From 9e2793f6e4e2ca452457e459f013cc8e6b08a789 Mon Sep 17 00:00:00 2001
From: Dave Gordon <david.s.gordon@intel.com>
Date: Thu, 14 Jul 2016 14:52:03 +0100
Subject: drm/i915: compile-time consistency check on __EXEC_OBJECT flags

Two different sets of flag bits are stored in the 'flags' member of a
'struct drm_i915_gem_exec_object2', and they're defined in two different
source files, increasing the risk of an accidental clash.

Some flags in this field are supplied by the user; these are defined in
i915_drm.h, and they start from the LSB and work up.

Other flags are defined in i915_gem_execbuffer, for internal use within
that file only; they start from the MSB and work down.

So here we add a compile-time check that the two sets of flags do not
overlap, which would cause all sorts of confusion.

Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/1468504324-12690-1-git-send-email-david.s.gordon@intel.com
---
 include/uapi/drm/i915_drm.h | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index d7e81a3886fd..51b9360bb376 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -698,12 +698,13 @@ struct drm_i915_gem_exec_object2 {
 	 */
 	__u64 offset;
 
-#define EXEC_OBJECT_NEEDS_FENCE (1<<0)
-#define EXEC_OBJECT_NEEDS_GTT	(1<<1)
-#define EXEC_OBJECT_WRITE	(1<<2)
+#define EXEC_OBJECT_NEEDS_FENCE		 (1<<0)
+#define EXEC_OBJECT_NEEDS_GTT		 (1<<1)
+#define EXEC_OBJECT_WRITE		 (1<<2)
 #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
-#define EXEC_OBJECT_PINNED	(1<<4)
-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1)
+#define EXEC_OBJECT_PINNED		 (1<<4)
+/* All remaining bits are MBZ and RESERVED FOR FUTURE USE */
+#define __EXEC_OBJECT_UNKNOWN_FLAGS	(-(EXEC_OBJECT_PINNED<<1))
 	__u64 flags;
 
 	__u64 rsvd1;
-- 
cgit v1.2.3


From 3373ce2eccd56651579b1864fecf98b46fd1cb67 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Fri, 1 Jul 2016 17:32:08 +0300
Subject: drm/i915: Give proper names to MOCS entries
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The purpose for each MOCS entry isn't well defined atm. Defining these
is important to remove any uncertainty about the use of these entries
for example in terms of performance and GPU/CPU coherency.

Suggested by Ville.

v4:
- Rename I915_MOCS_AUTO to I915_MOCS_PTE. (Chris)

CC: Rong R Yang <rong.r.yang@intel.com>
CC: Yakui Zhao <yakui.zhao@intel.com>
CC: Ville Syrjälä <ville.syrjala@linux.intel.com>
CC: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1467383528-16142-1-git-send-email-imre.deak@intel.com
---
 drivers/gpu/drm/i915/intel_mocs.c | 13 +++++++------
 include/uapi/drm/i915_drm.h       | 24 ++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 6 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
index 927825f5b284..2280c329d37f 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -97,7 +97,8 @@ struct drm_i915_mocs_table {
  *       end.
  */
 static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
-	{ /* 0x00000009 */
+	[I915_MOCS_UNCACHED] = {
+	  /* 0x00000009 */
 	  .control_value = LE_CACHEABILITY(LE_UC) |
 			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
 			   LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
@@ -106,7 +107,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
 	  /* 0x0010 */
 	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
 	},
-	{
+	[I915_MOCS_PTE] = {
 	  /* 0x00000038 */
 	  .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
 			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
@@ -115,7 +116,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
 	  /* 0x0030 */
 	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
 	},
-	{
+	[I915_MOCS_CACHED] = {
 	  /* 0x0000003b */
 	  .control_value = LE_CACHEABILITY(LE_WB) |
 			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
@@ -128,7 +129,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
 
 /* NOTE: the LE_TGT_CACHE is not used on Broxton */
 static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
-	{
+	[I915_MOCS_UNCACHED] = {
 	  /* 0x00000009 */
 	  .control_value = LE_CACHEABILITY(LE_UC) |
 			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
@@ -138,7 +139,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
 	  /* 0x0010 */
 	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
 	},
-	{
+	[I915_MOCS_PTE] = {
 	  /* 0x00000038 */
 	  .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
 			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
@@ -148,7 +149,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
 	  /* 0x0030 */
 	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
 	},
-	{
+	[I915_MOCS_CACHED] = {
 	  /* 0x00000039 */
 	  .control_value = LE_CACHEABILITY(LE_UC) |
 			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 51b9360bb376..33ce5ff9556a 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -62,6 +62,30 @@ extern "C" {
 #define I915_ERROR_UEVENT		"ERROR"
 #define I915_RESET_UEVENT		"RESET"
 
+/*
+ * MOCS indexes used for GPU surfaces, defining the cacheability of the
+ * surface data and the coherency for this data wrt. CPU vs. GPU accesses.
+ */
+enum i915_mocs_table_index {
+	/*
+	 * Not cached anywhere, coherency between CPU and GPU accesses is
+	 * guaranteed.
+	 */
+	I915_MOCS_UNCACHED,
+	/*
+	 * Cacheability and coherency controlled by the kernel automatically
+	 * based on the DRM_I915_GEM_SET_CACHING IOCTL setting and the current
+	 * usage of the surface (used for display scanout or not).
+	 */
+	I915_MOCS_PTE,
+	/*
+	 * Cached in all GPU caches available on the platform.
+	 * Coherency between CPU and GPU accesses to the surface is not
+	 * guaranteed without extra synchronization.
+	 */
+	I915_MOCS_CACHED,
+};
+
 /* Each region is a minimum of 16k, and there are at most 255 of them.
  */
 #define I915_NR_TEX_REGIONS 255	/* table size 2k - maximum due to use
-- 
cgit v1.2.3


From 91b2db6f65fbbb1a6688bcc2e52596b723ea2472 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 4 Aug 2016 16:32:23 +0100
Subject: drm/i915: Pad GTT views of exec objects up to user specified size

Our GPUs impose certain requirements upon buffers that depend upon how
exactly they are used. Typically this is expressed as that they require
a larger surface than would be naively computed by pitch * height.
Normally such requirements are hidden away in the userspace driver, but
when we accept pointers from strangers and later impose extra conditions
on them, the original client allocator has no idea about the
monstrosities in the GPU and we require the userspace driver to inform
the kernel how many padding pages are required beyond the client
allocation.

v2: Long time, no see
v3: Try an anonymous union for uapi struct compatibility

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1470324762-2545-7-git-send-email-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_drv.h            |  6 ++-
 drivers/gpu/drm/i915/i915_gem.c            | 80 ++++++++++++++----------------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 17 ++++++-
 include/uapi/drm/i915_drm.h                |  8 ++-
 4 files changed, 64 insertions(+), 47 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 74a31358fd87..1e1369319326 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3032,11 +3032,13 @@ void i915_gem_free_object(struct drm_gem_object *obj);
 int __must_check
 i915_gem_object_pin(struct drm_i915_gem_object *obj,
 		    struct i915_address_space *vm,
+		    u64 size,
 		    u64 alignment,
 		    u64 flags);
 int __must_check
 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 			 const struct i915_ggtt_view *view,
+			 u64 size,
 			 u64 alignment,
 			 u64 flags);
 
@@ -3313,8 +3315,8 @@ i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj,
 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 
-	return i915_gem_object_pin(obj, &ggtt->base,
-				   alignment, flags | PIN_GLOBAL);
+	return i915_gem_object_pin(obj, &ggtt->base, 0, alignment,
+				   flags | PIN_GLOBAL);
 }
 
 void i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d8e150508db5..b4af5d1c0ff5 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1692,7 +1692,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	}
 
 	/* Now pin it into the GTT if needed */
-	ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE);
+	ret = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
 	if (ret)
 		goto unlock;
 
@@ -2956,6 +2956,7 @@ static bool i915_gem_valid_gtt_space(struct i915_vma *vma,
  * @obj: object to bind
  * @vm: address space to bind into
  * @ggtt_view: global gtt view if applicable
+ * @size: requested size in bytes (can be larger than the VMA)
  * @alignment: requested alignment
  * @flags: mask of PIN_* flags to use
  */
@@ -2963,21 +2964,20 @@ static struct i915_vma *
 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
 			   struct i915_address_space *vm,
 			   const struct i915_ggtt_view *ggtt_view,
+			   u64 size,
 			   u64 alignment,
 			   u64 flags)
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
-	u32 fence_alignment, unfenced_alignment;
-	u32 search_flag, alloc_flag;
 	u64 start, end;
-	u64 size, fence_size;
+	u32 search_flag, alloc_flag;
 	struct i915_vma *vma;
 	int ret;
 
 	if (i915_is_ggtt(vm)) {
-		u32 view_size;
+		u32 fence_size, fence_alignment, unfenced_alignment;
+		u64 view_size;
 
 		if (WARN_ON(!ggtt_view))
 			return ERR_PTR(-EINVAL);
@@ -2995,48 +2995,39 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
 								view_size,
 								obj->tiling_mode,
 								false);
-		size = flags & PIN_MAPPABLE ? fence_size : view_size;
+		size = max(size, view_size);
+		if (flags & PIN_MAPPABLE)
+			size = max_t(u64, size, fence_size);
+
+		if (alignment == 0)
+			alignment = flags & PIN_MAPPABLE ? fence_alignment :
+				unfenced_alignment;
+		if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
+			DRM_DEBUG("Invalid object (view type=%u) alignment requested %llx\n",
+				  ggtt_view ? ggtt_view->type : 0,
+				  alignment);
+			return ERR_PTR(-EINVAL);
+		}
 	} else {
-		fence_size = i915_gem_get_gtt_size(dev,
-						   obj->base.size,
-						   obj->tiling_mode);
-		fence_alignment = i915_gem_get_gtt_alignment(dev,
-							     obj->base.size,
-							     obj->tiling_mode,
-							     true);
-		unfenced_alignment =
-			i915_gem_get_gtt_alignment(dev,
-						   obj->base.size,
-						   obj->tiling_mode,
-						   false);
-		size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
+		size = max_t(u64, size, obj->base.size);
+		alignment = 4096;
 	}
 
 	start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
 	end = vm->total;
 	if (flags & PIN_MAPPABLE)
-		end = min_t(u64, end, ggtt->mappable_end);
+		end = min_t(u64, end, dev_priv->ggtt.mappable_end);
 	if (flags & PIN_ZONE_4G)
 		end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE);
 
-	if (alignment == 0)
-		alignment = flags & PIN_MAPPABLE ? fence_alignment :
-						unfenced_alignment;
-	if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
-		DRM_DEBUG("Invalid object (view type=%u) alignment requested %llx\n",
-			  ggtt_view ? ggtt_view->type : 0,
-			  alignment);
-		return ERR_PTR(-EINVAL);
-	}
-
 	/* If binding the object/GGTT view requires more space than the entire
 	 * aperture has, reject it early before evicting everything in a vain
 	 * attempt to find space.
 	 */
 	if (size > end) {
-		DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n",
+		DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n",
 			  ggtt_view ? ggtt_view->type : 0,
-			  size,
+			  size, obj->base.size,
 			  flags & PIN_MAPPABLE ? "mappable" : "total",
 			  end);
 		return ERR_PTR(-E2BIG);
@@ -3530,7 +3521,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	 * (e.g. libkms for the bootup splash), we have to ensure that we
 	 * always use map_and_fenceable for all scanout buffers.
 	 */
-	ret = i915_gem_object_ggtt_pin(obj, view, alignment,
+	ret = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
 				       view->type == I915_GGTT_VIEW_NORMAL ?
 				       PIN_MAPPABLE : 0);
 	if (ret)
@@ -3678,12 +3669,14 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
 }
 
 static bool
-i915_vma_misplaced(struct i915_vma *vma, u64 alignment, u64 flags)
+i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 {
 	struct drm_i915_gem_object *obj = vma->obj;
 
-	if (alignment &&
-	    vma->node.start & (alignment - 1))
+	if (vma->node.size < size)
+		return true;
+
+	if (alignment && vma->node.start & (alignment - 1))
 		return true;
 
 	if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
@@ -3727,6 +3720,7 @@ static int
 i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
 		       struct i915_address_space *vm,
 		       const struct i915_ggtt_view *ggtt_view,
+		       u64 size,
 		       u64 alignment,
 		       u64 flags)
 {
@@ -3754,7 +3748,7 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
 		if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
 			return -EBUSY;
 
-		if (i915_vma_misplaced(vma, alignment, flags)) {
+		if (i915_vma_misplaced(vma, size, alignment, flags)) {
 			WARN(vma->pin_count,
 			     "bo is already pinned in %s with incorrect alignment:"
 			     " offset=%08x %08x, req.alignment=%llx, req.map_and_fenceable=%d,"
@@ -3775,8 +3769,8 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
 
 	bound = vma ? vma->bound : 0;
 	if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
-		vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment,
-						 flags);
+		vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view,
+						 size, alignment, flags);
 		if (IS_ERR(vma))
 			return PTR_ERR(vma);
 	} else {
@@ -3798,17 +3792,19 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
 int
 i915_gem_object_pin(struct drm_i915_gem_object *obj,
 		    struct i915_address_space *vm,
+		    u64 size,
 		    u64 alignment,
 		    u64 flags)
 {
 	return i915_gem_object_do_pin(obj, vm,
 				      i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL,
-				      alignment, flags);
+				      size, alignment, flags);
 }
 
 int
 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 			 const struct i915_ggtt_view *view,
+			 u64 size,
 			 u64 alignment,
 			 u64 flags)
 {
@@ -3819,7 +3815,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 	BUG_ON(!view);
 
 	return i915_gem_object_do_pin(obj, &ggtt->base, view,
-				      alignment, flags | PIN_GLOBAL);
+				      size, alignment, flags | PIN_GLOBAL);
 }
 
 void
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 63984c4d8e5a..d2e27e730ecb 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -682,10 +682,14 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 			flags |= PIN_HIGH;
 	}
 
-	ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
+	ret = i915_gem_object_pin(obj, vma->vm,
+				  entry->pad_to_size,
+				  entry->alignment,
+				  flags);
 	if ((ret == -ENOSPC  || ret == -E2BIG) &&
 	    only_mappable_for_reloc(entry->flags))
 		ret = i915_gem_object_pin(obj, vma->vm,
+					  entry->pad_to_size,
 					  entry->alignment,
 					  flags & ~PIN_MAPPABLE);
 	if (ret)
@@ -748,6 +752,9 @@ eb_vma_misplaced(struct i915_vma *vma)
 	    vma->node.start & (entry->alignment - 1))
 		return true;
 
+	if (vma->node.size < entry->pad_to_size)
+		return true;
+
 	if (entry->flags & EXEC_OBJECT_PINNED &&
 	    vma->node.start != entry->offset)
 		return true;
@@ -1091,6 +1098,14 @@ validate_exec_list(struct drm_device *dev,
 		if (exec[i].alignment && !is_power_of_2(exec[i].alignment))
 			return -EINVAL;
 
+		/* pad_to_size was once a reserved field, so sanitize it */
+		if (exec[i].flags & EXEC_OBJECT_PAD_TO_SIZE) {
+			if (offset_in_page(exec[i].pad_to_size))
+				return -EINVAL;
+		} else {
+			exec[i].pad_to_size = 0;
+		}
+
 		/* First check for malicious input causing overflow in
 		 * the worst case where we need to allocate the entire
 		 * relocation tree as a single array.
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 33ce5ff9556a..0f292733cffc 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -727,11 +727,15 @@ struct drm_i915_gem_exec_object2 {
 #define EXEC_OBJECT_WRITE		 (1<<2)
 #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
 #define EXEC_OBJECT_PINNED		 (1<<4)
+#define EXEC_OBJECT_PAD_TO_SIZE		 (1<<5)
 /* All remaining bits are MBZ and RESERVED FOR FUTURE USE */
-#define __EXEC_OBJECT_UNKNOWN_FLAGS	(-(EXEC_OBJECT_PINNED<<1))
+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1)
 	__u64 flags;
 
-	__u64 rsvd1;
+	union {
+		__u64 rsvd1;
+		__u64 pad_to_size;
+	};
 	__u64 rsvd2;
 };
 
-- 
cgit v1.2.3


From deeb1519b65a92ca06c8e8554a92df0fdb4d5dea Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 5 Aug 2016 10:14:22 +0100
Subject: drm/i915: Document and reject invalid tiling modes

Through the GTT interface to the fence registers, we can only handle
linear, X and Y tiling. The more esoteric tiling patterns are ignored.
Document that the tiling ABI only supports upto Y tiling, and reject any
attempts to set a tiling mode other than NONE, X or Y.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1470388464-28458-17-git-send-email-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_gem_tiling.c | 3 +++
 include/uapi/drm/i915_drm.h            | 1 +
 2 files changed, 4 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index c0e01333bddf..6817f69947d9 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -68,6 +68,9 @@ i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode)
 	if (tiling_mode == I915_TILING_NONE)
 		return true;
 
+	if (tiling_mode > I915_TILING_LAST)
+		return false;
+
 	if (IS_GEN2(dev) ||
 	    (tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)))
 		tile_width = 128;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 0f292733cffc..452629de7a57 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -926,6 +926,7 @@ struct drm_i915_gem_caching {
 #define I915_TILING_NONE	0
 #define I915_TILING_X		1
 #define I915_TILING_Y		2
+#define I915_TILING_LAST	I915_TILING_Y
 
 #define I915_BIT_6_SWIZZLE_NONE		0
 #define I915_BIT_6_SWIZZLE_9		1
-- 
cgit v1.2.3


From dc31e741db49e35e8b99d293dcc7afbbe9418fa7 Mon Sep 17 00:00:00 2001
From: Mengdong Lin <mengdong.lin@linux.intel.com>
Date: Tue, 26 Jul 2016 14:32:18 +0800
Subject: ASoC: topology: ABI - Add the types for BE DAI

Define the type and ABI struct for Backend DAIs. Add the number of BE DAIs
to manifest, and some reserved fields for future extensions.

Pump the version number to 5.

Topology core will check size of ABI objects to detect version mismatch
between user space and kernel.

Signed-off-by: Guneshwor Singh <guneshwor.o.singh@intel.com>
Signed-off-by: Mengdong Lin <mengdong.lin@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/uapi/sound/asoc.h | 34 ++++++++++++++++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/sound/asoc.h b/include/uapi/sound/asoc.h
index e4701a3c6331..f734bea9a032 100644
--- a/include/uapi/sound/asoc.h
+++ b/include/uapi/sound/asoc.h
@@ -83,7 +83,7 @@
 #define SND_SOC_TPLG_NUM_TEXTS		16
 
 /* ABI version */
-#define SND_SOC_TPLG_ABI_VERSION	0x4
+#define SND_SOC_TPLG_ABI_VERSION	0x5
 
 /* Max size of TLV data */
 #define SND_SOC_TPLG_TLV_SIZE		32
@@ -105,7 +105,8 @@
 #define SND_SOC_TPLG_TYPE_CODEC_LINK	9
 #define SND_SOC_TPLG_TYPE_BACKEND_LINK	10
 #define SND_SOC_TPLG_TYPE_PDATA		11
-#define SND_SOC_TPLG_TYPE_MAX	SND_SOC_TPLG_TYPE_PDATA
+#define SND_SOC_TPLG_TYPE_BE_DAI	12
+#define SND_SOC_TPLG_TYPE_MAX		SND_SOC_TPLG_TYPE_BE_DAI
 
 /* vendor block IDs - please add new vendor types to end */
 #define SND_SOC_TPLG_TYPE_VENDOR_FW	1000
@@ -124,6 +125,11 @@
 #define SND_SOC_TPLG_TUPLE_TYPE_WORD	4
 #define SND_SOC_TPLG_TUPLE_TYPE_SHORT	5
 
+/* BE DAI flags */
+#define SND_SOC_TPLG_DAI_FLGBIT_SYMMETRIC_RATES         (1 << 0)
+#define SND_SOC_TPLG_DAI_FLGBIT_SYMMETRIC_CHANNELS      (1 << 1)
+#define SND_SOC_TPLG_DAI_FLGBIT_SYMMETRIC_SAMPLEBITS    (1 << 2)
+
 /*
  * Block Header.
  * This header precedes all object and object arrays below.
@@ -285,6 +291,8 @@ struct snd_soc_tplg_manifest {
 	__le32 graph_elems;	/* number of graph elements */
 	__le32 pcm_elems;	/* number of PCM elements */
 	__le32 dai_link_elems;	/* number of DAI link elements */
+	__le32 be_dai_elems;	/* number of BE DAI elements */
+	__le32 reserved[20];	/* reserved for new ABI element types */
 	struct snd_soc_tplg_private priv;
 } __attribute__((packed));
 
@@ -450,4 +458,26 @@ struct snd_soc_tplg_link_config {
 	struct snd_soc_tplg_stream stream[SND_SOC_TPLG_STREAM_CONFIG_MAX]; /* supported configs playback and captrure */
 	__le32 num_streams;     /* number of streams */
 } __attribute__((packed));
+
+/*
+ * Describes SW/FW specific features of BE DAI.
+ *
+ * File block representation for BE DAI :-
+ * +-----------------------------------+-----+
+ * | struct snd_soc_tplg_hdr           |  1  |
+ * +-----------------------------------+-----+
+ * | struct snd_soc_tplg_be_dai        |  N  |
+ * +-----------------------------------+-----+
+ */
+struct snd_soc_tplg_be_dai {
+	__le32 size;            /* in bytes of this structure */
+	char dai_name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN]; /* name - used to match */
+	__le32 dai_id;          /* unique ID - used to match */
+	__le32 playback;        /* supports playback mode */
+	__le32 capture;         /* supports capture mode */
+	struct snd_soc_tplg_stream_caps caps[2]; /* playback and capture for DAI */
+	__le32 flag_mask;       /* bitmask of flags to configure */
+	__le32 flags;           /* SND_SOC_TPLG_DAI_FLGBIT_* */
+	struct snd_soc_tplg_private priv;
+} __attribute__((packed));
 #endif
-- 
cgit v1.2.3


From 4fea83ff0f61676389b17803365c1e8d2b652183 Mon Sep 17 00:00:00 2001
From: Flora Cui <Flora.Cui@amd.com>
Date: Wed, 20 Jul 2016 14:44:38 +0800
Subject: drm/amdgpu: expose AMDGPU_GEM_CREATE_VRAM_CLEARED to user space
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

V2: fix the return value for fill failure and validate bo before
filling data

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Flora Cui <Flora.Cui@amd.com>
Reviewed-by: Chunming Zhou <David1.Zhou@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 33 ++++++++++++++++++++++++++++++
 include/uapi/drm/amdgpu_drm.h              |  2 ++
 2 files changed, 35 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 67de19c46ddb..d8e69a7e51f9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -340,11 +340,44 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
 	if (unlikely(r != 0)) {
 		return r;
 	}
+
+	if (flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
+	    bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) {
+		struct fence *fence;
+
+		if (adev->mman.buffer_funcs_ring == NULL ||
+		   !adev->mman.buffer_funcs_ring->ready) {
+			r = -EBUSY;
+			goto fail_free;
+		}
+
+		r = amdgpu_bo_reserve(bo, false);
+		if (unlikely(r != 0))
+			goto fail_free;
+
+		amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
+		r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
+		if (unlikely(r != 0))
+			goto fail_unreserve;
+
+		amdgpu_fill_buffer(bo, 0, bo->tbo.resv, &fence);
+		amdgpu_bo_fence(bo, fence, false);
+		amdgpu_bo_unreserve(bo);
+		fence_put(bo->tbo.moving);
+		bo->tbo.moving = fence_get(fence);
+		fence_put(fence);
+	}
 	*bo_ptr = bo;
 
 	trace_amdgpu_bo_create(bo);
 
 	return 0;
+
+fail_unreserve:
+	amdgpu_bo_unreserve(bo);
+fail_free:
+	amdgpu_bo_unref(&bo);
+	return r;
 }
 
 int amdgpu_bo_create(struct amdgpu_device *adev,
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 462246aa200e..a902a602490b 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -77,6 +77,8 @@ extern "C" {
 #define AMDGPU_GEM_CREATE_NO_CPU_ACCESS		(1 << 1)
 /* Flag that USWC attributes should be used for GTT */
 #define AMDGPU_GEM_CREATE_CPU_GTT_USWC		(1 << 2)
+/* Flag that the memory should be in VRAM and cleared */
+#define AMDGPU_GEM_CREATE_VRAM_CLEARED		(1 << 3)
 
 struct drm_amdgpu_gem_create_in  {
 	/** the requested memory size */
-- 
cgit v1.2.3


From 07a3061e0832fe22932e0fa977581e45b9c42431 Mon Sep 17 00:00:00 2001
From: Matthias Schiffer <mschiffer@universe-factory.net>
Date: Sun, 3 Jul 2016 13:31:35 +0200
Subject: batman-adv: netlink: add routing_algo query

BATADV_CMD_GET_ROUTING_ALGOS is used to get the list of supported routing
algorithms.

Signed-off-by: Matthias Schiffer <mschiffer@universe-factory.net>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
[sven.eckelmann@open-mesh.com: Reduce the number of changes to
BATADV_CMD_GET_ROUTING_ALGOS, fix includes]
Signed-off-by: Sven Eckelmann <sven.eckelmann@open-mesh.com>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
---
 include/uapi/linux/batman_adv.h |  2 ++
 net/batman-adv/bat_algo.c       | 68 +++++++++++++++++++++++++++++++++++++++++
 net/batman-adv/bat_algo.h       |  3 ++
 net/batman-adv/netlink.c        |  9 +++++-
 net/batman-adv/netlink.h        |  3 ++
 5 files changed, 84 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index 0fbf6fd4711b..7afce444a64e 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -73,6 +73,7 @@ enum batadv_nl_attrs {
  * @BATADV_CMD_GET_MESH_INFO: Query basic information about batman-adv device
  * @BATADV_CMD_TP_METER: Start a tp meter session
  * @BATADV_CMD_TP_METER_CANCEL: Cancel a tp meter session
+ * @BATADV_CMD_GET_ROUTING_ALGOS: Query the list of routing algorithms.
  * @__BATADV_CMD_AFTER_LAST: internal use
  * @BATADV_CMD_MAX: highest used command number
  */
@@ -81,6 +82,7 @@ enum batadv_nl_commands {
 	BATADV_CMD_GET_MESH_INFO,
 	BATADV_CMD_TP_METER,
 	BATADV_CMD_TP_METER_CANCEL,
+	BATADV_CMD_GET_ROUTING_ALGOS,
 	/* add new commands above here */
 	__BATADV_CMD_AFTER_LAST,
 	BATADV_CMD_MAX = __BATADV_CMD_AFTER_LAST - 1
diff --git a/net/batman-adv/bat_algo.c b/net/batman-adv/bat_algo.c
index 81dbbf569bd4..f2cc50d354d9 100644
--- a/net/batman-adv/bat_algo.c
+++ b/net/batman-adv/bat_algo.c
@@ -20,12 +20,18 @@
 #include <linux/errno.h>
 #include <linux/list.h>
 #include <linux/moduleparam.h>
+#include <linux/netlink.h>
 #include <linux/printk.h>
 #include <linux/seq_file.h>
+#include <linux/skbuff.h>
 #include <linux/stddef.h>
 #include <linux/string.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "bat_algo.h"
+#include "netlink.h"
 
 char batadv_routing_algo[20] = "BATMAN_IV";
 static struct hlist_head batadv_algo_list;
@@ -138,3 +144,65 @@ static struct kparam_string batadv_param_string_ra = {
 
 module_param_cb(routing_algo, &batadv_param_ops_ra, &batadv_param_string_ra,
 		0644);
+
+/**
+ * batadv_algo_dump_entry - fill in information about one supported routing
+ *  algorithm
+ * @msg: netlink message to be sent back
+ * @portid: Port to reply to
+ * @seq: Sequence number of message
+ * @bat_algo_ops: Algorithm to be dumped
+ *
+ * Return: Error number, or 0 on success
+ */
+static int batadv_algo_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+				  struct batadv_algo_ops *bat_algo_ops)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI, BATADV_CMD_GET_ROUTING_ALGOS);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (nla_put_string(msg, BATADV_ATTR_ALGO_NAME, bat_algo_ops->name))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+/**
+ * batadv_algo_dump - fill in information about supported routing
+ *  algorithms
+ * @msg: netlink message to be sent back
+ * @cb: Parameters to the netlink request
+ *
+ * Return: Length of reply message.
+ */
+int batadv_algo_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	int portid = NETLINK_CB(cb->skb).portid;
+	struct batadv_algo_ops *bat_algo_ops;
+	int skip = cb->args[0];
+	int i = 0;
+
+	hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) {
+		if (i++ < skip)
+			continue;
+
+		if (batadv_algo_dump_entry(msg, portid, cb->nlh->nlmsg_seq,
+					   bat_algo_ops)) {
+			i--;
+			break;
+		}
+	}
+
+	cb->args[0] = i;
+
+	return msg->len;
+}
diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h
index 860d773dd8fa..3b5b69cdd12b 100644
--- a/net/batman-adv/bat_algo.h
+++ b/net/batman-adv/bat_algo.h
@@ -22,7 +22,9 @@
 
 #include <linux/types.h>
 
+struct netlink_callback;
 struct seq_file;
+struct sk_buff;
 
 extern char batadv_routing_algo[];
 extern struct list_head batadv_hardif_list;
@@ -31,5 +33,6 @@ void batadv_algo_init(void);
 int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops);
 int batadv_algo_select(struct batadv_priv *bat_priv, char *name);
 int batadv_algo_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_algo_dump(struct sk_buff *msg, struct netlink_callback *cb);
 
 #endif /* _NET_BATMAN_ADV_BAT_ALGO_H_ */
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 231f8eaf075b..19fb2657e274 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -32,13 +32,14 @@
 #include <net/netlink.h>
 #include <uapi/linux/batman_adv.h>
 
+#include "bat_algo.h"
 #include "hard-interface.h"
 #include "soft-interface.h"
 #include "tp_meter.h"
 
 struct sk_buff;
 
-static struct genl_family batadv_netlink_family = {
+struct genl_family batadv_netlink_family = {
 	.id = GENL_ID_GENERATE,
 	.hdrsize = 0,
 	.name = BATADV_NL_NAME,
@@ -399,6 +400,12 @@ static struct genl_ops batadv_netlink_ops[] = {
 		.policy = batadv_netlink_policy,
 		.doit = batadv_netlink_tp_meter_cancel,
 	},
+	{
+		.cmd = BATADV_CMD_GET_ROUTING_ALGOS,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_algo_dump,
+	},
 };
 
 /**
diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h
index 945653ab58c6..b399f49504df 100644
--- a/net/batman-adv/netlink.h
+++ b/net/batman-adv/netlink.h
@@ -21,6 +21,7 @@
 #include "main.h"
 
 #include <linux/types.h>
+#include <net/genetlink.h>
 
 void batadv_netlink_register(void);
 void batadv_netlink_unregister(void);
@@ -29,4 +30,6 @@ int batadv_netlink_tpmeter_notify(struct batadv_priv *bat_priv, const u8 *dst,
 				  u8 result, u32 test_time, u64 total_bytes,
 				  u32 cookie);
 
+extern struct genl_family batadv_netlink_family;
+
 #endif /* _NET_BATMAN_ADV_NETLINK_H_ */
-- 
cgit v1.2.3


From b60620cf567b79da46096a0ba29b39f23b6e7f1c Mon Sep 17 00:00:00 2001
From: Matthias Schiffer <mschiffer@universe-factory.net>
Date: Sun, 3 Jul 2016 13:31:36 +0200
Subject: batman-adv: netlink: hardif query

BATADV_CMD_GET_HARDIFS will return the list of hardifs (including index,
name and MAC address) of all hardifs for a given softif.

Signed-off-by: Matthias Schiffer <mschiffer@universe-factory.net>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
[sven.eckelmann@open-mesh.com: Reduce the number of changes to
BATADV_CMD_GET_HARDIFS, add policy for attributes]
Signed-off-by: Sven Eckelmann <sven.eckelmann@open-mesh.com>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
---
 include/uapi/linux/batman_adv.h |   4 ++
 net/batman-adv/netlink.c        | 128 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 130 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index 7afce444a64e..8abcbca14b6a 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -40,6 +40,7 @@
  * @BATADV_ATTR_TPMETER_BYTES: amount of acked bytes during run
  * @BATADV_ATTR_TPMETER_COOKIE: session cookie to match tp_meter session
  * @BATADV_ATTR_PAD: attribute used for padding for 64-bit alignment
+ * @BATADV_ATTR_ACTIVE: Flag indicating if the hard interface is active
  * @__BATADV_ATTR_AFTER_LAST: internal use
  * @NUM_BATADV_ATTR: total number of batadv_nl_attrs available
  * @BATADV_ATTR_MAX: highest attribute number currently defined
@@ -60,6 +61,7 @@ enum batadv_nl_attrs {
 	BATADV_ATTR_TPMETER_BYTES,
 	BATADV_ATTR_TPMETER_COOKIE,
 	BATADV_ATTR_PAD,
+	BATADV_ATTR_ACTIVE,
 	/* add attributes above here, update the policy in netlink.c */
 	__BATADV_ATTR_AFTER_LAST,
 	NUM_BATADV_ATTR = __BATADV_ATTR_AFTER_LAST,
@@ -74,6 +76,7 @@ enum batadv_nl_attrs {
  * @BATADV_CMD_TP_METER: Start a tp meter session
  * @BATADV_CMD_TP_METER_CANCEL: Cancel a tp meter session
  * @BATADV_CMD_GET_ROUTING_ALGOS: Query the list of routing algorithms.
+ * @BATADV_CMD_GET_HARDIFS: Query list of hard interfaces
  * @__BATADV_CMD_AFTER_LAST: internal use
  * @BATADV_CMD_MAX: highest used command number
  */
@@ -83,6 +86,7 @@ enum batadv_nl_commands {
 	BATADV_CMD_TP_METER,
 	BATADV_CMD_TP_METER_CANCEL,
 	BATADV_CMD_GET_ROUTING_ALGOS,
+	BATADV_CMD_GET_HARDIFS,
 	/* add new commands above here */
 	__BATADV_CMD_AFTER_LAST,
 	BATADV_CMD_MAX = __BATADV_CMD_AFTER_LAST - 1
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 19fb2657e274..3f872d6eec57 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -26,10 +26,14 @@
 #include <linux/netdevice.h>
 #include <linux/netlink.h>
 #include <linux/printk.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/skbuff.h>
 #include <linux/stddef.h>
 #include <linux/types.h>
 #include <net/genetlink.h>
 #include <net/netlink.h>
+#include <net/sock.h>
 #include <uapi/linux/batman_adv.h>
 
 #include "bat_algo.h"
@@ -37,8 +41,6 @@
 #include "soft-interface.h"
 #include "tp_meter.h"
 
-struct sk_buff;
-
 struct genl_family batadv_netlink_family = {
 	.id = GENL_ID_GENERATE,
 	.hdrsize = 0,
@@ -70,8 +72,24 @@ static struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
 	[BATADV_ATTR_TPMETER_TEST_TIME]	= { .type = NLA_U32 },
 	[BATADV_ATTR_TPMETER_BYTES]	= { .type = NLA_U64 },
 	[BATADV_ATTR_TPMETER_COOKIE]	= { .type = NLA_U32 },
+	[BATADV_ATTR_ACTIVE]		= { .type = NLA_FLAG },
 };
 
+/**
+ * batadv_netlink_get_ifindex - Extract an interface index from a message
+ * @nlh: Message header
+ * @attrtype: Attribute which holds an interface index
+ *
+ * Return: interface index, or 0.
+ */
+static int
+batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype)
+{
+	struct nlattr *attr = nlmsg_find_attr(nlh, GENL_HDRLEN, attrtype);
+
+	return attr ? nla_get_u32(attr) : 0;
+}
+
 /**
  * batadv_netlink_mesh_info_put - fill in generic information about mesh
  *  interface
@@ -381,6 +399,106 @@ out:
 	return ret;
 }
 
+/**
+ * batadv_netlink_dump_hardif_entry - Dump one hard interface into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @hard_iface: Hard interface to dump
+ *
+ * Return: error code, or 0 on success
+ */
+static int
+batadv_netlink_dump_hardif_entry(struct sk_buff *msg, u32 portid, u32 seq,
+				 struct batadv_hard_iface *hard_iface)
+{
+	struct net_device *net_dev = hard_iface->net_dev;
+	void *hdr;
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI,
+			  BATADV_CMD_GET_HARDIFS);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
+			net_dev->ifindex) ||
+	    nla_put_string(msg, BATADV_ATTR_HARD_IFNAME,
+			   net_dev->name) ||
+	    nla_put(msg, BATADV_ATTR_HARD_ADDRESS, ETH_ALEN,
+		    net_dev->dev_addr))
+		goto nla_put_failure;
+
+	if (hard_iface->if_status == BATADV_IF_ACTIVE) {
+		if (nla_put_flag(msg, BATADV_ATTR_ACTIVE))
+			goto nla_put_failure;
+	}
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+/**
+ * batadv_netlink_dump_hardifs - Dump all hard interface into a messages
+ * @msg: Netlink message to dump into
+ * @cb: Parameters from query
+ *
+ * Return: error code, or length of reply message on success
+ */
+static int
+batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(cb->skb->sk);
+	struct net_device *soft_iface;
+	struct batadv_hard_iface *hard_iface;
+	int ifindex;
+	int portid = NETLINK_CB(cb->skb).portid;
+	int seq = cb->nlh->nlmsg_seq;
+	int skip = cb->args[0];
+	int i = 0;
+
+	ifindex = batadv_netlink_get_ifindex(cb->nlh,
+					     BATADV_ATTR_MESH_IFINDEX);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface)
+		return -ENODEV;
+
+	if (!batadv_softif_is_valid(soft_iface)) {
+		dev_put(soft_iface);
+		return -ENODEV;
+	}
+
+	rcu_read_lock();
+
+	list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
+		if (hard_iface->soft_iface != soft_iface)
+			continue;
+
+		if (i++ < skip)
+			continue;
+
+		if (batadv_netlink_dump_hardif_entry(msg, portid, seq,
+						     hard_iface)) {
+			i--;
+			break;
+		}
+	}
+
+	rcu_read_unlock();
+
+	dev_put(soft_iface);
+
+	cb->args[0] = i;
+
+	return msg->len;
+}
+
 static struct genl_ops batadv_netlink_ops[] = {
 	{
 		.cmd = BATADV_CMD_GET_MESH_INFO,
@@ -406,6 +524,12 @@ static struct genl_ops batadv_netlink_ops[] = {
 		.policy = batadv_netlink_policy,
 		.dumpit = batadv_algo_dump,
 	},
+	{
+		.cmd = BATADV_CMD_GET_HARDIFS,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_netlink_dump_hardifs,
+	},
 };
 
 /**
-- 
cgit v1.2.3


From d34f05507db245bef819b684ad84f9e0f9bb003d Mon Sep 17 00:00:00 2001
From: Matthias Schiffer <mschiffer@universe-factory.net>
Date: Sun, 3 Jul 2016 13:31:37 +0200
Subject: batman-adv: netlink: add translation table query

This adds the commands BATADV_CMD_GET_TRANSTABLE_LOCAL and
BATADV_CMD_GET_TRANSTABLE_GLOBAL, which correspond to the transtable_local
and transtable_global debugfs files.

The batadv_tt_client_flags enum is moved to the UAPI to expose it as part
of the netlink API.

Signed-off-by: Matthias Schiffer <mschiffer@universe-factory.net>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
[sven.eckelmann@open-mesh.com: add policy for attributes, fix includes]
Signed-off-by: Sven Eckelmann <sven.eckelmann@open-mesh.com>
[sw@simonwunderlich.de: fix VID attributes content]
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
---
 include/uapi/linux/batman_adv.h    |  56 ++++++
 net/batman-adv/netlink.c           |  23 ++-
 net/batman-adv/netlink.h           |   3 +
 net/batman-adv/packet.h            |  36 ----
 net/batman-adv/translation-table.c | 377 +++++++++++++++++++++++++++++++++++++
 net/batman-adv/translation-table.h |   4 +
 6 files changed, 462 insertions(+), 37 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index 8abcbca14b6a..1168d058cd2b 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -22,6 +22,42 @@
 
 #define BATADV_NL_MCAST_GROUP_TPMETER	"tpmeter"
 
+/**
+ * enum batadv_tt_client_flags - TT client specific flags
+ * @BATADV_TT_CLIENT_DEL: the client has to be deleted from the table
+ * @BATADV_TT_CLIENT_ROAM: the client roamed to/from another node and the new
+ *  update telling its new real location has not been received/sent yet
+ * @BATADV_TT_CLIENT_WIFI: this client is connected through a wifi interface.
+ *  This information is used by the "AP Isolation" feature
+ * @BATADV_TT_CLIENT_ISOLA: this client is considered "isolated". This
+ *  information is used by the Extended Isolation feature
+ * @BATADV_TT_CLIENT_NOPURGE: this client should never be removed from the table
+ * @BATADV_TT_CLIENT_NEW: this client has been added to the local table but has
+ *  not been announced yet
+ * @BATADV_TT_CLIENT_PENDING: this client is marked for removal but it is kept
+ *  in the table for one more originator interval for consistency purposes
+ * @BATADV_TT_CLIENT_TEMP: this global client has been detected to be part of
+ *  the network but no nnode has already announced it
+ *
+ * Bits from 0 to 7 are called _remote flags_ because they are sent on the wire.
+ * Bits from 8 to 15 are called _local flags_ because they are used for local
+ * computations only.
+ *
+ * Bits from 4 to 7 - a subset of remote flags - are ensured to be in sync with
+ * the other nodes in the network. To achieve this goal these flags are included
+ * in the TT CRC computation.
+ */
+enum batadv_tt_client_flags {
+	BATADV_TT_CLIENT_DEL     = (1 << 0),
+	BATADV_TT_CLIENT_ROAM    = (1 << 1),
+	BATADV_TT_CLIENT_WIFI    = (1 << 4),
+	BATADV_TT_CLIENT_ISOLA	 = (1 << 5),
+	BATADV_TT_CLIENT_NOPURGE = (1 << 8),
+	BATADV_TT_CLIENT_NEW     = (1 << 9),
+	BATADV_TT_CLIENT_PENDING = (1 << 10),
+	BATADV_TT_CLIENT_TEMP	 = (1 << 11),
+};
+
 /**
  * enum batadv_nl_attrs - batman-adv netlink attributes
  *
@@ -41,6 +77,14 @@
  * @BATADV_ATTR_TPMETER_COOKIE: session cookie to match tp_meter session
  * @BATADV_ATTR_PAD: attribute used for padding for 64-bit alignment
  * @BATADV_ATTR_ACTIVE: Flag indicating if the hard interface is active
+ * @BATADV_ATTR_TT_ADDRESS: Client MAC address
+ * @BATADV_ATTR_TT_TTVN: Translation table version
+ * @BATADV_ATTR_TT_LAST_TTVN: Previous translation table version
+ * @BATADV_ATTR_TT_CRC32: CRC32 over translation table
+ * @BATADV_ATTR_TT_VID: VLAN ID
+ * @BATADV_ATTR_TT_FLAGS: Translation table client flags
+ * @BATADV_ATTR_FLAG_BEST: Flags indicating entry is the best
+ * @BATADV_ATTR_LAST_SEEN_MSECS: Time in milliseconds since last seen
  * @__BATADV_ATTR_AFTER_LAST: internal use
  * @NUM_BATADV_ATTR: total number of batadv_nl_attrs available
  * @BATADV_ATTR_MAX: highest attribute number currently defined
@@ -62,6 +106,14 @@ enum batadv_nl_attrs {
 	BATADV_ATTR_TPMETER_COOKIE,
 	BATADV_ATTR_PAD,
 	BATADV_ATTR_ACTIVE,
+	BATADV_ATTR_TT_ADDRESS,
+	BATADV_ATTR_TT_TTVN,
+	BATADV_ATTR_TT_LAST_TTVN,
+	BATADV_ATTR_TT_CRC32,
+	BATADV_ATTR_TT_VID,
+	BATADV_ATTR_TT_FLAGS,
+	BATADV_ATTR_FLAG_BEST,
+	BATADV_ATTR_LAST_SEEN_MSECS,
 	/* add attributes above here, update the policy in netlink.c */
 	__BATADV_ATTR_AFTER_LAST,
 	NUM_BATADV_ATTR = __BATADV_ATTR_AFTER_LAST,
@@ -77,6 +129,8 @@ enum batadv_nl_attrs {
  * @BATADV_CMD_TP_METER_CANCEL: Cancel a tp meter session
  * @BATADV_CMD_GET_ROUTING_ALGOS: Query the list of routing algorithms.
  * @BATADV_CMD_GET_HARDIFS: Query list of hard interfaces
+ * @BATADV_CMD_GET_TRANSTABLE_LOCAL: Query list of local translations
+ * @BATADV_CMD_GET_TRANSTABLE_GLOBAL Query list of global translations
  * @__BATADV_CMD_AFTER_LAST: internal use
  * @BATADV_CMD_MAX: highest used command number
  */
@@ -87,6 +141,8 @@ enum batadv_nl_commands {
 	BATADV_CMD_TP_METER_CANCEL,
 	BATADV_CMD_GET_ROUTING_ALGOS,
 	BATADV_CMD_GET_HARDIFS,
+	BATADV_CMD_GET_TRANSTABLE_LOCAL,
+	BATADV_CMD_GET_TRANSTABLE_GLOBAL,
 	/* add new commands above here */
 	__BATADV_CMD_AFTER_LAST,
 	BATADV_CMD_MAX = __BATADV_CMD_AFTER_LAST - 1
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 3f872d6eec57..14360ec16513 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -40,6 +40,7 @@
 #include "hard-interface.h"
 #include "soft-interface.h"
 #include "tp_meter.h"
+#include "translation-table.h"
 
 struct genl_family batadv_netlink_family = {
 	.id = GENL_ID_GENERATE,
@@ -73,6 +74,14 @@ static struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
 	[BATADV_ATTR_TPMETER_BYTES]	= { .type = NLA_U64 },
 	[BATADV_ATTR_TPMETER_COOKIE]	= { .type = NLA_U32 },
 	[BATADV_ATTR_ACTIVE]		= { .type = NLA_FLAG },
+	[BATADV_ATTR_TT_ADDRESS]	= { .len = ETH_ALEN },
+	[BATADV_ATTR_TT_TTVN]		= { .type = NLA_U8 },
+	[BATADV_ATTR_TT_LAST_TTVN]	= { .type = NLA_U8 },
+	[BATADV_ATTR_TT_CRC32]		= { .type = NLA_U32 },
+	[BATADV_ATTR_TT_VID]		= { .type = NLA_U16 },
+	[BATADV_ATTR_TT_FLAGS]		= { .type = NLA_U32 },
+	[BATADV_ATTR_FLAG_BEST]		= { .type = NLA_FLAG },
+	[BATADV_ATTR_LAST_SEEN_MSECS]	= { .type = NLA_U32 },
 };
 
 /**
@@ -82,7 +91,7 @@ static struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
  *
  * Return: interface index, or 0.
  */
-static int
+int
 batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype)
 {
 	struct nlattr *attr = nlmsg_find_attr(nlh, GENL_HDRLEN, attrtype);
@@ -530,6 +539,18 @@ static struct genl_ops batadv_netlink_ops[] = {
 		.policy = batadv_netlink_policy,
 		.dumpit = batadv_netlink_dump_hardifs,
 	},
+	{
+		.cmd = BATADV_CMD_GET_TRANSTABLE_LOCAL,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_tt_local_dump,
+	},
+	{
+		.cmd = BATADV_CMD_GET_TRANSTABLE_GLOBAL,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_tt_global_dump,
+	},
 };
 
 /**
diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h
index b399f49504df..52eb16281aba 100644
--- a/net/batman-adv/netlink.h
+++ b/net/batman-adv/netlink.h
@@ -23,8 +23,11 @@
 #include <linux/types.h>
 #include <net/genetlink.h>
 
+struct nlmsghdr;
+
 void batadv_netlink_register(void);
 void batadv_netlink_unregister(void);
+int batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype);
 
 int batadv_netlink_tpmeter_notify(struct batadv_priv *bat_priv, const u8 *dst,
 				  u8 result, u32 test_time, u64 total_bytes,
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index 6b011ff64dd8..6afc0b86950e 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -128,42 +128,6 @@ enum batadv_tt_data_flags {
 	BATADV_TT_FULL_TABLE = BIT(4),
 };
 
-/**
- * enum batadv_tt_client_flags - TT client specific flags
- * @BATADV_TT_CLIENT_DEL: the client has to be deleted from the table
- * @BATADV_TT_CLIENT_ROAM: the client roamed to/from another node and the new
- *  update telling its new real location has not been received/sent yet
- * @BATADV_TT_CLIENT_WIFI: this client is connected through a wifi interface.
- *  This information is used by the "AP Isolation" feature
- * @BATADV_TT_CLIENT_ISOLA: this client is considered "isolated". This
- *  information is used by the Extended Isolation feature
- * @BATADV_TT_CLIENT_NOPURGE: this client should never be removed from the table
- * @BATADV_TT_CLIENT_NEW: this client has been added to the local table but has
- *  not been announced yet
- * @BATADV_TT_CLIENT_PENDING: this client is marked for removal but it is kept
- *  in the table for one more originator interval for consistency purposes
- * @BATADV_TT_CLIENT_TEMP: this global client has been detected to be part of
- *  the network but no nnode has already announced it
- *
- * Bits from 0 to 7 are called _remote flags_ because they are sent on the wire.
- * Bits from 8 to 15 are called _local flags_ because they are used for local
- * computations only.
- *
- * Bits from 4 to 7 - a subset of remote flags - are ensured to be in sync with
- * the other nodes in the network. To achieve this goal these flags are included
- * in the TT CRC computation.
- */
-enum batadv_tt_client_flags {
-	BATADV_TT_CLIENT_DEL     = BIT(0),
-	BATADV_TT_CLIENT_ROAM    = BIT(1),
-	BATADV_TT_CLIENT_WIFI    = BIT(4),
-	BATADV_TT_CLIENT_ISOLA	 = BIT(5),
-	BATADV_TT_CLIENT_NOPURGE = BIT(8),
-	BATADV_TT_CLIENT_NEW     = BIT(9),
-	BATADV_TT_CLIENT_PENDING = BIT(10),
-	BATADV_TT_CLIENT_TEMP	 = BIT(11),
-};
-
 /**
  * enum batadv_vlan_flags - flags for the four MSB of any vlan ID field
  * @BATADV_VLAN_HAS_TAG: whether the field contains a valid vlan tag or not
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index af2bfef6dca8..20804078293c 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -37,20 +37,27 @@
 #include <linux/list.h>
 #include <linux/lockdep.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
 #include <linux/seq_file.h>
+#include <linux/skbuff.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/stddef.h>
 #include <linux/string.h>
 #include <linux/workqueue.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "bridge_loop_avoidance.h"
 #include "hard-interface.h"
 #include "hash.h"
 #include "log.h"
 #include "multicast.h"
+#include "netlink.h"
 #include "originator.h"
 #include "packet.h"
 #include "soft-interface.h"
@@ -1108,6 +1115,164 @@ out:
 	return 0;
 }
 
+/**
+ * batadv_tt_local_dump_entry - Dump one TT local entry into a message
+ * @msg :Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @common: tt local & tt global common data
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_tt_local_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+			   struct batadv_priv *bat_priv,
+			   struct batadv_tt_common_entry *common)
+{
+	void *hdr;
+	struct batadv_softif_vlan *vlan;
+	struct batadv_tt_local_entry *local;
+	unsigned int last_seen_msecs;
+	u32 crc;
+
+	local = container_of(common, struct batadv_tt_local_entry, common);
+	last_seen_msecs = jiffies_to_msecs(jiffies - local->last_seen);
+
+	vlan = batadv_softif_vlan_get(bat_priv, common->vid);
+	if (!vlan)
+		return 0;
+
+	crc = vlan->tt.crc;
+
+	batadv_softif_vlan_put(vlan);
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI,
+			  BATADV_CMD_GET_TRANSTABLE_LOCAL);
+	if (!hdr)
+		return -ENOBUFS;
+
+	if (nla_put(msg, BATADV_ATTR_TT_ADDRESS, ETH_ALEN, common->addr) ||
+	    nla_put_u32(msg, BATADV_ATTR_TT_CRC32, crc) ||
+	    nla_put_u16(msg, BATADV_ATTR_TT_VID, common->vid) ||
+	    nla_put_u32(msg, BATADV_ATTR_TT_FLAGS, common->flags))
+		goto nla_put_failure;
+
+	if (!(common->flags & BATADV_TT_CLIENT_NOPURGE) &&
+	    nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, last_seen_msecs))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+/**
+ * batadv_tt_local_dump_bucket - Dump one TT local bucket into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @head: Pointer to the list containing the local tt entries
+ * @idx_s: Number of entries to skip
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_tt_local_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+			    struct batadv_priv *bat_priv,
+			    struct hlist_head *head, int *idx_s)
+{
+	struct batadv_tt_common_entry *common;
+	int idx = 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(common, head, hash_entry) {
+		if (idx++ < *idx_s)
+			continue;
+
+		if (batadv_tt_local_dump_entry(msg, portid, seq, bat_priv,
+					       common)) {
+			rcu_read_unlock();
+			*idx_s = idx - 1;
+			return -EMSGSIZE;
+		}
+	}
+	rcu_read_unlock();
+
+	*idx_s = 0;
+	return 0;
+}
+
+/**
+ * batadv_tt_local_dump - Dump TT local entries into a message
+ * @msg: Netlink message to dump into
+ * @cb: Parameters from query
+ *
+ * Return: Error code, or 0 on success
+ */
+int batadv_tt_local_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(cb->skb->sk);
+	struct net_device *soft_iface;
+	struct batadv_priv *bat_priv;
+	struct batadv_hard_iface *primary_if = NULL;
+	struct batadv_hashtable *hash;
+	struct hlist_head *head;
+	int ret;
+	int ifindex;
+	int bucket = cb->args[0];
+	int idx = cb->args[1];
+	int portid = NETLINK_CB(cb->skb).portid;
+
+	ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	bat_priv = netdev_priv(soft_iface);
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	hash = bat_priv->tt.local_hash;
+
+	while (bucket < hash->size) {
+		head = &hash->table[bucket];
+
+		if (batadv_tt_local_dump_bucket(msg, portid, cb->nlh->nlmsg_seq,
+						bat_priv, head, &idx))
+			break;
+
+		bucket++;
+	}
+
+	ret = msg->len;
+
+ out:
+	if (primary_if)
+		batadv_hardif_put(primary_if);
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	cb->args[0] = bucket;
+	cb->args[1] = idx;
+
+	return ret;
+}
+
 static void
 batadv_tt_local_set_pending(struct batadv_priv *bat_priv,
 			    struct batadv_tt_local_entry *tt_local_entry,
@@ -1755,6 +1920,218 @@ out:
 	return 0;
 }
 
+/**
+ * batadv_tt_global_dump_subentry - Dump all TT local entries into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @common: tt local & tt global common data
+ * @orig: Originator node announcing a non-mesh client
+ * @best: Is the best originator for the TT entry
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_tt_global_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq,
+			       struct batadv_tt_common_entry *common,
+			       struct batadv_tt_orig_list_entry *orig,
+			       bool best)
+{
+	void *hdr;
+	struct batadv_orig_node_vlan *vlan;
+	u8 last_ttvn;
+	u32 crc;
+
+	vlan = batadv_orig_node_vlan_get(orig->orig_node,
+					 common->vid);
+	if (!vlan)
+		return 0;
+
+	crc = vlan->tt.crc;
+
+	batadv_orig_node_vlan_put(vlan);
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI,
+			  BATADV_CMD_GET_TRANSTABLE_GLOBAL);
+	if (!hdr)
+		return -ENOBUFS;
+
+	last_ttvn = atomic_read(&orig->orig_node->last_ttvn);
+
+	if (nla_put(msg, BATADV_ATTR_TT_ADDRESS, ETH_ALEN, common->addr) ||
+	    nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN,
+		    orig->orig_node->orig) ||
+	    nla_put_u8(msg, BATADV_ATTR_TT_TTVN, orig->ttvn) ||
+	    nla_put_u8(msg, BATADV_ATTR_TT_LAST_TTVN, last_ttvn) ||
+	    nla_put_u32(msg, BATADV_ATTR_TT_CRC32, crc) ||
+	    nla_put_u16(msg, BATADV_ATTR_TT_VID, common->vid) ||
+	    nla_put_u32(msg, BATADV_ATTR_TT_FLAGS, common->flags))
+		goto nla_put_failure;
+
+	if (best && nla_put_flag(msg, BATADV_ATTR_FLAG_BEST))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+/**
+ * batadv_tt_global_dump_entry - Dump one TT global entry into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @common: tt local & tt global common data
+ * @sub_s: Number of entries to skip
+ *
+ * This function assumes the caller holds rcu_read_lock().
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_tt_global_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+			    struct batadv_priv *bat_priv,
+			    struct batadv_tt_common_entry *common, int *sub_s)
+{
+	struct batadv_tt_orig_list_entry *orig_entry, *best_entry;
+	struct batadv_tt_global_entry *global;
+	struct hlist_head *head;
+	int sub = 0;
+	bool best;
+
+	global = container_of(common, struct batadv_tt_global_entry, common);
+	best_entry = batadv_transtable_best_orig(bat_priv, global);
+	head = &global->orig_list;
+
+	hlist_for_each_entry_rcu(orig_entry, head, list) {
+		if (sub++ < *sub_s)
+			continue;
+
+		best = (orig_entry == best_entry);
+
+		if (batadv_tt_global_dump_subentry(msg, portid, seq, common,
+						   orig_entry, best)) {
+			*sub_s = sub - 1;
+			return -EMSGSIZE;
+		}
+	}
+
+	*sub_s = 0;
+	return 0;
+}
+
+/**
+ * batadv_tt_global_dump_bucket - Dump one TT local bucket into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @head: Pointer to the list containing the global tt entries
+ * @idx_s: Number of entries to skip
+ * @sub: Number of entries to skip
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_tt_global_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+			     struct batadv_priv *bat_priv,
+			     struct hlist_head *head, int *idx_s, int *sub)
+{
+	struct batadv_tt_common_entry *common;
+	int idx = 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(common, head, hash_entry) {
+		if (idx++ < *idx_s)
+			continue;
+
+		if (batadv_tt_global_dump_entry(msg, portid, seq, bat_priv,
+						common, sub)) {
+			rcu_read_unlock();
+			*idx_s = idx - 1;
+			return -EMSGSIZE;
+		}
+	}
+	rcu_read_unlock();
+
+	*idx_s = 0;
+	*sub = 0;
+	return 0;
+}
+
+/**
+ * batadv_tt_global_dump -  Dump TT global entries into a message
+ * @msg: Netlink message to dump into
+ * @cb: Parameters from query
+ *
+ * Return: Error code, or length of message on success
+ */
+int batadv_tt_global_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(cb->skb->sk);
+	struct net_device *soft_iface;
+	struct batadv_priv *bat_priv;
+	struct batadv_hard_iface *primary_if = NULL;
+	struct batadv_hashtable *hash;
+	struct hlist_head *head;
+	int ret;
+	int ifindex;
+	int bucket = cb->args[0];
+	int idx = cb->args[1];
+	int sub = cb->args[2];
+	int portid = NETLINK_CB(cb->skb).portid;
+
+	ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	bat_priv = netdev_priv(soft_iface);
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	hash = bat_priv->tt.global_hash;
+
+	while (bucket < hash->size) {
+		head = &hash->table[bucket];
+
+		if (batadv_tt_global_dump_bucket(msg, portid,
+						 cb->nlh->nlmsg_seq, bat_priv,
+						 head, &idx, &sub))
+			break;
+
+		bucket++;
+	}
+
+	ret = msg->len;
+
+ out:
+	if (primary_if)
+		batadv_hardif_put(primary_if);
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	cb->args[0] = bucket;
+	cb->args[1] = idx;
+	cb->args[2] = sub;
+
+	return ret;
+}
+
 /**
  * _batadv_tt_global_del_orig_entry - remove and free an orig_entry
  * @tt_global_entry: the global entry to remove the orig_entry from
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index 02b0f85527cc..783fdba84db2 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -22,8 +22,10 @@
 
 #include <linux/types.h>
 
+struct netlink_callback;
 struct net_device;
 struct seq_file;
+struct sk_buff;
 
 int batadv_tt_init(struct batadv_priv *bat_priv);
 bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
@@ -33,6 +35,8 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv,
 			   const char *message, bool roaming);
 int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset);
 int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_tt_local_dump(struct sk_buff *msg, struct netlink_callback *cb);
+int batadv_tt_global_dump(struct sk_buff *msg, struct netlink_callback *cb);
 void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
 			       struct batadv_orig_node *orig_node,
 			       s32 match_vid, const char *message);
-- 
cgit v1.2.3


From 85cf8c859d53f6f53c37bb7f23a41f6171427021 Mon Sep 17 00:00:00 2001
From: Matthias Schiffer <mschiffer@universe-factory.net>
Date: Sun, 3 Jul 2016 13:31:39 +0200
Subject: batman-adv: netlink: add originator and neighbor table queries

Add BATADV_CMD_GET_ORIGINATORS and BATADV_CMD_GET_NEIGHBORS commands,
using handlers bat_orig_dump and bat_neigh_dump in batadv_algo_ops. Will
always return -EOPNOTSUPP for now, as no implementations exist yet.

Signed-off-by: Matthias Schiffer <mschiffer@universe-factory.net>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
[sven@narfation.org: Rewrite based on new algo_ops structures]
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
---
 include/uapi/linux/batman_adv.h |   4 +
 net/batman-adv/netlink.c        |  13 ++++
 net/batman-adv/originator.c     | 160 ++++++++++++++++++++++++++++++++++++++++
 net/batman-adv/originator.h     |   4 +
 net/batman-adv/types.h          |   9 +++
 5 files changed, 190 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index 1168d058cd2b..3f7a415f5e09 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -131,6 +131,8 @@ enum batadv_nl_attrs {
  * @BATADV_CMD_GET_HARDIFS: Query list of hard interfaces
  * @BATADV_CMD_GET_TRANSTABLE_LOCAL: Query list of local translations
  * @BATADV_CMD_GET_TRANSTABLE_GLOBAL Query list of global translations
+ * @BATADV_CMD_GET_ORIGINATORS: Query list of originators
+ * @BATADV_CMD_GET_NEIGHBORS: Query list of neighbours
  * @__BATADV_CMD_AFTER_LAST: internal use
  * @BATADV_CMD_MAX: highest used command number
  */
@@ -143,6 +145,8 @@ enum batadv_nl_commands {
 	BATADV_CMD_GET_HARDIFS,
 	BATADV_CMD_GET_TRANSTABLE_LOCAL,
 	BATADV_CMD_GET_TRANSTABLE_GLOBAL,
+	BATADV_CMD_GET_ORIGINATORS,
+	BATADV_CMD_GET_NEIGHBORS,
 	/* add new commands above here */
 	__BATADV_CMD_AFTER_LAST,
 	BATADV_CMD_MAX = __BATADV_CMD_AFTER_LAST - 1
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 0c0d20bb92ce..8469fc4ec5a3 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -39,6 +39,7 @@
 
 #include "bat_algo.h"
 #include "hard-interface.h"
+#include "originator.h"
 #include "soft-interface.h"
 #include "tp_meter.h"
 #include "translation-table.h"
@@ -554,6 +555,18 @@ static struct genl_ops batadv_netlink_ops[] = {
 		.policy = batadv_netlink_policy,
 		.dumpit = batadv_tt_global_dump,
 	},
+	{
+		.cmd = BATADV_CMD_GET_ORIGINATORS,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_orig_dump,
+	},
+	{
+		.cmd = BATADV_CMD_GET_NEIGHBORS,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_hardif_neigh_dump,
+	},
 };
 
 /**
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 3940b5d24421..95c85558c530 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -28,11 +28,15 @@
 #include <linux/list.h>
 #include <linux/lockdep.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/rculist.h>
 #include <linux/seq_file.h>
+#include <linux/skbuff.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
+#include <net/sock.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "bat_algo.h"
 #include "distributed-arp-table.h"
@@ -42,8 +46,10 @@
 #include "hash.h"
 #include "log.h"
 #include "multicast.h"
+#include "netlink.h"
 #include "network-coding.h"
 #include "routing.h"
+#include "soft-interface.h"
 #include "translation-table.h"
 
 /* hash class keys */
@@ -720,6 +726,83 @@ int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset)
 	return 0;
 }
 
+/**
+ * batadv_hardif_neigh_dump - Dump to netlink the neighbor infos for a specific
+ *  outgoing interface
+ * @msg: message to dump into
+ * @cb: parameters for the dump
+ *
+ * Return: 0 or error value
+ */
+int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(cb->skb->sk);
+	struct net_device *soft_iface;
+	struct net_device *hard_iface = NULL;
+	struct batadv_hard_iface *hardif = BATADV_IF_DEFAULT;
+	struct batadv_priv *bat_priv;
+	struct batadv_hard_iface *primary_if = NULL;
+	int ret;
+	int ifindex, hard_ifindex;
+
+	ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	bat_priv = netdev_priv(soft_iface);
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	hard_ifindex = batadv_netlink_get_ifindex(cb->nlh,
+						  BATADV_ATTR_HARD_IFINDEX);
+	if (hard_ifindex) {
+		hard_iface = dev_get_by_index(net, hard_ifindex);
+		if (hard_iface)
+			hardif = batadv_hardif_get_by_netdev(hard_iface);
+
+		if (!hardif) {
+			ret = -ENODEV;
+			goto out;
+		}
+
+		if (hardif->soft_iface != soft_iface) {
+			ret = -ENOENT;
+			goto out;
+		}
+	}
+
+	if (!bat_priv->algo_ops->neigh.dump) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	bat_priv->algo_ops->neigh.dump(msg, cb, bat_priv, hardif);
+
+	ret = msg->len;
+
+ out:
+	if (hardif)
+		batadv_hardif_put(hardif);
+	if (hard_iface)
+		dev_put(hard_iface);
+	if (primary_if)
+		batadv_hardif_put(primary_if);
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	return ret;
+}
+
 /**
  * batadv_orig_ifinfo_release - release orig_ifinfo from lists and queue for
  *  free after rcu grace period
@@ -1330,6 +1413,83 @@ out:
 	return 0;
 }
 
+/**
+ * batadv_orig_dump - Dump to netlink the originator infos for a specific
+ *  outgoing interface
+ * @msg: message to dump into
+ * @cb: parameters for the dump
+ *
+ * Return: 0 or error value
+ */
+int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(cb->skb->sk);
+	struct net_device *soft_iface;
+	struct net_device *hard_iface = NULL;
+	struct batadv_hard_iface *hardif = BATADV_IF_DEFAULT;
+	struct batadv_priv *bat_priv;
+	struct batadv_hard_iface *primary_if = NULL;
+	int ret;
+	int ifindex, hard_ifindex;
+
+	ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	bat_priv = netdev_priv(soft_iface);
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	hard_ifindex = batadv_netlink_get_ifindex(cb->nlh,
+						  BATADV_ATTR_HARD_IFINDEX);
+	if (hard_ifindex) {
+		hard_iface = dev_get_by_index(net, hard_ifindex);
+		if (hard_iface)
+			hardif = batadv_hardif_get_by_netdev(hard_iface);
+
+		if (!hardif) {
+			ret = -ENODEV;
+			goto out;
+		}
+
+		if (hardif->soft_iface != soft_iface) {
+			ret = -ENOENT;
+			goto out;
+		}
+	}
+
+	if (!bat_priv->algo_ops->orig.dump) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	bat_priv->algo_ops->orig.dump(msg, cb, bat_priv, hardif);
+
+	ret = msg->len;
+
+ out:
+	if (hardif)
+		batadv_hardif_put(hardif);
+	if (hard_iface)
+		dev_put(hard_iface);
+	if (primary_if)
+		batadv_hardif_put(primary_if);
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	return ret;
+}
+
 int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
 			    int max_if_num)
 {
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index 566306bf05dc..ebc56183f358 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -31,7 +31,9 @@
 
 #include "hash.h"
 
+struct netlink_callback;
 struct seq_file;
+struct sk_buff;
 
 bool batadv_compare_orig(const struct hlist_node *node, const void *data2);
 int batadv_originator_init(struct batadv_priv *bat_priv);
@@ -61,6 +63,7 @@ batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh,
 			struct batadv_hard_iface *if_outgoing);
 void batadv_neigh_ifinfo_put(struct batadv_neigh_ifinfo *neigh_ifinfo);
 
+int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb);
 int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset);
 
 struct batadv_orig_ifinfo *
@@ -72,6 +75,7 @@ batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node,
 void batadv_orig_ifinfo_put(struct batadv_orig_ifinfo *orig_ifinfo);
 
 int batadv_orig_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb);
 int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset);
 int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
 			    int max_if_num);
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 72806a3c40df..968023a61598 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -28,6 +28,7 @@
 #include <linux/if_ether.h>
 #include <linux/kref.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/sched.h> /* for linux/wait.h */
 #include <linux/spinlock.h>
 #include <linux/types.h>
@@ -1418,6 +1419,7 @@ struct batadv_algo_iface_ops {
  * @is_similar_or_better: check if neigh1 is equally similar or better than
  *  neigh2 for their respective outgoing interface from the metric prospective
  * @print: print the single hop neighbor list (optional)
+ * @dump: dump neighbors to a netlink socket (optional)
  */
 struct batadv_algo_neigh_ops {
 	void (*hardif_init)(struct batadv_hardif_neigh_node *neigh);
@@ -1430,6 +1432,9 @@ struct batadv_algo_neigh_ops {
 				     struct batadv_neigh_node *neigh2,
 				     struct batadv_hard_iface *if_outgoing2);
 	void (*print)(struct batadv_priv *priv, struct seq_file *seq);
+	void (*dump)(struct sk_buff *msg, struct netlink_callback *cb,
+		     struct batadv_priv *priv,
+		     struct batadv_hard_iface *hard_iface);
 };
 
 /**
@@ -1441,6 +1446,7 @@ struct batadv_algo_neigh_ops {
  * @del_if: ask the routing algorithm to apply the needed changes to the
  *  orig_node due to an hard-interface being removed from the mesh (optional)
  * @print: print the originator table (optional)
+ * @dump: dump originators to a netlink socket (optional)
  */
 struct batadv_algo_orig_ops {
 	void (*free)(struct batadv_orig_node *orig_node);
@@ -1449,6 +1455,9 @@ struct batadv_algo_orig_ops {
 		      int del_if_num);
 	void (*print)(struct batadv_priv *priv, struct seq_file *seq,
 		      struct batadv_hard_iface *hard_iface);
+	void (*dump)(struct sk_buff *msg, struct netlink_callback *cb,
+		     struct batadv_priv *priv,
+		     struct batadv_hard_iface *hard_iface);
 };
 
 /**
-- 
cgit v1.2.3


From 024f99cb4acc14dab5e55e1ecdf6aad31269ca98 Mon Sep 17 00:00:00 2001
From: Matthias Schiffer <mschiffer@universe-factory.net>
Date: Sun, 3 Jul 2016 13:31:40 +0200
Subject: batman-adv: add B.A.T.M.A.N. IV bat_{orig, neigh}_dump
 implementations

Signed-off-by: Matthias Schiffer <mschiffer@universe-factory.net>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
[sven.eckelmann@open-mesh.com: Fix function parameter alignments,
add policy for attributes, fix includes, fix algo_ops integration]
Signed-off-by: Sven Eckelmann <sven.eckelmann@open-mesh.com>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
---
 include/uapi/linux/batman_adv.h |   4 +
 net/batman-adv/bat_iv_ogm.c     | 366 ++++++++++++++++++++++++++++++++++++++++
 net/batman-adv/netlink.c        |   2 +
 3 files changed, 372 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index 3f7a415f5e09..ba2359a1f464 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -85,6 +85,8 @@ enum batadv_tt_client_flags {
  * @BATADV_ATTR_TT_FLAGS: Translation table client flags
  * @BATADV_ATTR_FLAG_BEST: Flags indicating entry is the best
  * @BATADV_ATTR_LAST_SEEN_MSECS: Time in milliseconds since last seen
+ * @BATADV_ATTR_NEIGH_ADDRESS: Neighbour MAC address
+ * @BATADV_ATTR_TQ: TQ to neighbour
  * @__BATADV_ATTR_AFTER_LAST: internal use
  * @NUM_BATADV_ATTR: total number of batadv_nl_attrs available
  * @BATADV_ATTR_MAX: highest attribute number currently defined
@@ -114,6 +116,8 @@ enum batadv_nl_attrs {
 	BATADV_ATTR_TT_FLAGS,
 	BATADV_ATTR_FLAG_BEST,
 	BATADV_ATTR_LAST_SEEN_MSECS,
+	BATADV_ATTR_NEIGH_ADDRESS,
+	BATADV_ATTR_TQ,
 	/* add attributes above here, update the policy in netlink.c */
 	__BATADV_ATTR_AFTER_LAST,
 	NUM_BATADV_ATTR = __BATADV_ATTR_AFTER_LAST,
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index a40cdf273625..7a8c0f63e2ae 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -35,6 +35,7 @@
 #include <linux/list.h>
 #include <linux/lockdep.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/pkt_sched.h>
 #include <linux/printk.h>
 #include <linux/random.h>
@@ -48,6 +49,9 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/workqueue.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "bat_algo.h"
 #include "bitarray.h"
@@ -55,6 +59,7 @@
 #include "hard-interface.h"
 #include "hash.h"
 #include "log.h"
+#include "netlink.h"
 #include "network-coding.h"
 #include "originator.h"
 #include "packet.h"
@@ -1947,6 +1952,235 @@ next:
 		seq_puts(seq, "No batman nodes in range ...\n");
 }
 
+/**
+ * batadv_iv_ogm_neigh_get_tq_avg - Get the TQ average for a neighbour on a
+ *  given outgoing interface.
+ * @neigh_node: Neighbour of interest
+ * @if_outgoing: Outgoing interface of interest
+ * @tq_avg: Pointer of where to store the TQ average
+ *
+ * Return: False if no average TQ available, otherwise true.
+ */
+static bool
+batadv_iv_ogm_neigh_get_tq_avg(struct batadv_neigh_node *neigh_node,
+			       struct batadv_hard_iface *if_outgoing,
+			       u8 *tq_avg)
+{
+	struct batadv_neigh_ifinfo *n_ifinfo;
+
+	n_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing);
+	if (!n_ifinfo)
+		return false;
+
+	*tq_avg = n_ifinfo->bat_iv.tq_avg;
+	batadv_neigh_ifinfo_put(n_ifinfo);
+
+	return true;
+}
+
+/**
+ * batadv_iv_ogm_orig_dump_subentry - Dump an originator subentry into a
+ *  message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ * @orig_node: Originator to dump
+ * @neigh_node: Single hops neighbour
+ * @best: Is the best originator
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_iv_ogm_orig_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq,
+				 struct batadv_priv *bat_priv,
+				 struct batadv_hard_iface *if_outgoing,
+				 struct batadv_orig_node *orig_node,
+				 struct batadv_neigh_node *neigh_node,
+				 bool best)
+{
+	void *hdr;
+	u8 tq_avg;
+	unsigned int last_seen_msecs;
+
+	last_seen_msecs = jiffies_to_msecs(jiffies - orig_node->last_seen);
+
+	if (!batadv_iv_ogm_neigh_get_tq_avg(neigh_node, if_outgoing, &tq_avg))
+		return 0;
+
+	if (if_outgoing != BATADV_IF_DEFAULT &&
+	    if_outgoing != neigh_node->if_incoming)
+		return 0;
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI, BATADV_CMD_GET_ORIGINATORS);
+	if (!hdr)
+		return -ENOBUFS;
+
+	if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN,
+		    orig_node->orig) ||
+	    nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN,
+		    neigh_node->addr) ||
+	    nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
+			neigh_node->if_incoming->net_dev->ifindex) ||
+	    nla_put_u8(msg, BATADV_ATTR_TQ, tq_avg) ||
+	    nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS,
+			last_seen_msecs))
+		goto nla_put_failure;
+
+	if (best && nla_put_flag(msg, BATADV_ATTR_FLAG_BEST))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+/**
+ * batadv_iv_ogm_orig_dump_entry - Dump an originator entry into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ * @orig_node: Originator to dump
+ * @sub_s: Number of sub entries to skip
+ *
+ * This function assumes the caller holds rcu_read_lock().
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_iv_ogm_orig_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+			      struct batadv_priv *bat_priv,
+			      struct batadv_hard_iface *if_outgoing,
+			      struct batadv_orig_node *orig_node, int *sub_s)
+{
+	struct batadv_neigh_node *neigh_node_best;
+	struct batadv_neigh_node *neigh_node;
+	int sub = 0;
+	bool best;
+	u8 tq_avg_best;
+
+	neigh_node_best = batadv_orig_router_get(orig_node, if_outgoing);
+	if (!neigh_node_best)
+		goto out;
+
+	if (!batadv_iv_ogm_neigh_get_tq_avg(neigh_node_best, if_outgoing,
+					    &tq_avg_best))
+		goto out;
+
+	if (tq_avg_best == 0)
+		goto out;
+
+	hlist_for_each_entry_rcu(neigh_node, &orig_node->neigh_list, list) {
+		if (sub++ < *sub_s)
+			continue;
+
+		best = (neigh_node == neigh_node_best);
+
+		if (batadv_iv_ogm_orig_dump_subentry(msg, portid, seq,
+						     bat_priv, if_outgoing,
+						     orig_node, neigh_node,
+						     best)) {
+			batadv_neigh_node_put(neigh_node_best);
+
+			*sub_s = sub - 1;
+			return -EMSGSIZE;
+		}
+	}
+
+ out:
+	if (neigh_node_best)
+		batadv_neigh_node_put(neigh_node_best);
+
+	*sub_s = 0;
+	return 0;
+}
+
+/**
+ * batadv_iv_ogm_orig_dump_bucket - Dump an originator bucket into a
+ *  message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ * @head: Bucket to be dumped
+ * @idx_s: Number of entries to be skipped
+ * @sub: Number of sub entries to be skipped
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_iv_ogm_orig_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+			       struct batadv_priv *bat_priv,
+			       struct batadv_hard_iface *if_outgoing,
+			       struct hlist_head *head, int *idx_s, int *sub)
+{
+	struct batadv_orig_node *orig_node;
+	int idx = 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
+		if (idx++ < *idx_s)
+			continue;
+
+		if (batadv_iv_ogm_orig_dump_entry(msg, portid, seq, bat_priv,
+						  if_outgoing, orig_node,
+						  sub)) {
+			rcu_read_unlock();
+			*idx_s = idx - 1;
+			return -EMSGSIZE;
+		}
+	}
+	rcu_read_unlock();
+
+	*idx_s = 0;
+	*sub = 0;
+	return 0;
+}
+
+/**
+ * batadv_iv_ogm_orig_dump - Dump the originators into a message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ */
+static void
+batadv_iv_ogm_orig_dump(struct sk_buff *msg, struct netlink_callback *cb,
+			struct batadv_priv *bat_priv,
+			struct batadv_hard_iface *if_outgoing)
+{
+	struct batadv_hashtable *hash = bat_priv->orig_hash;
+	struct hlist_head *head;
+	int bucket = cb->args[0];
+	int idx = cb->args[1];
+	int sub = cb->args[2];
+	int portid = NETLINK_CB(cb->skb).portid;
+
+	while (bucket < hash->size) {
+		head = &hash->table[bucket];
+
+		if (batadv_iv_ogm_orig_dump_bucket(msg, portid,
+						   cb->nlh->nlmsg_seq,
+						   bat_priv, if_outgoing, head,
+						   &idx, &sub))
+			break;
+
+		bucket++;
+	}
+
+	cb->args[0] = bucket;
+	cb->args[1] = idx;
+	cb->args[2] = sub;
+}
+
 /**
  * batadv_iv_hardif_neigh_print - print a single hop neighbour node
  * @seq: neighbour table seq_file struct
@@ -2043,6 +2277,136 @@ out:
 	return ret;
 }
 
+/**
+ * batadv_iv_ogm_neigh_dump_neigh - Dump a neighbour into a netlink message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @hardif_neigh: Neighbour to be dumped
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_iv_ogm_neigh_dump_neigh(struct sk_buff *msg, u32 portid, u32 seq,
+			       struct batadv_hardif_neigh_node *hardif_neigh)
+{
+	void *hdr;
+	unsigned int last_seen_msecs;
+
+	last_seen_msecs = jiffies_to_msecs(jiffies - hardif_neigh->last_seen);
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI, BATADV_CMD_GET_NEIGHBORS);
+	if (!hdr)
+		return -ENOBUFS;
+
+	if (nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN,
+		    hardif_neigh->addr) ||
+	    nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
+			hardif_neigh->if_incoming->net_dev->ifindex) ||
+	    nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS,
+			last_seen_msecs))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+/**
+ * batadv_iv_ogm_neigh_dump_hardif - Dump the neighbours of a hard interface
+ *  into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @hard_iface: Hard interface to dump the neighbours for
+ * @idx_s: Number of entries to skip
+ *
+ * This function assumes the caller holds rcu_read_lock().
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_iv_ogm_neigh_dump_hardif(struct sk_buff *msg, u32 portid, u32 seq,
+				struct batadv_priv *bat_priv,
+				struct batadv_hard_iface *hard_iface,
+				int *idx_s)
+{
+	struct batadv_hardif_neigh_node *hardif_neigh;
+	int idx = 0;
+
+	hlist_for_each_entry_rcu(hardif_neigh,
+				 &hard_iface->neigh_list, list) {
+		if (idx++ < *idx_s)
+			continue;
+
+		if (batadv_iv_ogm_neigh_dump_neigh(msg, portid, seq,
+						   hardif_neigh)) {
+			*idx_s = idx - 1;
+			return -EMSGSIZE;
+		}
+	}
+
+	*idx_s = 0;
+	return 0;
+}
+
+/**
+ * batadv_iv_ogm_neigh_dump - Dump the neighbours into a message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ * @bat_priv: The bat priv with all the soft interface information
+ * @single_hardif: Limit dump to this hard interfaace
+ */
+static void
+batadv_iv_ogm_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb,
+			 struct batadv_priv *bat_priv,
+			 struct batadv_hard_iface *single_hardif)
+{
+	struct batadv_hard_iface *hard_iface;
+	int i_hardif = 0;
+	int i_hardif_s = cb->args[0];
+	int idx = cb->args[1];
+	int portid = NETLINK_CB(cb->skb).portid;
+
+	rcu_read_lock();
+	if (single_hardif) {
+		if (i_hardif_s == 0) {
+			if (batadv_iv_ogm_neigh_dump_hardif(msg, portid,
+							    cb->nlh->nlmsg_seq,
+							    bat_priv,
+							    single_hardif,
+							    &idx) == 0)
+				i_hardif++;
+		}
+	} else {
+		list_for_each_entry_rcu(hard_iface, &batadv_hardif_list,
+					list) {
+			if (hard_iface->soft_iface != bat_priv->soft_iface)
+				continue;
+
+			if (i_hardif++ < i_hardif_s)
+				continue;
+
+			if (batadv_iv_ogm_neigh_dump_hardif(msg, portid,
+							    cb->nlh->nlmsg_seq,
+							    bat_priv,
+							    hard_iface, &idx)) {
+				i_hardif--;
+				break;
+			}
+		}
+	}
+	rcu_read_unlock();
+
+	cb->args[0] = i_hardif;
+	cb->args[1] = idx;
+}
+
 /**
  * batadv_iv_ogm_neigh_cmp - compare the metrics of two neighbors
  * @neigh1: the first neighbor object of the comparison
@@ -2330,9 +2694,11 @@ static struct batadv_algo_ops batadv_batman_iv __read_mostly = {
 		.cmp = batadv_iv_ogm_neigh_cmp,
 		.is_similar_or_better = batadv_iv_ogm_neigh_is_sob,
 		.print = batadv_iv_neigh_print,
+		.dump = batadv_iv_ogm_neigh_dump,
 	},
 	.orig = {
 		.print = batadv_iv_ogm_orig_print,
+		.dump = batadv_iv_ogm_orig_dump,
 		.free = batadv_iv_ogm_orig_free,
 		.add_if = batadv_iv_ogm_orig_add_if,
 		.del_if = batadv_iv_ogm_orig_del_if,
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 8469fc4ec5a3..0c7940cc1968 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -84,6 +84,8 @@ static struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
 	[BATADV_ATTR_TT_FLAGS]		= { .type = NLA_U32 },
 	[BATADV_ATTR_FLAG_BEST]		= { .type = NLA_FLAG },
 	[BATADV_ATTR_LAST_SEEN_MSECS]	= { .type = NLA_U32 },
+	[BATADV_ATTR_NEIGH_ADDRESS]	= { .len = ETH_ALEN },
+	[BATADV_ATTR_TQ]		= { .type = NLA_U8 },
 };
 
 /**
-- 
cgit v1.2.3


From f02a478f518ee5690f279c8c2d3a6222143a7b20 Mon Sep 17 00:00:00 2001
From: Matthias Schiffer <mschiffer@universe-factory.net>
Date: Sun, 3 Jul 2016 13:31:41 +0200
Subject: batman-adv: add B.A.T.M.A.N. V bat_{orig, neigh}_dump implementations

Dump the algo V originators and neighbours.

Signed-off-by: Matthias Schiffer <mschiffer@universe-factory.net>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
[sven@narfation.org: Fix includes, fix algo_ops integration]
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
---
 include/uapi/linux/batman_adv.h |   2 +
 net/batman-adv/bat_v.c          | 340 ++++++++++++++++++++++++++++++++++++++++
 net/batman-adv/netlink.c        |   1 +
 3 files changed, 343 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index ba2359a1f464..2e2747fb1311 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -87,6 +87,7 @@ enum batadv_tt_client_flags {
  * @BATADV_ATTR_LAST_SEEN_MSECS: Time in milliseconds since last seen
  * @BATADV_ATTR_NEIGH_ADDRESS: Neighbour MAC address
  * @BATADV_ATTR_TQ: TQ to neighbour
+ * @BATADV_ATTR_THROUGHPUT: Estimated throughput to Neighbour
  * @__BATADV_ATTR_AFTER_LAST: internal use
  * @NUM_BATADV_ATTR: total number of batadv_nl_attrs available
  * @BATADV_ATTR_MAX: highest attribute number currently defined
@@ -118,6 +119,7 @@ enum batadv_nl_attrs {
 	BATADV_ATTR_LAST_SEEN_MSECS,
 	BATADV_ATTR_NEIGH_ADDRESS,
 	BATADV_ATTR_TQ,
+	BATADV_ATTR_THROUGHPUT,
 	/* add attributes above here, update the policy in netlink.c */
 	__BATADV_ATTR_AFTER_LAST,
 	NUM_BATADV_ATTR = __BATADV_ATTR_AFTER_LAST,
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 1d777b171366..9dccfaf32115 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -22,17 +22,22 @@
 #include <linux/bug.h>
 #include <linux/cache.h>
 #include <linux/errno.h>
+#include <linux/if_ether.h>
 #include <linux/init.h>
 #include <linux/jiffies.h>
 #include <linux/kernel.h>
 #include <linux/kref.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
 #include <linux/seq_file.h>
 #include <linux/stddef.h>
 #include <linux/types.h>
 #include <linux/workqueue.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "bat_algo.h"
 #include "bat_v_elp.h"
@@ -42,9 +47,12 @@
 #include "hard-interface.h"
 #include "hash.h"
 #include "log.h"
+#include "netlink.h"
 #include "originator.h"
 #include "packet.h"
 
+struct sk_buff;
+
 static void batadv_v_iface_activate(struct batadv_hard_iface *hard_iface)
 {
 	struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
@@ -205,6 +213,138 @@ static void batadv_v_neigh_print(struct batadv_priv *bat_priv,
 		seq_puts(seq, "No batman nodes in range ...\n");
 }
 
+/**
+ * batadv_v_neigh_dump_neigh - Dump a neighbour into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @hardif_neigh: Neighbour to dump
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_v_neigh_dump_neigh(struct sk_buff *msg, u32 portid, u32 seq,
+			  struct batadv_hardif_neigh_node *hardif_neigh)
+{
+	void *hdr;
+	unsigned int last_seen_msecs;
+	u32 throughput;
+
+	last_seen_msecs = jiffies_to_msecs(jiffies - hardif_neigh->last_seen);
+	throughput = ewma_throughput_read(&hardif_neigh->bat_v.throughput);
+	throughput = throughput * 100;
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI,
+			  BATADV_CMD_GET_NEIGHBORS);
+	if (!hdr)
+		return -ENOBUFS;
+
+	if (nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN,
+		    hardif_neigh->addr) ||
+	    nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
+			hardif_neigh->if_incoming->net_dev->ifindex) ||
+	    nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS,
+			last_seen_msecs) ||
+	    nla_put_u32(msg, BATADV_ATTR_THROUGHPUT, throughput))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+/**
+ * batadv_v_neigh_dump_hardif - Dump the  neighbours of a hard interface  into
+ *  a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @hard_iface: The hard interface to be dumped
+ * @idx_s: Entries to be skipped
+ *
+ * This function assumes the caller holds rcu_read_lock().
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_v_neigh_dump_hardif(struct sk_buff *msg, u32 portid, u32 seq,
+			   struct batadv_priv *bat_priv,
+			   struct batadv_hard_iface *hard_iface,
+			   int *idx_s)
+{
+	struct batadv_hardif_neigh_node *hardif_neigh;
+	int idx = 0;
+
+	hlist_for_each_entry_rcu(hardif_neigh,
+				 &hard_iface->neigh_list, list) {
+		if (idx++ < *idx_s)
+			continue;
+
+		if (batadv_v_neigh_dump_neigh(msg, portid, seq, hardif_neigh)) {
+			*idx_s = idx - 1;
+			return -EMSGSIZE;
+		}
+	}
+
+	*idx_s = 0;
+	return 0;
+}
+
+/**
+ * batadv_v_neigh_dump - Dump the neighbours of a hard interface  into a
+ *  message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ * @bat_priv: The bat priv with all the soft interface information
+ * @single_hardif: Limit dumping to this hard interface
+ */
+static void
+batadv_v_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb,
+		    struct batadv_priv *bat_priv,
+		    struct batadv_hard_iface *single_hardif)
+{
+	struct batadv_hard_iface *hard_iface;
+	int i_hardif = 0;
+	int i_hardif_s = cb->args[0];
+	int idx = cb->args[1];
+	int portid = NETLINK_CB(cb->skb).portid;
+
+	rcu_read_lock();
+	if (single_hardif) {
+		if (i_hardif_s == 0) {
+			if (batadv_v_neigh_dump_hardif(msg, portid,
+						       cb->nlh->nlmsg_seq,
+						       bat_priv, single_hardif,
+						       &idx) == 0)
+				i_hardif++;
+		}
+	} else {
+		list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
+			if (hard_iface->soft_iface != bat_priv->soft_iface)
+				continue;
+
+			if (i_hardif++ < i_hardif_s)
+				continue;
+
+			if (batadv_v_neigh_dump_hardif(msg, portid,
+						       cb->nlh->nlmsg_seq,
+						       bat_priv, hard_iface,
+						       &idx)) {
+				i_hardif--;
+				break;
+			}
+		}
+	}
+	rcu_read_unlock();
+
+	cb->args[0] = i_hardif;
+	cb->args[1] = idx;
+}
+
 /**
  * batadv_v_orig_print - print the originator table
  * @bat_priv: the bat priv with all the soft interface information
@@ -272,6 +412,204 @@ next:
 		seq_puts(seq, "No batman nodes in range ...\n");
 }
 
+/**
+ * batadv_v_orig_dump_subentry - Dump an originator subentry into a
+ *  message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ * @orig_node: Originator to dump
+ * @neigh_node: Single hops neighbour
+ * @best: Is the best originator
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_v_orig_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq,
+			    struct batadv_priv *bat_priv,
+			    struct batadv_hard_iface *if_outgoing,
+			    struct batadv_orig_node *orig_node,
+			    struct batadv_neigh_node *neigh_node,
+			    bool best)
+{
+	struct batadv_neigh_ifinfo *n_ifinfo;
+	unsigned int last_seen_msecs;
+	u32 throughput;
+	void *hdr;
+
+	n_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing);
+	if (!n_ifinfo)
+		return 0;
+
+	throughput = n_ifinfo->bat_v.throughput * 100;
+
+	batadv_neigh_ifinfo_put(n_ifinfo);
+
+	last_seen_msecs = jiffies_to_msecs(jiffies - orig_node->last_seen);
+
+	if (if_outgoing != BATADV_IF_DEFAULT &&
+	    if_outgoing != neigh_node->if_incoming)
+		return 0;
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI,
+			  BATADV_CMD_GET_ORIGINATORS);
+	if (!hdr)
+		return -ENOBUFS;
+
+	if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, orig_node->orig) ||
+	    nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN,
+		    neigh_node->addr) ||
+	    nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
+			neigh_node->if_incoming->net_dev->ifindex) ||
+	    nla_put_u32(msg, BATADV_ATTR_THROUGHPUT, throughput) ||
+	    nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS,
+			last_seen_msecs))
+		goto nla_put_failure;
+
+	if (best && nla_put_flag(msg, BATADV_ATTR_FLAG_BEST))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+/**
+ * batadv_v_orig_dump_entry - Dump an originator entry into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ * @orig_node: Originator to dump
+ * @sub_s: Number of sub entries to skip
+ *
+ * This function assumes the caller holds rcu_read_lock().
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_v_orig_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+			 struct batadv_priv *bat_priv,
+			 struct batadv_hard_iface *if_outgoing,
+			 struct batadv_orig_node *orig_node, int *sub_s)
+{
+	struct batadv_neigh_node *neigh_node_best;
+	struct batadv_neigh_node *neigh_node;
+	int sub = 0;
+	bool best;
+
+	neigh_node_best = batadv_orig_router_get(orig_node, if_outgoing);
+	if (!neigh_node_best)
+		goto out;
+
+	hlist_for_each_entry_rcu(neigh_node, &orig_node->neigh_list, list) {
+		if (sub++ < *sub_s)
+			continue;
+
+		best = (neigh_node == neigh_node_best);
+
+		if (batadv_v_orig_dump_subentry(msg, portid, seq, bat_priv,
+						if_outgoing, orig_node,
+						neigh_node, best)) {
+			batadv_neigh_node_put(neigh_node_best);
+
+			*sub_s = sub - 1;
+			return -EMSGSIZE;
+		}
+	}
+
+ out:
+	if (neigh_node_best)
+		batadv_neigh_node_put(neigh_node_best);
+
+	*sub_s = 0;
+	return 0;
+}
+
+/**
+ * batadv_v_orig_dump_bucket - Dump an originator bucket into a
+ *  message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ * @head: Bucket to be dumped
+ * @idx_s: Number of entries to be skipped
+ * @sub: Number of sub entries to be skipped
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_v_orig_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+			  struct batadv_priv *bat_priv,
+			  struct batadv_hard_iface *if_outgoing,
+			  struct hlist_head *head, int *idx_s, int *sub)
+{
+	struct batadv_orig_node *orig_node;
+	int idx = 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
+		if (idx++ < *idx_s)
+			continue;
+
+		if (batadv_v_orig_dump_entry(msg, portid, seq, bat_priv,
+					     if_outgoing, orig_node, sub)) {
+			rcu_read_unlock();
+			*idx_s = idx - 1;
+			return -EMSGSIZE;
+		}
+	}
+	rcu_read_unlock();
+
+	*idx_s = 0;
+	*sub = 0;
+	return 0;
+}
+
+/**
+ * batadv_v_orig_dump - Dump the originators into a message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ */
+static void
+batadv_v_orig_dump(struct sk_buff *msg, struct netlink_callback *cb,
+		   struct batadv_priv *bat_priv,
+		   struct batadv_hard_iface *if_outgoing)
+{
+	struct batadv_hashtable *hash = bat_priv->orig_hash;
+	struct hlist_head *head;
+	int bucket = cb->args[0];
+	int idx = cb->args[1];
+	int sub = cb->args[2];
+	int portid = NETLINK_CB(cb->skb).portid;
+
+	while (bucket < hash->size) {
+		head = &hash->table[bucket];
+
+		if (batadv_v_orig_dump_bucket(msg, portid,
+					      cb->nlh->nlmsg_seq,
+					      bat_priv, if_outgoing, head, &idx,
+					      &sub))
+			break;
+
+		bucket++;
+	}
+
+	cb->args[0] = bucket;
+	cb->args[1] = idx;
+	cb->args[2] = sub;
+}
+
 static int batadv_v_neigh_cmp(struct batadv_neigh_node *neigh1,
 			      struct batadv_hard_iface *if_outgoing1,
 			      struct batadv_neigh_node *neigh2,
@@ -573,9 +911,11 @@ static struct batadv_algo_ops batadv_batman_v __read_mostly = {
 		.cmp = batadv_v_neigh_cmp,
 		.is_similar_or_better = batadv_v_neigh_is_sob,
 		.print = batadv_v_neigh_print,
+		.dump = batadv_v_neigh_dump,
 	},
 	.orig = {
 		.print = batadv_v_orig_print,
+		.dump = batadv_v_orig_dump,
 	},
 	.gw = {
 		.store_sel_class = batadv_v_store_sel_class,
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 0c7940cc1968..025f2ec2b27e 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -86,6 +86,7 @@ static struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
 	[BATADV_ATTR_LAST_SEEN_MSECS]	= { .type = NLA_U32 },
 	[BATADV_ATTR_NEIGH_ADDRESS]	= { .len = ETH_ALEN },
 	[BATADV_ATTR_TQ]		= { .type = NLA_U8 },
+	[BATADV_ATTR_THROUGHPUT]	= { .type = NLA_U32 },
 };
 
 /**
-- 
cgit v1.2.3


From d7129dafcb71adfd1a166d0477ce0f564cf410d5 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Sun, 3 Jul 2016 13:31:42 +0200
Subject: batman-adv: netlink: add gateway table queries

Add BATADV_CMD_GET_GATEWAYS commands, using handlers bat_gw_dump in
batadv_algo_ops. Will always return -EOPNOTSUPP for now, as no
implementations exist yet.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 include/uapi/linux/batman_adv.h |  8 ++++++
 net/batman-adv/gateway_client.c | 59 +++++++++++++++++++++++++++++++++++++++++
 net/batman-adv/gateway_client.h |  2 ++
 net/batman-adv/netlink.c        | 10 +++++++
 net/batman-adv/types.h          |  3 +++
 5 files changed, 82 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index 2e2747fb1311..a13fc09e8192 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -88,6 +88,9 @@ enum batadv_tt_client_flags {
  * @BATADV_ATTR_NEIGH_ADDRESS: Neighbour MAC address
  * @BATADV_ATTR_TQ: TQ to neighbour
  * @BATADV_ATTR_THROUGHPUT: Estimated throughput to Neighbour
+ * @BATADV_ATTR_BANDWIDTH_UP: Reported uplink bandwidth
+ * @BATADV_ATTR_BANDWIDTH_DOWN: Reported downlink bandwidth
+ * @BATADV_ATTR_ROUTER: Gateway router MAC address
  * @__BATADV_ATTR_AFTER_LAST: internal use
  * @NUM_BATADV_ATTR: total number of batadv_nl_attrs available
  * @BATADV_ATTR_MAX: highest attribute number currently defined
@@ -120,6 +123,9 @@ enum batadv_nl_attrs {
 	BATADV_ATTR_NEIGH_ADDRESS,
 	BATADV_ATTR_TQ,
 	BATADV_ATTR_THROUGHPUT,
+	BATADV_ATTR_BANDWIDTH_UP,
+	BATADV_ATTR_BANDWIDTH_DOWN,
+	BATADV_ATTR_ROUTER,
 	/* add attributes above here, update the policy in netlink.c */
 	__BATADV_ATTR_AFTER_LAST,
 	NUM_BATADV_ATTR = __BATADV_ATTR_AFTER_LAST,
@@ -139,6 +145,7 @@ enum batadv_nl_attrs {
  * @BATADV_CMD_GET_TRANSTABLE_GLOBAL Query list of global translations
  * @BATADV_CMD_GET_ORIGINATORS: Query list of originators
  * @BATADV_CMD_GET_NEIGHBORS: Query list of neighbours
+ * @BATADV_CMD_GET_GATEWAYS: Query list of gateways
  * @__BATADV_CMD_AFTER_LAST: internal use
  * @BATADV_CMD_MAX: highest used command number
  */
@@ -153,6 +160,7 @@ enum batadv_nl_commands {
 	BATADV_CMD_GET_TRANSTABLE_GLOBAL,
 	BATADV_CMD_GET_ORIGINATORS,
 	BATADV_CMD_GET_NEIGHBORS,
+	BATADV_CMD_GET_GATEWAYS,
 	/* add new commands above here */
 	__BATADV_CMD_AFTER_LAST,
 	BATADV_CMD_MAX = __BATADV_CMD_AFTER_LAST - 1
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index a77a17939f1e..c2928c2287b8 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -20,6 +20,7 @@
 
 #include <linux/atomic.h>
 #include <linux/byteorder/generic.h>
+#include <linux/errno.h>
 #include <linux/etherdevice.h>
 #include <linux/fs.h>
 #include <linux/if_ether.h>
@@ -31,6 +32,7 @@
 #include <linux/kref.h>
 #include <linux/list.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
 #include <linux/seq_file.h>
@@ -39,13 +41,17 @@
 #include <linux/spinlock.h>
 #include <linux/stddef.h>
 #include <linux/udp.h>
+#include <net/sock.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "gateway_common.h"
 #include "hard-interface.h"
 #include "log.h"
+#include "netlink.h"
 #include "originator.h"
 #include "packet.h"
 #include "routing.h"
+#include "soft-interface.h"
 #include "sysfs.h"
 #include "translation-table.h"
 
@@ -500,6 +506,59 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset)
 	return 0;
 }
 
+/**
+ * batadv_gw_dump - Dump gateways into a message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ *
+ * Return: Error code, or length of message
+ */
+int batadv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	struct batadv_hard_iface *primary_if = NULL;
+	struct net *net = sock_net(cb->skb->sk);
+	struct net_device *soft_iface;
+	struct batadv_priv *bat_priv;
+	int ifindex;
+	int ret;
+
+	ifindex = batadv_netlink_get_ifindex(cb->nlh,
+					     BATADV_ATTR_MESH_IFINDEX);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	bat_priv = netdev_priv(soft_iface);
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	if (!bat_priv->algo_ops->gw.dump) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	bat_priv->algo_ops->gw.dump(msg, cb, bat_priv);
+
+	ret = msg->len;
+
+out:
+	if (primary_if)
+		batadv_hardif_put(primary_if);
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	return ret;
+}
+
 /**
  * batadv_gw_dhcp_recipient_get - check if a packet is a DHCP message
  * @skb: the packet to check
diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
index 6b40432aa1ed..859166d03561 100644
--- a/net/batman-adv/gateway_client.h
+++ b/net/batman-adv/gateway_client.h
@@ -23,6 +23,7 @@
 #include <linux/types.h>
 
 struct batadv_tvlv_gateway_data;
+struct netlink_callback;
 struct seq_file;
 struct sk_buff;
 
@@ -43,6 +44,7 @@ void batadv_gw_node_put(struct batadv_gw_node *gw_node);
 struct batadv_gw_node *
 batadv_gw_get_selected_gw_node(struct batadv_priv *bat_priv);
 int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb);
 bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, struct sk_buff *skb);
 enum batadv_dhcp_recipient
 batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len,
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 025f2ec2b27e..c68ccb03634d 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -38,6 +38,7 @@
 #include <uapi/linux/batman_adv.h>
 
 #include "bat_algo.h"
+#include "gateway_client.h"
 #include "hard-interface.h"
 #include "originator.h"
 #include "soft-interface.h"
@@ -87,6 +88,9 @@ static struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
 	[BATADV_ATTR_NEIGH_ADDRESS]	= { .len = ETH_ALEN },
 	[BATADV_ATTR_TQ]		= { .type = NLA_U8 },
 	[BATADV_ATTR_THROUGHPUT]	= { .type = NLA_U32 },
+	[BATADV_ATTR_BANDWIDTH_UP]	= { .type = NLA_U32 },
+	[BATADV_ATTR_BANDWIDTH_DOWN]	= { .type = NLA_U32 },
+	[BATADV_ATTR_ROUTER]		= { .len = ETH_ALEN },
 };
 
 /**
@@ -570,6 +574,12 @@ static struct genl_ops batadv_netlink_ops[] = {
 		.policy = batadv_netlink_policy,
 		.dumpit = batadv_hardif_neigh_dump,
 	},
+	{
+		.cmd = BATADV_CMD_GET_GATEWAYS,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_gw_dump,
+	},
 };
 
 /**
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 968023a61598..b5f01a36ec34 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1469,6 +1469,7 @@ struct batadv_algo_orig_ops {
  * @is_eligible: check if a newly discovered GW is a potential candidate for
  *  the election as best GW (optional)
  * @print: print the gateway table (optional)
+ * @dump: dump gateways to a netlink socket (optional)
  */
 struct batadv_algo_gw_ops {
 	ssize_t (*store_sel_class)(struct batadv_priv *bat_priv, char *buff,
@@ -1480,6 +1481,8 @@ struct batadv_algo_gw_ops {
 			    struct batadv_orig_node *curr_gw_orig,
 			    struct batadv_orig_node *orig_node);
 	void (*print)(struct batadv_priv *bat_priv, struct seq_file *seq);
+	void (*dump)(struct sk_buff *msg, struct netlink_callback *cb,
+		     struct batadv_priv *priv);
 };
 
 /**
-- 
cgit v1.2.3


From 04f3f5bf1883fbe0acba5c1fc698cf5cedebc5c5 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sun, 3 Jul 2016 13:31:45 +0200
Subject: batman-adv: add B.A.T.M.A.N. Dump BLA claims via netlink

Dump the list of bridge loop avoidance claims via the netlink socket.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
[sven.eckelmann@open-mesh.com: add policy for attributes, fix includes, fix
soft_iface reference leak]
Signed-off-by: Sven Eckelmann <sven.eckelmann@open-mesh.com>
[sw@simonwunderlich.de: fix kerneldoc, fix error reporting]
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
---
 include/uapi/linux/batman_adv.h        |  12 +++
 net/batman-adv/bridge_loop_avoidance.c | 169 +++++++++++++++++++++++++++++++++
 net/batman-adv/bridge_loop_avoidance.h |  10 +-
 net/batman-adv/netlink.c               |  12 +++
 4 files changed, 202 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index a13fc09e8192..96b37ab2e840 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -91,6 +91,11 @@ enum batadv_tt_client_flags {
  * @BATADV_ATTR_BANDWIDTH_UP: Reported uplink bandwidth
  * @BATADV_ATTR_BANDWIDTH_DOWN: Reported downlink bandwidth
  * @BATADV_ATTR_ROUTER: Gateway router MAC address
+ * @BATADV_ATTR_BLA_OWN: Flag indicating own originator
+ * @BATADV_ATTR_BLA_ADDRESS: Bridge loop avoidance claim MAC address
+ * @BATADV_ATTR_BLA_VID: BLA VLAN ID
+ * @BATADV_ATTR_BLA_BACKBONE: BLA gateway originator MAC address
+ * @BATADV_ATTR_BLA_CRC: BLA CRC
  * @__BATADV_ATTR_AFTER_LAST: internal use
  * @NUM_BATADV_ATTR: total number of batadv_nl_attrs available
  * @BATADV_ATTR_MAX: highest attribute number currently defined
@@ -126,6 +131,11 @@ enum batadv_nl_attrs {
 	BATADV_ATTR_BANDWIDTH_UP,
 	BATADV_ATTR_BANDWIDTH_DOWN,
 	BATADV_ATTR_ROUTER,
+	BATADV_ATTR_BLA_OWN,
+	BATADV_ATTR_BLA_ADDRESS,
+	BATADV_ATTR_BLA_VID,
+	BATADV_ATTR_BLA_BACKBONE,
+	BATADV_ATTR_BLA_CRC,
 	/* add attributes above here, update the policy in netlink.c */
 	__BATADV_ATTR_AFTER_LAST,
 	NUM_BATADV_ATTR = __BATADV_ATTR_AFTER_LAST,
@@ -146,6 +156,7 @@ enum batadv_nl_attrs {
  * @BATADV_CMD_GET_ORIGINATORS: Query list of originators
  * @BATADV_CMD_GET_NEIGHBORS: Query list of neighbours
  * @BATADV_CMD_GET_GATEWAYS: Query list of gateways
+ * @BATADV_CMD_GET_BLA_CLAIM: Query list of bridge loop avoidance claims
  * @__BATADV_CMD_AFTER_LAST: internal use
  * @BATADV_CMD_MAX: highest used command number
  */
@@ -161,6 +172,7 @@ enum batadv_nl_commands {
 	BATADV_CMD_GET_ORIGINATORS,
 	BATADV_CMD_GET_NEIGHBORS,
 	BATADV_CMD_GET_GATEWAYS,
+	BATADV_CMD_GET_BLA_CLAIM,
 	/* add new commands above here */
 	__BATADV_CMD_AFTER_LAST,
 	BATADV_CMD_MAX = __BATADV_CMD_AFTER_LAST - 1
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index c75ef648f0fd..aafa88f3e98d 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -35,6 +35,7 @@
 #include <linux/list.h>
 #include <linux/lockdep.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
 #include <linux/seq_file.h>
@@ -45,12 +46,18 @@
 #include <linux/string.h>
 #include <linux/workqueue.h>
 #include <net/arp.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "hard-interface.h"
 #include "hash.h"
 #include "log.h"
+#include "netlink.h"
 #include "originator.h"
 #include "packet.h"
+#include "soft-interface.h"
 #include "sysfs.h"
 #include "translation-table.h"
 
@@ -2051,6 +2058,168 @@ out:
 	return 0;
 }
 
+/**
+ * batadv_bla_claim_dump_entry - dump one entry of the claim table
+ * to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @primary_if: primary interface
+ * @claim: entry to dump
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_bla_claim_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+			    struct batadv_hard_iface *primary_if,
+			    struct batadv_bla_claim *claim)
+{
+	u8 *primary_addr = primary_if->net_dev->dev_addr;
+	u16 backbone_crc;
+	bool is_own;
+	void *hdr;
+	int ret = -EINVAL;
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI, BATADV_CMD_GET_BLA_CLAIM);
+	if (!hdr) {
+		ret = -ENOBUFS;
+		goto out;
+	}
+
+	is_own = batadv_compare_eth(claim->backbone_gw->orig,
+				    primary_addr);
+
+	spin_lock_bh(&claim->backbone_gw->crc_lock);
+	backbone_crc = claim->backbone_gw->crc;
+	spin_unlock_bh(&claim->backbone_gw->crc_lock);
+
+	if (is_own)
+		if (nla_put_flag(msg, BATADV_ATTR_BLA_OWN)) {
+			genlmsg_cancel(msg, hdr);
+			goto out;
+		}
+
+	if (nla_put(msg, BATADV_ATTR_BLA_ADDRESS, ETH_ALEN, claim->addr) ||
+	    nla_put_u16(msg, BATADV_ATTR_BLA_VID, claim->vid) ||
+	    nla_put(msg, BATADV_ATTR_BLA_BACKBONE, ETH_ALEN,
+		    claim->backbone_gw->orig) ||
+	    nla_put_u16(msg, BATADV_ATTR_BLA_CRC,
+			backbone_crc)) {
+		genlmsg_cancel(msg, hdr);
+		goto out;
+	}
+
+	genlmsg_end(msg, hdr);
+	ret = 0;
+
+out:
+	return ret;
+}
+
+/**
+ * batadv_bla_claim_dump_bucket - dump one bucket of the claim table
+ * to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @primary_if: primary interface
+ * @head: bucket to dump
+ * @idx_skip: How many entries to skip
+ *
+ * Return: always 0.
+ */
+static int
+batadv_bla_claim_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+			     struct batadv_hard_iface *primary_if,
+			     struct hlist_head *head, int *idx_skip)
+{
+	struct batadv_bla_claim *claim;
+	int idx = 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(claim, head, hash_entry) {
+		if (idx++ < *idx_skip)
+			continue;
+		if (batadv_bla_claim_dump_entry(msg, portid, seq,
+						primary_if, claim)) {
+			*idx_skip = idx - 1;
+			goto unlock;
+		}
+	}
+
+	*idx_skip = idx;
+unlock:
+	rcu_read_unlock();
+	return 0;
+}
+
+/**
+ * batadv_bla_claim_dump - dump claim table to a netlink socket
+ * @msg: buffer for the message
+ * @cb: callback structure containing arguments
+ *
+ * Return: message length.
+ */
+int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	struct batadv_hard_iface *primary_if = NULL;
+	int portid = NETLINK_CB(cb->skb).portid;
+	struct net *net = sock_net(cb->skb->sk);
+	struct net_device *soft_iface;
+	struct batadv_hashtable *hash;
+	struct batadv_priv *bat_priv;
+	int bucket = cb->args[0];
+	struct hlist_head *head;
+	int idx = cb->args[1];
+	int ifindex;
+	int ret = 0;
+
+	ifindex = batadv_netlink_get_ifindex(cb->nlh,
+					     BATADV_ATTR_MESH_IFINDEX);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	bat_priv = netdev_priv(soft_iface);
+	hash = bat_priv->bla.claim_hash;
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	while (bucket < hash->size) {
+		head = &hash->table[bucket];
+
+		if (batadv_bla_claim_dump_bucket(msg, portid,
+						 cb->nlh->nlmsg_seq,
+						 primary_if, head, &idx))
+			break;
+		bucket++;
+	}
+
+	cb->args[0] = bucket;
+	cb->args[1] = idx;
+
+	ret = msg->len;
+
+out:
+	if (primary_if)
+		batadv_hardif_put(primary_if);
+
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	return ret;
+}
+
 /**
  * batadv_bla_backbone_table_seq_print_text - print the backbone table in a seq
  *  file
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index 0f01daeb359e..a80b9e96f28e 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -23,6 +23,7 @@
 #include <linux/types.h>
 
 struct net_device;
+struct netlink_callback;
 struct seq_file;
 struct sk_buff;
 
@@ -35,6 +36,7 @@ bool batadv_bla_is_backbone_gw(struct sk_buff *skb,
 			       struct batadv_orig_node *orig_node,
 			       int hdr_size);
 int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb);
 int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq,
 					     void *offset);
 bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, u8 *orig,
@@ -47,7 +49,7 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv,
 void batadv_bla_status_update(struct net_device *net_dev);
 int batadv_bla_init(struct batadv_priv *bat_priv);
 void batadv_bla_free(struct batadv_priv *bat_priv);
-
+int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb);
 #define BATADV_BLA_CRC_INIT	0
 #else /* ifdef CONFIG_BATMAN_ADV_BLA */
 
@@ -112,6 +114,12 @@ static inline void batadv_bla_free(struct batadv_priv *bat_priv)
 {
 }
 
+static inline int batadv_bla_claim_dump(struct sk_buff *msg,
+					struct netlink_callback *cb)
+{
+	return -EOPNOTSUPP;
+}
+
 #endif /* ifdef CONFIG_BATMAN_ADV_BLA */
 
 #endif /* ifndef _NET_BATMAN_ADV_BLA_H_ */
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index c68ccb03634d..b33675cbaecf 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -38,6 +38,7 @@
 #include <uapi/linux/batman_adv.h>
 
 #include "bat_algo.h"
+#include "bridge_loop_avoidance.h"
 #include "gateway_client.h"
 #include "hard-interface.h"
 #include "originator.h"
@@ -91,6 +92,11 @@ static struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
 	[BATADV_ATTR_BANDWIDTH_UP]	= { .type = NLA_U32 },
 	[BATADV_ATTR_BANDWIDTH_DOWN]	= { .type = NLA_U32 },
 	[BATADV_ATTR_ROUTER]		= { .len = ETH_ALEN },
+	[BATADV_ATTR_BLA_OWN]		= { .type = NLA_FLAG },
+	[BATADV_ATTR_BLA_ADDRESS]	= { .len = ETH_ALEN },
+	[BATADV_ATTR_BLA_VID]		= { .type = NLA_U16 },
+	[BATADV_ATTR_BLA_BACKBONE]	= { .len = ETH_ALEN },
+	[BATADV_ATTR_BLA_CRC]		= { .type = NLA_U16 },
 };
 
 /**
@@ -580,6 +586,12 @@ static struct genl_ops batadv_netlink_ops[] = {
 		.policy = batadv_netlink_policy,
 		.dumpit = batadv_gw_dump,
 	},
+	{
+		.cmd = BATADV_CMD_GET_BLA_CLAIM,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_bla_claim_dump,
+	},
 };
 
 /**
-- 
cgit v1.2.3


From ea4152e1171604f325f1a5f080190823a0edbc1f Mon Sep 17 00:00:00 2001
From: Simon Wunderlich <sw@simonwunderlich.de>
Date: Sun, 3 Jul 2016 13:31:47 +0200
Subject: batman-adv: add backbone table netlink support

Dump the list of bridge loop avoidance backbones via the netlink socket.

Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
---
 include/uapi/linux/batman_adv.h        |   2 +
 net/batman-adv/bridge_loop_avoidance.c | 164 +++++++++++++++++++++++++++++++++
 net/batman-adv/bridge_loop_avoidance.h |   7 ++
 net/batman-adv/netlink.c               |   7 ++
 4 files changed, 180 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index 96b37ab2e840..734fe83ab645 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -157,6 +157,7 @@ enum batadv_nl_attrs {
  * @BATADV_CMD_GET_NEIGHBORS: Query list of neighbours
  * @BATADV_CMD_GET_GATEWAYS: Query list of gateways
  * @BATADV_CMD_GET_BLA_CLAIM: Query list of bridge loop avoidance claims
+ * @BATADV_CMD_GET_BLA_BACKBONE: Query list of bridge loop avoidance backbones
  * @__BATADV_CMD_AFTER_LAST: internal use
  * @BATADV_CMD_MAX: highest used command number
  */
@@ -173,6 +174,7 @@ enum batadv_nl_commands {
 	BATADV_CMD_GET_NEIGHBORS,
 	BATADV_CMD_GET_GATEWAYS,
 	BATADV_CMD_GET_BLA_CLAIM,
+	BATADV_CMD_GET_BLA_BACKBONE,
 	/* add new commands above here */
 	__BATADV_CMD_AFTER_LAST,
 	BATADV_CMD_MAX = __BATADV_CMD_AFTER_LAST - 1
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index aafa88f3e98d..35ed1d32bab5 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -2283,3 +2283,167 @@ out:
 		batadv_hardif_put(primary_if);
 	return 0;
 }
+
+/**
+ * batadv_bla_backbone_dump_entry - dump one entry of the backbone table
+ * to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @primary_if: primary interface
+ * @backbone_gw: entry to dump
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_bla_backbone_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+			       struct batadv_hard_iface *primary_if,
+			       struct batadv_bla_backbone_gw *backbone_gw)
+{
+	u8 *primary_addr = primary_if->net_dev->dev_addr;
+	u16 backbone_crc;
+	bool is_own;
+	int msecs;
+	void *hdr;
+	int ret = -EINVAL;
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI, BATADV_CMD_GET_BLA_BACKBONE);
+	if (!hdr) {
+		ret = -ENOBUFS;
+		goto out;
+	}
+
+	is_own = batadv_compare_eth(backbone_gw->orig, primary_addr);
+
+	spin_lock_bh(&backbone_gw->crc_lock);
+	backbone_crc = backbone_gw->crc;
+	spin_unlock_bh(&backbone_gw->crc_lock);
+
+	msecs = jiffies_to_msecs(jiffies - backbone_gw->lasttime);
+
+	if (is_own)
+		if (nla_put_flag(msg, BATADV_ATTR_BLA_OWN)) {
+			genlmsg_cancel(msg, hdr);
+			goto out;
+		}
+
+	if (nla_put(msg, BATADV_ATTR_BLA_BACKBONE, ETH_ALEN,
+		    backbone_gw->orig) ||
+	    nla_put_u16(msg, BATADV_ATTR_BLA_VID, backbone_gw->vid) ||
+	    nla_put_u16(msg, BATADV_ATTR_BLA_CRC,
+			backbone_crc) ||
+	    nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, msecs)) {
+		genlmsg_cancel(msg, hdr);
+		goto out;
+	}
+
+	genlmsg_end(msg, hdr);
+	ret = 0;
+
+out:
+	return ret;
+}
+
+/**
+ * batadv_bla_backbone_dump_bucket - dump one bucket of the backbone table
+ * to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @primary_if: primary interface
+ * @head: bucket to dump
+ * @idx_skip: How many entries to skip
+ *
+ * Return: always 0.
+ */
+static int
+batadv_bla_backbone_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+				struct batadv_hard_iface *primary_if,
+				struct hlist_head *head, int *idx_skip)
+{
+	struct batadv_bla_backbone_gw *backbone_gw;
+	int idx = 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) {
+		if (idx++ < *idx_skip)
+			continue;
+		if (batadv_bla_backbone_dump_entry(msg, portid, seq,
+						   primary_if, backbone_gw)) {
+			*idx_skip = idx - 1;
+			goto unlock;
+		}
+	}
+
+	*idx_skip = idx;
+unlock:
+	rcu_read_unlock();
+	return 0;
+}
+
+/**
+ * batadv_bla_backbone_dump - dump backbone table to a netlink socket
+ * @msg: buffer for the message
+ * @cb: callback structure containing arguments
+ *
+ * Return: message length.
+ */
+int batadv_bla_backbone_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	struct batadv_hard_iface *primary_if = NULL;
+	int portid = NETLINK_CB(cb->skb).portid;
+	struct net *net = sock_net(cb->skb->sk);
+	struct net_device *soft_iface;
+	struct batadv_hashtable *hash;
+	struct batadv_priv *bat_priv;
+	int bucket = cb->args[0];
+	struct hlist_head *head;
+	int idx = cb->args[1];
+	int ifindex;
+	int ret = 0;
+
+	ifindex = batadv_netlink_get_ifindex(cb->nlh,
+					     BATADV_ATTR_MESH_IFINDEX);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	bat_priv = netdev_priv(soft_iface);
+	hash = bat_priv->bla.backbone_hash;
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	while (bucket < hash->size) {
+		head = &hash->table[bucket];
+
+		if (batadv_bla_backbone_dump_bucket(msg, portid,
+						    cb->nlh->nlmsg_seq,
+						    primary_if, head, &idx))
+			break;
+		bucket++;
+	}
+
+	cb->args[0] = bucket;
+	cb->args[1] = idx;
+
+	ret = msg->len;
+
+out:
+	if (primary_if)
+		batadv_hardif_put(primary_if);
+
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	return ret;
+}
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index a80b9e96f28e..1ae93e46fb98 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -39,6 +39,7 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset);
 int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb);
 int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq,
 					     void *offset);
+int batadv_bla_backbone_dump(struct sk_buff *msg, struct netlink_callback *cb);
 bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, u8 *orig,
 				    unsigned short vid);
 bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
@@ -120,6 +121,12 @@ static inline int batadv_bla_claim_dump(struct sk_buff *msg,
 	return -EOPNOTSUPP;
 }
 
+static inline int batadv_bla_backbone_dump(struct sk_buff *msg,
+					   struct netlink_callback *cb)
+{
+	return -EOPNOTSUPP;
+}
+
 #endif /* ifdef CONFIG_BATMAN_ADV_BLA */
 
 #endif /* ifndef _NET_BATMAN_ADV_BLA_H_ */
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 464de9d05135..c3f68167591d 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -600,6 +600,13 @@ static struct genl_ops batadv_netlink_ops[] = {
 		.policy = batadv_netlink_policy,
 		.dumpit = batadv_bla_claim_dump,
 	},
+	{
+		.cmd = BATADV_CMD_GET_BLA_BACKBONE,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_bla_backbone_dump,
+	},
+
 };
 
 /**
-- 
cgit v1.2.3


From f837297ad82480024d3ad08cd84f6670bcafa862 Mon Sep 17 00:00:00 2001
From: Michel Dänzer <michel.daenzer@amd.com>
Date: Mon, 8 Aug 2016 16:23:39 +0900
Subject: drm: Add DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE/RELATIVE flags v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These flags allow userspace to explicitly specify the target vertical
blank period when a flip should take effect.

v2:
* Add new struct drm_mode_crtc_page_flip_target instead of modifying
  struct drm_mode_crtc_page_flip, to make sure all existing userspace
  code keeps compiling (Daniel Vetter)

Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/drm_crtc.c  | 48 ++++++++++++++++++++++++++++++++++++++-------
 drivers/gpu/drm/drm_ioctl.c |  8 ++++++++
 include/uapi/drm/drm.h      |  1 +
 include/uapi/drm/drm_mode.h | 39 +++++++++++++++++++++++++++++++++---
 4 files changed, 86 insertions(+), 10 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index a316f7336aeb..93b9821f7f24 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -5398,15 +5398,23 @@ out:
 int drm_mode_page_flip_ioctl(struct drm_device *dev,
 			     void *data, struct drm_file *file_priv)
 {
-	struct drm_mode_crtc_page_flip *page_flip = data;
+	struct drm_mode_crtc_page_flip_target *page_flip = data;
 	struct drm_crtc *crtc;
 	struct drm_framebuffer *fb = NULL;
 	struct drm_pending_vblank_event *e = NULL;
-	u32 target_vblank = 0;
+	u32 target_vblank = page_flip->sequence;
 	int ret = -EINVAL;
 
-	if (page_flip->flags & ~DRM_MODE_PAGE_FLIP_FLAGS ||
-	    page_flip->reserved != 0)
+	if (page_flip->flags & ~DRM_MODE_PAGE_FLIP_FLAGS)
+		return -EINVAL;
+
+	if (page_flip->sequence != 0 && !(page_flip->flags & DRM_MODE_PAGE_FLIP_TARGET))
+		return -EINVAL;
+
+	/* Only one of the DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE/RELATIVE flags
+	 * can be specified
+	 */
+	if ((page_flip->flags & DRM_MODE_PAGE_FLIP_TARGET) == DRM_MODE_PAGE_FLIP_TARGET)
 		return -EINVAL;
 
 	if ((page_flip->flags & DRM_MODE_PAGE_FLIP_ASYNC) && !dev->mode_config.async_page_flip)
@@ -5417,15 +5425,41 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev,
 		return -ENOENT;
 
 	if (crtc->funcs->page_flip_target) {
+		u32 current_vblank;
 		int r;
 
 		r = drm_crtc_vblank_get(crtc);
 		if (r)
 			return r;
 
-		target_vblank = drm_crtc_vblank_count(crtc) +
-			!(page_flip->flags & DRM_MODE_PAGE_FLIP_ASYNC);
-	} else if (crtc->funcs->page_flip == NULL) {
+		current_vblank = drm_crtc_vblank_count(crtc);
+
+		switch (page_flip->flags & DRM_MODE_PAGE_FLIP_TARGET) {
+		case DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE:
+			if ((int)(target_vblank - current_vblank) > 1) {
+				DRM_DEBUG("Invalid absolute flip target %u, "
+					  "must be <= %u\n", target_vblank,
+					  current_vblank + 1);
+				drm_crtc_vblank_put(crtc);
+				return -EINVAL;
+			}
+			break;
+		case DRM_MODE_PAGE_FLIP_TARGET_RELATIVE:
+			if (target_vblank != 0 && target_vblank != 1) {
+				DRM_DEBUG("Invalid relative flip target %u, "
+					  "must be 0 or 1\n", target_vblank);
+				drm_crtc_vblank_put(crtc);
+				return -EINVAL;
+			}
+			target_vblank += current_vblank;
+			break;
+		default:
+			target_vblank = current_vblank +
+				!(page_flip->flags & DRM_MODE_PAGE_FLIP_ASYNC);
+			break;
+		}
+	} else if (crtc->funcs->page_flip == NULL ||
+		   (page_flip->flags & DRM_MODE_PAGE_FLIP_TARGET)) {
 		return -EINVAL;
 	}
 
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 33af4a5ddca1..0099d2a4d618 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -228,6 +228,7 @@ static int drm_getstats(struct drm_device *dev, void *data,
 static int drm_getcap(struct drm_device *dev, void *data, struct drm_file *file_priv)
 {
 	struct drm_get_cap *req = data;
+	struct drm_crtc *crtc;
 
 	req->value = 0;
 	switch (req->capability) {
@@ -254,6 +255,13 @@ static int drm_getcap(struct drm_device *dev, void *data, struct drm_file *file_
 	case DRM_CAP_ASYNC_PAGE_FLIP:
 		req->value = dev->mode_config.async_page_flip;
 		break;
+	case DRM_CAP_PAGE_FLIP_TARGET:
+		req->value = 1;
+		drm_for_each_crtc(crtc, dev) {
+			if (!crtc->funcs->page_flip_target)
+				req->value = 0;
+		}
+		break;
 	case DRM_CAP_CURSOR_WIDTH:
 		if (dev->mode_config.cursor_width)
 			req->value = dev->mode_config.cursor_width;
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 452675fb55d9..b2c52843bc70 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -646,6 +646,7 @@ struct drm_gem_open {
 #define DRM_CAP_CURSOR_WIDTH		0x8
 #define DRM_CAP_CURSOR_HEIGHT		0x9
 #define DRM_CAP_ADDFB2_MODIFIERS	0x10
+#define DRM_CAP_PAGE_FLIP_TARGET	0x11
 
 /** DRM_IOCTL_GET_CAP ioctl argument type */
 struct drm_get_cap {
diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index 49a72659b801..df0e3504c349 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -520,7 +520,13 @@ struct drm_color_lut {
 
 #define DRM_MODE_PAGE_FLIP_EVENT 0x01
 #define DRM_MODE_PAGE_FLIP_ASYNC 0x02
-#define DRM_MODE_PAGE_FLIP_FLAGS (DRM_MODE_PAGE_FLIP_EVENT|DRM_MODE_PAGE_FLIP_ASYNC)
+#define DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE 0x4
+#define DRM_MODE_PAGE_FLIP_TARGET_RELATIVE 0x8
+#define DRM_MODE_PAGE_FLIP_TARGET (DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE | \
+				   DRM_MODE_PAGE_FLIP_TARGET_RELATIVE)
+#define DRM_MODE_PAGE_FLIP_FLAGS (DRM_MODE_PAGE_FLIP_EVENT | \
+				  DRM_MODE_PAGE_FLIP_ASYNC | \
+				  DRM_MODE_PAGE_FLIP_TARGET)
 
 /*
  * Request a page flip on the specified crtc.
@@ -543,8 +549,7 @@ struct drm_color_lut {
  * 'as soon as possible', meaning that it not delay waiting for vblank.
  * This may cause tearing on the screen.
  *
- * The reserved field must be zero until we figure out something
- * clever to use it for.
+ * The reserved field must be zero.
  */
 
 struct drm_mode_crtc_page_flip {
@@ -555,6 +560,34 @@ struct drm_mode_crtc_page_flip {
 	__u64 user_data;
 };
 
+/*
+ * Request a page flip on the specified crtc.
+ *
+ * Same as struct drm_mode_crtc_page_flip, but supports new flags and
+ * re-purposes the reserved field:
+ *
+ * The sequence field must be zero unless either of the
+ * DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE/RELATIVE flags is specified. When
+ * the ABSOLUTE flag is specified, the sequence field denotes the absolute
+ * vblank sequence when the flip should take effect. When the RELATIVE
+ * flag is specified, the sequence field denotes the relative (to the
+ * current one when the ioctl is called) vblank sequence when the flip
+ * should take effect. NOTE: DRM_IOCTL_WAIT_VBLANK must still be used to
+ * make sure the vblank sequence before the target one has passed before
+ * calling this ioctl. The purpose of the
+ * DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE/RELATIVE flags is merely to clarify
+ * the target for when code dealing with a page flip runs during a
+ * vertical blank period.
+ */
+
+struct drm_mode_crtc_page_flip_target {
+	__u32 crtc_id;
+	__u32 fb_id;
+	__u32 flags;
+	__u32 sequence;
+	__u64 user_data;
+};
+
 /* create a dumb scanout buffer */
 struct drm_mode_create_dumb {
 	__u32 height;
-- 
cgit v1.2.3


From ab10dccb11608b96b43b557c12a5ad867723e503 Mon Sep 17 00:00:00 2001
From: Gao Feng <fgao@ikuai8.com>
Date: Tue, 9 Aug 2016 12:38:24 +0800
Subject: rps: Inspect PPTP encapsulated by GRE to get flow hash

The PPTP is encapsulated by GRE header with that GRE_VERSION bits
must contain one. But current GRE RPS needs the GRE_VERSION must be
zero. So RPS does not work for PPTP traffic.

In my test environment, there are four MIPS cores, and all traffic
are passed through by PPTP. As a result, only one core is 100% busy
while other three cores are very idle. After this patch, the usage
of four cores are balanced well.

Signed-off-by: Gao Feng <fgao@ikuai8.com>
Reviewed-by: Philip Prindeville <philipp@redfish-solutions.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ppp/pptp.c         |  36 +------------
 include/net/gre.h              |  10 +++-
 include/net/pptp.h             |  40 +++++++++++++++
 include/uapi/linux/if_tunnel.h |   7 ++-
 net/core/flow_dissector.c      | 113 ++++++++++++++++++++++++++++-------------
 5 files changed, 135 insertions(+), 71 deletions(-)
 create mode 100644 include/net/pptp.h

(limited to 'include/uapi')

diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
index ae0905ed4a32..3e68dbc0af7f 100644
--- a/drivers/net/ppp/pptp.c
+++ b/drivers/net/ppp/pptp.c
@@ -37,6 +37,7 @@
 #include <net/icmp.h>
 #include <net/route.h>
 #include <net/gre.h>
+#include <net/pptp.h>
 
 #include <linux/uaccess.h>
 
@@ -53,41 +54,6 @@ static struct proto pptp_sk_proto __read_mostly;
 static const struct ppp_channel_ops pptp_chan_ops;
 static const struct proto_ops pptp_ops;
 
-#define PPP_LCP_ECHOREQ 0x09
-#define PPP_LCP_ECHOREP 0x0A
-#define SC_RCV_BITS	(SC_RCV_B7_1|SC_RCV_B7_0|SC_RCV_ODDP|SC_RCV_EVNP)
-
-#define MISSING_WINDOW 20
-#define WRAPPED(curseq, lastseq)\
-	((((curseq) & 0xffffff00) == 0) &&\
-	(((lastseq) & 0xffffff00) == 0xffffff00))
-
-#define PPTP_GRE_PROTO  0x880B
-#define PPTP_GRE_VER    0x1
-
-#define PPTP_GRE_FLAG_C	0x80
-#define PPTP_GRE_FLAG_R	0x40
-#define PPTP_GRE_FLAG_K	0x20
-#define PPTP_GRE_FLAG_S	0x10
-#define PPTP_GRE_FLAG_A	0x80
-
-#define PPTP_GRE_IS_C(f) ((f)&PPTP_GRE_FLAG_C)
-#define PPTP_GRE_IS_R(f) ((f)&PPTP_GRE_FLAG_R)
-#define PPTP_GRE_IS_K(f) ((f)&PPTP_GRE_FLAG_K)
-#define PPTP_GRE_IS_S(f) ((f)&PPTP_GRE_FLAG_S)
-#define PPTP_GRE_IS_A(f) ((f)&PPTP_GRE_FLAG_A)
-
-#define PPTP_HEADER_OVERHEAD (2+sizeof(struct pptp_gre_header))
-struct pptp_gre_header {
-	u8  flags;
-	u8  ver;
-	__be16 protocol;
-	__be16 payload_len;
-	__be16 call_id;
-	__be32 seq;
-	__be32 ack;
-} __packed;
-
 static struct pppox_sock *lookup_chan(u16 call_id, __be32 s_addr)
 {
 	struct pppox_sock *sock;
diff --git a/include/net/gre.h b/include/net/gre.h
index 7a54a31d1d4c..8962e1e68449 100644
--- a/include/net/gre.h
+++ b/include/net/gre.h
@@ -7,7 +7,15 @@
 struct gre_base_hdr {
 	__be16 flags;
 	__be16 protocol;
-};
+} __packed;
+
+struct gre_full_hdr {
+	struct gre_base_hdr fixed_header;
+	__be16 csum;
+	__be16 reserved1;
+	__be32 key;
+	__be32 seq;
+} __packed;
 #define GRE_HEADER_SECTION 4
 
 #define GREPROTO_CISCO		0
diff --git a/include/net/pptp.h b/include/net/pptp.h
new file mode 100644
index 000000000000..301d3e2ba1f9
--- /dev/null
+++ b/include/net/pptp.h
@@ -0,0 +1,40 @@
+#ifndef _NET_PPTP_H
+#define _NET_PPTP_H
+
+#define PPP_LCP_ECHOREQ 0x09
+#define PPP_LCP_ECHOREP 0x0A
+#define SC_RCV_BITS     (SC_RCV_B7_1|SC_RCV_B7_0|SC_RCV_ODDP|SC_RCV_EVNP)
+
+#define MISSING_WINDOW 20
+#define WRAPPED(curseq, lastseq)\
+	((((curseq) & 0xffffff00) == 0) &&\
+	(((lastseq) & 0xffffff00) == 0xffffff00))
+
+#define PPTP_GRE_PROTO  0x880B
+#define PPTP_GRE_VER    0x1
+
+#define PPTP_GRE_FLAG_C 0x80
+#define PPTP_GRE_FLAG_R 0x40
+#define PPTP_GRE_FLAG_K 0x20
+#define PPTP_GRE_FLAG_S 0x10
+#define PPTP_GRE_FLAG_A 0x80
+
+#define PPTP_GRE_IS_C(f) ((f)&PPTP_GRE_FLAG_C)
+#define PPTP_GRE_IS_R(f) ((f)&PPTP_GRE_FLAG_R)
+#define PPTP_GRE_IS_K(f) ((f)&PPTP_GRE_FLAG_K)
+#define PPTP_GRE_IS_S(f) ((f)&PPTP_GRE_FLAG_S)
+#define PPTP_GRE_IS_A(f) ((f)&PPTP_GRE_FLAG_A)
+
+#define PPTP_HEADER_OVERHEAD (2+sizeof(struct pptp_gre_header))
+struct pptp_gre_header {
+	u8  flags;
+	u8  ver;
+	__be16 protocol;
+	__be16 payload_len;
+	__be16 call_id;
+	__be32 seq;
+	__be32 ack;
+} __packed;
+
+
+#endif
diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index 1046f5515174..60dbb200de60 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -24,9 +24,14 @@
 #define GRE_SEQ		__cpu_to_be16(0x1000)
 #define GRE_STRICT	__cpu_to_be16(0x0800)
 #define GRE_REC		__cpu_to_be16(0x0700)
-#define GRE_FLAGS	__cpu_to_be16(0x00F8)
+#define GRE_ACK		__cpu_to_be16(0x0080)
+#define GRE_FLAGS	__cpu_to_be16(0x0078)
 #define GRE_VERSION	__cpu_to_be16(0x0007)
 
+#define GRE_VERSION_1	__cpu_to_be16(0x0001)
+#define GRE_PROTO_PPP	__cpu_to_be16(0x880b)
+#define GRE_PPTP_KEY_MASK	__cpu_to_be32(0xffff)
+
 struct ip_tunnel_parm {
 	char			name[IFNAMSIZ];
 	int			link;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 61ad43f61c5e..91028ae2fb01 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -6,6 +6,8 @@
 #include <linux/if_vlan.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
+#include <net/gre.h>
+#include <net/pptp.h>
 #include <linux/igmp.h>
 #include <linux/icmp.h>
 #include <linux/sctp.h>
@@ -338,32 +340,42 @@ mpls:
 ip_proto_again:
 	switch (ip_proto) {
 	case IPPROTO_GRE: {
-		struct gre_hdr {
-			__be16 flags;
-			__be16 proto;
-		} *hdr, _hdr;
+		struct gre_base_hdr *hdr, _hdr;
+		u16 gre_ver;
+		int offset = 0;
 
 		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
 		if (!hdr)
 			goto out_bad;
-		/*
-		 * Only look inside GRE if version zero and no
-		 * routing
-		 */
-		if (hdr->flags & (GRE_VERSION | GRE_ROUTING))
+
+		/* Only look inside GRE without routing */
+		if (hdr->flags & GRE_ROUTING)
 			break;
 
-		proto = hdr->proto;
-		nhoff += 4;
+		/* Only look inside GRE for version 0 and 1 */
+		gre_ver = ntohs(hdr->flags & GRE_VERSION);
+		if (gre_ver > 1)
+			break;
+
+		proto = hdr->protocol;
+		if (gre_ver) {
+			/* Version1 must be PPTP, and check the flags */
+			if (!(proto == GRE_PROTO_PPP && (hdr->flags & GRE_KEY)))
+				break;
+		}
+
+		offset += sizeof(struct gre_base_hdr);
+
 		if (hdr->flags & GRE_CSUM)
-			nhoff += 4;
+			offset += sizeof(((struct gre_full_hdr *)0)->csum) +
+				  sizeof(((struct gre_full_hdr *)0)->reserved1);
+
 		if (hdr->flags & GRE_KEY) {
 			const __be32 *keyid;
 			__be32 _keyid;
 
-			keyid = __skb_header_pointer(skb, nhoff, sizeof(_keyid),
+			keyid = __skb_header_pointer(skb, nhoff + offset, sizeof(_keyid),
 						     data, hlen, &_keyid);
-
 			if (!keyid)
 				goto out_bad;
 
@@ -372,32 +384,65 @@ ip_proto_again:
 				key_keyid = skb_flow_dissector_target(flow_dissector,
 								      FLOW_DISSECTOR_KEY_GRE_KEYID,
 								      target_container);
-				key_keyid->keyid = *keyid;
+				if (gre_ver == 0)
+					key_keyid->keyid = *keyid;
+				else
+					key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK;
 			}
-			nhoff += 4;
+			offset += sizeof(((struct gre_full_hdr *)0)->key);
 		}
+
 		if (hdr->flags & GRE_SEQ)
-			nhoff += 4;
-		if (proto == htons(ETH_P_TEB)) {
-			const struct ethhdr *eth;
-			struct ethhdr _eth;
-
-			eth = __skb_header_pointer(skb, nhoff,
-						   sizeof(_eth),
-						   data, hlen, &_eth);
-			if (!eth)
+			offset += sizeof(((struct pptp_gre_header *)0)->seq);
+
+		if (gre_ver == 0) {
+			if (proto == htons(ETH_P_TEB)) {
+				const struct ethhdr *eth;
+				struct ethhdr _eth;
+
+				eth = __skb_header_pointer(skb, nhoff + offset,
+							   sizeof(_eth),
+							   data, hlen, &_eth);
+				if (!eth)
+					goto out_bad;
+				proto = eth->h_proto;
+				offset += sizeof(*eth);
+
+				/* Cap headers that we access via pointers at the
+				 * end of the Ethernet header as our maximum alignment
+				 * at that point is only 2 bytes.
+				 */
+				if (NET_IP_ALIGN)
+					hlen = (nhoff + offset);
+			}
+		} else { /* version 1, must be PPTP */
+			u8 _ppp_hdr[PPP_HDRLEN];
+			u8 *ppp_hdr;
+
+			if (hdr->flags & GRE_ACK)
+				offset += sizeof(((struct pptp_gre_header *)0)->ack);
+
+			ppp_hdr = skb_header_pointer(skb, nhoff + offset,
+						     sizeof(_ppp_hdr), _ppp_hdr);
+			if (!ppp_hdr)
 				goto out_bad;
-			proto = eth->h_proto;
-			nhoff += sizeof(*eth);
-
-			/* Cap headers that we access via pointers at the
-			 * end of the Ethernet header as our maximum alignment
-			 * at that point is only 2 bytes.
-			 */
-			if (NET_IP_ALIGN)
-				hlen = nhoff;
+
+			switch (PPP_PROTOCOL(ppp_hdr)) {
+			case PPP_IP:
+				proto = htons(ETH_P_IP);
+				break;
+			case PPP_IPV6:
+				proto = htons(ETH_P_IPV6);
+				break;
+			default:
+				/* Could probably catch some more like MPLS */
+				break;
+			}
+
+			offset += PPP_HDRLEN;
 		}
 
+		nhoff += offset;
 		key_control->flags |= FLOW_DIS_ENCAPSULATION;
 		if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
 			goto out_good;
-- 
cgit v1.2.3


From cb1b69b0b15b2897daeba8674c14c85a23a3347f Mon Sep 17 00:00:00 2001
From: Laura Garcia Liebana <nevola@gmail.com>
Date: Thu, 11 Aug 2016 18:02:07 +0200
Subject: netfilter: nf_tables: add hash expression

This patch adds a new hash expression, this provides jhash support but
this can be extended to support for other hash functions. The modulus
and seed already comes embedded into this new expression.

Use case example:

	... meta mark set hash ip saddr mod 10

Signed-off-by: Laura Garcia Liebana <nevola@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h |  20 +++++
 net/netfilter/Kconfig                    |   6 ++
 net/netfilter/Makefile                   |   1 +
 net/netfilter/nft_hash.c                 | 136 +++++++++++++++++++++++++++++++
 4 files changed, 163 insertions(+)
 create mode 100644 net/netfilter/nft_hash.c

(limited to 'include/uapi')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 01751faccaf8..6ce0a6dd0889 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -723,6 +723,26 @@ enum nft_meta_keys {
 	NFT_META_PRANDOM,
 };
 
+/**
+ * enum nft_hash_attributes - nf_tables hash expression netlink attributes
+ *
+ * @NFTA_HASH_SREG: source register (NLA_U32)
+ * @NFTA_HASH_DREG: destination register (NLA_U32)
+ * @NFTA_HASH_LEN: source data length (NLA_U32)
+ * @NFTA_HASH_MODULUS: modulus value (NLA_U32)
+ * @NFTA_HASH_SEED: seed value (NLA_U32)
+ */
+enum nft_hash_attributes {
+	NFTA_HASH_UNSPEC,
+	NFTA_HASH_SREG,
+	NFTA_HASH_DREG,
+	NFTA_HASH_LEN,
+	NFTA_HASH_MODULUS,
+	NFTA_HASH_SEED,
+	__NFTA_HASH_MAX,
+};
+#define NFTA_HASH_MAX	(__NFTA_HASH_MAX - 1)
+
 /**
  * enum nft_meta_attributes - nf_tables meta expression netlink attributes
  *
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index e5740e108a0b..9cfaa00c79b2 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -563,6 +563,12 @@ config NFT_COMPAT
 	  x_tables match/target extensions over the nf_tables
 	  framework.
 
+config NFT_HASH
+	tristate "Netfilter nf_tables hash module"
+	help
+	  This option adds the "hash" expression that you can use to perform
+	  a hash operation on registers.
+
 if NF_TABLES_NETDEV
 
 config NF_DUP_NETDEV
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 101fb859203c..1106ccde215c 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -92,6 +92,7 @@ obj-$(CONFIG_NFT_COUNTER)	+= nft_counter.o
 obj-$(CONFIG_NFT_LOG)		+= nft_log.o
 obj-$(CONFIG_NFT_MASQ)		+= nft_masq.o
 obj-$(CONFIG_NFT_REDIR)		+= nft_redir.o
+obj-$(CONFIG_NFT_HASH)		+= nft_hash.o
 
 # nf_tables netdev
 obj-$(CONFIG_NFT_DUP_NETDEV)	+= nft_dup_netdev.o
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
new file mode 100644
index 000000000000..b82ff29b3f5f
--- /dev/null
+++ b/net/netfilter/nft_hash.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2016 Laura Garcia <nevola@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <linux/jhash.h>
+
+struct nft_hash {
+	enum nft_registers      sreg:8;
+	enum nft_registers      dreg:8;
+	u8			len;
+	u32			modulus;
+	u32			seed;
+};
+
+static void nft_hash_eval(const struct nft_expr *expr,
+			  struct nft_regs *regs,
+			  const struct nft_pktinfo *pkt)
+{
+	struct nft_hash *priv = nft_expr_priv(expr);
+	const void *data = &regs->data[priv->sreg];
+
+	regs->data[priv->dreg] =
+		reciprocal_scale(jhash(data, priv->len, priv->seed),
+				 priv->modulus);
+}
+
+const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = {
+	[NFTA_HASH_SREG]	= { .type = NLA_U32 },
+	[NFTA_HASH_DREG]	= { .type = NLA_U32 },
+	[NFTA_HASH_LEN]		= { .type = NLA_U32 },
+	[NFTA_HASH_MODULUS]	= { .type = NLA_U32 },
+	[NFTA_HASH_SEED]	= { .type = NLA_U32 },
+};
+
+static int nft_hash_init(const struct nft_ctx *ctx,
+			 const struct nft_expr *expr,
+			 const struct nlattr * const tb[])
+{
+	struct nft_hash *priv = nft_expr_priv(expr);
+	u32 len;
+
+	if (!tb[NFTA_HASH_SREG] ||
+	    !tb[NFTA_HASH_DREG] ||
+	    !tb[NFTA_HASH_LEN]  ||
+	    !tb[NFTA_HASH_SEED] ||
+	    !tb[NFTA_HASH_MODULUS])
+		return -EINVAL;
+
+	priv->sreg = nft_parse_register(tb[NFTA_HASH_SREG]);
+	priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]);
+
+	len = ntohl(nla_get_be32(tb[NFTA_HASH_LEN]));
+	if (len == 0 || len > U8_MAX)
+		return -ERANGE;
+
+	priv->len = len;
+
+	priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS]));
+	if (priv->modulus <= 1)
+		return -ERANGE;
+
+	priv->seed = ntohl(nla_get_be32(tb[NFTA_HASH_SEED]));
+
+	return nft_validate_register_load(priv->sreg, len) &&
+	       nft_validate_register_store(ctx, priv->dreg, NULL,
+					   NFT_DATA_VALUE, sizeof(u32));
+}
+
+static int nft_hash_dump(struct sk_buff *skb,
+			 const struct nft_expr *expr)
+{
+	const struct nft_hash *priv = nft_expr_priv(expr);
+
+	if (nft_dump_register(skb, NFTA_HASH_SREG, priv->sreg))
+		goto nla_put_failure;
+	if (nft_dump_register(skb, NFTA_HASH_DREG, priv->dreg))
+		goto nla_put_failure;
+	if (nft_dump_register(skb, NFTA_HASH_LEN, priv->len))
+		goto nla_put_failure;
+	if (nft_dump_register(skb, NFTA_HASH_MODULUS, priv->modulus))
+		goto nla_put_failure;
+	if (nft_dump_register(skb, NFTA_HASH_SEED, priv->seed))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_type nft_hash_type;
+static const struct nft_expr_ops nft_hash_ops = {
+	.type		= &nft_hash_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_hash)),
+	.eval		= nft_hash_eval,
+	.init		= nft_hash_init,
+	.dump		= nft_hash_dump,
+};
+
+static struct nft_expr_type nft_hash_type __read_mostly = {
+	.name		= "hash",
+	.ops		= &nft_hash_ops,
+	.policy		= nft_hash_policy,
+	.maxattr	= NFTA_HASH_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_hash_module_init(void)
+{
+	return nft_register_expr(&nft_hash_type);
+}
+
+static void __exit nft_hash_module_exit(void)
+{
+	nft_unregister_expr(&nft_hash_type);
+}
+
+module_init(nft_hash_module_init);
+module_exit(nft_hash_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Laura Garcia <nevola@gmail.com>");
+MODULE_ALIAS_NFT_EXPR("hash");
-- 
cgit v1.2.3


From 300d8b93d73533411a98362350fe7d1795f1a044 Mon Sep 17 00:00:00 2001
From: Appana Durga Kedareswara Rao <appana.durga.rao@xilinx.com>
Date: Wed, 10 Aug 2016 11:20:06 +0530
Subject: net: Add mask for Control register 10Mbps speed

This patch adds mask for the Control register
10Mbps speed.

Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Kedareswara rao Appana <appanad@xilinx.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/mii.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/mii.h b/include/uapi/linux/mii.h
index 237fac4bc17b..15d8510cdae0 100644
--- a/include/uapi/linux/mii.h
+++ b/include/uapi/linux/mii.h
@@ -48,6 +48,7 @@
 #define BMCR_SPEED100		0x2000	/* Select 100Mbps              */
 #define BMCR_LOOPBACK		0x4000	/* TXD loopback bits           */
 #define BMCR_RESET		0x8000	/* Reset to default state      */
+#define BMCR_SPEED10		0x0000	/* Select 10Mbps               */
 
 /* Basic mode status register. */
 #define BMSR_ERCAP		0x0001	/* Ext-reg capability          */
-- 
cgit v1.2.3


From 60d20f9195b260bdf0ac10c275ae9f6016f9c069 Mon Sep 17 00:00:00 2001
From: Sargun Dhillon <sargun@sargun.me>
Date: Fri, 12 Aug 2016 08:56:52 -0700
Subject: bpf: Add bpf_current_task_under_cgroup helper

This adds a bpf helper that's similar to the skb_in_cgroup helper to check
whether the probe is currently executing in the context of a specific
subset of the cgroupsv2 hierarchy. It does this based on membership test
for a cgroup arraymap. It is invalid to call this in an interrupt, and
it'll return an error. The helper is primarily to be used in debugging
activities for containers, where you may have multiple programs running in
a given top-level "container".

Signed-off-by: Sargun Dhillon <sargun@sargun.me>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Tejun Heo <tj@kernel.org>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h | 11 +++++++++++
 kernel/bpf/arraymap.c    |  2 +-
 kernel/bpf/verifier.c    |  4 +++-
 kernel/trace/bpf_trace.c | 30 ++++++++++++++++++++++++++++++
 4 files changed, 45 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index da218fec6056..bea0c4e2830a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -375,6 +375,17 @@ enum bpf_func_id {
 	 */
 	BPF_FUNC_probe_write_user,
 
+	/**
+	 * bpf_current_task_under_cgroup(map, index) - Check cgroup2 membership of current task
+	 * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
+	 * @index: index of the cgroup in the bpf_map
+	 * Return:
+	 *   == 0 current failed the cgroup2 descendant test
+	 *   == 1 current succeeded the cgroup2 descendant test
+	 *    < 0 error
+	 */
+	BPF_FUNC_current_task_under_cgroup,
+
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 633a650d7aeb..a2ac051c342f 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -538,7 +538,7 @@ static int __init register_perf_event_array_map(void)
 }
 late_initcall(register_perf_event_array_map);
 
-#ifdef CONFIG_SOCK_CGROUP_DATA
+#ifdef CONFIG_CGROUPS
 static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
 				     struct file *map_file /* not used */,
 				     int fd)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 7094c69ac199..d504722ebfa4 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1053,7 +1053,8 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
 			goto error;
 		break;
 	case BPF_MAP_TYPE_CGROUP_ARRAY:
-		if (func_id != BPF_FUNC_skb_in_cgroup)
+		if (func_id != BPF_FUNC_skb_in_cgroup &&
+		    func_id != BPF_FUNC_current_task_under_cgroup)
 			goto error;
 		break;
 	default:
@@ -1075,6 +1076,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
 		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
 			goto error;
 		break;
+	case BPF_FUNC_current_task_under_cgroup:
 	case BPF_FUNC_skb_in_cgroup:
 		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
 			goto error;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index b20438fdb029..6b794d6669a7 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -376,6 +376,34 @@ static const struct bpf_func_proto bpf_get_current_task_proto = {
 	.ret_type	= RET_INTEGER,
 };
 
+static u64 bpf_current_task_under_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	struct bpf_map *map = (struct bpf_map *)(long)r1;
+	struct bpf_array *array = container_of(map, struct bpf_array, map);
+	struct cgroup *cgrp;
+	u32 idx = (u32)r2;
+
+	if (unlikely(in_interrupt()))
+		return -EINVAL;
+
+	if (unlikely(idx >= array->map.max_entries))
+		return -E2BIG;
+
+	cgrp = READ_ONCE(array->ptrs[idx]);
+	if (unlikely(!cgrp))
+		return -EAGAIN;
+
+	return task_under_cgroup_hierarchy(current, cgrp);
+}
+
+static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
+	.func           = bpf_current_task_under_cgroup,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_CONST_MAP_PTR,
+	.arg2_type      = ARG_ANYTHING,
+};
+
 static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
 {
 	switch (func_id) {
@@ -407,6 +435,8 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
 		return &bpf_perf_event_read_proto;
 	case BPF_FUNC_probe_write_user:
 		return bpf_get_probe_write_proto();
+	case BPF_FUNC_current_task_under_cgroup:
+		return &bpf_current_task_under_cgroup_proto;
 	default:
 		return NULL;
 	}
-- 
cgit v1.2.3


From 03459345bc00da70a35fa39bcfcf13d779097074 Mon Sep 17 00:00:00 2001
From: Gao Feng <fgao@ikuai8.com>
Date: Sat, 13 Aug 2016 00:30:48 +0800
Subject: pptp: Refactor the struct and macros of PPTP codes

1. Use struct gre_base_hdr directly in pptp_gre_header instead of
duplicated members;
2. Use existing macros like GRE_KEY, GRE_SEQ, and so on instead of
duplicated macros defined by PPTP;
3. Add new macros like GRE_IS_ACK/SEQ and so on instead of
PPTP_GRE_IS_A/S and so on;

Signed-off-by: Gao Feng <fgao@ikuai8.com>
Reviewed-by: Philip Prindeville <philipp@redfish-solutions.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ppp/pptp.c         | 28 +++++++++++++---------------
 include/net/pptp.h             | 19 +------------------
 include/uapi/linux/if_tunnel.h | 12 ++++++++++--
 3 files changed, 24 insertions(+), 35 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
index 3e68dbc0af7f..1951b1085cb8 100644
--- a/drivers/net/ppp/pptp.c
+++ b/drivers/net/ppp/pptp.c
@@ -206,16 +206,14 @@ static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 	skb_push(skb, header_len);
 	hdr = (struct pptp_gre_header *)(skb->data);
 
-	hdr->flags       = PPTP_GRE_FLAG_K;
-	hdr->ver         = PPTP_GRE_VER;
-	hdr->protocol    = htons(PPTP_GRE_PROTO);
-	hdr->call_id     = htons(opt->dst_addr.call_id);
+	hdr->gre_hd.flags = GRE_KEY | GRE_VERSION_1 | GRE_SEQ;
+	hdr->gre_hd.protocol = GRE_PROTO_PPP;
+	hdr->call_id = htons(opt->dst_addr.call_id);
 
-	hdr->flags      |= PPTP_GRE_FLAG_S;
-	hdr->seq         = htonl(++opt->seq_sent);
+	hdr->seq = htonl(++opt->seq_sent);
 	if (opt->ack_sent != seq_recv)	{
 		/* send ack with this message */
-		hdr->ver |= PPTP_GRE_FLAG_A;
+		hdr->gre_hd.flags |= GRE_ACK;
 		hdr->ack  = htonl(seq_recv);
 		opt->ack_sent = seq_recv;
 	}
@@ -278,7 +276,7 @@ static int pptp_rcv_core(struct sock *sk, struct sk_buff *skb)
 	headersize  = sizeof(*header);
 
 	/* test if acknowledgement present */
-	if (PPTP_GRE_IS_A(header->ver)) {
+	if (GRE_IS_ACK(header->gre_hd.flags)) {
 		__u32 ack;
 
 		if (!pskb_may_pull(skb, headersize))
@@ -286,7 +284,7 @@ static int pptp_rcv_core(struct sock *sk, struct sk_buff *skb)
 		header = (struct pptp_gre_header *)(skb->data);
 
 		/* ack in different place if S = 0 */
-		ack = PPTP_GRE_IS_S(header->flags) ? header->ack : header->seq;
+		ack = GRE_IS_SEQ(header->gre_hd.flags) ? header->ack : header->seq;
 
 		ack = ntohl(ack);
 
@@ -299,7 +297,7 @@ static int pptp_rcv_core(struct sock *sk, struct sk_buff *skb)
 		headersize -= sizeof(header->ack);
 	}
 	/* test if payload present */
-	if (!PPTP_GRE_IS_S(header->flags))
+	if (!GRE_IS_SEQ(header->gre_hd.flags))
 		goto drop;
 
 	payload_len = ntohs(header->payload_len);
@@ -360,11 +358,11 @@ static int pptp_rcv(struct sk_buff *skb)
 
 	header = (struct pptp_gre_header *)skb->data;
 
-	if (ntohs(header->protocol) != PPTP_GRE_PROTO || /* PPTP-GRE protocol for PPTP */
-		PPTP_GRE_IS_C(header->flags) ||                /* flag C should be clear */
-		PPTP_GRE_IS_R(header->flags) ||                /* flag R should be clear */
-		!PPTP_GRE_IS_K(header->flags) ||               /* flag K should be set */
-		(header->flags&0xF) != 0)                      /* routing and recursion ctrl = 0 */
+	if (header->gre_hd.protocol != GRE_PROTO_PPP || /* PPTP-GRE protocol for PPTP */
+		GRE_IS_CSUM(header->gre_hd.flags) ||    /* flag CSUM should be clear */
+		GRE_IS_ROUTING(header->gre_hd.flags) || /* flag ROUTING should be clear */
+		!GRE_IS_KEY(header->gre_hd.flags) ||    /* flag KEY should be set */
+		(header->gre_hd.flags & GRE_FLAGS))     /* flag Recursion Ctrl should be clear */
 		/* if invalid, discard this packet */
 		goto drop;
 
diff --git a/include/net/pptp.h b/include/net/pptp.h
index 301d3e2ba1f9..92e9f1fe2628 100644
--- a/include/net/pptp.h
+++ b/include/net/pptp.h
@@ -10,26 +10,9 @@
 	((((curseq) & 0xffffff00) == 0) &&\
 	(((lastseq) & 0xffffff00) == 0xffffff00))
 
-#define PPTP_GRE_PROTO  0x880B
-#define PPTP_GRE_VER    0x1
-
-#define PPTP_GRE_FLAG_C 0x80
-#define PPTP_GRE_FLAG_R 0x40
-#define PPTP_GRE_FLAG_K 0x20
-#define PPTP_GRE_FLAG_S 0x10
-#define PPTP_GRE_FLAG_A 0x80
-
-#define PPTP_GRE_IS_C(f) ((f)&PPTP_GRE_FLAG_C)
-#define PPTP_GRE_IS_R(f) ((f)&PPTP_GRE_FLAG_R)
-#define PPTP_GRE_IS_K(f) ((f)&PPTP_GRE_FLAG_K)
-#define PPTP_GRE_IS_S(f) ((f)&PPTP_GRE_FLAG_S)
-#define PPTP_GRE_IS_A(f) ((f)&PPTP_GRE_FLAG_A)
-
 #define PPTP_HEADER_OVERHEAD (2+sizeof(struct pptp_gre_header))
 struct pptp_gre_header {
-	u8  flags;
-	u8  ver;
-	__be16 protocol;
+	struct gre_base_hdr gre_hd;
 	__be16 payload_len;
 	__be16 call_id;
 	__be32 seq;
diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index 60dbb200de60..361b9f0f20d7 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -28,8 +28,16 @@
 #define GRE_FLAGS	__cpu_to_be16(0x0078)
 #define GRE_VERSION	__cpu_to_be16(0x0007)
 
-#define GRE_VERSION_1	__cpu_to_be16(0x0001)
-#define GRE_PROTO_PPP	__cpu_to_be16(0x880b)
+#define GRE_IS_CSUM(f)		((f) & GRE_CSUM)
+#define GRE_IS_ROUTING(f)	((f) & GRE_ROUTING)
+#define GRE_IS_KEY(f)		((f) & GRE_KEY)
+#define GRE_IS_SEQ(f)		((f) & GRE_SEQ)
+#define GRE_IS_STRICT(f)	((f) & GRE_STRICT)
+#define GRE_IS_REC(f)		((f) & GRE_REC)
+#define GRE_IS_ACK(f)		((f) & GRE_ACK)
+
+#define GRE_VERSION_1		__cpu_to_be16(0x0001)
+#define GRE_PROTO_PPP		__cpu_to_be16(0x880b)
 #define GRE_PPTP_KEY_MASK	__cpu_to_be32(0xffff)
 
 struct ip_tunnel_parm {
-- 
cgit v1.2.3


From 02486c2905a7caa50b0f508a86e03d12d8d24ac4 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 11 Aug 2016 17:36:20 -0700
Subject: libnvdimm: fix SMART Health DSM payload definition

"NVDIMM DSM Interface Example" v1.2 made an incompatible change to the
layout of function1 "SMART and Health Info".  While the kernel does not
directly consume this payload, it does define it in ndctl.h that
userpace utilities consume.

Reported-by: Brian Boylston <brian.boylston@hpe.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/uapi/linux/ndctl.h | 30 +++++++++++++++++-------------
 1 file changed, 17 insertions(+), 13 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h
index ba5a8c79652a..ede5c6a62164 100644
--- a/include/uapi/linux/ndctl.h
+++ b/include/uapi/linux/ndctl.h
@@ -21,14 +21,16 @@ struct nd_cmd_smart {
 } __packed;
 
 #define ND_SMART_HEALTH_VALID	(1 << 0)
-#define ND_SMART_TEMP_VALID 	(1 << 1)
-#define ND_SMART_SPARES_VALID	(1 << 2)
-#define ND_SMART_ALARM_VALID	(1 << 3)
-#define ND_SMART_USED_VALID	(1 << 4)
-#define ND_SMART_SHUTDOWN_VALID	(1 << 5)
-#define ND_SMART_VENDOR_VALID	(1 << 6)
-#define ND_SMART_TEMP_TRIP	(1 << 0)
-#define ND_SMART_SPARE_TRIP	(1 << 1)
+#define ND_SMART_SPARES_VALID	(1 << 1)
+#define ND_SMART_USED_VALID	(1 << 2)
+#define ND_SMART_TEMP_VALID 	(1 << 3)
+#define ND_SMART_CTEMP_VALID 	(1 << 4)
+#define ND_SMART_ALARM_VALID	(1 << 9)
+#define ND_SMART_SHUTDOWN_VALID	(1 << 10)
+#define ND_SMART_VENDOR_VALID	(1 << 11)
+#define ND_SMART_SPARE_TRIP	(1 << 0)
+#define ND_SMART_TEMP_TRIP	(1 << 1)
+#define ND_SMART_CTEMP_TRIP	(1 << 2)
 #define ND_SMART_NON_CRITICAL_HEALTH	(1 << 0)
 #define ND_SMART_CRITICAL_HEALTH	(1 << 1)
 #define ND_SMART_FATAL_HEALTH		(1 << 2)
@@ -37,14 +39,15 @@ struct nd_smart_payload {
 	__u32 flags;
 	__u8 reserved0[4];
 	__u8 health;
-	__u16 temperature;
 	__u8 spares;
-	__u8 alarm_flags;
 	__u8 life_used;
+	__u8 alarm_flags;
+	__u16 temperature;
+	__u16 ctrl_temperature;
+	__u8 reserved1[15];
 	__u8 shutdown_state;
-	__u8 reserved1;
 	__u32 vendor_size;
-	__u8 vendor_data[108];
+	__u8 vendor_data[92];
 } __packed;
 
 struct nd_cmd_smart_threshold {
@@ -53,7 +56,8 @@ struct nd_cmd_smart_threshold {
 } __packed;
 
 struct nd_smart_threshold_payload {
-	__u16 alarm_control;
+	__u8 alarm_control;
+	__u8 reserved0;
 	__u16 temperature;
 	__u8 spares;
 	__u8 reserved[3];
-- 
cgit v1.2.3


From 9bb04a0c4e261187be904d05c2bcd1da0eebc20c Mon Sep 17 00:00:00 2001
From: Jonathan Yong <jonathan.yong@intel.com>
Date: Sat, 11 Jun 2016 14:13:38 -0500
Subject: PCI: Add Precision Time Measurement (PTM) support

Add Precision Time Measurement (PTM) support (see PCIe r3.1, sec 6.22).

Enable PTM on PTM Root devices and switch ports.  This does not enable PTM
on endpoints.

There currently are no PTM-capable devices on the market, but it is
expected to be supported by the Intel Apollo Lake platform.

[bhelgaas: complete rework]
Signed-off-by: Jonathan Yong <jonathan.yong@intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci.h             |  6 ++++
 drivers/pci/pcie/Kconfig      | 11 +++++++
 drivers/pci/pcie/Makefile     |  1 +
 drivers/pci/pcie/ptm.c        | 70 +++++++++++++++++++++++++++++++++++++++++++
 drivers/pci/probe.c           |  3 ++
 include/linux/pci.h           |  5 ++++
 include/uapi/linux/pci_regs.h | 10 ++++++-
 7 files changed, 105 insertions(+), 1 deletion(-)
 create mode 100644 drivers/pci/pcie/ptm.c

(limited to 'include/uapi')

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 9730c474b016..194521bfb1a3 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -332,6 +332,12 @@ static inline resource_size_t pci_resource_alignment(struct pci_dev *dev,
 
 void pci_enable_acs(struct pci_dev *dev);
 
+#ifdef CONFIG_PCIE_PTM
+void pci_ptm_init(struct pci_dev *dev);
+#else
+static inline void pci_ptm_init(struct pci_dev *dev) { }
+#endif
+
 struct pci_dev_reset_methods {
 	u16 vendor;
 	u16 device;
diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig
index 7fcea75afa4c..7ce77635e5ad 100644
--- a/drivers/pci/pcie/Kconfig
+++ b/drivers/pci/pcie/Kconfig
@@ -92,3 +92,14 @@ config PCIE_DPC
 	  will be handled by the DPC driver.  If your system doesn't
 	  have this capability or you do not want to use this feature,
 	  it is safe to answer N.
+
+config PCIE_PTM
+	bool "PCIe Precision Time Measurement support"
+	default n
+	depends on PCIEPORTBUS
+	help
+	  This enables PCI Express Precision Time Measurement (PTM)
+	  support.
+
+	  This is only useful if you have devices that support PTM, but it
+	  is safe to enable even if you don't.
diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile
index b24525b3dec1..36e35ea8fde7 100644
--- a/drivers/pci/pcie/Makefile
+++ b/drivers/pci/pcie/Makefile
@@ -16,3 +16,4 @@ obj-$(CONFIG_PCIEAER)		+= aer/
 obj-$(CONFIG_PCIE_PME) += pme.o
 
 obj-$(CONFIG_PCIE_DPC) += pcie-dpc.o
+obj-$(CONFIG_PCIE_PTM) += ptm.o
diff --git a/drivers/pci/pcie/ptm.c b/drivers/pci/pcie/ptm.c
new file mode 100644
index 000000000000..48eea4e65247
--- /dev/null
+++ b/drivers/pci/pcie/ptm.c
@@ -0,0 +1,70 @@
+/*
+ * PCI Express Precision Time Measurement
+ * Copyright (c) 2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include "../pci.h"
+
+static void pci_ptm_info(struct pci_dev *dev)
+{
+	dev_info(&dev->dev, "PTM enabled%s\n", dev->ptm_root ? " (root)" : "");
+}
+
+void pci_ptm_init(struct pci_dev *dev)
+{
+	int pos;
+	u32 cap, ctrl;
+	struct pci_dev *ups;
+
+	if (!pci_is_pcie(dev))
+		return;
+
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_PTM);
+	if (!pos)
+		return;
+
+	/*
+	 * Enable PTM only on interior devices (root ports, switch ports,
+	 * etc.) on the assumption that it causes no link traffic until an
+	 * endpoint enables it.
+	 */
+	if ((pci_pcie_type(dev) == PCI_EXP_TYPE_ENDPOINT ||
+	     pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END))
+		return;
+
+	pci_read_config_dword(dev, pos + PCI_PTM_CAP, &cap);
+
+	/*
+	 * There's no point in enabling PTM unless it's enabled in the
+	 * upstream device or this device can be a PTM Root itself.  Per
+	 * the spec recommendation (PCIe r3.1, sec 7.32.3), select the
+	 * furthest upstream Time Source as the PTM Root.
+	 */
+	ups = pci_upstream_bridge(dev);
+	if (ups && ups->ptm_enabled) {
+		ctrl = PCI_PTM_CTRL_ENABLE;
+	} else {
+		if (cap & PCI_PTM_CAP_ROOT) {
+			ctrl = PCI_PTM_CTRL_ENABLE | PCI_PTM_CTRL_ROOT;
+			dev->ptm_root = 1;
+		} else
+			return;
+	}
+
+	pci_write_config_dword(dev, pos + PCI_PTM_CTRL, ctrl);
+	dev->ptm_enabled = 1;
+
+	pci_ptm_info(dev);
+}
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 93f280df3428..e2e424472058 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1667,6 +1667,9 @@ static void pci_init_capabilities(struct pci_dev *dev)
 	pci_enable_acs(dev);
 
 	pci_cleanup_aer_error_status_regs(dev);
+
+	/* Precision Time Measurement */
+	pci_ptm_init(dev);
 }
 
 /*
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 2599a980340f..96c509fa9d46 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -367,6 +367,11 @@ struct pci_dev {
 	int rom_attr_enabled;		/* has display of the rom attribute been enabled? */
 	struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */
 	struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */
+
+#ifdef CONFIG_PCIE_PTM
+	unsigned int	ptm_root:1;
+	unsigned int	ptm_enabled:1;
+#endif
 #ifdef CONFIG_PCI_MSI
 	const struct attribute_group **msi_irq_groups;
 #endif
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 404095124ae2..926fff41b417 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -671,7 +671,8 @@
 #define PCI_EXT_CAP_ID_PMUX	0x1A	/* Protocol Multiplexing */
 #define PCI_EXT_CAP_ID_PASID	0x1B	/* Process Address Space ID */
 #define PCI_EXT_CAP_ID_DPC	0x1D	/* Downstream Port Containment */
-#define PCI_EXT_CAP_ID_MAX	PCI_EXT_CAP_ID_DPC
+#define PCI_EXT_CAP_ID_PTM	0x1F	/* Precision Time Measurement */
+#define PCI_EXT_CAP_ID_MAX	PCI_EXT_CAP_ID_PTM
 
 #define PCI_EXT_CAP_DSN_SIZEOF	12
 #define PCI_EXT_CAP_MCAST_ENDPOINT_SIZEOF 40
@@ -964,4 +965,11 @@
 
 #define PCI_EXP_DPC_SOURCE_ID		10	/* DPC Source Identifier */
 
+/* Precision Time Measurement */
+#define PCI_PTM_CAP			0x04	    /* PTM Capability */
+#define  PCI_PTM_CAP_ROOT		0x00000004  /* Root capable */
+#define PCI_PTM_CTRL			0x08	    /* PTM Control */
+#define  PCI_PTM_CTRL_ENABLE		0x00000001  /* PTM enable */
+#define  PCI_PTM_CTRL_ROOT		0x00000002  /* Root select */
+
 #endif /* LINUX_PCI_REGS_H */
-- 
cgit v1.2.3


From 1255501d8681775d564de45742c6e82b7782b7f5 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 16 Aug 2016 09:50:40 +0100
Subject: drm/i915: Embrace the race in busy-ioctl

Daniel Vetter proposed a new challenge to the serialisation inside the
busy-ioctl that exposed a flaw that could result in us reporting the
wrong engine as being busy. If the request is reallocated as we test
its busyness and then reassigned to this object by another thread, we
would not notice that the test itself was incorrect.

We are faced with a choice of using __i915_gem_active_get_request_rcu()
to first acquire a reference to the request preventing the race, or to
acknowledge the race and accept the limitations upon the accuracy of the
busy flags. Note that we guarantee that we never falsely report the
object as idle (providing userspace itself doesn't race), and so the
most important use of the busy-ioctl and its guarantees are fulfilled.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: Daniel Vetter <daniel.vetter@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1471337440-16777-1-git-send-email-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_gem.c | 112 +++++++++++++++++++++++-----------------
 include/uapi/drm/i915_drm.h     |  16 +++++-
 2 files changed, 81 insertions(+), 47 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7e08c774a1aa..a8d0f70c22f9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3807,49 +3807,68 @@ static __always_inline unsigned int
 __busy_set_if_active(const struct i915_gem_active *active,
 		     unsigned int (*flag)(unsigned int id))
 {
-	/* For more discussion about the barriers and locking concerns,
-	 * see __i915_gem_active_get_rcu().
-	 */
-	do {
-		struct drm_i915_gem_request *request;
-		unsigned int id;
-
-		request = rcu_dereference(active->request);
-		if (!request || i915_gem_request_completed(request))
-			return 0;
+	struct drm_i915_gem_request *request;
 
-		id = request->engine->exec_id;
+	request = rcu_dereference(active->request);
+	if (!request || i915_gem_request_completed(request))
+		return 0;
 
-		/* Check that the pointer wasn't reassigned and overwritten.
-		 *
-		 * In __i915_gem_active_get_rcu(), we enforce ordering between
-		 * the first rcu pointer dereference (imposing a
-		 * read-dependency only on access through the pointer) and
-		 * the second lockless access through the memory barrier
-		 * following a successful atomic_inc_not_zero(). Here there
-		 * is no such barrier, and so we must manually insert an
-		 * explicit read barrier to ensure that the following
-		 * access occurs after all the loads through the first
-		 * pointer.
-		 *
-		 * It is worth comparing this sequence with
-		 * raw_write_seqcount_latch() which operates very similarly.
-		 * The challenge here is the visibility of the other CPU
-		 * writes to the reallocated request vs the local CPU ordering.
-		 * Before the other CPU can overwrite the request, it will
-		 * have updated our active->request and gone through a wmb.
-		 * During the read here, we want to make sure that the values
-		 * we see have not been overwritten as we do so - and we do
-		 * that by serialising the second pointer check with the writes
-		 * on other other CPUs.
-		 *
-		 * The corresponding write barrier is part of
-		 * rcu_assign_pointer().
-		 */
-		smp_rmb();
-		if (request == rcu_access_pointer(active->request))
-			return flag(id);
-	} while (1);
+	/* This is racy. See __i915_gem_active_get_rcu() for an in detail
+	 * discussion of how to handle the race correctly, but for reporting
+	 * the busy state we err on the side of potentially reporting the
+	 * wrong engine as being busy (but we guarantee that the result
+	 * is at least self-consistent).
+	 *
+	 * As we use SLAB_DESTROY_BY_RCU, the request may be reallocated
+	 * whilst we are inspecting it, even under the RCU read lock as we are.
+	 * This means that there is a small window for the engine and/or the
+	 * seqno to have been overwritten. The seqno will always be in the
+	 * future compared to the intended, and so we know that if that
+	 * seqno is idle (on whatever engine) our request is idle and the
+	 * return 0 above is correct.
+	 *
+	 * The issue is that if the engine is switched, it is just as likely
+	 * to report that it is busy (but since the switch happened, we know
+	 * the request should be idle). So there is a small chance that a busy
+	 * result is actually the wrong engine.
+	 *
+	 * So why don't we care?
+	 *
+	 * For starters, the busy ioctl is a heuristic that is by definition
+	 * racy. Even with perfect serialisation in the driver, the hardware
+	 * state is constantly advancing - the state we report to the user
+	 * is stale.
+	 *
+	 * The critical information for the busy-ioctl is whether the object
+	 * is idle as userspace relies on that to detect whether its next
+	 * access will stall, or if it has missed submitting commands to
+	 * the hardware allowing the GPU to stall. We never generate a
+	 * false-positive for idleness, thus busy-ioctl is reliable at the
+	 * most fundamental level, and we maintain the guarantee that a
+	 * busy object left to itself will eventually become idle (and stay
+	 * idle!).
+	 *
+	 * We allow ourselves the leeway of potentially misreporting the busy
+	 * state because that is an optimisation heuristic that is constantly
+	 * in flux. Being quickly able to detect the busy/idle state is much
+	 * more important than accurate logging of exactly which engines were
+	 * busy.
+	 *
+	 * For accuracy in reporting the engine, we could use
+	 *
+	 *	result = 0;
+	 *	request = __i915_gem_active_get_rcu(active);
+	 *	if (request) {
+	 *		if (!i915_gem_request_completed(request))
+	 *			result = flag(request->engine->exec_id);
+	 *		i915_gem_request_put(request);
+	 *	}
+	 *
+	 * but that still remains susceptible to both hardware and userspace
+	 * races. So we accept making the result of that race slightly worse,
+	 * given the rarity of the race and its low impact on the result.
+	 */
+	return flag(READ_ONCE(request->engine->exec_id));
 }
 
 static __always_inline unsigned int
@@ -3897,11 +3916,12 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 		 * retired and freed. We take a local copy of the pointer,
 		 * but before we add its engine into the busy set, the other
 		 * thread reallocates it and assigns it to a task on another
-		 * engine with a fresh and incomplete seqno.
-		 *
-		 * So after we lookup the engine's id, we double check that
-		 * the active request is the same and only then do we add it
-		 * into the busy set.
+		 * engine with a fresh and incomplete seqno. Guarding against
+		 * that requires careful serialisation and reference counting,
+		 * i.e. using __i915_gem_active_get_request_rcu(). We don't,
+		 * instead we expect that if the result is busy, which engines
+		 * are busy is not completely reliable - we only guarantee
+		 * that the object was busy.
 		 */
 		rcu_read_lock();
 
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 452629de7a57..5501fe83ed92 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -855,7 +855,16 @@ struct drm_i915_gem_busy {
 	 * having flushed any pending activity), and a non-zero return that
 	 * the object is still in-flight on the GPU. (The GPU has not yet
 	 * signaled completion for all pending requests that reference the
-	 * object.)
+	 * object.) An object is guaranteed to become idle eventually (so
+	 * long as no new GPU commands are executed upon it). Due to the
+	 * asynchronous nature of the hardware, an object reported
+	 * as busy may become idle before the ioctl is completed.
+	 *
+	 * Furthermore, if the object is busy, which engine is busy is only
+	 * provided as a guide. There are race conditions which prevent the
+	 * report of which engines are busy from being always accurate.
+	 * However, the converse is not true. If the object is idle, the
+	 * result of the ioctl, that all engines are idle, is accurate.
 	 *
 	 * The returned dword is split into two fields to indicate both
 	 * the engines on which the object is being read, and the
@@ -878,6 +887,11 @@ struct drm_i915_gem_busy {
 	 * execution engines, e.g. multiple media engines, which are
 	 * mapped to the same identifier in the EXECBUFFER2 ioctl and
 	 * so are not separately reported for busyness.
+	 *
+	 * Caveat emptor:
+	 * Only the boolean result of this query is reliable; that is whether
+	 * the object is idle or busy. The report of which engines are busy
+	 * should be only used as a heuristic.
 	 */
 	__u32 busy;
 };
-- 
cgit v1.2.3


From e7893c4bd34b9d2f942d77666656efaa084a3f87 Mon Sep 17 00:00:00 2001
From: Chunming Zhou <David1.Zhou@amd.com>
Date: Tue, 26 Jul 2016 14:13:21 +0800
Subject: drm/amdgpu: add shadow bo support V2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

shadow bo is the shadow of a bo, which is always in GTT,
which can be used to backup the original bo.
V2:
reference shadow parent, shadow bo will be freed by who allocted him.

Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 48 ++++++++++++++++++++++++++++--
 include/uapi/drm/amdgpu_drm.h              |  2 ++
 3 files changed, 48 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index c54f5b962b80..26f6028bb72b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -505,6 +505,7 @@ struct amdgpu_bo {
 	struct amdgpu_device		*adev;
 	struct drm_gem_object		gem_base;
 	struct amdgpu_bo		*parent;
+	struct amdgpu_bo		*shadow;
 
 	struct ttm_bo_kmap_obj		dma_buf_vmap;
 	struct amdgpu_mn		*mn;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index d8e69a7e51f9..278017d3dc3f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -380,6 +380,37 @@ fail_free:
 	return r;
 }
 
+static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
+				   unsigned long size, int byte_align,
+				   struct amdgpu_bo *bo)
+{
+	struct ttm_placement placement = {0};
+	struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1];
+	int r;
+
+	if (bo->shadow)
+		return 0;
+
+	bo->flags |= AMDGPU_GEM_CREATE_SHADOW;
+	memset(&placements, 0,
+	       (AMDGPU_GEM_DOMAIN_MAX + 1) * sizeof(struct ttm_place));
+
+	amdgpu_ttm_placement_init(adev, &placement,
+				  placements, AMDGPU_GEM_DOMAIN_GTT,
+				  AMDGPU_GEM_CREATE_CPU_GTT_USWC);
+
+	r = amdgpu_bo_create_restricted(adev, size, byte_align, true,
+					AMDGPU_GEM_DOMAIN_GTT,
+					AMDGPU_GEM_CREATE_CPU_GTT_USWC,
+					NULL, &placement,
+					bo->tbo.resv,
+					&bo->shadow);
+	if (!r)
+		bo->shadow->parent = amdgpu_bo_ref(bo);
+
+	return r;
+}
+
 int amdgpu_bo_create(struct amdgpu_device *adev,
 		     unsigned long size, int byte_align,
 		     bool kernel, u32 domain, u64 flags,
@@ -389,6 +420,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
 {
 	struct ttm_placement placement = {0};
 	struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1];
+	int r;
 
 	memset(&placements, 0,
 	       (AMDGPU_GEM_DOMAIN_MAX + 1) * sizeof(struct ttm_place));
@@ -396,9 +428,19 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
 	amdgpu_ttm_placement_init(adev, &placement,
 				  placements, domain, flags);
 
-	return amdgpu_bo_create_restricted(adev, size, byte_align, kernel,
-					   domain, flags, sg, &placement,
-					   resv, bo_ptr);
+	r = amdgpu_bo_create_restricted(adev, size, byte_align, kernel,
+					domain, flags, sg, &placement,
+					resv, bo_ptr);
+	if (r)
+		return r;
+
+	if (flags & AMDGPU_GEM_CREATE_SHADOW) {
+		r = amdgpu_bo_create_shadow(adev, size, byte_align, (*bo_ptr));
+		if (r)
+			amdgpu_bo_unref(bo_ptr);
+	}
+
+	return r;
 }
 
 int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index a902a602490b..5aef0b71079b 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -79,6 +79,8 @@ extern "C" {
 #define AMDGPU_GEM_CREATE_CPU_GTT_USWC		(1 << 2)
 /* Flag that the memory should be in VRAM and cleared */
 #define AMDGPU_GEM_CREATE_VRAM_CLEARED		(1 << 3)
+/* Flag that create shadow bo(GTT) while allocating vram bo */
+#define AMDGPU_GEM_CREATE_SHADOW		(1 << 4)
 
 struct drm_amdgpu_gem_create_in  {
 	/** the requested memory size */
-- 
cgit v1.2.3


From 0b28cb4bcb17dcb5fe0763fc3e1a94398b8f6cf6 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Sun, 7 Aug 2016 02:25:36 -0700
Subject: HID: intel-ish-hid: ISH HID client driver

This driver is responsible for implementing ISH HID client, which
gets HID description and report. Once it has completely gets
report descriptors, it registers as a HID LL drivers. This implements
necessary callbacks so that it can be used by HID sensor hub driver.

Original-author: Daniel Drubin <daniel.drubin@intel.com>
Reviewed-and-tested-by: Ooi, Joyce <joyce.ooi@intel.com>
Tested-by: Grant Likely <grant.likely@secretlab.ca>
Tested-by: Rann Bar-On <rb6@duke.edu>
Tested-by: Atri Bhattacharya <badshah400@aim.com>
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/intel-ish-hid/Makefile           |   4 +
 drivers/hid/intel-ish-hid/ishtp-hid-client.c | 978 +++++++++++++++++++++++++++
 drivers/hid/intel-ish-hid/ishtp-hid.c        | 246 +++++++
 drivers/hid/intel-ish-hid/ishtp-hid.h        | 182 +++++
 include/uapi/linux/input.h                   |   1 +
 5 files changed, 1411 insertions(+)
 create mode 100644 drivers/hid/intel-ish-hid/ishtp-hid-client.c
 create mode 100644 drivers/hid/intel-ish-hid/ishtp-hid.c
 create mode 100644 drivers/hid/intel-ish-hid/ishtp-hid.h

(limited to 'include/uapi')

diff --git a/drivers/hid/intel-ish-hid/Makefile b/drivers/hid/intel-ish-hid/Makefile
index ab626d8f7bb8..8c08b0b358b1 100644
--- a/drivers/hid/intel-ish-hid/Makefile
+++ b/drivers/hid/intel-ish-hid/Makefile
@@ -15,4 +15,8 @@ obj-$(CONFIG_INTEL_ISH_HID) += intel-ish-ipc.o
 intel-ish-ipc-objs := ipc/ipc.o
 intel-ish-ipc-objs += ipc/pci-ish.o
 
+obj-$(CONFIG_INTEL_ISH_HID) += intel-ishtp-hid.o
+intel-ishtp-hid-objs := ishtp-hid.o
+intel-ishtp-hid-objs += ishtp-hid-client.o
+
 ccflags-y += -Idrivers/hid/intel-ish-hid/ishtp
diff --git a/drivers/hid/intel-ish-hid/ishtp-hid-client.c b/drivers/hid/intel-ish-hid/ishtp-hid-client.c
new file mode 100644
index 000000000000..5c643d7a07b2
--- /dev/null
+++ b/drivers/hid/intel-ish-hid/ishtp-hid-client.c
@@ -0,0 +1,978 @@
+/*
+ * ISHTP client driver for HID (ISH)
+ *
+ * Copyright (c) 2014-2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/module.h>
+#include <linux/hid.h>
+#include <linux/sched.h>
+#include "ishtp/ishtp-dev.h"
+#include "ishtp/client.h"
+#include "ishtp-hid.h"
+
+/* Rx ring buffer pool size */
+#define HID_CL_RX_RING_SIZE	32
+#define HID_CL_TX_RING_SIZE	16
+
+/**
+ * report_bad_packets() - Report bad packets
+ * @hid_ishtp_cl:	Client instance to get stats
+ * @recv_buf:		Raw received host interface message
+ * @cur_pos:		Current position index in payload
+ * @payload_len:	Length of payload expected
+ *
+ * Dumps error in case bad packet is received
+ */
+static void report_bad_packet(struct ishtp_cl *hid_ishtp_cl, void *recv_buf,
+			      size_t cur_pos,  size_t payload_len)
+{
+	struct hostif_msg *recv_msg = recv_buf;
+	struct ishtp_cl_data *client_data = hid_ishtp_cl->client_data;
+
+	dev_err(&client_data->cl_device->dev, "[hid-ish]: BAD packet %02X\n"
+		"total_bad=%u cur_pos=%u\n"
+		"[%02X %02X %02X %02X]\n"
+		"payload_len=%u\n"
+		"multi_packet_cnt=%u\n"
+		"is_response=%02X\n",
+		recv_msg->hdr.command, client_data->bad_recv_cnt,
+		(unsigned int)cur_pos,
+		((unsigned char *)recv_msg)[0], ((unsigned char *)recv_msg)[1],
+		((unsigned char *)recv_msg)[2], ((unsigned char *)recv_msg)[3],
+		(unsigned int)payload_len, client_data->multi_packet_cnt,
+		recv_msg->hdr.command & ~CMD_MASK);
+}
+
+/**
+ * process_recv() - Received and parse incoming packet
+ * @hid_ishtp_cl:	Client instance to get stats
+ * @recv_buf:		Raw received host interface message
+ * @data_len:		length of the message
+ *
+ * Parse the incoming packet. If it is a response packet then it will update
+ * per instance flags and wake up the caller waiting to for the response.
+ */
+static void process_recv(struct ishtp_cl *hid_ishtp_cl, void *recv_buf,
+			 size_t data_len)
+{
+	struct hostif_msg *recv_msg;
+	unsigned char *payload;
+	struct device_info *dev_info;
+	int i, j;
+	size_t	payload_len, total_len, cur_pos;
+	int report_type;
+	struct report_list *reports_list;
+	char *reports;
+	size_t report_len;
+	struct ishtp_cl_data *client_data = hid_ishtp_cl->client_data;
+	int curr_hid_dev = client_data->cur_hid_dev;
+
+	if (data_len < sizeof(struct hostif_msg_hdr)) {
+		dev_err(&client_data->cl_device->dev,
+			"[hid-ish]: error, received %u which is less than data header %u\n",
+			(unsigned int)data_len,
+			(unsigned int)sizeof(struct hostif_msg_hdr));
+		++client_data->bad_recv_cnt;
+		ish_hw_reset(hid_ishtp_cl->dev);
+		return;
+	}
+
+	payload = recv_buf + sizeof(struct hostif_msg_hdr);
+	total_len = data_len;
+	cur_pos = 0;
+
+	do {
+		recv_msg = (struct hostif_msg *)(recv_buf + cur_pos);
+		payload_len = recv_msg->hdr.size;
+
+		/* Sanity checks */
+		if (cur_pos + payload_len + sizeof(struct hostif_msg) >
+				total_len) {
+			++client_data->bad_recv_cnt;
+			report_bad_packet(hid_ishtp_cl, recv_msg, cur_pos,
+					  payload_len);
+			ish_hw_reset(hid_ishtp_cl->dev);
+			break;
+		}
+
+		hid_ishtp_trace(client_data,  "%s %d\n",
+				__func__, recv_msg->hdr.command & CMD_MASK);
+
+		switch (recv_msg->hdr.command & CMD_MASK) {
+		case HOSTIF_DM_ENUM_DEVICES:
+			if ((!(recv_msg->hdr.command & ~CMD_MASK) ||
+					client_data->init_done)) {
+				++client_data->bad_recv_cnt;
+				report_bad_packet(hid_ishtp_cl, recv_msg,
+						  cur_pos,
+						  payload_len);
+				ish_hw_reset(hid_ishtp_cl->dev);
+				break;
+			}
+			client_data->hid_dev_count = (unsigned int)*payload;
+			if (!client_data->hid_devices)
+				client_data->hid_devices = devm_kzalloc(
+						&client_data->cl_device->dev,
+						client_data->hid_dev_count *
+						sizeof(struct device_info),
+						GFP_KERNEL);
+			if (!client_data->hid_devices) {
+				dev_err(&client_data->cl_device->dev,
+				"Mem alloc failed for hid device info\n");
+				wake_up_interruptible(&client_data->init_wait);
+				break;
+			}
+			for (i = 0; i < client_data->hid_dev_count; ++i) {
+				if (1 + sizeof(struct device_info) * i >=
+						payload_len) {
+					dev_err(&client_data->cl_device->dev,
+						"[hid-ish]: [ENUM_DEVICES]: content size %lu is bigger than payload_len %u\n",
+						1 + sizeof(struct device_info)
+						* i,
+						(unsigned int)payload_len);
+				}
+
+				if (1 + sizeof(struct device_info) * i >=
+						data_len)
+					break;
+
+				dev_info = (struct device_info *)(payload + 1 +
+					sizeof(struct device_info) * i);
+				if (client_data->hid_devices)
+					memcpy(client_data->hid_devices + i,
+					       dev_info,
+					       sizeof(struct device_info));
+			}
+
+			client_data->enum_devices_done = true;
+			wake_up_interruptible(&client_data->init_wait);
+
+			break;
+
+		case HOSTIF_GET_HID_DESCRIPTOR:
+			if ((!(recv_msg->hdr.command & ~CMD_MASK) ||
+					client_data->init_done)) {
+				++client_data->bad_recv_cnt;
+				report_bad_packet(hid_ishtp_cl, recv_msg,
+						  cur_pos,
+						  payload_len);
+				ish_hw_reset(hid_ishtp_cl->dev);
+				break;
+			}
+			if (!client_data->hid_descr[curr_hid_dev])
+				client_data->hid_descr[curr_hid_dev] =
+				devm_kmalloc(&client_data->cl_device->dev,
+					     payload_len, GFP_KERNEL);
+			if (client_data->hid_descr[curr_hid_dev]) {
+				memcpy(client_data->hid_descr[curr_hid_dev],
+				       payload, payload_len);
+				client_data->hid_descr_size[curr_hid_dev] =
+					payload_len;
+				client_data->hid_descr_done = true;
+			}
+			wake_up_interruptible(&client_data->init_wait);
+
+			break;
+
+		case HOSTIF_GET_REPORT_DESCRIPTOR:
+			if ((!(recv_msg->hdr.command & ~CMD_MASK) ||
+					client_data->init_done)) {
+				++client_data->bad_recv_cnt;
+				report_bad_packet(hid_ishtp_cl, recv_msg,
+						  cur_pos,
+						  payload_len);
+				ish_hw_reset(hid_ishtp_cl->dev);
+				break;
+			}
+			if (!client_data->report_descr[curr_hid_dev])
+				client_data->report_descr[curr_hid_dev] =
+				devm_kmalloc(&client_data->cl_device->dev,
+					     payload_len, GFP_KERNEL);
+			if (client_data->report_descr[curr_hid_dev])  {
+				memcpy(client_data->report_descr[curr_hid_dev],
+				       payload,
+				       payload_len);
+				client_data->report_descr_size[curr_hid_dev] =
+					payload_len;
+				client_data->report_descr_done = true;
+			}
+			wake_up_interruptible(&client_data->init_wait);
+
+			break;
+
+		case HOSTIF_GET_FEATURE_REPORT:
+			report_type = HID_FEATURE_REPORT;
+			goto	do_get_report;
+
+		case HOSTIF_GET_INPUT_REPORT:
+			report_type = HID_INPUT_REPORT;
+do_get_report:
+			/* Get index of device that matches this id */
+			for (i = 0; i < client_data->num_hid_devices; ++i) {
+				if (recv_msg->hdr.device_id ==
+					client_data->hid_devices[i].dev_id)
+					if (client_data->hid_sensor_hubs[i]) {
+						hid_input_report(
+						client_data->hid_sensor_hubs[
+									i],
+						report_type, payload,
+						payload_len, 0);
+						ishtp_hid_wakeup(
+						client_data->hid_sensor_hubs[
+							i]);
+						break;
+					}
+			}
+			break;
+
+		case HOSTIF_SET_FEATURE_REPORT:
+			/* Get index of device that matches this id */
+			for (i = 0; i < client_data->num_hid_devices; ++i) {
+				if (recv_msg->hdr.device_id ==
+					client_data->hid_devices[i].dev_id)
+					if (client_data->hid_sensor_hubs[i]) {
+						ishtp_hid_wakeup(
+						client_data->hid_sensor_hubs[
+							i]);
+						break;
+					}
+			}
+			break;
+
+		case HOSTIF_PUBLISH_INPUT_REPORT:
+			report_type = HID_INPUT_REPORT;
+			for (i = 0; i < client_data->num_hid_devices; ++i)
+				if (recv_msg->hdr.device_id ==
+					client_data->hid_devices[i].dev_id)
+					if (client_data->hid_sensor_hubs[i])
+						hid_input_report(
+						client_data->hid_sensor_hubs[
+									i],
+						report_type, payload,
+						payload_len, 0);
+			break;
+
+		case HOSTIF_PUBLISH_INPUT_REPORT_LIST:
+			report_type = HID_INPUT_REPORT;
+			reports_list = (struct report_list *)payload;
+			reports = (char *)reports_list->reports;
+
+			for (j = 0; j < reports_list->num_of_reports; j++) {
+				recv_msg = (struct hostif_msg *)(reports +
+					sizeof(uint16_t));
+				report_len = *(uint16_t *)reports;
+				payload = reports + sizeof(uint16_t) +
+					sizeof(struct hostif_msg_hdr);
+				payload_len = report_len -
+					sizeof(struct hostif_msg_hdr);
+
+				for (i = 0; i < client_data->num_hid_devices;
+				     ++i)
+					if (recv_msg->hdr.device_id ==
+					client_data->hid_devices[i].dev_id &&
+					client_data->hid_sensor_hubs[i]) {
+						hid_input_report(
+						client_data->hid_sensor_hubs[
+									i],
+						report_type,
+						payload, payload_len,
+						0);
+					}
+
+				reports += sizeof(uint16_t) + report_len;
+			}
+			break;
+		default:
+			++client_data->bad_recv_cnt;
+			report_bad_packet(hid_ishtp_cl, recv_msg, cur_pos,
+					  payload_len);
+			ish_hw_reset(hid_ishtp_cl->dev);
+			break;
+
+		}
+
+		if (!cur_pos && cur_pos + payload_len +
+				sizeof(struct hostif_msg) < total_len)
+			++client_data->multi_packet_cnt;
+
+		cur_pos += payload_len + sizeof(struct hostif_msg);
+		payload += payload_len + sizeof(struct hostif_msg);
+
+	} while (cur_pos < total_len);
+}
+
+/**
+ * ish_cl_event_cb() - bus driver callback for incoming message/packet
+ * @device:	Pointer to the the ishtp client device for which this message
+ *		is targeted
+ *
+ * Remove the packet from the list and process the message by calling
+ * process_recv
+ */
+static void ish_cl_event_cb(struct ishtp_cl_device *device)
+{
+	struct ishtp_cl	*hid_ishtp_cl = device->driver_data;
+	struct ishtp_cl_rb *rb_in_proc;
+	size_t r_length;
+	unsigned long flags;
+
+	if (!hid_ishtp_cl)
+		return;
+
+	spin_lock_irqsave(&hid_ishtp_cl->in_process_spinlock, flags);
+	while (!list_empty(&hid_ishtp_cl->in_process_list.list)) {
+		rb_in_proc = list_entry(
+			hid_ishtp_cl->in_process_list.list.next,
+			struct ishtp_cl_rb, list);
+		list_del_init(&rb_in_proc->list);
+		spin_unlock_irqrestore(&hid_ishtp_cl->in_process_spinlock,
+			flags);
+
+		if (!rb_in_proc->buffer.data)
+			return;
+
+		r_length = rb_in_proc->buf_idx;
+
+		/* decide what to do with received data */
+		process_recv(hid_ishtp_cl, rb_in_proc->buffer.data, r_length);
+
+		ishtp_cl_io_rb_recycle(rb_in_proc);
+		spin_lock_irqsave(&hid_ishtp_cl->in_process_spinlock, flags);
+	}
+	spin_unlock_irqrestore(&hid_ishtp_cl->in_process_spinlock, flags);
+}
+
+/**
+ * hid_ishtp_set_feature() - send request to ISH FW to set a feature request
+ * @hid:	hid device instance for this request
+ * @buf:	feature buffer
+ * @len:	Length of feature buffer
+ * @report_id:	Report id for the feature set request
+ *
+ * This is called from hid core .request() callback. This function doesn't wait
+ * for response.
+ */
+void hid_ishtp_set_feature(struct hid_device *hid, char *buf, unsigned int len,
+			   int report_id)
+{
+	struct ishtp_hid_data *hid_data =  hid->driver_data;
+	struct ishtp_cl_data *client_data = hid_data->client_data;
+	struct hostif_msg *msg = (struct hostif_msg *)buf;
+	int	rv;
+	int	i;
+
+	hid_ishtp_trace(client_data,  "%s hid %p\n", __func__, hid);
+
+	rv = ishtp_hid_link_ready_wait(client_data);
+	if (rv) {
+		hid_ishtp_trace(client_data,  "%s hid %p link not ready\n",
+				__func__, hid);
+		return;
+	}
+
+	memset(msg, 0, sizeof(struct hostif_msg));
+	msg->hdr.command = HOSTIF_SET_FEATURE_REPORT;
+	for (i = 0; i < client_data->num_hid_devices; ++i) {
+		if (hid == client_data->hid_sensor_hubs[i]) {
+			msg->hdr.device_id =
+				client_data->hid_devices[i].dev_id;
+			break;
+		}
+	}
+
+	if (i == client_data->num_hid_devices)
+		return;
+
+	rv = ishtp_cl_send(client_data->hid_ishtp_cl, buf, len);
+	if (rv)
+		hid_ishtp_trace(client_data,  "%s hid %p send failed\n",
+				__func__, hid);
+}
+
+/**
+ * hid_ishtp_get_report() - request to get feature/input report
+ * @hid:	hid device instance for this request
+ * @report_id:	Report id for the get request
+ * @report_type:	Report type for the this request
+ *
+ * This is called from hid core .request() callback. This function will send
+ * request to FW and return without waiting for response.
+ */
+void hid_ishtp_get_report(struct hid_device *hid, int report_id,
+			  int report_type)
+{
+	struct ishtp_hid_data *hid_data =  hid->driver_data;
+	struct ishtp_cl_data *client_data = hid_data->client_data;
+	static unsigned char	buf[10];
+	unsigned int	len;
+	struct hostif_msg_to_sensor *msg = (struct hostif_msg_to_sensor *)buf;
+	int	rv;
+	int	i;
+
+	hid_ishtp_trace(client_data,  "%s hid %p\n", __func__, hid);
+	rv = ishtp_hid_link_ready_wait(client_data);
+	if (rv) {
+		hid_ishtp_trace(client_data,  "%s hid %p link not ready\n",
+				__func__, hid);
+		return;
+	}
+
+	len = sizeof(struct hostif_msg_to_sensor);
+
+	memset(msg, 0, sizeof(struct hostif_msg_to_sensor));
+	msg->hdr.command = (report_type == HID_FEATURE_REPORT) ?
+		HOSTIF_GET_FEATURE_REPORT : HOSTIF_GET_INPUT_REPORT;
+	for (i = 0; i < client_data->num_hid_devices; ++i) {
+		if (hid == client_data->hid_sensor_hubs[i]) {
+			msg->hdr.device_id =
+				client_data->hid_devices[i].dev_id;
+			break;
+		}
+	}
+
+	if (i == client_data->num_hid_devices)
+		return;
+
+	msg->report_id = report_id;
+	rv = ishtp_cl_send(client_data->hid_ishtp_cl, buf, len);
+	if (rv)
+		hid_ishtp_trace(client_data,  "%s hid %p send failed\n",
+				__func__, hid);
+}
+
+/**
+ * ishtp_hid_link_ready_wait() - Wait for link ready
+ * @client_data:	client data instance
+ *
+ * If the transport link started suspend process, then wait, till either
+ * resumed or timeout
+ *
+ * Return: 0 on success, non zero on error
+ */
+int ishtp_hid_link_ready_wait(struct ishtp_cl_data *client_data)
+{
+	int rc;
+
+	if (client_data->suspended) {
+		hid_ishtp_trace(client_data,  "wait for link ready\n");
+		rc = wait_event_interruptible_timeout(
+					client_data->ishtp_resume_wait,
+					!client_data->suspended,
+					5 * HZ);
+
+		if (rc == 0) {
+			hid_ishtp_trace(client_data,  "link not ready\n");
+			return -EIO;
+		}
+		hid_ishtp_trace(client_data,  "link ready\n");
+	}
+
+	return 0;
+}
+
+/**
+ * ishtp_enum_enum_devices() - Enumerate hid devices
+ * @hid_ishtp_cl:	client instance
+ *
+ * Helper function to send request to firmware to enumerate HID devices
+ *
+ * Return: 0 on success, non zero on error
+ */
+static int ishtp_enum_enum_devices(struct ishtp_cl *hid_ishtp_cl)
+{
+	struct hostif_msg msg;
+	struct ishtp_cl_data *client_data = hid_ishtp_cl->client_data;
+	int retry_count;
+	int rv;
+
+	/* Send HOSTIF_DM_ENUM_DEVICES */
+	memset(&msg, 0, sizeof(struct hostif_msg));
+	msg.hdr.command = HOSTIF_DM_ENUM_DEVICES;
+	rv = ishtp_cl_send(hid_ishtp_cl, (unsigned char *)&msg,
+			   sizeof(struct hostif_msg));
+	if (rv)
+		return rv;
+
+	retry_count = 0;
+	while (!client_data->enum_devices_done &&
+	       retry_count < 10) {
+		wait_event_interruptible_timeout(client_data->init_wait,
+					 client_data->enum_devices_done,
+					 3 * HZ);
+		++retry_count;
+		if (!client_data->enum_devices_done)
+			/* Send HOSTIF_DM_ENUM_DEVICES */
+			rv = ishtp_cl_send(hid_ishtp_cl,
+					   (unsigned char *) &msg,
+					   sizeof(struct hostif_msg));
+	}
+	if (!client_data->enum_devices_done) {
+		dev_err(&client_data->cl_device->dev,
+			"[hid-ish]: timed out waiting for enum_devices\n");
+		return -ETIMEDOUT;
+	}
+	if (!client_data->hid_devices) {
+		dev_err(&client_data->cl_device->dev,
+			"[hid-ish]: failed to allocate HID dev structures\n");
+		return -ENOMEM;
+	}
+
+	client_data->num_hid_devices = client_data->hid_dev_count;
+	dev_info(&hid_ishtp_cl->device->dev,
+		"[hid-ish]: enum_devices_done OK, num_hid_devices=%d\n",
+		client_data->num_hid_devices);
+
+	return	0;
+}
+
+/**
+ * ishtp_get_hid_descriptor() - Get hid descriptor
+ * @hid_ishtp_cl:	client instance
+ * @index:		Index into the hid_descr array
+ *
+ * Helper function to send request to firmware get HID descriptor of a device
+ *
+ * Return: 0 on success, non zero on error
+ */
+static int ishtp_get_hid_descriptor(struct ishtp_cl *hid_ishtp_cl, int index)
+{
+	struct hostif_msg msg;
+	struct ishtp_cl_data *client_data = hid_ishtp_cl->client_data;
+	int rv;
+
+	/* Get HID descriptor */
+	client_data->hid_descr_done = false;
+	memset(&msg, 0, sizeof(struct hostif_msg));
+	msg.hdr.command = HOSTIF_GET_HID_DESCRIPTOR;
+	msg.hdr.device_id = client_data->hid_devices[index].dev_id;
+	rv = ishtp_cl_send(hid_ishtp_cl, (unsigned char *) &msg,
+			   sizeof(struct hostif_msg));
+	if (rv)
+		return rv;
+
+	if (!client_data->hid_descr_done) {
+		wait_event_interruptible_timeout(client_data->init_wait,
+						 client_data->hid_descr_done,
+						 3 * HZ);
+		if (!client_data->hid_descr_done) {
+			dev_err(&client_data->cl_device->dev,
+				"[hid-ish]: timed out for hid_descr_done\n");
+			return -EIO;
+		}
+
+		if (!client_data->hid_descr[index]) {
+			dev_err(&client_data->cl_device->dev,
+				"[hid-ish]: allocation HID desc fail\n");
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ishtp_get_report_descriptor() - Get report descriptor
+ * @hid_ishtp_cl:	client instance
+ * @index:		Index into the hid_descr array
+ *
+ * Helper function to send request to firmware get HID report descriptor of
+ * a device
+ *
+ * Return: 0 on success, non zero on error
+ */
+static int ishtp_get_report_descriptor(struct ishtp_cl *hid_ishtp_cl,
+				       int index)
+{
+	struct hostif_msg msg;
+	struct ishtp_cl_data *client_data = hid_ishtp_cl->client_data;
+	int rv;
+
+	/* Get report descriptor */
+	client_data->report_descr_done = false;
+	memset(&msg, 0, sizeof(struct hostif_msg));
+	msg.hdr.command = HOSTIF_GET_REPORT_DESCRIPTOR;
+	msg.hdr.device_id = client_data->hid_devices[index].dev_id;
+	rv = ishtp_cl_send(hid_ishtp_cl, (unsigned char *) &msg,
+			   sizeof(struct hostif_msg));
+	if (rv)
+		return rv;
+
+	if (!client_data->report_descr_done)
+		wait_event_interruptible_timeout(client_data->init_wait,
+					 client_data->report_descr_done,
+					 3 * HZ);
+	if (!client_data->report_descr_done) {
+		dev_err(&client_data->cl_device->dev,
+				"[hid-ish]: timed out for report descr\n");
+		return -EIO;
+	}
+	if (!client_data->report_descr[index]) {
+		dev_err(&client_data->cl_device->dev,
+			"[hid-ish]: failed to alloc report descr\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/**
+ * hid_ishtp_cl_init() - Init function for ISHTP client
+ * @hid_ishtp_cl:	ISHTP client instance
+ * @reset:		true if called for init after reset
+ *
+ * This function complete the initializtion of the client. The summary of
+ * processing:
+ * - Send request to enumerate the hid clients
+ *	Get the HID descriptor for each enumearated device
+ *	Get report description of each device
+ *	Register each device wik hid core by calling ishtp_hid_probe
+ *
+ * Return: 0 on success, non zero on error
+ */
+static int hid_ishtp_cl_init(struct ishtp_cl *hid_ishtp_cl, int reset)
+{
+	struct ishtp_device *dev;
+	unsigned long flags;
+	struct ishtp_cl_data *client_data = hid_ishtp_cl->client_data;
+	int i;
+	int rv;
+
+	dev_dbg(&client_data->cl_device->dev, "%s\n", __func__);
+	hid_ishtp_trace(client_data,  "%s reset flag: %d\n", __func__, reset);
+
+	rv = ishtp_cl_link(hid_ishtp_cl, ISHTP_HOST_CLIENT_ID_ANY);
+	if (rv) {
+		dev_err(&client_data->cl_device->dev,
+			"ishtp_cl_link failed\n");
+		return	-ENOMEM;
+	}
+
+	client_data->init_done = 0;
+
+	dev = hid_ishtp_cl->dev;
+
+	/* Connect to FW client */
+	hid_ishtp_cl->rx_ring_size = HID_CL_RX_RING_SIZE;
+	hid_ishtp_cl->tx_ring_size = HID_CL_TX_RING_SIZE;
+
+	spin_lock_irqsave(&dev->fw_clients_lock, flags);
+	i = ishtp_fw_cl_by_uuid(dev, &hid_ishtp_guid);
+	if (i < 0) {
+		spin_unlock_irqrestore(&dev->fw_clients_lock, flags);
+		dev_err(&client_data->cl_device->dev,
+			"ish client uuid not found\n");
+		return i;
+	}
+	hid_ishtp_cl->fw_client_id = dev->fw_clients[i].client_id;
+	spin_unlock_irqrestore(&dev->fw_clients_lock, flags);
+	hid_ishtp_cl->state = ISHTP_CL_CONNECTING;
+
+	rv = ishtp_cl_connect(hid_ishtp_cl);
+	if (rv) {
+		dev_err(&client_data->cl_device->dev,
+			"client connect fail\n");
+		goto err_cl_unlink;
+	}
+
+	hid_ishtp_trace(client_data,  "%s client connected\n", __func__);
+
+	/* Register read callback */
+	ishtp_register_event_cb(hid_ishtp_cl->device, ish_cl_event_cb);
+
+	rv = ishtp_enum_enum_devices(hid_ishtp_cl);
+	if (rv)
+		goto err_cl_disconnect;
+
+	hid_ishtp_trace(client_data,  "%s enumerated device count %d\n",
+			__func__, client_data->num_hid_devices);
+
+	for (i = 0; i < client_data->num_hid_devices; ++i) {
+		client_data->cur_hid_dev = i;
+
+		rv = ishtp_get_hid_descriptor(hid_ishtp_cl, i);
+		if (rv)
+			goto err_cl_disconnect;
+
+		rv = ishtp_get_report_descriptor(hid_ishtp_cl, i);
+		if (rv)
+			goto err_cl_disconnect;
+
+		if (!reset) {
+			rv = ishtp_hid_probe(i, client_data);
+			if (rv) {
+				dev_err(&client_data->cl_device->dev,
+				"[hid-ish]: HID probe for #%u failed: %d\n",
+				i, rv);
+				goto err_cl_disconnect;
+			}
+		}
+	} /* for() on all hid devices */
+
+	client_data->init_done = 1;
+	client_data->suspended = false;
+	wake_up_interruptible(&client_data->ishtp_resume_wait);
+	hid_ishtp_trace(client_data,  "%s successful init\n", __func__);
+	return 0;
+
+err_cl_disconnect:
+	hid_ishtp_cl->state = ISHTP_CL_DISCONNECTING;
+	ishtp_cl_disconnect(hid_ishtp_cl);
+err_cl_unlink:
+	ishtp_cl_unlink(hid_ishtp_cl);
+	return rv;
+}
+
+/**
+ * hid_ishtp_cl_deinit() - Deinit function for ISHTP client
+ * @hid_ishtp_cl:	ISHTP client instance
+ *
+ * Unlink and free hid client
+ */
+static void hid_ishtp_cl_deinit(struct ishtp_cl *hid_ishtp_cl)
+{
+	ishtp_cl_unlink(hid_ishtp_cl);
+	ishtp_cl_flush_queues(hid_ishtp_cl);
+
+	/* disband and free all Tx and Rx client-level rings */
+	ishtp_cl_free(hid_ishtp_cl);
+}
+
+static void hid_ishtp_cl_reset_handler(struct work_struct *work)
+{
+	struct ishtp_cl_data *client_data;
+	struct ishtp_cl *hid_ishtp_cl;
+	struct ishtp_cl_device *cl_device;
+	int retry;
+	int rv;
+
+	client_data = container_of(work, struct ishtp_cl_data, work);
+
+	hid_ishtp_cl = client_data->hid_ishtp_cl;
+	cl_device = client_data->cl_device;
+
+	hid_ishtp_trace(client_data, "%s hid_ishtp_cl %p\n", __func__,
+			hid_ishtp_cl);
+	dev_dbg(&cl_device->dev, "%s\n", __func__);
+
+	hid_ishtp_cl_deinit(hid_ishtp_cl);
+
+	hid_ishtp_cl = ishtp_cl_allocate(cl_device->ishtp_dev);
+	if (!hid_ishtp_cl)
+		return;
+
+	cl_device->driver_data = hid_ishtp_cl;
+	hid_ishtp_cl->client_data = client_data;
+	client_data->hid_ishtp_cl = hid_ishtp_cl;
+
+	client_data->num_hid_devices = 0;
+
+	for (retry = 0; retry < 3; ++retry) {
+		rv = hid_ishtp_cl_init(hid_ishtp_cl, 1);
+		if (!rv)
+			break;
+		dev_err(&client_data->cl_device->dev, "Retry reset init\n");
+	}
+	if (rv) {
+		dev_err(&client_data->cl_device->dev, "Reset Failed\n");
+		hid_ishtp_trace(client_data, "%s Failed hid_ishtp_cl %p\n",
+				__func__, hid_ishtp_cl);
+	}
+}
+
+/**
+ * hid_ishtp_cl_probe() - ISHTP client driver probe
+ * @cl_device:		ISHTP client device instance
+ *
+ * This function gets called on device create on ISHTP bus
+ *
+ * Return: 0 on success, non zero on error
+ */
+static int hid_ishtp_cl_probe(struct ishtp_cl_device *cl_device)
+{
+	struct ishtp_cl *hid_ishtp_cl;
+	struct ishtp_cl_data *client_data;
+	int rv;
+
+	if (!cl_device)
+		return	-ENODEV;
+
+	if (uuid_le_cmp(hid_ishtp_guid,
+			cl_device->fw_client->props.protocol_name) != 0)
+		return	-ENODEV;
+
+	client_data = devm_kzalloc(&cl_device->dev, sizeof(*client_data),
+				   GFP_KERNEL);
+	if (!client_data)
+		return -ENOMEM;
+
+	hid_ishtp_cl = ishtp_cl_allocate(cl_device->ishtp_dev);
+	if (!hid_ishtp_cl)
+		return -ENOMEM;
+
+	cl_device->driver_data = hid_ishtp_cl;
+	hid_ishtp_cl->client_data = client_data;
+	client_data->hid_ishtp_cl = hid_ishtp_cl;
+	client_data->cl_device = cl_device;
+
+	init_waitqueue_head(&client_data->init_wait);
+	init_waitqueue_head(&client_data->ishtp_resume_wait);
+
+	INIT_WORK(&client_data->work, hid_ishtp_cl_reset_handler);
+
+	rv = hid_ishtp_cl_init(hid_ishtp_cl, 0);
+	if (rv) {
+		ishtp_cl_free(hid_ishtp_cl);
+		return rv;
+	}
+	ishtp_get_device(cl_device);
+
+	return 0;
+}
+
+/**
+ * hid_ishtp_cl_remove() - ISHTP client driver remove
+ * @cl_device:		ISHTP client device instance
+ *
+ * This function gets called on device remove on ISHTP bus
+ *
+ * Return: 0
+ */
+static int hid_ishtp_cl_remove(struct ishtp_cl_device *cl_device)
+{
+	struct ishtp_cl *hid_ishtp_cl = cl_device->driver_data;
+	struct ishtp_cl_data *client_data = hid_ishtp_cl->client_data;
+
+	hid_ishtp_trace(client_data, "%s hid_ishtp_cl %p\n", __func__,
+			hid_ishtp_cl);
+
+	dev_dbg(&cl_device->dev, "%s\n", __func__);
+	hid_ishtp_cl->state = ISHTP_CL_DISCONNECTING;
+	ishtp_cl_disconnect(hid_ishtp_cl);
+	ishtp_put_device(cl_device);
+	ishtp_hid_remove(client_data);
+	hid_ishtp_cl_deinit(hid_ishtp_cl);
+
+	hid_ishtp_cl = NULL;
+
+	client_data->num_hid_devices = 0;
+
+	return 0;
+}
+
+/**
+ * hid_ishtp_cl_reset() - ISHTP client driver reset
+ * @cl_device:		ISHTP client device instance
+ *
+ * This function gets called on device reset on ISHTP bus
+ *
+ * Return: 0
+ */
+static int hid_ishtp_cl_reset(struct ishtp_cl_device *cl_device)
+{
+	struct ishtp_cl *hid_ishtp_cl = cl_device->driver_data;
+	struct ishtp_cl_data *client_data = hid_ishtp_cl->client_data;
+
+	hid_ishtp_trace(client_data, "%s hid_ishtp_cl %p\n", __func__,
+			hid_ishtp_cl);
+
+	schedule_work(&client_data->work);
+
+	return 0;
+}
+
+#define to_ishtp_cl_device(d) container_of(d, struct ishtp_cl_device, dev)
+
+/**
+ * hid_ishtp_cl_suspend() - ISHTP client driver suspend
+ * @device:	device instance
+ *
+ * This function gets called on system suspend
+ *
+ * Return: 0
+ */
+static int hid_ishtp_cl_suspend(struct device *device)
+{
+	struct ishtp_cl_device *cl_device = to_ishtp_cl_device(device);
+	struct ishtp_cl *hid_ishtp_cl = cl_device->driver_data;
+	struct ishtp_cl_data *client_data = hid_ishtp_cl->client_data;
+
+	hid_ishtp_trace(client_data, "%s hid_ishtp_cl %p\n", __func__,
+			hid_ishtp_cl);
+	client_data->suspended = true;
+
+	return 0;
+}
+
+/**
+ * hid_ishtp_cl_resume() - ISHTP client driver resume
+ * @device:	device instance
+ *
+ * This function gets called on system resume
+ *
+ * Return: 0
+ */
+static int hid_ishtp_cl_resume(struct device *device)
+{
+	struct ishtp_cl_device *cl_device = to_ishtp_cl_device(device);
+	struct ishtp_cl *hid_ishtp_cl = cl_device->driver_data;
+	struct ishtp_cl_data *client_data = hid_ishtp_cl->client_data;
+
+	hid_ishtp_trace(client_data, "%s hid_ishtp_cl %p\n", __func__,
+			hid_ishtp_cl);
+	client_data->suspended = false;
+	return 0;
+}
+
+static const struct dev_pm_ops hid_ishtp_pm_ops = {
+	.suspend = hid_ishtp_cl_suspend,
+	.resume = hid_ishtp_cl_resume,
+};
+
+static struct ishtp_cl_driver	hid_ishtp_cl_driver = {
+	.name = "ish-hid",
+	.probe = hid_ishtp_cl_probe,
+	.remove = hid_ishtp_cl_remove,
+	.reset = hid_ishtp_cl_reset,
+	.driver.pm = &hid_ishtp_pm_ops,
+};
+
+static int __init ish_hid_init(void)
+{
+	int	rv;
+
+	/* Register ISHTP client device driver with ISHTP Bus */
+	rv = ishtp_cl_driver_register(&hid_ishtp_cl_driver);
+
+	return rv;
+
+}
+
+static void __exit ish_hid_exit(void)
+{
+	ishtp_cl_driver_unregister(&hid_ishtp_cl_driver);
+}
+
+late_initcall(ish_hid_init);
+module_exit(ish_hid_exit);
+
+MODULE_DESCRIPTION("ISH ISHTP HID client driver");
+/* Primary author */
+MODULE_AUTHOR("Daniel Drubin <daniel.drubin@intel.com>");
+/*
+ * Several modification for multi instance support
+ * suspend/resume and clean up
+ */
+MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>");
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ishtp:*");
diff --git a/drivers/hid/intel-ish-hid/ishtp-hid.c b/drivers/hid/intel-ish-hid/ishtp-hid.c
new file mode 100644
index 000000000000..277983aa1d90
--- /dev/null
+++ b/drivers/hid/intel-ish-hid/ishtp-hid.c
@@ -0,0 +1,246 @@
+/*
+ * ISHTP-HID glue driver.
+ *
+ * Copyright (c) 2012-2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/hid.h>
+#include <uapi/linux/input.h>
+#include "ishtp/client.h"
+#include "ishtp-hid.h"
+
+/**
+ * ishtp_hid_parse() - hid-core .parse() callback
+ * @hid:	hid device instance
+ *
+ * This function gets called during call to hid_add_device
+ *
+ * Return: 0 on success and non zero on error
+ */
+static int ishtp_hid_parse(struct hid_device *hid)
+{
+	struct ishtp_hid_data *hid_data =  hid->driver_data;
+	struct ishtp_cl_data *client_data = hid_data->client_data;
+	int rv;
+
+	rv = hid_parse_report(hid, client_data->report_descr[hid_data->index],
+			      client_data->report_descr_size[hid_data->index]);
+	if (rv)
+		return	rv;
+
+	return 0;
+}
+
+/* Empty callbacks with success return code */
+static int ishtp_hid_start(struct hid_device *hid)
+{
+	return 0;
+}
+
+static void ishtp_hid_stop(struct hid_device *hid)
+{
+}
+
+static int ishtp_hid_open(struct hid_device *hid)
+{
+	return 0;
+}
+
+static void ishtp_hid_close(struct hid_device *hid)
+{
+}
+
+static int ishtp_raw_request(struct hid_device *hdev, unsigned char reportnum,
+	__u8 *buf, size_t len, unsigned char rtype, int reqtype)
+{
+	return 0;
+}
+
+/**
+ * ishtp_hid_request() - hid-core .request() callback
+ * @hid:	hid device instance
+ * @rep:	pointer to hid_report
+ * @reqtype:	type of req. [GET|SET]_REPORT
+ *
+ * This function is used to set/get feaure/input report.
+ */
+static void ishtp_hid_request(struct hid_device *hid, struct hid_report *rep,
+	int reqtype)
+{
+	struct ishtp_hid_data *hid_data =  hid->driver_data;
+	/* the specific report length, just HID part of it */
+	unsigned int len = ((rep->size - 1) >> 3) + 1 + (rep->id > 0);
+	char *buf;
+	unsigned int header_size = sizeof(struct hostif_msg);
+
+	len += header_size;
+
+	hid_data->request_done = false;
+	switch (reqtype) {
+	case HID_REQ_GET_REPORT:
+		hid_ishtp_get_report(hid, rep->id, rep->type);
+		break;
+	case HID_REQ_SET_REPORT:
+		/*
+		 * Spare 7 bytes for 64b accesses through
+		 * get/put_unaligned_le64()
+		 */
+		buf = kzalloc(len + 7, GFP_KERNEL);
+		if (!buf)
+			return;
+
+		hid_output_report(rep, buf + header_size);
+		hid_ishtp_set_feature(hid, buf, len, rep->id);
+		kfree(buf);
+		break;
+	}
+}
+
+/**
+ * ishtp_wait_for_response() - hid-core .wait() callback
+ * @hid:	hid device instance
+ *
+ * This function is used to wait after get feaure/input report.
+ *
+ * Return: 0 on success and non zero on error
+ */
+static int ishtp_wait_for_response(struct hid_device *hid)
+{
+	struct ishtp_hid_data *hid_data =  hid->driver_data;
+	struct ishtp_cl_data *client_data = hid_data->client_data;
+	int rv;
+
+	hid_ishtp_trace(client_data,  "%s hid %p\n", __func__, hid);
+
+	rv = ishtp_hid_link_ready_wait(hid_data->client_data);
+	if (rv)
+		return rv;
+
+	if (!hid_data->request_done)
+		wait_event_interruptible_timeout(hid_data->hid_wait,
+					hid_data->request_done, 3 * HZ);
+
+	if (!hid_data->request_done) {
+		hid_err(hid,
+			"timeout waiting for response from ISHTP device\n");
+		return -ETIMEDOUT;
+	}
+	hid_ishtp_trace(client_data,  "%s hid %p done\n", __func__, hid);
+
+	hid_data->request_done = false;
+
+	return 0;
+}
+
+/**
+ * ishtp_hid_wakeup() - Wakeup caller
+ * @hid:	hid device instance
+ *
+ * This function will wakeup caller waiting for Get/Set feature report
+ */
+void ishtp_hid_wakeup(struct hid_device *hid)
+{
+	struct ishtp_hid_data *hid_data = hid->driver_data;
+
+	hid_data->request_done = true;
+	wake_up_interruptible(&hid_data->hid_wait);
+}
+
+static struct hid_ll_driver ishtp_hid_ll_driver = {
+	.parse = ishtp_hid_parse,
+	.start = ishtp_hid_start,
+	.stop = ishtp_hid_stop,
+	.open = ishtp_hid_open,
+	.close = ishtp_hid_close,
+	.request = ishtp_hid_request,
+	.wait = ishtp_wait_for_response,
+	.raw_request = ishtp_raw_request
+};
+
+/**
+ * ishtp_hid_probe() - hid register ll driver
+ * @cur_hid_dev:	Index of hid device calling to register
+ * @client_data:	Client data pointer
+ *
+ * This function is used to allocate and add HID device.
+ *
+ * Return: 0 on success, non zero on error
+ */
+int ishtp_hid_probe(unsigned int cur_hid_dev,
+		    struct ishtp_cl_data *client_data)
+{
+	int rv;
+	struct hid_device *hid;
+	struct ishtp_hid_data *hid_data;
+
+	hid = hid_allocate_device();
+	if (IS_ERR(hid)) {
+		rv = PTR_ERR(hid);
+		return	-ENOMEM;
+	}
+
+	hid_data = kzalloc(sizeof(*hid_data), GFP_KERNEL);
+	if (!hid_data) {
+		rv = -ENOMEM;
+		goto err_hid_data;
+	}
+
+	hid_data->index = cur_hid_dev;
+	hid_data->client_data = client_data;
+	init_waitqueue_head(&hid_data->hid_wait);
+
+	hid->driver_data = hid_data;
+
+	client_data->hid_sensor_hubs[cur_hid_dev] = hid;
+
+	hid->ll_driver = &ishtp_hid_ll_driver;
+	hid->bus = BUS_INTEL_ISHTP;
+	hid->dev.parent = &client_data->cl_device->dev;
+	hid->version = le16_to_cpu(ISH_HID_VERSION);
+	hid->vendor = le16_to_cpu(ISH_HID_VENDOR);
+	hid->product = le16_to_cpu(ISH_HID_PRODUCT);
+	snprintf(hid->name, sizeof(hid->name), "%s %04hX:%04hX", "hid-ishtp",
+		hid->vendor, hid->product);
+
+	rv = hid_add_device(hid);
+	if (rv)
+		goto err_hid_device;
+
+	hid_ishtp_trace(client_data,  "%s allocated hid %p\n", __func__, hid);
+
+	return 0;
+
+err_hid_device:
+	kfree(hid_data);
+err_hid_data:
+	kfree(hid);
+	return rv;
+}
+
+/**
+ * ishtp_hid_probe() - Remove registered hid device
+ * @client_data:	client data pointer
+ *
+ * This function is used to destroy allocatd HID device.
+ */
+void ishtp_hid_remove(struct ishtp_cl_data *client_data)
+{
+	int i;
+
+	for (i = 0; i < client_data->num_hid_devices; ++i) {
+		if (client_data->hid_sensor_hubs[i]) {
+			kfree(client_data->hid_sensor_hubs[i]->driver_data);
+			hid_destroy_device(client_data->hid_sensor_hubs[i]);
+			client_data->hid_sensor_hubs[i] = NULL;
+		}
+	}
+}
diff --git a/drivers/hid/intel-ish-hid/ishtp-hid.h b/drivers/hid/intel-ish-hid/ishtp-hid.h
new file mode 100644
index 000000000000..f5c7eb79b7b5
--- /dev/null
+++ b/drivers/hid/intel-ish-hid/ishtp-hid.h
@@ -0,0 +1,182 @@
+/*
+ * ISHTP-HID glue driver's definitions.
+ *
+ * Copyright (c) 2014-2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+#ifndef ISHTP_HID__H
+#define	ISHTP_HID__H
+
+/* The fixed ISH product and vendor id */
+#define	ISH_HID_VENDOR	0x8086
+#define	ISH_HID_PRODUCT	0x22D8
+#define	ISH_HID_VERSION	0x0200
+
+#define	CMD_MASK	0x7F
+#define	IS_RESPONSE	0x80
+
+/* Used to dump to Linux trace buffer, if enabled */
+#define hid_ishtp_trace(client, ...)	\
+	client->cl_device->ishtp_dev->print_log(\
+		client->cl_device->ishtp_dev, __VA_ARGS__)
+
+/* ISH Transport protocol (ISHTP in short) GUID */
+static const uuid_le hid_ishtp_guid = UUID_LE(0x33AECD58, 0xB679, 0x4E54,
+					      0x9B, 0xD9, 0xA0, 0x4D, 0x34,
+					      0xF0, 0xC2, 0x26);
+
+/* ISH HID message structure */
+struct hostif_msg_hdr {
+	uint8_t	command; /* Bit 7: is_response */
+	uint8_t	device_id;
+	uint8_t	status;
+	uint8_t	flags;
+	uint16_t size;
+} __packed;
+
+struct hostif_msg {
+	struct hostif_msg_hdr	hdr;
+} __packed;
+
+struct hostif_msg_to_sensor {
+	struct hostif_msg_hdr	hdr;
+	uint8_t	report_id;
+} __packed;
+
+struct device_info {
+	uint32_t dev_id;
+	uint8_t dev_class;
+	uint16_t pid;
+	uint16_t vid;
+} __packed;
+
+struct ishtp_version {
+	uint8_t	major;
+	uint8_t	minor;
+	uint8_t	hotfix;
+	uint16_t build;
+} __packed;
+
+/* struct for ISHTP aggregated input data */
+struct report_list {
+	uint16_t total_size;
+	uint8_t	num_of_reports;
+	uint8_t	flags;
+	struct {
+		uint16_t	size_of_report;
+		uint8_t report[1];
+	} __packed reports[1];
+} __packed;
+
+/* HOSTIF commands */
+#define	HOSTIF_HID_COMMAND_BASE			0
+#define	HOSTIF_GET_HID_DESCRIPTOR		0
+#define	HOSTIF_GET_REPORT_DESCRIPTOR		1
+#define HOSTIF_GET_FEATURE_REPORT		2
+#define	HOSTIF_SET_FEATURE_REPORT		3
+#define	HOSTIF_GET_INPUT_REPORT			4
+#define	HOSTIF_PUBLISH_INPUT_REPORT		5
+#define	HOSTIF_PUBLISH_INPUT_REPORT_LIST	6
+#define	HOSTIF_DM_COMMAND_BASE			32
+#define	HOSTIF_DM_ENUM_DEVICES			33
+#define	HOSTIF_DM_ADD_DEVICE			34
+
+#define	MAX_HID_DEVICES				32
+
+/**
+ * struct ishtp_cl_data - Encapsulate per ISH TP HID Client
+ * @enum_device_done:	Enum devices response complete flag
+ * @hid_descr_done:	HID descriptor complete flag
+ * @report_descr_done:	Get report descriptor complete flag
+ * @init_done:		Init process completed successfully
+ * @suspended:		System is under suspend state or in progress
+ * @num_hid_devices:	Number of HID devices enumerated in this client
+ * @cur_hid_dev:	This keeps track of the device index for which
+ *			initialization and registration with HID core
+ *			in progress.
+ * @hid_devices:	Store vid/pid/devid for each enumerated HID device
+ * @report_descr:	Stores the raw report descriptors for each HID device
+ * @report_descr_size:	Report description of size of above repo_descr[]
+ * @hid_sensor_hubs:	Pointer to hid_device for all HID device, so that
+ *			when clients are removed, they can be freed
+ * @hid_descr:		Pointer to hid descriptor for each enumerated hid
+ *			device
+ * @hid_descr_size:	Size of each above report descriptor
+ * @init_wait:		Wait queue to wait during initialization, where the
+ *			client send message to ISH FW and wait for response
+ * @ishtp_hid_wait:	The wait for get report during wait callback from hid
+ *			core
+ * @bad_recv_cnt:	Running count of packets received with error
+ * @multi_packet_cnt:	Count of fragmented packet count
+ *
+ * This structure is used to store completion flags and per client data like
+ * like report description, number of HID devices etc.
+ */
+struct ishtp_cl_data {
+	/* completion flags */
+	bool enum_devices_done;
+	bool hid_descr_done;
+	bool report_descr_done;
+	bool init_done;
+	bool suspended;
+
+	unsigned int num_hid_devices;
+	unsigned int cur_hid_dev;
+	unsigned int hid_dev_count;
+
+	struct device_info *hid_devices;
+	unsigned char *report_descr[MAX_HID_DEVICES];
+	int report_descr_size[MAX_HID_DEVICES];
+	struct hid_device *hid_sensor_hubs[MAX_HID_DEVICES];
+	unsigned char *hid_descr[MAX_HID_DEVICES];
+	int hid_descr_size[MAX_HID_DEVICES];
+
+	wait_queue_head_t init_wait;
+	wait_queue_head_t ishtp_resume_wait;
+	struct ishtp_cl *hid_ishtp_cl;
+
+	/* Statistics */
+	unsigned int bad_recv_cnt;
+	int multi_packet_cnt;
+
+	struct work_struct work;
+	struct ishtp_cl_device *cl_device;
+};
+
+/**
+ * struct ishtp_hid_data - Per instance HID data
+ * @index:		Device index in the order of enumeration
+ * @request_done:	Get Feature/Input report complete flag
+ *			used during get/set request from hid core
+ * @client_data:	Link to the client instance
+ * @hid_wait:		Completion waitq
+ *
+ * Used to tie hid hid->driver data to driver client instance
+ */
+struct ishtp_hid_data {
+	int index;
+	bool request_done;
+	struct ishtp_cl_data *client_data;
+	wait_queue_head_t hid_wait;
+};
+
+/* Interface functions between HID LL driver and ISH TP client */
+void hid_ishtp_set_feature(struct hid_device *hid, char *buf, unsigned int len,
+			   int report_id);
+void hid_ishtp_get_report(struct hid_device *hid, int report_id,
+			  int report_type);
+int ishtp_hid_probe(unsigned int cur_hid_dev,
+		    struct ishtp_cl_data *client_data);
+void ishtp_hid_remove(struct ishtp_cl_data *client_data);
+int ishtp_hid_link_ready_wait(struct ishtp_cl_data *client_data);
+void ishtp_hid_wakeup(struct hid_device *hid);
+
+#endif	/* ISHTP_HID__H */
diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h
index c51494119817..e794f7bee22f 100644
--- a/include/uapi/linux/input.h
+++ b/include/uapi/linux/input.h
@@ -248,6 +248,7 @@ struct input_mask {
 #define BUS_SPI			0x1C
 #define BUS_RMI			0x1D
 #define BUS_CEC			0x1E
+#define BUS_INTEL_ISHTP		0x1F
 
 /*
  * MT_TOOL types
-- 
cgit v1.2.3


From eec097d43100a8195fd4f678671ecd5d986dd675 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Mon, 13 Jun 2016 11:01:51 -0500
Subject: PCI: Add pci_enable_ptm() for drivers to enable PTM on endpoints

Add an pci_enable_ptm() interface so drivers can enable PTM.

The PCI core enables PTM on PTM Roots and switches automatically, but we
don't enable PTM on endpoints unless a driver requests it.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pcie/ptm.c        | 45 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci.h           |  7 +++++++
 include/uapi/linux/pci_regs.h |  1 +
 3 files changed, 53 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/pci/pcie/ptm.c b/drivers/pci/pcie/ptm.c
index 48eea4e65247..a14ac94b96dc 100644
--- a/drivers/pci/pcie/ptm.c
+++ b/drivers/pci/pcie/ptm.c
@@ -68,3 +68,48 @@ void pci_ptm_init(struct pci_dev *dev)
 
 	pci_ptm_info(dev);
 }
+
+int pci_enable_ptm(struct pci_dev *dev, u8 *granularity)
+{
+	int pos;
+	u32 cap, ctrl;
+	struct pci_dev *ups;
+
+	if (!pci_is_pcie(dev))
+		return -EINVAL;
+
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_PTM);
+	if (!pos)
+		return -EINVAL;
+
+	pci_read_config_dword(dev, pos + PCI_PTM_CAP, &cap);
+	if (!(cap & PCI_PTM_CAP_REQ))
+		return -EINVAL;
+
+	/*
+	 * For a PCIe Endpoint, PTM is only useful if the endpoint can
+	 * issue PTM requests to upstream devices that have PTM enabled.
+	 *
+	 * For Root Complex Integrated Endpoints, there is no upstream
+	 * device, so there must be some implementation-specific way to
+	 * associate the endpoint with a time source.
+	 */
+	if (pci_pcie_type(dev) == PCI_EXP_TYPE_ENDPOINT) {
+		ups = pci_upstream_bridge(dev);
+		if (!ups || !ups->ptm_enabled)
+			return -EINVAL;
+	} else if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END) {
+	} else
+		return -EINVAL;
+
+	ctrl = PCI_PTM_CTRL_ENABLE;
+	pci_write_config_dword(dev, pos + PCI_PTM_CTRL, ctrl);
+	dev->ptm_enabled = 1;
+
+	pci_ptm_info(dev);
+
+	if (granularity)
+		*granularity = 0;
+	return 0;
+}
+EXPORT_SYMBOL(pci_enable_ptm);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 96c509fa9d46..9e4b6d6f3c8d 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1407,6 +1407,13 @@ static inline void pci_disable_ats(struct pci_dev *d) { }
 static inline int pci_ats_queue_depth(struct pci_dev *d) { return -ENODEV; }
 #endif
 
+#ifdef CONFIG_PCIE_PTM
+int pci_enable_ptm(struct pci_dev *dev, u8 *granularity);
+#else
+static inline int pci_enable_ptm(struct pci_dev *dev, u8 *granularity)
+{ return -EINVAL; }
+#endif
+
 void pci_cfg_access_lock(struct pci_dev *dev);
 bool pci_cfg_access_trylock(struct pci_dev *dev);
 void pci_cfg_access_unlock(struct pci_dev *dev);
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 926fff41b417..72bbe1491cbf 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -967,6 +967,7 @@
 
 /* Precision Time Measurement */
 #define PCI_PTM_CAP			0x04	    /* PTM Capability */
+#define  PCI_PTM_CAP_REQ		0x00000001  /* Requester capable */
 #define  PCI_PTM_CAP_ROOT		0x00000004  /* Root capable */
 #define PCI_PTM_CTRL			0x08	    /* PTM Control */
 #define  PCI_PTM_CTRL_ENABLE		0x00000001  /* PTM enable */
-- 
cgit v1.2.3


From 9399ae9a6cb28ebac78216f715ace3b42f1c2132 Mon Sep 17 00:00:00 2001
From: Hadar Hen Zion <hadarh@mellanox.com>
Date: Wed, 17 Aug 2016 13:36:13 +0300
Subject: net_sched: flower: Add vlan support

Enhance flower to support 802.1Q vlan protocol classification.
Currently, the supported fields are vlan_id and vlan_priority.

Example:

	# add a flower filter with vlan id and priority classification
	tc filter add dev ens4f0 protocol 802.1Q parent ffff: \
		flower \
		indev ens4f0 \
		vlan_ethtype ipv4 \
		vlan_id 100 \
		vlan_prio 3 \
	action vlan pop

Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_cls.h |  3 ++
 net/sched/cls_flower.c       | 70 ++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 70 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index d1c1ccaba787..51b5b247fb5a 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -428,6 +428,9 @@ enum {
 	TCA_FLOWER_KEY_UDP_DST,		/* be16 */
 
 	TCA_FLOWER_FLAGS,
+	TCA_FLOWER_KEY_VLAN_ID,
+	TCA_FLOWER_KEY_VLAN_PRIO,
+	TCA_FLOWER_KEY_VLAN_ETH_TYPE,
 	__TCA_FLOWER_MAX,
 };
 
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 0080fc073019..1e11e57e6947 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -28,6 +28,7 @@ struct fl_flow_key {
 	struct flow_dissector_key_control control;
 	struct flow_dissector_key_basic basic;
 	struct flow_dissector_key_eth_addrs eth;
+	struct flow_dissector_key_vlan vlan;
 	struct flow_dissector_key_addrs ipaddrs;
 	union {
 		struct flow_dissector_key_ipv4_addrs ipv4;
@@ -293,6 +294,10 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
 	[TCA_FLOWER_KEY_TCP_DST]	= { .type = NLA_U16 },
 	[TCA_FLOWER_KEY_UDP_SRC]	= { .type = NLA_U16 },
 	[TCA_FLOWER_KEY_UDP_DST]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_VLAN_ID]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_VLAN_PRIO]	= { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_VLAN_ETH_TYPE]	= { .type = NLA_U16 },
+
 };
 
 static void fl_set_key_val(struct nlattr **tb,
@@ -308,9 +313,29 @@ static void fl_set_key_val(struct nlattr **tb,
 		memcpy(mask, nla_data(tb[mask_type]), len);
 }
 
+static void fl_set_key_vlan(struct nlattr **tb,
+			    struct flow_dissector_key_vlan *key_val,
+			    struct flow_dissector_key_vlan *key_mask)
+{
+#define VLAN_PRIORITY_MASK	0x7
+
+	if (tb[TCA_FLOWER_KEY_VLAN_ID]) {
+		key_val->vlan_id =
+			nla_get_u16(tb[TCA_FLOWER_KEY_VLAN_ID]) & VLAN_VID_MASK;
+		key_mask->vlan_id = VLAN_VID_MASK;
+	}
+	if (tb[TCA_FLOWER_KEY_VLAN_PRIO]) {
+		key_val->vlan_priority =
+			nla_get_u8(tb[TCA_FLOWER_KEY_VLAN_PRIO]) &
+			VLAN_PRIORITY_MASK;
+		key_mask->vlan_priority = VLAN_PRIORITY_MASK;
+	}
+}
+
 static int fl_set_key(struct net *net, struct nlattr **tb,
 		      struct fl_flow_key *key, struct fl_flow_key *mask)
 {
+	__be16 ethertype;
 #ifdef CONFIG_NET_CLS_IND
 	if (tb[TCA_FLOWER_INDEV]) {
 		int err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV]);
@@ -328,9 +353,19 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
 		       mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
 		       sizeof(key->eth.src));
 
-	fl_set_key_val(tb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE,
-		       &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
-		       sizeof(key->basic.n_proto));
+	if (tb[TCA_FLOWER_KEY_ETH_TYPE])
+		ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_ETH_TYPE]);
+
+	if (ethertype == htons(ETH_P_8021Q)) {
+		fl_set_key_vlan(tb, &key->vlan, &mask->vlan);
+		fl_set_key_val(tb, &key->basic.n_proto,
+			       TCA_FLOWER_KEY_VLAN_ETH_TYPE,
+			       &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
+			       sizeof(key->basic.n_proto));
+	} else {
+		key->basic.n_proto = ethertype;
+		mask->basic.n_proto = cpu_to_be16(~0);
+	}
 
 	if (key->basic.n_proto == htons(ETH_P_IP) ||
 	    key->basic.n_proto == htons(ETH_P_IPV6)) {
@@ -438,6 +473,8 @@ static void fl_init_dissector(struct cls_fl_head *head,
 			     FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
 			     FLOW_DISSECTOR_KEY_PORTS, tp);
+	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+			     FLOW_DISSECTOR_KEY_VLAN, vlan);
 
 	skb_flow_dissector_init(&head->dissector, keys, cnt);
 }
@@ -666,6 +703,29 @@ static int fl_dump_key_val(struct sk_buff *skb,
 	return 0;
 }
 
+static int fl_dump_key_vlan(struct sk_buff *skb,
+			    struct flow_dissector_key_vlan *vlan_key,
+			    struct flow_dissector_key_vlan *vlan_mask)
+{
+	int err;
+
+	if (!memchr_inv(vlan_mask, 0, sizeof(*vlan_mask)))
+		return 0;
+	if (vlan_mask->vlan_id) {
+		err = nla_put_u16(skb, TCA_FLOWER_KEY_VLAN_ID,
+				  vlan_key->vlan_id);
+		if (err)
+			return err;
+	}
+	if (vlan_mask->vlan_priority) {
+		err = nla_put_u8(skb, TCA_FLOWER_KEY_VLAN_PRIO,
+				 vlan_key->vlan_priority);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
 static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 		   struct sk_buff *skb, struct tcmsg *t)
 {
@@ -710,6 +770,10 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 			    &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
 			    sizeof(key->basic.n_proto)))
 		goto nla_put_failure;
+
+	if (fl_dump_key_vlan(skb, &key->vlan, &mask->vlan))
+		goto nla_put_failure;
+
 	if ((key->basic.n_proto == htons(ETH_P_IP) ||
 	     key->basic.n_proto == htons(ETH_P_IPV6)) &&
 	    fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
-- 
cgit v1.2.3


From 956af37102b515512331a03c35c958b2a1d8dd87 Mon Sep 17 00:00:00 2001
From: Hadar Hen Zion <hadarh@mellanox.com>
Date: Wed, 17 Aug 2016 13:36:14 +0300
Subject: net_sched: act_vlan: Add priority option

The current vlan push action supports only vid and protocol options.
Add priority option.

Example script that adds vlan push action with vid and
priority:

tc filter add dev veth0 protocol ip parent ffff: \
	   flower \
	   	indev veth0 \
	   action vlan push id 100 priority 5

Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_act/tc_vlan.h        |  1 +
 include/uapi/linux/tc_act/tc_vlan.h |  1 +
 net/sched/act_vlan.c                | 13 +++++++++++--
 3 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h
index e29f52e8bdf1..6b835889ea30 100644
--- a/include/net/tc_act/tc_vlan.h
+++ b/include/net/tc_act/tc_vlan.h
@@ -20,6 +20,7 @@ struct tcf_vlan {
 	int			tcfv_action;
 	u16			tcfv_push_vid;
 	__be16			tcfv_push_proto;
+	u8			tcfv_push_prio;
 };
 #define to_vlan(a) ((struct tcf_vlan *)a)
 
diff --git a/include/uapi/linux/tc_act/tc_vlan.h b/include/uapi/linux/tc_act/tc_vlan.h
index 31151ff6264f..be72b6e3843b 100644
--- a/include/uapi/linux/tc_act/tc_vlan.h
+++ b/include/uapi/linux/tc_act/tc_vlan.h
@@ -29,6 +29,7 @@ enum {
 	TCA_VLAN_PUSH_VLAN_ID,
 	TCA_VLAN_PUSH_VLAN_PROTOCOL,
 	TCA_VLAN_PAD,
+	TCA_VLAN_PUSH_VLAN_PRIORITY,
 	__TCA_VLAN_MAX,
 };
 #define TCA_VLAN_MAX (__TCA_VLAN_MAX - 1)
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 691409de3e1a..59a8d3150ae2 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -43,7 +43,8 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
 			goto drop;
 		break;
 	case TCA_VLAN_ACT_PUSH:
-		err = skb_vlan_push(skb, v->tcfv_push_proto, v->tcfv_push_vid);
+		err = skb_vlan_push(skb, v->tcfv_push_proto, v->tcfv_push_vid |
+				    (v->tcfv_push_prio << VLAN_PRIO_SHIFT));
 		if (err)
 			goto drop;
 		break;
@@ -65,6 +66,7 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = {
 	[TCA_VLAN_PARMS]		= { .len = sizeof(struct tc_vlan) },
 	[TCA_VLAN_PUSH_VLAN_ID]		= { .type = NLA_U16 },
 	[TCA_VLAN_PUSH_VLAN_PROTOCOL]	= { .type = NLA_U16 },
+	[TCA_VLAN_PUSH_VLAN_PRIORITY]	= { .type = NLA_U8 },
 };
 
 static int tcf_vlan_init(struct net *net, struct nlattr *nla,
@@ -78,6 +80,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 	int action;
 	__be16 push_vid = 0;
 	__be16 push_proto = 0;
+	u8 push_prio = 0;
 	bool exists = false;
 	int ret = 0, err;
 
@@ -123,6 +126,9 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 		} else {
 			push_proto = htons(ETH_P_8021Q);
 		}
+
+		if (tb[TCA_VLAN_PUSH_VLAN_PRIORITY])
+			push_prio = nla_get_u8(tb[TCA_VLAN_PUSH_VLAN_PRIORITY]);
 		break;
 	default:
 		if (exists)
@@ -150,6 +156,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 
 	v->tcfv_action = action;
 	v->tcfv_push_vid = push_vid;
+	v->tcfv_push_prio = push_prio;
 	v->tcfv_push_proto = push_proto;
 
 	v->tcf_action = parm->action;
@@ -181,7 +188,9 @@ static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a,
 	if (v->tcfv_action == TCA_VLAN_ACT_PUSH &&
 	    (nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, v->tcfv_push_vid) ||
 	     nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL,
-			  v->tcfv_push_proto)))
+			  v->tcfv_push_proto) ||
+	     (nla_put_u8(skb, TCA_VLAN_PUSH_VLAN_PRIORITY,
+					      v->tcfv_push_prio))))
 		goto nla_put_failure;
 
 	tcf_tm_dump(&t, &v->tcf_tm);
-- 
cgit v1.2.3


From 61ba1a2da9693b88bf5f2bb8e7a99a29cd139122 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Wed, 17 Aug 2016 12:53:10 +0200
Subject: net: bridge: export vlan flags with the stats

Use one of the vlan xstats padding fields to export the vlan flags. This is
needed in order to be able to distinguish between master (bridge) and port
vlan entries in user-space when dumping the bridge vlan stats.

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_bridge.h | 2 +-
 net/bridge/br_netlink.c        | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index c186f64fffca..ab92bca6d448 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -140,7 +140,7 @@ struct bridge_vlan_xstats {
 	__u64 tx_bytes;
 	__u64 tx_packets;
 	__u16 vid;
-	__u16 pad1;
+	__u16 flags;
 	__u32 pad2;
 };
 
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 493ab9b3d51a..872d4c0deb59 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1321,6 +1321,7 @@ static int br_fill_linkxstats(struct sk_buff *skb,
 				continue;
 			memset(&vxi, 0, sizeof(vxi));
 			vxi.vid = v->vid;
+			vxi.flags = v->flags;
 			br_vlan_get_stats(v, &stats);
 			vxi.rx_bytes = stats.rx_bytes;
 			vxi.rx_packets = stats.rx_packets;
-- 
cgit v1.2.3


From b34040227be7da760cc72ef3c807e0985e7f0f16 Mon Sep 17 00:00:00 2001
From: Richard Alpe <richard.alpe@ericsson.com>
Date: Thu, 18 Aug 2016 10:33:52 +0200
Subject: tipc: add peer removal functionality

Add TIPC_NL_PEER_REMOVE netlink command. This command can remove
an offline peer node from the internal data structures.

This will be supported by the tipc user space tool in iproute2.

Signed-off-by: Richard Alpe <richard.alpe@ericsson.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tipc_netlink.h |  1 +
 net/tipc/net.h                    |  2 ++
 net/tipc/netlink.c                |  5 ++++
 net/tipc/node.c                   | 63 +++++++++++++++++++++++++++++++++++++++
 net/tipc/node.h                   |  1 +
 5 files changed, 72 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h
index 5f3f6d09fb79..bcb65ef725f6 100644
--- a/include/uapi/linux/tipc_netlink.h
+++ b/include/uapi/linux/tipc_netlink.h
@@ -59,6 +59,7 @@ enum {
 	TIPC_NL_MON_SET,
 	TIPC_NL_MON_GET,
 	TIPC_NL_MON_PEER_GET,
+	TIPC_NL_PEER_REMOVE,
 
 	__TIPC_NL_CMD_MAX,
 	TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1
diff --git a/net/tipc/net.h b/net/tipc/net.h
index 77a7a118911d..c7c254902873 100644
--- a/net/tipc/net.h
+++ b/net/tipc/net.h
@@ -39,6 +39,8 @@
 
 #include <net/genetlink.h>
 
+extern const struct nla_policy tipc_nl_net_policy[];
+
 int tipc_net_start(struct net *net, u32 addr);
 
 void tipc_net_stop(struct net *net);
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index a84daec0afe9..2718de667828 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -238,6 +238,11 @@ static const struct genl_ops tipc_genl_v2_ops[] = {
 		.dumpit	= tipc_nl_node_dump_monitor_peer,
 		.policy = tipc_nl_policy,
 	},
+	{
+		.cmd	= TIPC_NL_PEER_REMOVE,
+		.doit	= tipc_nl_peer_rm,
+		.policy = tipc_nl_policy,
+	}
 };
 
 int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr)
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 21974191e425..7e8b75fd1a02 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1553,6 +1553,69 @@ discard:
 	kfree_skb(skb);
 }
 
+int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info)
+{
+	struct net *net = sock_net(skb->sk);
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	struct nlattr *attrs[TIPC_NLA_NET_MAX + 1];
+	struct tipc_node *peer;
+	u32 addr;
+	int err;
+	int i;
+
+	/* We identify the peer by its net */
+	if (!info->attrs[TIPC_NLA_NET])
+		return -EINVAL;
+
+	err = nla_parse_nested(attrs, TIPC_NLA_NET_MAX,
+			       info->attrs[TIPC_NLA_NET],
+			       tipc_nl_net_policy);
+	if (err)
+		return err;
+
+	if (!attrs[TIPC_NLA_NET_ADDR])
+		return -EINVAL;
+
+	addr = nla_get_u32(attrs[TIPC_NLA_NET_ADDR]);
+
+	if (in_own_node(net, addr))
+		return -ENOTSUPP;
+
+	spin_lock_bh(&tn->node_list_lock);
+	peer = tipc_node_find(net, addr);
+	if (!peer) {
+		spin_unlock_bh(&tn->node_list_lock);
+		return -ENXIO;
+	}
+
+	tipc_node_write_lock(peer);
+	if (peer->state != SELF_DOWN_PEER_DOWN &&
+	    peer->state != SELF_DOWN_PEER_LEAVING) {
+		tipc_node_write_unlock(peer);
+		err = -EBUSY;
+		goto err_out;
+	}
+
+	for (i = 0; i < MAX_BEARERS; i++) {
+		struct tipc_link_entry *le = &peer->links[i];
+
+		if (le->link) {
+			kfree(le->link);
+			le->link = NULL;
+			peer->link_cnt--;
+		}
+	}
+	tipc_node_write_unlock(peer);
+	tipc_node_delete(peer);
+
+	err = 0;
+err_out:
+	tipc_node_put(peer);
+	spin_unlock_bh(&tn->node_list_lock);
+
+	return err;
+}
+
 int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	int err;
diff --git a/net/tipc/node.h b/net/tipc/node.h
index d69fdfcc0ec9..4578b34c7dca 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -77,6 +77,7 @@ int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb);
 int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info);
 int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info);
 int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info);
+int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info);
 
 int tipc_nl_node_set_monitor(struct sk_buff *skb, struct genl_info *info);
 int tipc_nl_node_get_monitor(struct sk_buff *skb, struct genl_info *info);
-- 
cgit v1.2.3


From 5293efe62df81908f2e90c9820c7edcc8e61f5e9 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 18 Aug 2016 01:00:39 +0200
Subject: bpf: add bpf_skb_change_tail helper

This work adds a bpf_skb_change_tail() helper for tc BPF programs. The
basic idea is to expand or shrink the skb in a controlled manner. The
eBPF program can then rewrite the rest via helpers like bpf_skb_store_bytes(),
bpf_lX_csum_replace() and others rather than passing a raw buffer for
writing here.

bpf_skb_change_tail() is really a slow path helper and intended for
replies with f.e. ICMP control messages. Concept is similar to other
helpers like bpf_skb_change_proto() helper to keep the helper without
protocol specifics and let the BPF program mangle the remaining parts.
A flags field has been added and is reserved for now should we extend
the helper in future.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h   |  43 +++++++++++++++++++-
 include/uapi/linux/bpf.h |  11 ++++++
 net/core/filter.c        | 100 +++++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 150 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 0f665cb26b50..7047448e8129 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2295,7 +2295,7 @@ static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len)
 
 int ___pskb_trim(struct sk_buff *skb, unsigned int len);
 
-static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
+static inline void __skb_set_length(struct sk_buff *skb, unsigned int len)
 {
 	if (unlikely(skb_is_nonlinear(skb))) {
 		WARN_ON(1);
@@ -2305,6 +2305,11 @@ static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
 	skb_set_tail_pointer(skb, len);
 }
 
+static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
+{
+	__skb_set_length(skb, len);
+}
+
 void skb_trim(struct sk_buff *skb, unsigned int len);
 
 static inline int __pskb_trim(struct sk_buff *skb, unsigned int len)
@@ -2335,6 +2340,20 @@ static inline void pskb_trim_unique(struct sk_buff *skb, unsigned int len)
 	BUG_ON(err);
 }
 
+static inline int __skb_grow(struct sk_buff *skb, unsigned int len)
+{
+	unsigned int diff = len - skb->len;
+
+	if (skb_tailroom(skb) < diff) {
+		int ret = pskb_expand_head(skb, 0, diff - skb_tailroom(skb),
+					   GFP_ATOMIC);
+		if (ret)
+			return ret;
+	}
+	__skb_set_length(skb, len);
+	return 0;
+}
+
 /**
  *	skb_orphan - orphan a buffer
  *	@skb: buffer to orphan
@@ -2938,6 +2957,21 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
 	return __pskb_trim(skb, len);
 }
 
+static inline int __skb_trim_rcsum(struct sk_buff *skb, unsigned int len)
+{
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
+		skb->ip_summed = CHECKSUM_NONE;
+	__skb_trim(skb, len);
+	return 0;
+}
+
+static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len)
+{
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
+		skb->ip_summed = CHECKSUM_NONE;
+	return __skb_grow(skb, len);
+}
+
 #define skb_queue_walk(queue, skb) \
 		for (skb = (queue)->next;					\
 		     skb != (struct sk_buff *)(queue);				\
@@ -3726,6 +3760,13 @@ static inline bool skb_is_gso_v6(const struct sk_buff *skb)
 	return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6;
 }
 
+static inline void skb_gso_reset(struct sk_buff *skb)
+{
+	skb_shinfo(skb)->gso_size = 0;
+	skb_shinfo(skb)->gso_segs = 0;
+	skb_shinfo(skb)->gso_type = 0;
+}
+
 void __skb_warn_lro_forwarding(const struct sk_buff *skb);
 
 static inline bool skb_warn_if_lro(const struct sk_buff *skb)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 866d53c33298..e4c5a1baa993 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -386,6 +386,17 @@ enum bpf_func_id {
 	 */
 	BPF_FUNC_current_task_under_cgroup,
 
+	/**
+	 * bpf_skb_change_tail(skb, len, flags)
+	 * The helper will resize the skb to the given new size,
+	 * to be used f.e. with control messages.
+	 * @skb: pointer to skb
+	 * @len: new skb length
+	 * @flags: reserved
+	 * Return: 0 on success or negative error
+	 */
+	BPF_FUNC_skb_change_tail,
+
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/net/core/filter.c b/net/core/filter.c
index 58b5e6dd25fe..abf546d96b6b 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1350,14 +1350,18 @@ struct bpf_scratchpad {
 
 static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp);
 
+static inline int __bpf_try_make_writable(struct sk_buff *skb,
+					  unsigned int write_len)
+{
+	return skb_ensure_writable(skb, write_len);
+}
+
 static inline int bpf_try_make_writable(struct sk_buff *skb,
 					unsigned int write_len)
 {
-	int err;
+	int err = __bpf_try_make_writable(skb, write_len);
 
-	err = skb_ensure_writable(skb, write_len);
 	bpf_compute_data_end(skb);
-
 	return err;
 }
 
@@ -1992,6 +1996,92 @@ static const struct bpf_func_proto bpf_skb_change_type_proto = {
 	.arg2_type	= ARG_ANYTHING,
 };
 
+static u32 __bpf_skb_min_len(const struct sk_buff *skb)
+{
+	u32 min_len = skb_network_offset(skb);
+
+	if (skb_transport_header_was_set(skb))
+		min_len = skb_transport_offset(skb);
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
+		min_len = skb_checksum_start_offset(skb) +
+			  skb->csum_offset + sizeof(__sum16);
+	return min_len;
+}
+
+static u32 __bpf_skb_max_len(const struct sk_buff *skb)
+{
+	return skb->dev ? skb->dev->mtu + skb->dev->hard_header_len :
+	       65536;
+}
+
+static int bpf_skb_grow_rcsum(struct sk_buff *skb, unsigned int new_len)
+{
+	unsigned int old_len = skb->len;
+	int ret;
+
+	ret = __skb_grow_rcsum(skb, new_len);
+	if (!ret)
+		memset(skb->data + old_len, 0, new_len - old_len);
+	return ret;
+}
+
+static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len)
+{
+	return __skb_trim_rcsum(skb, new_len);
+}
+
+static u64 bpf_skb_change_tail(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
+{
+	struct sk_buff *skb = (struct sk_buff *)(long) r1;
+	u32 max_len = __bpf_skb_max_len(skb);
+	u32 min_len = __bpf_skb_min_len(skb);
+	u32 new_len = (u32) r2;
+	int ret;
+
+	if (unlikely(flags || new_len > max_len || new_len < min_len))
+		return -EINVAL;
+	if (skb->encapsulation)
+		return -ENOTSUPP;
+
+	/* The basic idea of this helper is that it's performing the
+	 * needed work to either grow or trim an skb, and eBPF program
+	 * rewrites the rest via helpers like bpf_skb_store_bytes(),
+	 * bpf_lX_csum_replace() and others rather than passing a raw
+	 * buffer here. This one is a slow path helper and intended
+	 * for replies with control messages.
+	 *
+	 * Like in bpf_skb_change_proto(), we want to keep this rather
+	 * minimal and without protocol specifics so that we are able
+	 * to separate concerns as in bpf_skb_store_bytes() should only
+	 * be the one responsible for writing buffers.
+	 *
+	 * It's really expected to be a slow path operation here for
+	 * control message replies, so we're implicitly linearizing,
+	 * uncloning and drop offloads from the skb by this.
+	 */
+	ret = __bpf_try_make_writable(skb, skb->len);
+	if (!ret) {
+		if (new_len > skb->len)
+			ret = bpf_skb_grow_rcsum(skb, new_len);
+		else if (new_len < skb->len)
+			ret = bpf_skb_trim_rcsum(skb, new_len);
+		if (!ret && skb_is_gso(skb))
+			skb_gso_reset(skb);
+	}
+
+	bpf_compute_data_end(skb);
+	return ret;
+}
+
+static const struct bpf_func_proto bpf_skb_change_tail_proto = {
+	.func		= bpf_skb_change_tail,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_ANYTHING,
+};
+
 bool bpf_helper_changes_skb_data(void *func)
 {
 	if (func == bpf_skb_vlan_push)
@@ -2002,6 +2092,8 @@ bool bpf_helper_changes_skb_data(void *func)
 		return true;
 	if (func == bpf_skb_change_proto)
 		return true;
+	if (func == bpf_skb_change_tail)
+		return true;
 	if (func == bpf_l3_csum_replace)
 		return true;
 	if (func == bpf_l4_csum_replace)
@@ -2368,6 +2460,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
 		return &bpf_skb_change_proto_proto;
 	case BPF_FUNC_skb_change_type:
 		return &bpf_skb_change_type_proto;
+	case BPF_FUNC_skb_change_tail:
+		return &bpf_skb_change_tail_proto;
 	case BPF_FUNC_skb_get_tunnel_key:
 		return &bpf_skb_get_tunnel_key_proto;
 	case BPF_FUNC_skb_set_tunnel_key:
-- 
cgit v1.2.3


From 3d2f30a1df907e3ef4175121f0d21456630a72aa Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 18 Aug 2016 01:46:06 +0200
Subject: netfilter: nf_tables: add quota expression

This patch adds the quota expression. This new stateful expression
integrate easily into the dynset expression to build 'hashquota' flow
tables.

Arguably, we could use instead "counter bytes > 1000" instead, but this
approach has several problems:

1) We only support for one single stateful expression in dynamic set
   definitions, and the expression above is a composite of two
   expressions: get counter + comparison.

2) We would need to restore the packed counter representation (that we
   used to have) based on seqlock to synchronize this, since per-cpu is
   not suitable for this.

So instead of bloating the counter expression back with the seqlock
representation and extending the existing set infrastructure to make it
more complex for the composite described above, let's follow the more
simple approach of adding a quota expression that we can plug into our
existing infrastructure.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h |  19 +++++
 net/netfilter/Kconfig                    |   6 ++
 net/netfilter/Makefile                   |   1 +
 net/netfilter/nft_quota.c                | 121 +++++++++++++++++++++++++++++++
 4 files changed, 147 insertions(+)
 create mode 100644 net/netfilter/nft_quota.c

(limited to 'include/uapi')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 6ce0a6dd0889..784fbf15ab3d 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -900,6 +900,25 @@ enum nft_queue_attributes {
 #define NFT_QUEUE_FLAG_CPU_FANOUT	0x02 /* use current CPU (no hashing) */
 #define NFT_QUEUE_FLAG_MASK		0x03
 
+enum nft_quota_flags {
+	NFT_QUOTA_F_INV		= (1 << 0),
+};
+
+/**
+ * enum nft_quota_attributes - nf_tables quota expression netlink attributes
+ *
+ * @NFTA_QUOTA_BYTES: quota in bytes (NLA_U16)
+ * @NFTA_QUOTA_FLAGS: flags (NLA_U32)
+ */
+enum nft_quota_attributes {
+	NFTA_QUOTA_UNSPEC,
+	NFTA_QUOTA_BYTES,
+	NFTA_QUOTA_FLAGS,
+	NFTA_QUOTA_PAD,
+	__NFTA_QUOTA_MAX
+};
+#define NFTA_QUOTA_MAX		(__NFTA_QUOTA_MAX - 1)
+
 /**
  * enum nft_reject_types - nf_tables reject expression reject types
  *
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 9cfaa00c79b2..29a8078deafa 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -542,6 +542,12 @@ config NFT_QUEUE
 	  This is required if you intend to use the userspace queueing
 	  infrastructure (also known as NFQUEUE) from nftables.
 
+config NFT_QUOTA
+	tristate "Netfilter nf_tables quota module"
+	help
+	  This option adds the "quota" expression that you can use to match
+	  enforce bytes quotas.
+
 config NFT_REJECT
 	default m if NETFILTER_ADVANCED=n
 	tristate "Netfilter nf_tables reject support"
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 1106ccde215c..0fc42df19b8c 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -84,6 +84,7 @@ obj-$(CONFIG_NFT_CT)		+= nft_ct.o
 obj-$(CONFIG_NFT_LIMIT)		+= nft_limit.o
 obj-$(CONFIG_NFT_NAT)		+= nft_nat.o
 obj-$(CONFIG_NFT_QUEUE)		+= nft_queue.o
+obj-$(CONFIG_NFT_QUOTA)		+= nft_quota.o
 obj-$(CONFIG_NFT_REJECT) 	+= nft_reject.o
 obj-$(CONFIG_NFT_REJECT_INET)	+= nft_reject_inet.o
 obj-$(CONFIG_NFT_SET_RBTREE)	+= nft_set_rbtree.o
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
new file mode 100644
index 000000000000..6eafbf987ed9
--- /dev/null
+++ b/net/netfilter/nft_quota.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/atomic.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_quota {
+	u64		quota;
+	bool		invert;
+	atomic64_t	remain;
+};
+
+static inline long nft_quota(struct nft_quota *priv,
+			     const struct nft_pktinfo *pkt)
+{
+	return atomic64_sub_return(pkt->skb->len, &priv->remain);
+}
+
+static void nft_quota_eval(const struct nft_expr *expr,
+			   struct nft_regs *regs,
+			   const struct nft_pktinfo *pkt)
+{
+	struct nft_quota *priv = nft_expr_priv(expr);
+
+	if (nft_quota(priv, pkt) < 0 && !priv->invert)
+		regs->verdict.code = NFT_BREAK;
+}
+
+static const struct nla_policy nft_quota_policy[NFTA_QUOTA_MAX + 1] = {
+	[NFTA_QUOTA_BYTES]	= { .type = NLA_U64 },
+	[NFTA_QUOTA_FLAGS]	= { .type = NLA_U32 },
+};
+
+static int nft_quota_init(const struct nft_ctx *ctx,
+			  const struct nft_expr *expr,
+			  const struct nlattr * const tb[])
+{
+	struct nft_quota *priv = nft_expr_priv(expr);
+	u32 flags = 0;
+	u64 quota;
+
+	if (!tb[NFTA_QUOTA_BYTES])
+		return -EINVAL;
+
+	quota = be64_to_cpu(nla_get_be64(tb[NFTA_QUOTA_BYTES]));
+	if (quota > S64_MAX)
+		return -EOVERFLOW;
+
+	if (tb[NFTA_QUOTA_FLAGS]) {
+		flags = ntohl(nla_get_be32(tb[NFTA_QUOTA_FLAGS]));
+		if (flags & ~NFT_QUOTA_F_INV)
+			return -EINVAL;
+	}
+
+	priv->quota = quota;
+	priv->invert = (flags & NFT_QUOTA_F_INV) ? true : false;
+	atomic64_set(&priv->remain, quota);
+
+	return 0;
+}
+
+static int nft_quota_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_quota *priv = nft_expr_priv(expr);
+	u32 flags = priv->invert ? NFT_QUOTA_F_INV : 0;
+
+	if (nla_put_be64(skb, NFTA_QUOTA_BYTES, cpu_to_be64(priv->quota),
+			 NFTA_QUOTA_PAD) ||
+	    nla_put_be32(skb, NFTA_QUOTA_FLAGS, htonl(flags)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_type nft_quota_type;
+static const struct nft_expr_ops nft_quota_ops = {
+	.type		= &nft_quota_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_quota)),
+	.eval		= nft_quota_eval,
+	.init		= nft_quota_init,
+	.dump		= nft_quota_dump,
+};
+
+static struct nft_expr_type nft_quota_type __read_mostly = {
+	.name		= "quota",
+	.ops		= &nft_quota_ops,
+	.policy		= nft_quota_policy,
+	.maxattr	= NFTA_QUOTA_MAX,
+	.flags		= NFT_EXPR_STATEFUL,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_quota_module_init(void)
+{
+        return nft_register_expr(&nft_quota_type);
+}
+
+static void __exit nft_quota_module_exit(void)
+{
+        nft_unregister_expr(&nft_quota_type);
+}
+
+module_init(nft_quota_module_init);
+module_exit(nft_quota_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NFT_EXPR("quota");
-- 
cgit v1.2.3


From 91dbc6be0a62d3bcea98287734d593610aed507d Mon Sep 17 00:00:00 2001
From: Laura Garcia Liebana <nevola@gmail.com>
Date: Thu, 18 Aug 2016 12:13:13 +0200
Subject: netfilter: nf_tables: add number generator expression

This patch adds the numgen expression that allows us to generated
incremental and random numbers, this generator is bound to a upper limit
that is specified by userspace.

This expression is useful to distribute packets in a round-robin fashion
as well as randomly.

Signed-off-by: Laura Garcia Liebana <nevola@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h |  24 ++++
 net/netfilter/Kconfig                    |   6 +
 net/netfilter/Makefile                   |   1 +
 net/netfilter/nft_numgen.c               | 192 +++++++++++++++++++++++++++++++
 4 files changed, 223 insertions(+)
 create mode 100644 net/netfilter/nft_numgen.c

(limited to 'include/uapi')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 784fbf15ab3d..8c9d6ff70ec0 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1121,4 +1121,28 @@ enum nft_trace_types {
 	__NFT_TRACETYPE_MAX
 };
 #define NFT_TRACETYPE_MAX (__NFT_TRACETYPE_MAX - 1)
+
+/**
+ * enum nft_ng_attributes - nf_tables number generator expression netlink attributes
+ *
+ * @NFTA_NG_DREG: destination register (NLA_U32)
+ * @NFTA_NG_UNTIL: source value to increment the counter until reset (NLA_U32)
+ * @NFTA_NG_TYPE: operation type (NLA_U32)
+ */
+enum nft_ng_attributes {
+	NFTA_NG_UNSPEC,
+	NFTA_NG_DREG,
+	NFTA_NG_UNTIL,
+	NFTA_NG_TYPE,
+	__NFTA_NG_MAX
+};
+#define NFTA_NG_MAX	(__NFTA_NG_MAX - 1)
+
+enum nft_ng_types {
+	NFT_NG_INCREMENTAL,
+	NFT_NG_RANDOM,
+	__NFT_NG_MAX
+};
+#define NFT_NG_MAX	(__NFT_NG_MAX - 1)
+
 #endif /* _LINUX_NF_TABLES_H */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 29a8078deafa..e8d56d9a4df2 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -474,6 +474,12 @@ config NFT_META
 	  This option adds the "meta" expression that you can use to match and
 	  to set packet metainformation such as the packet mark.
 
+config NFT_NUMGEN
+	tristate "Netfilter nf_tables number generator module"
+	help
+	  This option adds the number generator expression used to perform
+	  incremental counting and random numbers bound to a upper limit.
+
 config NFT_CT
 	depends on NF_CONNTRACK
 	tristate "Netfilter nf_tables conntrack module"
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 0fc42df19b8c..0c8581100ac6 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -80,6 +80,7 @@ obj-$(CONFIG_NF_TABLES_NETDEV)	+= nf_tables_netdev.o
 obj-$(CONFIG_NFT_COMPAT)	+= nft_compat.o
 obj-$(CONFIG_NFT_EXTHDR)	+= nft_exthdr.o
 obj-$(CONFIG_NFT_META)		+= nft_meta.o
+obj-$(CONFIG_NFT_NUMGEN)	+= nft_numgen.o
 obj-$(CONFIG_NFT_CT)		+= nft_ct.o
 obj-$(CONFIG_NFT_LIMIT)		+= nft_limit.o
 obj-$(CONFIG_NFT_NAT)		+= nft_nat.o
diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c
new file mode 100644
index 000000000000..176e26d5bbd0
--- /dev/null
+++ b/net/netfilter/nft_numgen.c
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2016 Laura Garcia <nevola@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <linux/static_key.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+
+static DEFINE_PER_CPU(struct rnd_state, nft_numgen_prandom_state);
+
+struct nft_ng_inc {
+	enum nft_registers      dreg:8;
+	u32			until;
+	atomic_t		counter;
+};
+
+static void nft_ng_inc_eval(const struct nft_expr *expr,
+			    struct nft_regs *regs,
+			    const struct nft_pktinfo *pkt)
+{
+	struct nft_ng_inc *priv = nft_expr_priv(expr);
+	u32 nval, oval;
+
+	do {
+		oval = atomic_read(&priv->counter);
+		nval = (oval + 1 < priv->until) ? oval + 1 : 0;
+	} while (atomic_cmpxchg(&priv->counter, oval, nval) != oval);
+
+	memcpy(&regs->data[priv->dreg], &priv->counter, sizeof(u32));
+}
+
+static const struct nla_policy nft_ng_policy[NFTA_NG_MAX + 1] = {
+	[NFTA_NG_DREG]		= { .type = NLA_U32 },
+	[NFTA_NG_UNTIL]		= { .type = NLA_U32 },
+	[NFTA_NG_TYPE]		= { .type = NLA_U32 },
+};
+
+static int nft_ng_inc_init(const struct nft_ctx *ctx,
+			   const struct nft_expr *expr,
+			   const struct nlattr * const tb[])
+{
+	struct nft_ng_inc *priv = nft_expr_priv(expr);
+
+	priv->until = ntohl(nla_get_be32(tb[NFTA_NG_UNTIL]));
+	if (priv->until == 0)
+		return -ERANGE;
+
+	priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]);
+	atomic_set(&priv->counter, 0);
+
+	return nft_validate_register_store(ctx, priv->dreg, NULL,
+					   NFT_DATA_VALUE, sizeof(u32));
+}
+
+static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg,
+		       u32 until, enum nft_ng_types type)
+{
+	if (nft_dump_register(skb, NFTA_NG_DREG, dreg))
+		goto nla_put_failure;
+	if (nft_dump_register(skb, NFTA_NG_UNTIL, until))
+		goto nla_put_failure;
+	if (nft_dump_register(skb, NFTA_NG_TYPE, type))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_ng_inc *priv = nft_expr_priv(expr);
+
+	return nft_ng_dump(skb, priv->dreg, priv->until, NFT_NG_INCREMENTAL);
+}
+
+struct nft_ng_random {
+	enum nft_registers      dreg:8;
+	u32			until;
+};
+
+static void nft_ng_random_eval(const struct nft_expr *expr,
+			       struct nft_regs *regs,
+			       const struct nft_pktinfo *pkt)
+{
+	struct nft_ng_random *priv = nft_expr_priv(expr);
+	struct rnd_state *state = this_cpu_ptr(&nft_numgen_prandom_state);
+
+	regs->data[priv->dreg] = reciprocal_scale(prandom_u32_state(state),
+						  priv->until);
+}
+
+static int nft_ng_random_init(const struct nft_ctx *ctx,
+			      const struct nft_expr *expr,
+			      const struct nlattr * const tb[])
+{
+	struct nft_ng_random *priv = nft_expr_priv(expr);
+
+	priv->until = ntohl(nla_get_be32(tb[NFTA_NG_UNTIL]));
+	if (priv->until == 0)
+		return -ERANGE;
+
+	prandom_init_once(&nft_numgen_prandom_state);
+
+	priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]);
+
+	return nft_validate_register_store(ctx, priv->dreg, NULL,
+					   NFT_DATA_VALUE, sizeof(u32));
+}
+
+static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_ng_random *priv = nft_expr_priv(expr);
+
+	return nft_ng_dump(skb, priv->dreg, priv->until, NFT_NG_RANDOM);
+}
+
+static struct nft_expr_type nft_ng_type;
+static const struct nft_expr_ops nft_ng_inc_ops = {
+	.type		= &nft_ng_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_ng_inc)),
+	.eval		= nft_ng_inc_eval,
+	.init		= nft_ng_inc_init,
+	.dump		= nft_ng_inc_dump,
+};
+
+static const struct nft_expr_ops nft_ng_random_ops = {
+	.type		= &nft_ng_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_ng_random)),
+	.eval		= nft_ng_random_eval,
+	.init		= nft_ng_random_init,
+	.dump		= nft_ng_random_dump,
+};
+
+static const struct nft_expr_ops *
+nft_ng_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
+{
+	u32 type;
+
+	if (!tb[NFTA_NG_DREG]	||
+	    !tb[NFTA_NG_UNTIL]	||
+	    !tb[NFTA_NG_TYPE])
+		return ERR_PTR(-EINVAL);
+
+	type = ntohl(nla_get_be32(tb[NFTA_NG_TYPE]));
+
+	switch (type) {
+	case NFT_NG_INCREMENTAL:
+		return &nft_ng_inc_ops;
+	case NFT_NG_RANDOM:
+		return &nft_ng_random_ops;
+	}
+
+	return ERR_PTR(-EINVAL);
+}
+
+static struct nft_expr_type nft_ng_type __read_mostly = {
+	.name		= "numgen",
+	.select_ops	= &nft_ng_select_ops,
+	.policy		= nft_ng_policy,
+	.maxattr	= NFTA_NG_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_ng_module_init(void)
+{
+	return nft_register_expr(&nft_ng_type);
+}
+
+static void __exit nft_ng_module_exit(void)
+{
+	nft_unregister_expr(&nft_ng_type);
+}
+
+module_init(nft_ng_module_init);
+module_exit(nft_ng_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Laura Garcia <nevola@gmail.com>");
+MODULE_ALIAS_NFT_EXPR("numgen");
-- 
cgit v1.2.3


From 15f0d4f531d84015511dbdc2512e5a77c0173d49 Mon Sep 17 00:00:00 2001
From: Shreyas NC <shreyas.nc@intel.com>
Date: Fri, 12 Aug 2016 12:29:50 +0530
Subject: ASoC: uapi: Intel: Skylake: Define vendor specific tokens

With recent topology changes in alsa-lib, driver data for
modules can now be passed in topology conf file using tuples.

This patch defines vendor specific tokens to describe private
data with tuples.

The allowed token types are UUID, string, bool, byte, short and
word. These tokens will be referenced by the vendor tuples in
the conf file.

In the topology conf file, multiple data blocks can be defined
for a widget which can be either tuple vendor array or blob. So,
each data block will be preceded by a descriptor to identify
size and type of block. These descriptors will be token
value pairs.

Tokens for module_id and loadable flag are not defined as these
are read from the DSP FW manifest.

Signed-off-by: Shreyas NC <shreyas.nc@intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/uapi/sound/Kbuild           |   1 +
 include/uapi/sound/snd_sst_tokens.h | 208 ++++++++++++++++++++++++++++++++++++
 2 files changed, 209 insertions(+)
 create mode 100644 include/uapi/sound/snd_sst_tokens.h

(limited to 'include/uapi')

diff --git a/include/uapi/sound/Kbuild b/include/uapi/sound/Kbuild
index 691984cb0b91..9578d8bdbf31 100644
--- a/include/uapi/sound/Kbuild
+++ b/include/uapi/sound/Kbuild
@@ -13,3 +13,4 @@ header-y += sb16_csp.h
 header-y += sfnt_info.h
 header-y += tlv.h
 header-y += usb_stream.h
+header-y += snd_sst_tokens.h
diff --git a/include/uapi/sound/snd_sst_tokens.h b/include/uapi/sound/snd_sst_tokens.h
new file mode 100644
index 000000000000..f56a932736ca
--- /dev/null
+++ b/include/uapi/sound/snd_sst_tokens.h
@@ -0,0 +1,208 @@
+/*
+ * snd_sst_tokens.h - Intel SST tokens definition
+ *
+ * Copyright (C) 2016 Intel Corp
+ * Author: Shreyas NC <shreyas.nc@intel.com>
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef __SND_SST_TOKENS_H__
+#define __SND_SST_TOKENS_H__
+
+/**
+ * %SKL_TKN_UUID:               Module UUID
+ *
+ * %SKL_TKN_U8_BLOCK_TYPE:      Type of the private data block.Can be:
+ *                              tuples, bytes, short and words
+ *
+ * %SKL_TKN_U8_IN_PIN_TYPE:     Input pin type,
+ *                              homogenous=0, heterogenous=1
+ *
+ * %SKL_TKN_U8_OUT_PIN_TYPE:    Output pin type,
+ *                              homogenous=0, heterogenous=1
+ * %SKL_TKN_U8_DYN_IN_PIN:      Configure Input pin dynamically
+ *                              if true
+ *
+ * %SKL_TKN_U8_DYN_OUT_PIN:     Configure Output pin dynamically
+ *                              if true
+ *
+ * %SKL_TKN_U8_IN_QUEUE_COUNT:  Store the number of Input pins
+ *
+ * %SKL_TKN_U8_OUT_QUEUE_COUNT: Store the number of Output pins
+ *
+ * %SKL_TKN_U8_TIME_SLOT:       TDM slot number
+ *
+ * %SKL_TKN_U8_CORE_ID:         Stores module affinity value.Can take
+ *                              the values:
+ *                              SKL_AFFINITY_CORE_0 = 0,
+ *                              SKL_AFFINITY_CORE_1,
+ *                              SKL_AFFINITY_CORE_MAX
+ *
+ * %SKL_TKN_U8_MOD_TYPE:        Module type value.
+ *
+ * %SKL_TKN_U8_CONN_TYPE:       Module connection type can be a FE,
+ *                              BE or NONE as defined :
+ *                              SKL_PIPE_CONN_TYPE_NONE = 0,
+ *                              SKL_PIPE_CONN_TYPE_FE = 1 (HOST_DMA)
+ *                              SKL_PIPE_CONN_TYPE_BE = 2 (LINK_DMA)
+ *
+ * %SKL_TKN_U8_DEV_TYPE:        Type of device to which the module is
+ *                              connected
+ *                              Can take the values:
+ *                              SKL_DEVICE_BT = 0x0,
+ *                              SKL_DEVICE_DMIC = 0x1,
+ *                              SKL_DEVICE_I2S = 0x2,
+ *                              SKL_DEVICE_SLIMBUS = 0x3,
+ *                              SKL_DEVICE_HDALINK = 0x4,
+ *                              SKL_DEVICE_HDAHOST = 0x5,
+ *                              SKL_DEVICE_NONE
+ *
+ * %SKL_TKN_U8_HW_CONN_TYPE:    Connection type of the HW to which the
+ *                              module is connected
+ *                              SKL_CONN_NONE = 0,
+ *                              SKL_CONN_SOURCE = 1,
+ *                              SKL_CONN_SINK = 2
+ *
+ * %SKL_TKN_U16_PIN_INST_ID:    Stores the pin instance id
+ *
+ * %SKL_TKN_U16_MOD_INST_ID:    Stores the mdule instance id
+ *
+ * %SKL_TKN_U32_MAX_MCPS:       Module max mcps value
+ *
+ * %SKL_TKN_U32_MEM_PAGES:      Module resource pages
+ *
+ * %SKL_TKN_U32_OBS:            Stores Output Buffer size
+ *
+ * %SKL_TKN_U32_IBS:            Stores input buffer size
+ *
+ * %SKL_TKN_U32_VBUS_ID:        Module VBUS_ID. PDM=0, SSP0=0,
+ *                              SSP1=1,SSP2=2,
+ *                              SSP3=3, SSP4=4,
+ *                              SSP5=5, SSP6=6,INVALID
+ *
+ * %SKL_TKN_U32_PARAMS_FIXUP:   Module Params fixup mask
+ * %SKL_TKN_U32_CONVERTER:      Module params converter mask
+ * %SKL_TKN_U32_PIPE_ID:        Stores the pipe id
+ *
+ * %SKL_TKN_U32_PIPE_CONN_TYPE: Type of the token to which the pipe is
+ *                              connected to. It can be
+ *                              SKL_PIPE_CONN_TYPE_NONE = 0,
+ *                              SKL_PIPE_CONN_TYPE_FE = 1 (HOST_DMA),
+ *                              SKL_PIPE_CONN_TYPE_BE = 2 (LINK_DMA),
+ *
+ * %SKL_TKN_U32_PIPE_PRIORITY:  Pipe priority value
+ * %SKL_TKN_U32_PIPE_MEM_PGS:   Pipe resource pages
+ *
+ * %SKL_TKN_U32_DIR_PIN_COUNT:  Value for the direction to set input/output
+ *                              formats and the pin count.
+ *                              The first 4 bits have the direction
+ *                              value and the next 4 have
+ *                              the pin count value.
+ *                              SKL_DIR_IN = 0, SKL_DIR_OUT = 1.
+ *                              The input and output formats
+ *                              share the same set of tokens
+ *                              with the distinction between input
+ *                              and output made by reading direction
+ *                              token.
+ *
+ * %SKL_TKN_U32_FMT_CH:         Supported channel count
+ *
+ * %SKL_TKN_U32_FMT_FREQ:       Supported frequency/sample rate
+ *
+ * %SKL_TKN_U32_FMT_BIT_DEPTH:  Supported container size
+ *
+ * %SKL_TKN_U32_FMT_SAMPLE_SIZE:Number of samples in the container
+ *
+ * %SKL_TKN_U32_FMT_CH_CONFIG:  Supported channel configurations for the
+ *                              input/output.
+ *
+ * %SKL_TKN_U32_FMT_INTERLEAVE: Interleaving style which can be per
+ *                              channel or per sample. The values can be :
+ *                              SKL_INTERLEAVING_PER_CHANNEL = 0,
+ *                              SKL_INTERLEAVING_PER_SAMPLE = 1,
+ *
+ * %SKL_TKN_U32_FMT_SAMPLE_TYPE:
+ *                              Specifies the sample type. Can take the
+ *                              values: SKL_SAMPLE_TYPE_INT_MSB = 0,
+ *                              SKL_SAMPLE_TYPE_INT_LSB = 1,
+ *                              SKL_SAMPLE_TYPE_INT_SIGNED = 2,
+ *                              SKL_SAMPLE_TYPE_INT_UNSIGNED = 3,
+ *                              SKL_SAMPLE_TYPE_FLOAT = 4
+ *
+ * %SKL_TKN_U32_CH_MAP:         Channel map values
+ * %SKL_TKN_U32_MOD_SET_PARAMS: It can take these values:
+ *                              SKL_PARAM_DEFAULT, SKL_PARAM_INIT,
+ *                              SKL_PARAM_SET, SKL_PARAM_BIND
+ *
+ * %SKL_TKN_U32_MOD_PARAM_ID:   ID of the module params
+ *
+ * %SKL_TKN_U32_CAPS_SET_PARAMS:
+ *                              Set params value
+ *
+ * %SKL_TKN_U32_CAPS_PARAMS_ID: Params ID
+ *
+ * %SKL_TKN_U32_CAPS_SIZE:      Caps size
+ *
+ * %SKL_TKN_U32_PROC_DOMAIN:    Specify processing domain
+ *
+ * module_id and loadable flags dont have tokens as these values will be
+ * read from the DSP FW manifest
+ */
+enum SKL_TKNS {
+	SKL_TKN_UUID = 1,
+	SKL_TKN_U8_NUM_BLOCKS,
+	SKL_TKN_U8_BLOCK_TYPE,
+	SKL_TKN_U8_IN_PIN_TYPE,
+	SKL_TKN_U8_OUT_PIN_TYPE,
+	SKL_TKN_U8_DYN_IN_PIN,
+	SKL_TKN_U8_DYN_OUT_PIN,
+	SKL_TKN_U8_IN_QUEUE_COUNT,
+	SKL_TKN_U8_OUT_QUEUE_COUNT,
+	SKL_TKN_U8_TIME_SLOT,
+	SKL_TKN_U8_CORE_ID,
+	SKL_TKN_U8_MOD_TYPE,
+	SKL_TKN_U8_CONN_TYPE,
+	SKL_TKN_U8_DEV_TYPE,
+	SKL_TKN_U8_HW_CONN_TYPE,
+	SKL_TKN_U16_MOD_INST_ID,
+	SKL_TKN_U16_BLOCK_SIZE,
+	SKL_TKN_U32_MAX_MCPS,
+	SKL_TKN_U32_MEM_PAGES,
+	SKL_TKN_U32_OBS,
+	SKL_TKN_U32_IBS,
+	SKL_TKN_U32_VBUS_ID,
+	SKL_TKN_U32_PARAMS_FIXUP,
+	SKL_TKN_U32_CONVERTER,
+	SKL_TKN_U32_PIPE_ID,
+	SKL_TKN_U32_PIPE_CONN_TYPE,
+	SKL_TKN_U32_PIPE_PRIORITY,
+	SKL_TKN_U32_PIPE_MEM_PGS,
+	SKL_TKN_U32_DIR_PIN_COUNT,
+	SKL_TKN_U32_FMT_CH,
+	SKL_TKN_U32_FMT_FREQ,
+	SKL_TKN_U32_FMT_BIT_DEPTH,
+	SKL_TKN_U32_FMT_SAMPLE_SIZE,
+	SKL_TKN_U32_FMT_CH_CONFIG,
+	SKL_TKN_U32_FMT_INTERLEAVE,
+	SKL_TKN_U32_FMT_SAMPLE_TYPE,
+	SKL_TKN_U32_FMT_CH_MAP,
+	SKL_TKN_U32_PIN_MOD_ID,
+	SKL_TKN_U32_PIN_INST_ID,
+	SKL_TKN_U32_MOD_SET_PARAMS,
+	SKL_TKN_U32_MOD_PARAM_ID,
+	SKL_TKN_U32_CAPS_SET_PARAMS,
+	SKL_TKN_U32_CAPS_PARAMS_ID,
+	SKL_TKN_U32_CAPS_SIZE,
+	SKL_TKN_U32_PROC_DOMAIN,
+	SKL_TKN_MAX = SKL_TKN_U32_PROC_DOMAIN,
+};
+
+#endif
-- 
cgit v1.2.3


From 83a59b6338c71425f3159fc9ab31380f237af733 Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Wed, 17 Aug 2016 23:58:58 +0200
Subject: drm/amdgpu: add AMDGPU_INFO_NUM_EVICTIONS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For profiling.

v2: really bump the minor version

Signed-off-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 3 +++
 include/uapi/drm/amdgpu_drm.h           | 2 ++
 3 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 58b1db82f589..f5c99a008717 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -53,9 +53,10 @@
  * - 3.2.0 - GFX8: Uses EOP_TC_WB_ACTION_EN, so UMDs don't have to do the same
  *           at the end of IBs.
  * - 3.3.0 - Add VM support for UVD on supported hardware.
+ * - 3.4.0 - Add AMDGPU_INFO_NUM_EVICTIONS.
  */
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	3
+#define KMS_DRIVER_MINOR	4
 #define KMS_DRIVER_PATCHLEVEL	0
 
 int amdgpu_vram_limit = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index d942654a1de0..4c8e68a82c47 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -373,6 +373,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 	case AMDGPU_INFO_NUM_BYTES_MOVED:
 		ui64 = atomic64_read(&adev->num_bytes_moved);
 		return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
+	case AMDGPU_INFO_NUM_EVICTIONS:
+		ui64 = atomic64_read(&adev->num_evictions);
+		return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
 	case AMDGPU_INFO_VRAM_USAGE:
 		ui64 = atomic64_read(&adev->vram_usage);
 		return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 5aef0b71079b..ae2845fdcb5f 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -485,6 +485,8 @@ struct drm_amdgpu_cs_chunk_data {
 #define AMDGPU_INFO_DEV_INFO			0x16
 /* visible vram usage */
 #define AMDGPU_INFO_VIS_VRAM_USAGE		0x17
+/* number of TTM buffer evictions */
+#define AMDGPU_INFO_NUM_EVICTIONS		0x18
 
 #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT	0
 #define AMDGPU_INFO_MMR_SE_INDEX_MASK	0xff
-- 
cgit v1.2.3


From f918e1697b1a8f2f26a4813db053cfbcafc48046 Mon Sep 17 00:00:00 2001
From: Mengdong Lin <mengdong.lin@linux.intel.com>
Date: Fri, 19 Aug 2016 18:12:46 +0800
Subject: ASoC: topology: ABI - Add sig_bits to stream caps

Kernel struct snd_soc_pcm_stream, SoC PCM stream information, needs this
field. Although current topology users don't configure this, we define it
for future extension.

Signed-off-by: Mengdong Lin <mengdong.lin@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/uapi/sound/asoc.h | 1 +
 sound/soc/soc-topology.c  | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/sound/asoc.h b/include/uapi/sound/asoc.h
index f734bea9a032..33d00a4ce656 100644
--- a/include/uapi/sound/asoc.h
+++ b/include/uapi/sound/asoc.h
@@ -257,6 +257,7 @@ struct snd_soc_tplg_stream_caps {
 	__le32 period_size_max;	/* max period size bytes */
 	__le32 buffer_size_min;	/* min buffer size bytes */
 	__le32 buffer_size_max;	/* max buffer size bytes */
+	__le32 sig_bits;        /* number of bits of content */
 } __attribute__((packed));
 
 /*
diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c
index a9e83a2dd91c..6b05047a4134 100644
--- a/sound/soc/soc-topology.c
+++ b/sound/soc/soc-topology.c
@@ -1556,6 +1556,7 @@ static void set_stream_info(struct snd_soc_pcm_stream *stream,
 	stream->rate_min = caps->rate_min;
 	stream->rate_max = caps->rate_max;
 	stream->formats = caps->formats;
+	stream->sig_bits = caps->sig_bits;
 }
 
 static void set_dai_flags(struct snd_soc_dai_driver *dai_drv,
-- 
cgit v1.2.3


From 7ac5d7b1a1254ceb4be19ba93ef7a6ee4e7ac382 Mon Sep 17 00:00:00 2001
From: Mikko Rapeli <mikko.rapeli@iki.fi>
Date: Mon, 22 Aug 2016 20:32:22 +0200
Subject: HSI: hsi_char.h: use __u32 from linux/types.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes userspace compiler errors like:

linux/hsi/hsi_char.h:51:2: error: unknown type name ‘uint32_t’

Signed-off-by: Mikko Rapeli <mikko.rapeli@iki.fi>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 include/uapi/linux/hsi/hsi_char.h | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/hsi/hsi_char.h b/include/uapi/linux/hsi/hsi_char.h
index 76160b4f455d..c00a463d55f9 100644
--- a/include/uapi/linux/hsi/hsi_char.h
+++ b/include/uapi/linux/hsi/hsi_char.h
@@ -20,10 +20,11 @@
  * 02110-1301 USA
  */
 
-
 #ifndef __HSI_CHAR_H
 #define __HSI_CHAR_H
 
+#include <linux/types.h>
+
 #define HSI_CHAR_MAGIC		'k'
 #define HSC_IOW(num, dtype)	_IOW(HSI_CHAR_MAGIC, num, dtype)
 #define HSC_IOR(num, dtype)	_IOR(HSI_CHAR_MAGIC, num, dtype)
@@ -48,16 +49,16 @@
 #define HSC_ARB_PRIO		1
 
 struct hsc_rx_config {
-	uint32_t mode;
-	uint32_t flow;
-	uint32_t channels;
+	__u32 mode;
+	__u32 flow;
+	__u32 channels;
 };
 
 struct hsc_tx_config {
-	uint32_t mode;
-	uint32_t channels;
-	uint32_t speed;
-	uint32_t arb_mode;
+	__u32 mode;
+	__u32 channels;
+	__u32 speed;
+	__u32 arb_mode;
 };
 
 #endif /* __HSI_CHAR_H */
-- 
cgit v1.2.3


From 541070cec4f9be18ce9fcc74ac5e1036965ceb63 Mon Sep 17 00:00:00 2001
From: Shreyas NC <shreyas.nc@intel.com>
Date: Tue, 23 Aug 2016 09:31:03 +0530
Subject: ASoC: Intel: Skylake: Parse manifest data

Topology manifest has lib names and lib count info. So,
define tokens to represent module private data and parse
these tokens to fill up the manifest structure in the driver
accordingly.

Signed-off-by: Shreyas NC <shreyas.nc@intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/uapi/sound/snd_sst_tokens.h    |   8 +-
 sound/soc/intel/skylake/skl-topology.c | 194 ++++++++++++++++++++++++++++++++-
 2 files changed, 200 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/sound/snd_sst_tokens.h b/include/uapi/sound/snd_sst_tokens.h
index f56a932736ca..1ee2e943d66a 100644
--- a/include/uapi/sound/snd_sst_tokens.h
+++ b/include/uapi/sound/snd_sst_tokens.h
@@ -153,6 +153,10 @@
  *
  * %SKL_TKN_U32_PROC_DOMAIN:    Specify processing domain
  *
+ * %SKL_TKN_U32_LIB_COUNT:      Specifies the number of libraries
+ *
+ * %SKL_TKN_STR_LIB_NAME:       Specifies the library name
+ *
  * module_id and loadable flags dont have tokens as these values will be
  * read from the DSP FW manifest
  */
@@ -202,7 +206,9 @@ enum SKL_TKNS {
 	SKL_TKN_U32_CAPS_PARAMS_ID,
 	SKL_TKN_U32_CAPS_SIZE,
 	SKL_TKN_U32_PROC_DOMAIN,
-	SKL_TKN_MAX = SKL_TKN_U32_PROC_DOMAIN,
+	SKL_TKN_U32_LIB_COUNT,
+	SKL_TKN_STR_LIB_NAME,
+	SKL_TKN_MAX = SKL_TKN_STR_LIB_NAME,
 };
 
 #endif
diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c
index 000482e6e2f7..108ebb9ab329 100644
--- a/sound/soc/intel/skylake/skl-topology.c
+++ b/sound/soc/intel/skylake/skl-topology.c
@@ -2201,6 +2201,197 @@ static int skl_tplg_control_load(struct snd_soc_component *cmpnt,
 	return 0;
 }
 
+static int skl_tplg_fill_str_mfest_tkn(struct device *dev,
+		struct snd_soc_tplg_vendor_string_elem *str_elem,
+		struct skl_dfw_manifest *minfo)
+{
+	int tkn_count = 0;
+	static int ref_count;
+
+	switch (str_elem->token) {
+	case SKL_TKN_STR_LIB_NAME:
+		if (ref_count > minfo->lib_count - 1) {
+			ref_count = 0;
+			return -EINVAL;
+		}
+
+		strncpy(minfo->lib[ref_count].name, str_elem->string,
+				ARRAY_SIZE(minfo->lib[ref_count].name));
+		ref_count++;
+		tkn_count++;
+		break;
+
+	default:
+		dev_err(dev, "Not a string token %d", str_elem->token);
+		break;
+	}
+
+	return tkn_count;
+}
+
+static int skl_tplg_get_str_tkn(struct device *dev,
+		struct snd_soc_tplg_vendor_array *array,
+		struct skl_dfw_manifest *minfo)
+{
+	int tkn_count = 0, ret;
+	struct snd_soc_tplg_vendor_string_elem *str_elem;
+
+	str_elem = (struct snd_soc_tplg_vendor_string_elem *)array->value;
+	while (tkn_count < array->num_elems) {
+		ret = skl_tplg_fill_str_mfest_tkn(dev, str_elem, minfo);
+		str_elem++;
+
+		if (ret < 0)
+			return ret;
+
+		tkn_count = tkn_count + ret;
+	}
+
+	return tkn_count;
+}
+
+static int skl_tplg_get_int_tkn(struct device *dev,
+		struct snd_soc_tplg_vendor_value_elem *tkn_elem,
+		struct skl_dfw_manifest *minfo)
+{
+	int tkn_count = 0;
+
+	switch (tkn_elem->token) {
+	case SKL_TKN_U32_LIB_COUNT:
+		minfo->lib_count = tkn_elem->value;
+		tkn_count++;
+		break;
+
+	default:
+		dev_err(dev, "Not a manifest token %d", tkn_elem->token);
+		return -EINVAL;
+	}
+
+	return tkn_count;
+}
+
+/*
+ * Fill the manifest structure by parsing the tokens based on the
+ * type.
+ */
+static int skl_tplg_get_manifest_tkn(struct device *dev,
+		char *pvt_data, struct skl_dfw_manifest *minfo,
+		int block_size)
+{
+	int tkn_count = 0, ret;
+	int off = 0, tuple_size = 0;
+	struct snd_soc_tplg_vendor_array *array;
+	struct snd_soc_tplg_vendor_value_elem *tkn_elem;
+
+	if (block_size <= 0)
+		return -EINVAL;
+
+	while (tuple_size < block_size) {
+		array = (struct snd_soc_tplg_vendor_array *)(pvt_data + off);
+		off += array->size;
+		switch (array->type) {
+		case SND_SOC_TPLG_TUPLE_TYPE_STRING:
+			ret = skl_tplg_get_str_tkn(dev, array, minfo);
+
+			if (ret < 0)
+				return ret;
+			tkn_count += ret;
+
+			tuple_size += tkn_count *
+				sizeof(struct snd_soc_tplg_vendor_string_elem);
+			continue;
+
+		case SND_SOC_TPLG_TUPLE_TYPE_UUID:
+			dev_warn(dev, "no uuid tokens for skl tplf manifest");
+			continue;
+
+		default:
+			tkn_elem = array->value;
+			tkn_count = 0;
+			break;
+		}
+
+		while (tkn_count <= array->num_elems - 1) {
+			ret = skl_tplg_get_int_tkn(dev,
+					tkn_elem, minfo);
+			if (ret < 0)
+				return ret;
+
+			tkn_count = tkn_count + ret;
+			tkn_elem++;
+			tuple_size += tkn_count *
+				sizeof(struct snd_soc_tplg_vendor_value_elem);
+			break;
+		}
+		tkn_count = 0;
+	}
+
+	return 0;
+}
+
+/*
+ * Parse manifest private data for tokens. The private data block is
+ * preceded by descriptors for type and size of data block.
+ */
+static int skl_tplg_get_manifest_data(struct snd_soc_tplg_manifest *manifest,
+			struct device *dev, struct skl_dfw_manifest *minfo)
+{
+	struct snd_soc_tplg_vendor_array *array;
+	int num_blocks, block_size = 0, block_type, off = 0;
+	char *data;
+	int ret;
+
+	/* Read the NUM_DATA_BLOCKS descriptor */
+	array = (struct snd_soc_tplg_vendor_array *)manifest->priv.data;
+	ret = skl_tplg_get_desc_blocks(dev, array);
+	if (ret < 0)
+		return ret;
+	num_blocks = ret;
+
+	off += array->size;
+	array = (struct snd_soc_tplg_vendor_array *)
+			(manifest->priv.data + off);
+
+	/* Read the BLOCK_TYPE and BLOCK_SIZE descriptor */
+	while (num_blocks > 0) {
+		ret = skl_tplg_get_desc_blocks(dev, array);
+
+		if (ret < 0)
+			return ret;
+		block_type = ret;
+		off += array->size;
+
+		array = (struct snd_soc_tplg_vendor_array *)
+			(manifest->priv.data + off);
+
+		ret = skl_tplg_get_desc_blocks(dev, array);
+
+		if (ret < 0)
+			return ret;
+		block_size = ret;
+		off += array->size;
+
+		array = (struct snd_soc_tplg_vendor_array *)
+			(manifest->priv.data + off);
+
+		data = (manifest->priv.data + off);
+
+		if (block_type == SKL_TYPE_TUPLE) {
+			ret = skl_tplg_get_manifest_tkn(dev, data, minfo,
+					block_size);
+
+			if (ret < 0)
+				return ret;
+
+			--num_blocks;
+		} else {
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 static int skl_manifest_load(struct snd_soc_component *cmpnt,
 				struct snd_soc_tplg_manifest *manifest)
 {
@@ -2211,7 +2402,8 @@ static int skl_manifest_load(struct snd_soc_component *cmpnt,
 	int ret = 0;
 
 	minfo = &skl->skl_sst->manifest;
-	memcpy(minfo, manifest->priv.data, sizeof(struct skl_dfw_manifest));
+
+	skl_tplg_get_manifest_data(manifest, bus->dev, minfo);
 
 	if (minfo->lib_count > HDA_MAX_LIB) {
 		dev_err(bus->dev, "Exceeding max Library count. Got:%d\n",
-- 
cgit v1.2.3


From b43f95695124baba9f7c48e247fc8fd212a984d9 Mon Sep 17 00:00:00 2001
From: Pablo Neira <pablo@netfilter.org>
Date: Mon, 22 Aug 2016 11:37:36 +0200
Subject: netfilter: nf_tables: typo in trace attribute definition

Should be attributes, instead of attibutes, for consistency with other
definitions.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 8c9d6ff70ec0..8a63f22b365d 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1090,7 +1090,7 @@ enum nft_gen_attributes {
  * @NFTA_TRACE_NFPROTO: nf protocol processed (NLA_U32)
  * @NFTA_TRACE_POLICY: policy that decided fate of packet (NLA_U32)
  */
-enum nft_trace_attibutes {
+enum nft_trace_attributes {
 	NFTA_TRACE_UNSPEC,
 	NFTA_TRACE_TABLE,
 	NFTA_TRACE_CHAIN,
-- 
cgit v1.2.3


From b2fe22d0cf64708c50c26f11b3da8b79809c699b Mon Sep 17 00:00:00 2001
From: Nick Dyer <nick@shmanahar.org>
Date: Mon, 18 Jul 2016 18:10:30 -0300
Subject: [media] v4l2-core: Add support for touch devices

Some touch controllers send out touch data in a similar way to a
greyscale frame grabber.

Add new device type VFL_TYPE_TOUCH:
- This uses a new device prefix v4l-touch for these devices, to stop
  generic capture software from treating them as webcams. Otherwise,
  touch is treated similarly to video capture.
- Add V4L2_INPUT_TYPE_TOUCH
- Add MEDIA_INTF_T_V4L_TOUCH
- Add V4L2_CAP_TOUCH to indicate device is a touch device

Add formats:
- V4L2_TCH_FMT_DELTA_TD16 for signed 16-bit touch deltas
- V4L2_TCH_FMT_DELTA_TD08 for signed 16-bit touch deltas
- V4L2_TCH_FMT_TU16 for unsigned 16-bit touch data
- V4L2_TCH_FMT_TU08 for unsigned 8-bit touch data

This support will be used by:
- Atmel maXTouch (atmel_mxt_ts)
- Synaptics RMI4.
- sur40

Signed-off-by: Nick Dyer <nick@shmanahar.org>
Tested-by: Chris Healy <cphealy@gmail.com>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 Documentation/media/kapi/v4l2-dev.rst |  1 +
 drivers/media/media-entity.c          |  2 ++
 drivers/media/v4l2-core/v4l2-dev.c    | 14 +++++++++++---
 drivers/media/v4l2-core/v4l2-ioctl.c  | 36 ++++++++++++++++++++++++++++++-----
 include/media/v4l2-dev.h              |  4 +++-
 include/uapi/linux/media.h            |  1 +
 include/uapi/linux/videodev2.h        |  9 +++++++++
 7 files changed, 58 insertions(+), 9 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/media/kapi/v4l2-dev.rst b/Documentation/media/kapi/v4l2-dev.rst
index d07258a362d1..5782be725334 100644
--- a/Documentation/media/kapi/v4l2-dev.rst
+++ b/Documentation/media/kapi/v4l2-dev.rst
@@ -200,6 +200,7 @@ types exist:
 - ``VFL_TYPE_VBI``: ``/dev/vbiX`` for vertical blank data (i.e. closed captions, teletext)
 - ``VFL_TYPE_RADIO``: ``/dev/radioX`` for radio tuners
 - ``VFL_TYPE_SDR``: ``/dev/swradioX`` for Software Defined Radio tuners
+- ``VFL_TYPE_TOUCH``: ``/dev/v4l-touchX`` for touch sensors
 
 The last argument gives you a certain amount of control over the device
 device node number used (i.e. the X in ``videoX``). Normally you will pass -1
diff --git a/drivers/media/media-entity.c b/drivers/media/media-entity.c
index d8a2299f0c2a..9014362e904d 100644
--- a/drivers/media/media-entity.c
+++ b/drivers/media/media-entity.c
@@ -65,6 +65,8 @@ static inline const char *intf_type(struct media_interface *intf)
 		return "v4l-subdev";
 	case MEDIA_INTF_T_V4L_SWRADIO:
 		return "v4l-swradio";
+	case MEDIA_INTF_T_V4L_TOUCH:
+		return "v4l-touch";
 	case MEDIA_INTF_T_ALSA_PCM_CAPTURE:
 		return "alsa-pcm-capture";
 	case MEDIA_INTF_T_ALSA_PCM_PLAYBACK:
diff --git a/drivers/media/v4l2-core/v4l2-dev.c b/drivers/media/v4l2-core/v4l2-dev.c
index e6da353b39bc..8be561ab2615 100644
--- a/drivers/media/v4l2-core/v4l2-dev.c
+++ b/drivers/media/v4l2-core/v4l2-dev.c
@@ -527,6 +527,7 @@ static void determine_valid_ioctls(struct video_device *vdev)
 	bool is_vbi = vdev->vfl_type == VFL_TYPE_VBI;
 	bool is_radio = vdev->vfl_type == VFL_TYPE_RADIO;
 	bool is_sdr = vdev->vfl_type == VFL_TYPE_SDR;
+	bool is_tch = vdev->vfl_type == VFL_TYPE_TOUCH;
 	bool is_rx = vdev->vfl_dir != VFL_DIR_TX;
 	bool is_tx = vdev->vfl_dir != VFL_DIR_RX;
 
@@ -573,7 +574,7 @@ static void determine_valid_ioctls(struct video_device *vdev)
 	if (ops->vidioc_enum_freq_bands || ops->vidioc_g_tuner || ops->vidioc_g_modulator)
 		set_bit(_IOC_NR(VIDIOC_ENUM_FREQ_BANDS), valid_ioctls);
 
-	if (is_vid) {
+	if (is_vid || is_tch) {
 		/* video specific ioctls */
 		if ((is_rx && (ops->vidioc_enum_fmt_vid_cap ||
 			       ops->vidioc_enum_fmt_vid_cap_mplane ||
@@ -662,7 +663,7 @@ static void determine_valid_ioctls(struct video_device *vdev)
 			set_bit(_IOC_NR(VIDIOC_TRY_FMT), valid_ioctls);
 	}
 
-	if (is_vid || is_vbi || is_sdr) {
+	if (is_vid || is_vbi || is_sdr || is_tch) {
 		/* ioctls valid for video, vbi or sdr */
 		SET_VALID_IOCTL(ops, VIDIOC_REQBUFS, vidioc_reqbufs);
 		SET_VALID_IOCTL(ops, VIDIOC_QUERYBUF, vidioc_querybuf);
@@ -675,7 +676,7 @@ static void determine_valid_ioctls(struct video_device *vdev)
 		SET_VALID_IOCTL(ops, VIDIOC_STREAMOFF, vidioc_streamoff);
 	}
 
-	if (is_vid || is_vbi) {
+	if (is_vid || is_vbi || is_tch) {
 		/* ioctls valid for video or vbi */
 		if (ops->vidioc_s_std)
 			set_bit(_IOC_NR(VIDIOC_ENUMSTD), valid_ioctls);
@@ -751,6 +752,10 @@ static int video_register_media_controller(struct video_device *vdev, int type)
 		intf_type = MEDIA_INTF_T_V4L_SWRADIO;
 		vdev->entity.function = MEDIA_ENT_F_IO_SWRADIO;
 		break;
+	case VFL_TYPE_TOUCH:
+		intf_type = MEDIA_INTF_T_V4L_TOUCH;
+		vdev->entity.function = MEDIA_ENT_F_IO_V4L;
+		break;
 	case VFL_TYPE_RADIO:
 		intf_type = MEDIA_INTF_T_V4L_RADIO;
 		/*
@@ -854,6 +859,9 @@ int __video_register_device(struct video_device *vdev, int type, int nr,
 		/* Use device name 'swradio' because 'sdr' was already taken. */
 		name_base = "swradio";
 		break;
+	case VFL_TYPE_TOUCH:
+		name_base = "v4l-touch";
+		break;
 	default:
 		printk(KERN_ERR "%s called with unknown type: %d\n",
 		       __func__, type);
diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c
index 51a0fa144392..eb6ccc70e9a8 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -924,6 +924,7 @@ static int check_fmt(struct file *file, enum v4l2_buf_type type)
 	bool is_vid = vfd->vfl_type == VFL_TYPE_GRABBER;
 	bool is_vbi = vfd->vfl_type == VFL_TYPE_VBI;
 	bool is_sdr = vfd->vfl_type == VFL_TYPE_SDR;
+	bool is_tch = vfd->vfl_type == VFL_TYPE_TOUCH;
 	bool is_rx = vfd->vfl_dir != VFL_DIR_TX;
 	bool is_tx = vfd->vfl_dir != VFL_DIR_RX;
 
@@ -932,7 +933,7 @@ static int check_fmt(struct file *file, enum v4l2_buf_type type)
 
 	switch (type) {
 	case V4L2_BUF_TYPE_VIDEO_CAPTURE:
-		if (is_vid && is_rx &&
+		if ((is_vid || is_tch) && is_rx &&
 		    (ops->vidioc_g_fmt_vid_cap || ops->vidioc_g_fmt_vid_cap_mplane))
 			return 0;
 		break;
@@ -1243,6 +1244,10 @@ static void v4l_fill_fmtdesc(struct v4l2_fmtdesc *fmt)
 	case V4L2_SDR_FMT_CS8:		descr = "Complex S8"; break;
 	case V4L2_SDR_FMT_CS14LE:	descr = "Complex S14LE"; break;
 	case V4L2_SDR_FMT_RU12LE:	descr = "Real U12LE"; break;
+	case V4L2_TCH_FMT_DELTA_TD16:	descr = "16-bit signed deltas"; break;
+	case V4L2_TCH_FMT_DELTA_TD08:	descr = "8-bit signed deltas"; break;
+	case V4L2_TCH_FMT_TU16:		descr = "16-bit unsigned touch data"; break;
+	case V4L2_TCH_FMT_TU08:		descr = "8-bit unsigned touch data"; break;
 
 	default:
 		/* Compressed formats */
@@ -1309,13 +1314,14 @@ static int v4l_enum_fmt(const struct v4l2_ioctl_ops *ops,
 	struct video_device *vfd = video_devdata(file);
 	bool is_vid = vfd->vfl_type == VFL_TYPE_GRABBER;
 	bool is_sdr = vfd->vfl_type == VFL_TYPE_SDR;
+	bool is_tch = vfd->vfl_type == VFL_TYPE_TOUCH;
 	bool is_rx = vfd->vfl_dir != VFL_DIR_TX;
 	bool is_tx = vfd->vfl_dir != VFL_DIR_RX;
 	int ret = -EINVAL;
 
 	switch (p->type) {
 	case V4L2_BUF_TYPE_VIDEO_CAPTURE:
-		if (unlikely(!is_rx || !is_vid || !ops->vidioc_enum_fmt_vid_cap))
+		if (unlikely(!is_rx || (!is_vid && !is_tch) || !ops->vidioc_enum_fmt_vid_cap))
 			break;
 		ret = ops->vidioc_enum_fmt_vid_cap(file, fh, arg);
 		break;
@@ -1362,6 +1368,7 @@ static int v4l_g_fmt(const struct v4l2_ioctl_ops *ops,
 	struct video_device *vfd = video_devdata(file);
 	bool is_vid = vfd->vfl_type == VFL_TYPE_GRABBER;
 	bool is_sdr = vfd->vfl_type == VFL_TYPE_SDR;
+	bool is_tch = vfd->vfl_type == VFL_TYPE_TOUCH;
 	bool is_rx = vfd->vfl_dir != VFL_DIR_TX;
 	bool is_tx = vfd->vfl_dir != VFL_DIR_RX;
 	int ret;
@@ -1392,7 +1399,7 @@ static int v4l_g_fmt(const struct v4l2_ioctl_ops *ops,
 
 	switch (p->type) {
 	case V4L2_BUF_TYPE_VIDEO_CAPTURE:
-		if (unlikely(!is_rx || !is_vid || !ops->vidioc_g_fmt_vid_cap))
+		if (unlikely(!is_rx || (!is_vid && !is_tch) || !ops->vidioc_g_fmt_vid_cap))
 			break;
 		p->fmt.pix.priv = V4L2_PIX_FMT_PRIV_MAGIC;
 		ret = ops->vidioc_g_fmt_vid_cap(file, fh, arg);
@@ -1451,6 +1458,21 @@ static int v4l_g_fmt(const struct v4l2_ioctl_ops *ops,
 	return -EINVAL;
 }
 
+static void v4l_pix_format_touch(struct v4l2_pix_format *p)
+{
+	/*
+	 * The v4l2_pix_format structure contains fields that make no sense for
+	 * touch. Set them to default values in this case.
+	 */
+
+	p->field = V4L2_FIELD_NONE;
+	p->colorspace = V4L2_COLORSPACE_RAW;
+	p->flags = 0;
+	p->ycbcr_enc = 0;
+	p->quantization = 0;
+	p->xfer_func = 0;
+}
+
 static int v4l_s_fmt(const struct v4l2_ioctl_ops *ops,
 				struct file *file, void *fh, void *arg)
 {
@@ -1458,6 +1480,7 @@ static int v4l_s_fmt(const struct v4l2_ioctl_ops *ops,
 	struct video_device *vfd = video_devdata(file);
 	bool is_vid = vfd->vfl_type == VFL_TYPE_GRABBER;
 	bool is_sdr = vfd->vfl_type == VFL_TYPE_SDR;
+	bool is_tch = vfd->vfl_type == VFL_TYPE_TOUCH;
 	bool is_rx = vfd->vfl_dir != VFL_DIR_TX;
 	bool is_tx = vfd->vfl_dir != VFL_DIR_RX;
 	int ret;
@@ -1469,12 +1492,14 @@ static int v4l_s_fmt(const struct v4l2_ioctl_ops *ops,
 
 	switch (p->type) {
 	case V4L2_BUF_TYPE_VIDEO_CAPTURE:
-		if (unlikely(!is_rx || !is_vid || !ops->vidioc_s_fmt_vid_cap))
+		if (unlikely(!is_rx || (!is_vid && !is_tch) || !ops->vidioc_s_fmt_vid_cap))
 			break;
 		CLEAR_AFTER_FIELD(p, fmt.pix);
 		ret = ops->vidioc_s_fmt_vid_cap(file, fh, arg);
 		/* just in case the driver zeroed it again */
 		p->fmt.pix.priv = V4L2_PIX_FMT_PRIV_MAGIC;
+		if (is_tch)
+			v4l_pix_format_touch(&p->fmt.pix);
 		return ret;
 	case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE:
 		if (unlikely(!is_rx || !is_vid || !ops->vidioc_s_fmt_vid_cap_mplane))
@@ -1545,6 +1570,7 @@ static int v4l_try_fmt(const struct v4l2_ioctl_ops *ops,
 	struct video_device *vfd = video_devdata(file);
 	bool is_vid = vfd->vfl_type == VFL_TYPE_GRABBER;
 	bool is_sdr = vfd->vfl_type == VFL_TYPE_SDR;
+	bool is_tch = vfd->vfl_type == VFL_TYPE_TOUCH;
 	bool is_rx = vfd->vfl_dir != VFL_DIR_TX;
 	bool is_tx = vfd->vfl_dir != VFL_DIR_RX;
 	int ret;
@@ -1553,7 +1579,7 @@ static int v4l_try_fmt(const struct v4l2_ioctl_ops *ops,
 
 	switch (p->type) {
 	case V4L2_BUF_TYPE_VIDEO_CAPTURE:
-		if (unlikely(!is_rx || !is_vid || !ops->vidioc_try_fmt_vid_cap))
+		if (unlikely(!is_rx || (!is_vid && !is_tch) || !ops->vidioc_try_fmt_vid_cap))
 			break;
 		CLEAR_AFTER_FIELD(p, fmt.pix);
 		ret = ops->vidioc_try_fmt_vid_cap(file, fh, arg);
diff --git a/include/media/v4l2-dev.h b/include/media/v4l2-dev.h
index a122b1bd40f9..5272aeec0cec 100644
--- a/include/media/v4l2-dev.h
+++ b/include/media/v4l2-dev.h
@@ -25,7 +25,8 @@
 #define VFL_TYPE_RADIO		2
 #define VFL_TYPE_SUBDEV		3
 #define VFL_TYPE_SDR		4
-#define VFL_TYPE_MAX		5
+#define VFL_TYPE_TOUCH		5
+#define VFL_TYPE_MAX		6
 
 /* Is this a receiver, transmitter or mem-to-mem? */
 /* Ignored for VFL_TYPE_SUBDEV. */
@@ -294,6 +295,7 @@ struct video_device
  *	- %VFL_TYPE_RADIO - A radio card
  *	- %VFL_TYPE_SUBDEV - A subdevice
  *	- %VFL_TYPE_SDR - Software Defined Radio
+ *	- %VFL_TYPE_TOUCH - A touch sensor
  *
  * .. note::
  *
diff --git a/include/uapi/linux/media.h b/include/uapi/linux/media.h
index 7acf0f634f70..4890787731b8 100644
--- a/include/uapi/linux/media.h
+++ b/include/uapi/linux/media.h
@@ -307,6 +307,7 @@ struct media_links_enum {
 #define MEDIA_INTF_T_V4L_RADIO  (MEDIA_INTF_T_V4L_BASE + 2)
 #define MEDIA_INTF_T_V4L_SUBDEV (MEDIA_INTF_T_V4L_BASE + 3)
 #define MEDIA_INTF_T_V4L_SWRADIO (MEDIA_INTF_T_V4L_BASE + 4)
+#define MEDIA_INTF_T_V4L_TOUCH	(MEDIA_INTF_T_V4L_BASE + 5)
 
 #define MEDIA_INTF_T_ALSA_PCM_CAPTURE   (MEDIA_INTF_T_ALSA_BASE)
 #define MEDIA_INTF_T_ALSA_PCM_PLAYBACK  (MEDIA_INTF_T_ALSA_BASE + 1)
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 724f43e69d03..22bad8c741fe 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -440,6 +440,8 @@ struct v4l2_capability {
 #define V4L2_CAP_ASYNCIO                0x02000000  /* async I/O */
 #define V4L2_CAP_STREAMING              0x04000000  /* streaming I/O ioctls */
 
+#define V4L2_CAP_TOUCH                  0x10000000  /* Is a touch device */
+
 #define V4L2_CAP_DEVICE_CAPS            0x80000000  /* sets device capabilities field */
 
 /*
@@ -635,6 +637,12 @@ struct v4l2_pix_format {
 #define V4L2_SDR_FMT_CS14LE       v4l2_fourcc('C', 'S', '1', '4') /* complex s14le */
 #define V4L2_SDR_FMT_RU12LE       v4l2_fourcc('R', 'U', '1', '2') /* real u12le */
 
+/* Touch formats - used for Touch devices */
+#define V4L2_TCH_FMT_DELTA_TD16	v4l2_fourcc('T', 'D', '1', '6') /* 16-bit signed deltas */
+#define V4L2_TCH_FMT_DELTA_TD08	v4l2_fourcc('T', 'D', '0', '8') /* 8-bit signed deltas */
+#define V4L2_TCH_FMT_TU16	v4l2_fourcc('T', 'U', '1', '6') /* 16-bit unsigned touch data */
+#define V4L2_TCH_FMT_TU08	v4l2_fourcc('T', 'U', '0', '8') /* 8-bit unsigned touch data */
+
 /* priv field value to indicates that subsequent fields are valid. */
 #define V4L2_PIX_FMT_PRIV_MAGIC		0xfeedcafe
 
@@ -1401,6 +1409,7 @@ struct v4l2_input {
 /*  Values for the 'type' field */
 #define V4L2_INPUT_TYPE_TUNER		1
 #define V4L2_INPUT_TYPE_CAMERA		2
+#define V4L2_INPUT_TYPE_TOUCH		3
 
 /* field 'status' - general */
 #define V4L2_IN_ST_NO_POWER    0x00000001  /* Attached device is off */
-- 
cgit v1.2.3


From cd34af40a09c678abad36304eb68e1774640e908 Mon Sep 17 00:00:00 2001
From: "Matthew R. Ochs" <mrochs@linux.vnet.ibm.com>
Date: Tue, 9 Aug 2016 18:39:52 -0500
Subject: scsi: cxlflash: Transition to application close model

Caching the adapter file descriptor and performing a close on behalf of
an application is a poor design. This is due to the fact that once a
file descriptor in installed, it is free to be altered without the
knowledge of the cxlflash driver. This can lead to inconsistencies
between the application and kernel. Furthermore, the nature of the
former design is more exploitable and thus should be abandoned.

To support applications performing a close on the adapter file that is
associated with a context, a new flag is introduced to the user API to
indicate to applications that they are responsible for the close
following the cleanup (detach) of a context. The documentation is also
updated to reflect this change in behavior.

Inspired-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Matthew R. Ochs <mrochs@linux.vnet.ibm.com>
Acked-by: Manoj N. Kumar <manoj@linux.vnet.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 Documentation/powerpc/cxlflash.txt | 42 +++++++++++++++++++---
 drivers/scsi/cxlflash/superpipe.c  | 71 ++++++++++----------------------------
 drivers/scsi/cxlflash/vlun.c       | 13 ++-----
 include/uapi/scsi/cxlflash_ioctl.h | 19 +++++++---
 4 files changed, 73 insertions(+), 72 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/powerpc/cxlflash.txt b/Documentation/powerpc/cxlflash.txt
index 4202d1bc583c..f4c119092b98 100644
--- a/Documentation/powerpc/cxlflash.txt
+++ b/Documentation/powerpc/cxlflash.txt
@@ -171,11 +171,30 @@ DK_CXLFLASH_ATTACH
           destroyed, the tokens are to be considered stale and subsequent
           usage will result in errors.
 
+	- A valid adapter file descriptor (fd2 >= 0) is only returned on
+	  the initial attach for a context. Subsequent attaches to an
+	  existing context (DK_CXLFLASH_ATTACH_REUSE_CONTEXT flag present)
+	  do not provide the adapter file descriptor as it was previously
+	  made known to the application.
+
         - When a context is no longer needed, the user shall detach from
-          the context via the DK_CXLFLASH_DETACH ioctl.
+          the context via the DK_CXLFLASH_DETACH ioctl. When this ioctl
+	  returns with a valid adapter file descriptor and the return flag
+	  DK_CXLFLASH_APP_CLOSE_ADAP_FD is present, the application _must_
+	  close the adapter file descriptor following a successful detach.
+
+	- When this ioctl returns with a valid fd2 and the return flag
+	  DK_CXLFLASH_APP_CLOSE_ADAP_FD is present, the application _must_
+	  close fd2 in the following circumstances:
+
+		+ Following a successful detach of the last user of the context
+		+ Following a successful recovery on the context's original fd2
+		+ In the child process of a fork(), following a clone ioctl,
+		  on the fd2 associated with the source context
 
-        - A close on fd2 will invalidate the tokens. This operation is not
-          required by the user.
+        - At any time, a close on fd2 will invalidate the tokens. Applications
+	  should exercise caution to only close fd2 when appropriate (outlined
+	  in the previous bullet) to avoid premature loss of I/O.
 
 DK_CXLFLASH_USER_DIRECT
 -----------------------
@@ -254,6 +273,10 @@ DK_CXLFLASH_DETACH
     success, all "tokens" which had been provided to the user from the
     DK_CXLFLASH_ATTACH onward are no longer valid.
 
+    When the DK_CXLFLASH_APP_CLOSE_ADAP_FD flag was returned on a successful
+    attach, the application _must_ close the fd2 associated with the context
+    following the detach of the final user of the context.
+
 DK_CXLFLASH_VLUN_CLONE
 ----------------------
     This ioctl is responsible for cloning a previously created
@@ -261,7 +284,7 @@ DK_CXLFLASH_VLUN_CLONE
     support maintaining user space access to storage after a process
     forks. Upon success, the child process (which invoked the ioctl)
     will have access to the same LUNs via the same resource handle(s)
-    and fd2 as the parent, but under a different context.
+    as the parent, but under a different context.
 
     Context sharing across processes is not supported with CXL and
     therefore each fork must be met with establishing a new context
@@ -275,6 +298,12 @@ DK_CXLFLASH_VLUN_CLONE
     translation tables are copied from the parent context to the child's
     and then synced with the AFU.
 
+    When the DK_CXLFLASH_APP_CLOSE_ADAP_FD flag was returned on a successful
+    attach, the application _must_ close the fd2 associated with the source
+    context (still resident/accessible in the parent process) following the
+    clone. This is to avoid a stale entry in the file descriptor table of the
+    child process.
+
 DK_CXLFLASH_VERIFY
 ------------------
     This ioctl is used to detect various changes such as the capacity of
@@ -309,6 +338,11 @@ DK_CXLFLASH_RECOVER_AFU
     at which time the context/resources they held will be freed as part of
     the release fop.
 
+    When the DK_CXLFLASH_APP_CLOSE_ADAP_FD flag was returned on a successful
+    attach, the application _must_ unmap and close the fd2 associated with the
+    original context following this ioctl returning success and indicating that
+    the context was recovered (DK_CXLFLASH_RECOVER_AFU_CONTEXT_RESET).
+
 DK_CXLFLASH_MANAGE_LUN
 ----------------------
     This ioctl is used to switch a LUN from a mode where it is available
diff --git a/drivers/scsi/cxlflash/superpipe.c b/drivers/scsi/cxlflash/superpipe.c
index be7522ae02a4..b3bb90df4e1f 100644
--- a/drivers/scsi/cxlflash/superpipe.c
+++ b/drivers/scsi/cxlflash/superpipe.c
@@ -825,7 +825,6 @@ static void remove_context(struct kref *kref)
 {
 	struct ctx_info *ctxi = container_of(kref, struct ctx_info, kref);
 	struct cxlflash_cfg *cfg = ctxi->cfg;
-	int lfd;
 	u64 ctxid = DECODE_CTXID(ctxi->ctxid);
 
 	/* Remove context from table/error list */
@@ -842,19 +841,7 @@ static void remove_context(struct kref *kref)
 	mutex_unlock(&ctxi->mutex);
 
 	/* Context now completely uncoupled/unreachable */
-	lfd = ctxi->lfd;
 	destroy_context(cfg, ctxi);
-
-	/*
-	 * As a last step, clean up external resources when not
-	 * already on an external cleanup thread, i.e.: close(adap_fd).
-	 *
-	 * NOTE: this will free up the context from the CXL services,
-	 * allowing it to dole out the same context_id on a future
-	 * (or even currently in-flight) disk_attach operation.
-	 */
-	if (lfd != -1)
-		sys_close(lfd);
 }
 
 /**
@@ -949,34 +936,18 @@ static int cxlflash_disk_detach(struct scsi_device *sdev,
  *
  * This routine is the release handler for the fops registered with
  * the CXL services on an initial attach for a context. It is called
- * when a close is performed on the adapter file descriptor returned
- * to the user. Programmatically, the user is not required to perform
- * the close, as it is handled internally via the detach ioctl when
- * a context is being removed. Note that nothing prevents the user
- * from performing a close, but the user should be aware that doing
- * so is considered catastrophic and subsequent usage of the superpipe
- * API with previously saved off tokens will fail.
+ * when a close (explicity by the user or as part of a process tear
+ * down) is performed on the adapter file descriptor returned to the
+ * user. The user should be aware that explicitly performing a close
+ * considered catastrophic and subsequent usage of the superpipe API
+ * with previously saved off tokens will fail.
  *
- * When initiated from an external close (either by the user or via
- * a process tear down), the routine derives the context reference
- * and calls detach for each LUN associated with the context. The
- * final detach operation will cause the context itself to be freed.
- * Note that the saved off lfd is reset prior to calling detach to
- * signify that the final detach should not perform a close.
- *
- * When initiated from a detach operation as part of the tear down
- * of a context, the context is first completely freed and then the
- * close is performed. This routine will fail to derive the context
- * reference (due to the context having already been freed) and then
- * call into the CXL release entry point.
- *
- * Thus, with exception to when the CXL process element (context id)
- * lookup fails (a case that should theoretically never occur), every
- * call into this routine results in a complete freeing of a context.
- *
- * As part of the detach, all per-context resources associated with the LUN
- * are cleaned up. When detaching the last LUN for a context, the context
- * itself is cleaned up and released.
+ * This routine derives the context reference and calls detach for
+ * each LUN associated with the context.The final detach operation
+ * causes the context itself to be freed. With exception to when the
+ * CXL process element (context id) lookup fails (a case that should
+ * theoretically never occur), every call into this routine results
+ * in a complete freeing of a context.
  *
  * Return: 0 on success
  */
@@ -1014,11 +985,8 @@ static int cxlflash_cxl_release(struct inode *inode, struct file *file)
 		goto out;
 	}
 
-	dev_dbg(dev, "%s: close(%d) for context %d\n",
-		__func__, ctxi->lfd, ctxid);
+	dev_dbg(dev, "%s: close for context %d\n", __func__, ctxid);
 
-	/* Reset the file descriptor to indicate we're on a close() thread */
-	ctxi->lfd = -1;
 	detach.context_id = ctxi->ctxid;
 	list_for_each_entry_safe(lun_access, t, &ctxi->luns, list)
 		_cxlflash_disk_detach(lun_access->sdev, ctxi, &detach);
@@ -1391,7 +1359,6 @@ static int cxlflash_disk_attach(struct scsi_device *sdev,
 			__func__, rctxid);
 		kref_get(&ctxi->kref);
 		list_add(&lun_access->list, &ctxi->luns);
-		fd = ctxi->lfd;
 		goto out_attach;
 	}
 
@@ -1461,7 +1428,11 @@ static int cxlflash_disk_attach(struct scsi_device *sdev,
 	fd_install(fd, file);
 
 out_attach:
-	attach->hdr.return_flags = 0;
+	if (fd != -1)
+		attach->hdr.return_flags = DK_CXLFLASH_APP_CLOSE_ADAP_FD;
+	else
+		attach->hdr.return_flags = 0;
+
 	attach->context_id = ctxi->ctxid;
 	attach->block_size = gli->blk_len;
 	attach->mmio_size = sizeof(afu->afu_map->hosts[0].harea);
@@ -1526,7 +1497,7 @@ static int recover_context(struct cxlflash_cfg *cfg, struct ctx_info *ctxi)
 {
 	struct device *dev = &cfg->dev->dev;
 	int rc = 0;
-	int old_fd, fd = -1;
+	int fd = -1;
 	int ctxid = -1;
 	struct file *file;
 	struct cxl_context *ctx;
@@ -1574,7 +1545,6 @@ static int recover_context(struct cxlflash_cfg *cfg, struct ctx_info *ctxi)
 	 * No error paths after this point. Once the fd is installed it's
 	 * visible to user space and can't be undone safely on this thread.
 	 */
-	old_fd = ctxi->lfd;
 	ctxi->ctxid = ENCODE_CTXID(ctxi, ctxid);
 	ctxi->lfd = fd;
 	ctxi->ctx = ctx;
@@ -1593,9 +1563,6 @@ static int recover_context(struct cxlflash_cfg *cfg, struct ctx_info *ctxi)
 	cfg->ctx_tbl[ctxid] = ctxi;
 	mutex_unlock(&cfg->ctx_tbl_list_mutex);
 	fd_install(fd, file);
-
-	/* Release the original adapter fd and associated CXL resources */
-	sys_close(old_fd);
 out:
 	dev_dbg(dev, "%s: returning ctxid=%d fd=%d rc=%d\n",
 		__func__, ctxid, fd, rc);
@@ -1707,7 +1674,7 @@ retry_recover:
 		recover->context_id = ctxi->ctxid;
 		recover->adap_fd = ctxi->lfd;
 		recover->mmio_size = sizeof(afu->afu_map->hosts[0].harea);
-		recover->hdr.return_flags |=
+		recover->hdr.return_flags = DK_CXLFLASH_APP_CLOSE_ADAP_FD |
 			DK_CXLFLASH_RECOVER_AFU_CONTEXT_RESET;
 		goto out;
 	}
diff --git a/drivers/scsi/cxlflash/vlun.c b/drivers/scsi/cxlflash/vlun.c
index 50f8e9300770..90c5d7f5278e 100644
--- a/drivers/scsi/cxlflash/vlun.c
+++ b/drivers/scsi/cxlflash/vlun.c
@@ -1135,14 +1135,13 @@ int cxlflash_disk_clone(struct scsi_device *sdev,
 	    ctxid_dst = DECODE_CTXID(clone->context_id_dst),
 	    rctxid_src = clone->context_id_src,
 	    rctxid_dst = clone->context_id_dst;
-	int adap_fd_src = clone->adap_fd_src;
 	int i, j;
 	int rc = 0;
 	bool found;
 	LIST_HEAD(sidecar);
 
-	pr_debug("%s: ctxid_src=%llu ctxid_dst=%llu adap_fd_src=%d\n",
-		 __func__, ctxid_src, ctxid_dst, adap_fd_src);
+	pr_debug("%s: ctxid_src=%llu ctxid_dst=%llu\n",
+		 __func__, ctxid_src, ctxid_dst);
 
 	/* Do not clone yourself */
 	if (unlikely(rctxid_src == rctxid_dst)) {
@@ -1166,13 +1165,6 @@ int cxlflash_disk_clone(struct scsi_device *sdev,
 		goto out;
 	}
 
-	if (unlikely(adap_fd_src != ctxi_src->lfd)) {
-		pr_debug("%s: Invalid source adapter fd! (%d)\n",
-			 __func__, adap_fd_src);
-		rc = -EINVAL;
-		goto out;
-	}
-
 	/* Verify there is no open resource handle in the destination context */
 	for (i = 0; i < MAX_RHT_PER_CONTEXT; i++)
 		if (ctxi_dst->rht_start[i].nmask != 0) {
@@ -1257,7 +1249,6 @@ int cxlflash_disk_clone(struct scsi_device *sdev,
 
 out_success:
 	list_splice(&sidecar, &ctxi_dst->luns);
-	sys_close(adap_fd_src);
 
 	/* fall through */
 out:
diff --git a/include/uapi/scsi/cxlflash_ioctl.h b/include/uapi/scsi/cxlflash_ioctl.h
index 2302f3ce5f86..6bf1f8a022b1 100644
--- a/include/uapi/scsi/cxlflash_ioctl.h
+++ b/include/uapi/scsi/cxlflash_ioctl.h
@@ -39,19 +39,28 @@ struct dk_cxlflash_hdr {
  * at this time, this provides future flexibility.
  */
 #define DK_CXLFLASH_ALL_PORTS_ACTIVE	0x0000000000000001ULL
+#define DK_CXLFLASH_APP_CLOSE_ADAP_FD	0x0000000000000002ULL
 
 /*
- * Notes:
- * -----
+ * General Notes:
+ * -------------
  * The 'context_id' field of all ioctl structures contains the context
  * identifier for a context in the lower 32-bits (upper 32-bits are not
  * to be used when identifying a context to the AFU). That said, the value
  * in its entirety (all 64-bits) is to be treated as an opaque cookie and
  * should be presented as such when issuing ioctls.
+ */
+
+/*
+ * DK_CXLFLASH_ATTACH Notes:
+ * ------------------------
+ * Read/write access permissions are specified via the O_RDONLY, O_WRONLY,
+ * and O_RDWR flags defined in the fcntl.h header file.
  *
- * For DK_CXLFLASH_ATTACH ioctl, user specifies read/write access
- * permissions via the O_RDONLY, O_WRONLY, and O_RDWR flags defined in
- * the fcntl.h header file.
+ * A valid adapter file descriptor (fd >= 0) is only returned on the initial
+ * attach (successful) of a context. When a context is shared(reused), the user
+ * is expected to already 'know' the adapter file descriptor associated with the
+ * context.
  */
 #define DK_CXLFLASH_ATTACH_REUSE_CONTEXT	0x8000000000000000ULL
 
-- 
cgit v1.2.3


From 3bc52c45bac26bf7ed1dc8d287ad1aeaed1250b6 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sun, 24 Jul 2016 21:55:45 -0700
Subject: dax: define a unified inode/address_space for device-dax mappings

In support of enabling resize / truncate of device-dax instances, define
a pseudo-fs to provide a unified inode/address space for vm operations.

Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dax/dax.c          | 150 +++++++++++++++++++++++++++++++++++++++++++--
 fs/char_dev.c              |   1 +
 include/uapi/linux/magic.h |   1 +
 3 files changed, 148 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c
index 17715773c097..e8b9319aeadb 100644
--- a/drivers/dax/dax.c
+++ b/drivers/dax/dax.c
@@ -13,7 +13,9 @@
 #include <linux/pagemap.h>
 #include <linux/module.h>
 #include <linux/device.h>
+#include <linux/mount.h>
 #include <linux/pfn_t.h>
+#include <linux/hash.h>
 #include <linux/cdev.h>
 #include <linux/slab.h>
 #include <linux/dax.h>
@@ -26,6 +28,9 @@ static struct class *dax_class;
 static DEFINE_IDA(dax_minor_ida);
 static int nr_dax = CONFIG_NR_DEV_DAX;
 module_param(nr_dax, int, S_IRUGO);
+static struct vfsmount *dax_mnt;
+static struct kmem_cache *dax_cache __read_mostly;
+static struct super_block *dax_superblock __read_mostly;
 MODULE_PARM_DESC(nr_dax, "max number of device-dax instances");
 
 /**
@@ -61,6 +66,7 @@ struct dax_region {
  */
 struct dax_dev {
 	struct dax_region *region;
+	struct inode *inode;
 	struct device dev;
 	struct cdev cdev;
 	bool alive;
@@ -69,6 +75,117 @@ struct dax_dev {
 	struct resource res[0];
 };
 
+static struct inode *dax_alloc_inode(struct super_block *sb)
+{
+	return kmem_cache_alloc(dax_cache, GFP_KERNEL);
+}
+
+static void dax_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+
+	kmem_cache_free(dax_cache, inode);
+}
+
+static void dax_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, dax_i_callback);
+}
+
+static const struct super_operations dax_sops = {
+	.statfs = simple_statfs,
+	.alloc_inode = dax_alloc_inode,
+	.destroy_inode = dax_destroy_inode,
+	.drop_inode = generic_delete_inode,
+};
+
+static struct dentry *dax_mount(struct file_system_type *fs_type,
+		int flags, const char *dev_name, void *data)
+{
+	return mount_pseudo(fs_type, "dax:", &dax_sops, NULL, DAXFS_MAGIC);
+}
+
+static struct file_system_type dax_type = {
+	.name = "dax",
+	.mount = dax_mount,
+	.kill_sb = kill_anon_super,
+};
+
+static int dax_test(struct inode *inode, void *data)
+{
+	return inode->i_cdev == data;
+}
+
+static int dax_set(struct inode *inode, void *data)
+{
+	inode->i_cdev = data;
+	return 0;
+}
+
+static struct inode *dax_inode_get(struct cdev *cdev, dev_t devt)
+{
+	struct inode *inode;
+
+	inode = iget5_locked(dax_superblock, hash_32(devt + DAXFS_MAGIC, 31),
+			dax_test, dax_set, cdev);
+
+	if (!inode)
+		return NULL;
+
+	if (inode->i_state & I_NEW) {
+		inode->i_mode = S_IFCHR;
+		inode->i_flags = S_DAX;
+		inode->i_rdev = devt;
+		mapping_set_gfp_mask(&inode->i_data, GFP_USER);
+		unlock_new_inode(inode);
+	}
+	return inode;
+}
+
+static void init_once(void *inode)
+{
+	inode_init_once(inode);
+}
+
+static int dax_inode_init(void)
+{
+	int rc;
+
+	dax_cache = kmem_cache_create("dax_cache", sizeof(struct inode), 0,
+			(SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
+			 SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+			init_once);
+	if (!dax_cache)
+		return -ENOMEM;
+
+	rc = register_filesystem(&dax_type);
+	if (rc)
+		goto err_register_fs;
+
+	dax_mnt = kern_mount(&dax_type);
+	if (IS_ERR(dax_mnt)) {
+		rc = PTR_ERR(dax_mnt);
+		goto err_mount;
+	}
+	dax_superblock = dax_mnt->mnt_sb;
+
+	return 0;
+
+ err_mount:
+	unregister_filesystem(&dax_type);
+ err_register_fs:
+	kmem_cache_destroy(dax_cache);
+
+	return rc;
+}
+
+static void dax_inode_exit(void)
+{
+	kern_unmount(dax_mnt);
+	unregister_filesystem(&dax_type);
+	kmem_cache_destroy(dax_cache);
+}
+
 static void dax_region_free(struct kref *kref)
 {
 	struct dax_region *dax_region;
@@ -379,6 +496,9 @@ static int dax_open(struct inode *inode, struct file *filp)
 
 	dax_dev = container_of(inode->i_cdev, struct dax_dev, cdev);
 	dev_dbg(&dax_dev->dev, "%s\n", __func__);
+	inode->i_mapping = dax_dev->inode->i_mapping;
+	inode->i_mapping->host = dax_dev->inode;
+	filp->f_mapping = inode->i_mapping;
 	filp->private_data = dax_dev;
 	inode->i_flags = S_DAX;
 
@@ -410,6 +530,7 @@ static void dax_dev_release(struct device *dev)
 	ida_simple_remove(&dax_region->ida, dax_dev->id);
 	ida_simple_remove(&dax_minor_ida, MINOR(dev->devt));
 	dax_region_put(dax_region);
+	iput(dax_dev->inode);
 	kfree(dax_dev);
 }
 
@@ -459,6 +580,12 @@ int devm_create_dax_dev(struct dax_region *dax_region, struct resource *res,
 		goto err_minor;
 	}
 
+	dax_dev->inode = dax_inode_get(&dax_dev->cdev, dev_t);
+	if (!dax_dev->inode) {
+		rc = -ENOMEM;
+		goto err_inode;
+	}
+
 	/* device_initialize() so cdev can reference kobj parent */
 	dev_t = MKDEV(MAJOR(dax_devt), minor);
 	dev = &dax_dev->dev;
@@ -494,6 +621,8 @@ int devm_create_dax_dev(struct dax_region *dax_region, struct resource *res,
 	return devm_add_action_or_reset(dax_region->dev, unregister_dax_dev, dev);
 
  err_cdev:
+	iput(dax_dev->inode);
+ err_inode:
 	ida_simple_remove(&dax_minor_ida, minor);
  err_minor:
 	ida_simple_remove(&dax_region->ida, dax_dev->id);
@@ -508,16 +637,28 @@ static int __init dax_init(void)
 {
 	int rc;
 
+	rc = dax_inode_init();
+	if (rc)
+		return rc;
+
 	nr_dax = max(nr_dax, 256);
 	rc = alloc_chrdev_region(&dax_devt, 0, nr_dax, "dax");
 	if (rc)
-		return rc;
+		goto err_chrdev;
 
 	dax_class = class_create(THIS_MODULE, "dax");
-	if (IS_ERR(dax_class))
-		unregister_chrdev_region(dax_devt, nr_dax);
+	if (IS_ERR(dax_class)) {
+		rc = PTR_ERR(dax_class);
+		goto err_class;
+	}
 
-	return PTR_ERR_OR_ZERO(dax_class);
+	return 0;
+
+ err_class:
+	unregister_chrdev_region(dax_devt, nr_dax);
+ err_chrdev:
+	dax_inode_exit();
+	return rc;
 }
 
 static void __exit dax_exit(void)
@@ -525,6 +666,7 @@ static void __exit dax_exit(void)
 	class_destroy(dax_class);
 	unregister_chrdev_region(dax_devt, nr_dax);
 	ida_destroy(&dax_minor_ida);
+	dax_inode_exit();
 }
 
 MODULE_AUTHOR("Intel Corporation");
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 6edd825231c5..44a240c4bb65 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -406,6 +406,7 @@ void cd_forget(struct inode *inode)
 	spin_lock(&cdev_lock);
 	list_del_init(&inode->i_devices);
 	inode->i_cdev = NULL;
+	inode->i_mapping = &inode->i_data;
 	spin_unlock(&cdev_lock);
 }
 
diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index e398beac67b8..9bd559472c92 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -65,6 +65,7 @@
 #define V9FS_MAGIC		0x01021997
 
 #define BDEVFS_MAGIC            0x62646576
+#define DAXFS_MAGIC             0x64646178
 #define BINFMTFS_MAGIC          0x42494e4d
 #define DEVPTS_SUPER_MAGIC	0x1cd1
 #define FUTEXFS_SUPER_MAGIC	0xBAD1DEA
-- 
cgit v1.2.3


From 7e0739cd9c40752fc9d31cac86e3a31d5537be18 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Thu, 4 Aug 2016 05:51:12 -0300
Subject: [media] videodev2.h: fix sYCC/AdobeYCC default quantization range

The default quantization range of the Y'CbCr encodings of sRGB
and AdobeRGB are full range instead of limited range according to
the corresponding standards.

Fix this in the V4L2_MAP_QUANTIZATION_DEFAULT macro.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 Documentation/media/uapi/v4l/pixfmt-007.rst | 4 ++--
 include/uapi/linux/videodev2.h              | 7 ++++---
 2 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/media/uapi/v4l/pixfmt-007.rst b/Documentation/media/uapi/v4l/pixfmt-007.rst
index f097268ffc54..c45d8794e8eb 100644
--- a/Documentation/media/uapi/v4l/pixfmt-007.rst
+++ b/Documentation/media/uapi/v4l/pixfmt-007.rst
@@ -367,7 +367,7 @@ The :ref:`adobergb` standard defines the colorspace used by computer
 graphics that use the AdobeRGB colorspace. This is also known as the
 :ref:`oprgb` standard. The default transfer function is
 ``V4L2_XFER_FUNC_ADOBERGB``. The default Y'CbCr encoding is
-``V4L2_YCBCR_ENC_601``. The default Y'CbCr quantization is limited
+``V4L2_YCBCR_ENC_601``. The default Y'CbCr quantization is full
 range. The chromaticities of the primary colors and the white reference
 are:
 
@@ -448,7 +448,7 @@ the following ``V4L2_YCBCR_ENC_601`` encoding:
 
 Y' is clamped to the range [0…1] and Cb and Cr are clamped to the range
 [-0.5…0.5]. This transform is identical to one defined in SMPTE
-170M/BT.601. The Y'CbCr quantization is limited range.
+170M/BT.601. The Y'CbCr quantization is full range.
 
 
 .. _col-bt2020:
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 22bad8c741fe..0ec15222a6ca 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -345,8 +345,8 @@ enum v4l2_quantization {
 	/*
 	 * The default for R'G'B' quantization is always full range, except
 	 * for the BT2020 colorspace. For Y'CbCr the quantization is always
-	 * limited range, except for COLORSPACE_JPEG, SYCC, XV601 or XV709:
-	 * those are full range.
+	 * limited range, except for COLORSPACE_JPEG, SRGB, ADOBERGB,
+	 * XV601 or XV709: those are full range.
 	 */
 	V4L2_QUANTIZATION_DEFAULT     = 0,
 	V4L2_QUANTIZATION_FULL_RANGE  = 1,
@@ -361,7 +361,8 @@ enum v4l2_quantization {
 #define V4L2_MAP_QUANTIZATION_DEFAULT(is_rgb, colsp, ycbcr_enc) \
 	(((is_rgb) && (colsp) == V4L2_COLORSPACE_BT2020) ? V4L2_QUANTIZATION_LIM_RANGE : \
 	 (((is_rgb) || (ycbcr_enc) == V4L2_YCBCR_ENC_XV601 || \
-	  (ycbcr_enc) == V4L2_YCBCR_ENC_XV709 || (colsp) == V4L2_COLORSPACE_JPEG) ? \
+	  (ycbcr_enc) == V4L2_YCBCR_ENC_XV709 || (colsp) == V4L2_COLORSPACE_JPEG) || \
+	  (colsp) == V4L2_COLORSPACE_ADOBERGB || (colsp) == V4L2_COLORSPACE_SRGB ? \
 	 V4L2_QUANTIZATION_FULL_RANGE : V4L2_QUANTIZATION_LIM_RANGE))
 
 enum v4l2_priority {
-- 
cgit v1.2.3


From adca8c8e251fdbdb6b28e3ef7a5ca24b597b82d8 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Thu, 4 Aug 2016 06:01:48 -0300
Subject: [media] videodev2.h: put V4L2_YCBCR_ENC_SYCC under #ifndef __KERNEL__

This define is a duplicate of V4L2_YCBCR_ENC_601. So mark it as
'should not be used' and put it under #ifndef __KERNEL__ to
prevent drivers from referring to it.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 include/uapi/linux/videodev2.h | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 0ec15222a6ca..5529741202b5 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -292,13 +292,11 @@ enum v4l2_ycbcr_encoding {
 	 * various colorspaces:
 	 *
 	 * V4L2_COLORSPACE_SMPTE170M, V4L2_COLORSPACE_470_SYSTEM_M,
-	 * V4L2_COLORSPACE_470_SYSTEM_BG, V4L2_COLORSPACE_ADOBERGB and
-	 * V4L2_COLORSPACE_JPEG: V4L2_YCBCR_ENC_601
+	 * V4L2_COLORSPACE_470_SYSTEM_BG, V4L2_COLORSPACE_SRGB,
+	 * V4L2_COLORSPACE_ADOBERGB and V4L2_COLORSPACE_JPEG: V4L2_YCBCR_ENC_601
 	 *
 	 * V4L2_COLORSPACE_REC709 and V4L2_COLORSPACE_DCI_P3: V4L2_YCBCR_ENC_709
 	 *
-	 * V4L2_COLORSPACE_SRGB: V4L2_YCBCR_ENC_SYCC
-	 *
 	 * V4L2_COLORSPACE_BT2020: V4L2_YCBCR_ENC_BT2020
 	 *
 	 * V4L2_COLORSPACE_SMPTE240M: V4L2_YCBCR_ENC_SMPTE240M
@@ -317,8 +315,14 @@ enum v4l2_ycbcr_encoding {
 	/* Rec. 709/EN 61966-2-4 Extended Gamut -- HDTV */
 	V4L2_YCBCR_ENC_XV709          = 4,
 
-	/* sYCC (Y'CbCr encoding of sRGB) */
+#ifndef __KERNEL__
+	/*
+	 * sYCC (Y'CbCr encoding of sRGB), identical to ENC_601. It was added
+	 * originally due to a misunderstanding of the sYCC standard. It should
+	 * not be used, instead use V4L2_YCBCR_ENC_601.
+	 */
 	V4L2_YCBCR_ENC_SYCC           = 5,
+#endif
 
 	/* BT.2020 Non-constant Luminance Y'CbCr */
 	V4L2_YCBCR_ENC_BT2020         = 6,
-- 
cgit v1.2.3


From a52e95abf772b43c9226e9a72d3c1353903ba96f Mon Sep 17 00:00:00 2001
From: Lorenzo Colitti <lorenzo@google.com>
Date: Wed, 24 Aug 2016 15:46:26 +0900
Subject: net: diag: allow socket bytecode filters to match socket marks

This allows a privileged process to filter by socket mark when
dumping sockets via INET_DIAG_BY_FAMILY. This is useful on
systems that use mark-based routing such as Android.

The ability to filter socket marks requires CAP_NET_ADMIN, which
is consistent with other privileged operations allowed by the
SOCK_DIAG interface such as the ability to destroy sockets and
the ability to inspect BPF filters attached to packet sockets.

Tested: https://android-review.googlesource.com/261350
Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
Acked-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/inet_diag.h |  6 ++++++
 net/ipv4/inet_diag.c           | 36 +++++++++++++++++++++++++++++++++---
 2 files changed, 39 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h
index abbd1dc5d683..5581206a08ae 100644
--- a/include/uapi/linux/inet_diag.h
+++ b/include/uapi/linux/inet_diag.h
@@ -73,6 +73,7 @@ enum {
 	INET_DIAG_BC_S_COND,
 	INET_DIAG_BC_D_COND,
 	INET_DIAG_BC_DEV_COND,   /* u32 ifindex */
+	INET_DIAG_BC_MARK_COND,
 };
 
 struct inet_diag_hostcond {
@@ -82,6 +83,11 @@ struct inet_diag_hostcond {
 	__be32	addr[0];
 };
 
+struct inet_diag_markcond {
+	__u32 mark;
+	__u32 mask;
+};
+
 /* Base info structure. It contains socket identity (addrs/ports/cookie)
  * and, alas, the information shown by netstat. */
 struct inet_diag_msg {
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index a89b68c6a934..abfbe492ebfe 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -45,6 +45,7 @@ struct inet_diag_entry {
 	u16 family;
 	u16 userlocks;
 	u32 ifindex;
+	u32 mark;
 };
 
 static DEFINE_MUTEX(inet_diag_table_mutex);
@@ -580,6 +581,14 @@ static int inet_diag_bc_run(const struct nlattr *_bc,
 				yes = 0;
 			break;
 		}
+		case INET_DIAG_BC_MARK_COND: {
+			struct inet_diag_markcond *cond;
+
+			cond = (struct inet_diag_markcond *)(op + 1);
+			if ((entry->mark & cond->mask) != cond->mark)
+				yes = 0;
+			break;
+		}
 		}
 
 		if (yes) {
@@ -624,6 +633,12 @@ int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
 	entry.dport = ntohs(inet->inet_dport);
 	entry.ifindex = sk->sk_bound_dev_if;
 	entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0;
+	if (sk_fullsock(sk))
+		entry.mark = sk->sk_mark;
+	else if (sk->sk_state == TCP_NEW_SYN_RECV)
+		entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark;
+	else
+		entry.mark = 0;
 
 	return inet_diag_bc_run(bc, &entry);
 }
@@ -706,8 +721,17 @@ static bool valid_port_comparison(const struct inet_diag_bc_op *op,
 	return true;
 }
 
-static int inet_diag_bc_audit(const struct nlattr *attr)
+static bool valid_markcond(const struct inet_diag_bc_op *op, int len,
+			   int *min_len)
 {
+	*min_len += sizeof(struct inet_diag_markcond);
+	return len >= *min_len;
+}
+
+static int inet_diag_bc_audit(const struct nlattr *attr,
+			      const struct sk_buff *skb)
+{
+	bool net_admin = netlink_net_capable(skb, CAP_NET_ADMIN);
 	const void *bytecode, *bc;
 	int bytecode_len, len;
 
@@ -738,6 +762,12 @@ static int inet_diag_bc_audit(const struct nlattr *attr)
 			if (!valid_port_comparison(bc, len, &min_len))
 				return -EINVAL;
 			break;
+		case INET_DIAG_BC_MARK_COND:
+			if (!net_admin)
+				return -EPERM;
+			if (!valid_markcond(bc, len, &min_len))
+				return -EINVAL;
+			break;
 		case INET_DIAG_BC_AUTO:
 		case INET_DIAG_BC_JMP:
 		case INET_DIAG_BC_NOP:
@@ -1030,7 +1060,7 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
 
 			attr = nlmsg_find_attr(nlh, hdrlen,
 					       INET_DIAG_REQ_BYTECODE);
-			err = inet_diag_bc_audit(attr);
+			err = inet_diag_bc_audit(attr, skb);
 			if (err)
 				return err;
 		}
@@ -1061,7 +1091,7 @@ static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h)
 
 			attr = nlmsg_find_attr(h, hdrlen,
 					       INET_DIAG_REQ_BYTECODE);
-			err = inet_diag_bc_audit(attr);
+			err = inet_diag_bc_audit(attr, skb);
 			if (err)
 				return err;
 		}
-- 
cgit v1.2.3


From 54dfce6d07b0391e23d006579bba488de4f7d6aa Mon Sep 17 00:00:00 2001
From: Felix Hädicke <felixhaedicke@web.de>
Date: Wed, 22 Jun 2016 01:12:07 +0200
Subject: usb: gadget: f_fs: handle control requests not directed to interface
 or endpoint
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduces a new FunctionFS descriptor flag named
FUNCTIONFS_ALL_CTRL_RECIP. When this flag is enabled, control requests,
which are not explicitly directed to an interface or endpoint, can be
handled.

This allows FunctionFS userspace drivers to process non-standard
control requests.

Signed-off-by: Felix Hädicke <felixhaedicke@web.de>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/function/f_fs.c  | 34 ++++++++++++++++++++++++++++++----
 include/uapi/linux/usb/functionfs.h |  1 +
 2 files changed, 31 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 5c8429f23a89..d7acab873836 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -98,6 +98,8 @@ static int ffs_func_set_alt(struct usb_function *, unsigned, unsigned);
 static void ffs_func_disable(struct usb_function *);
 static int ffs_func_setup(struct usb_function *,
 			  const struct usb_ctrlrequest *);
+static bool ffs_func_req_match(struct usb_function *,
+			       const struct usb_ctrlrequest *);
 static void ffs_func_suspend(struct usb_function *);
 static void ffs_func_resume(struct usb_function *);
 
@@ -2243,7 +2245,8 @@ static int __ffs_data_got_descs(struct ffs_data *ffs,
 			      FUNCTIONFS_HAS_SS_DESC |
 			      FUNCTIONFS_HAS_MS_OS_DESC |
 			      FUNCTIONFS_VIRTUAL_ADDR |
-			      FUNCTIONFS_EVENTFD)) {
+			      FUNCTIONFS_EVENTFD |
+			      FUNCTIONFS_ALL_CTRL_RECIP)) {
 			ret = -ENOSYS;
 			goto error;
 		}
@@ -3094,8 +3097,9 @@ static int ffs_func_setup(struct usb_function *f,
 	 * handle them.  All other either handled by composite or
 	 * passed to usb_configuration->setup() (if one is set).  No
 	 * matter, we will handle requests directed to endpoint here
-	 * as well (as it's straightforward) but what to do with any
-	 * other request?
+	 * as well (as it's straightforward).  Other request recipient
+	 * types are only handled when the user flag FUNCTIONFS_ALL_CTRL_RECIP
+	 * is being used.
 	 */
 	if (ffs->state != FFS_ACTIVE)
 		return -ENODEV;
@@ -3116,7 +3120,10 @@ static int ffs_func_setup(struct usb_function *f,
 		break;
 
 	default:
-		return -EOPNOTSUPP;
+		if (func->ffs->user_flags & FUNCTIONFS_ALL_CTRL_RECIP)
+			ret = le16_to_cpu(creq->wIndex);
+		else
+			return -EOPNOTSUPP;
 	}
 
 	spin_lock_irqsave(&ffs->ev.waitq.lock, flags);
@@ -3128,6 +3135,24 @@ static int ffs_func_setup(struct usb_function *f,
 	return 0;
 }
 
+static bool ffs_func_req_match(struct usb_function *f,
+			       const struct usb_ctrlrequest *creq)
+{
+	struct ffs_function *func = ffs_func_from_usb(f);
+
+	switch (creq->bRequestType & USB_RECIP_MASK) {
+	case USB_RECIP_INTERFACE:
+		return ffs_func_revmap_intf(func,
+					    le16_to_cpu(creq->wIndex) >= 0);
+	case USB_RECIP_ENDPOINT:
+		return ffs_func_revmap_ep(func,
+					  le16_to_cpu(creq->wIndex) >= 0);
+	default:
+		return (bool) (func->ffs->user_flags &
+			       FUNCTIONFS_ALL_CTRL_RECIP);
+	}
+}
+
 static void ffs_func_suspend(struct usb_function *f)
 {
 	ENTER();
@@ -3378,6 +3403,7 @@ static struct usb_function *ffs_alloc(struct usb_function_instance *fi)
 	func->function.set_alt = ffs_func_set_alt;
 	func->function.disable = ffs_func_disable;
 	func->function.setup   = ffs_func_setup;
+	func->function.req_match = ffs_func_req_match;
 	func->function.suspend = ffs_func_suspend;
 	func->function.resume  = ffs_func_resume;
 	func->function.free_func = ffs_free;
diff --git a/include/uapi/linux/usb/functionfs.h b/include/uapi/linux/usb/functionfs.h
index 108dd7997014..93da4ca82dc7 100644
--- a/include/uapi/linux/usb/functionfs.h
+++ b/include/uapi/linux/usb/functionfs.h
@@ -21,6 +21,7 @@ enum functionfs_flags {
 	FUNCTIONFS_HAS_MS_OS_DESC = 8,
 	FUNCTIONFS_VIRTUAL_ADDR = 16,
 	FUNCTIONFS_EVENTFD = 32,
+	FUNCTIONFS_ALL_CTRL_RECIP = 64,
 };
 
 /* Descriptor of an non-audio endpoint */
-- 
cgit v1.2.3


From 4368c28ae7acb0744968e58c81be561b44aacd57 Mon Sep 17 00:00:00 2001
From: Felix Hädicke <felixhaedicke@web.de>
Date: Wed, 22 Jun 2016 01:12:09 +0200
Subject: usb: gadget: f_fs: handle control requests in config 0
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduces a new FunctionFS descriptor flag named
FUNCTIONFS_CONFIG0_SETUP.

When this flag is enabled, FunctionFS userspace drivers can process
non-standard control requests in configuration 0.

Signed-off-by: Felix Hädicke <felixhaedicke@web.de>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/function/f_fs.c  | 5 +++--
 include/uapi/linux/usb/functionfs.h | 1 +
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index d8f46f6233ac..998697bd80ac 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -2247,7 +2247,8 @@ static int __ffs_data_got_descs(struct ffs_data *ffs,
 			      FUNCTIONFS_HAS_MS_OS_DESC |
 			      FUNCTIONFS_VIRTUAL_ADDR |
 			      FUNCTIONFS_EVENTFD |
-			      FUNCTIONFS_ALL_CTRL_RECIP)) {
+			      FUNCTIONFS_ALL_CTRL_RECIP |
+			      FUNCTIONFS_CONFIG0_SETUP)) {
 			ret = -ENOSYS;
 			goto error;
 		}
@@ -3142,7 +3143,7 @@ static bool ffs_func_req_match(struct usb_function *f,
 {
 	struct ffs_function *func = ffs_func_from_usb(f);
 
-	if (config0)
+	if (config0 && !(func->ffs->user_flags & FUNCTIONFS_CONFIG0_SETUP))
 		return false;
 
 	switch (creq->bRequestType & USB_RECIP_MASK) {
diff --git a/include/uapi/linux/usb/functionfs.h b/include/uapi/linux/usb/functionfs.h
index 93da4ca82dc7..acc63697a0cc 100644
--- a/include/uapi/linux/usb/functionfs.h
+++ b/include/uapi/linux/usb/functionfs.h
@@ -22,6 +22,7 @@ enum functionfs_flags {
 	FUNCTIONFS_VIRTUAL_ADDR = 16,
 	FUNCTIONFS_EVENTFD = 32,
 	FUNCTIONFS_ALL_CTRL_RECIP = 64,
+	FUNCTIONFS_CONFIG0_SETUP = 128,
 };
 
 /* Descriptor of an non-audio endpoint */
-- 
cgit v1.2.3


From 8b2ec318eece89be5e33d5313a25461a55a3177a Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Sun, 12 Jun 2016 16:26:40 -0500
Subject: PCI: Add PTM clock granularity information

The PTM Control register (PCIe r3.1, sec 7.32.3) contains an Effective
Granularity field:

  This provides information relating to the expected accuracy of the PTM
  clock, but does not otherwise affect the PTM mechanism.

Set the Effective Granularity based on the PTM Root and any intervening PTM
Time Sources.

This does not set Effective Granularity for Root Complex Integrated
Endpoints because I don't know how to figure out clock granularity for
them.  The spec says:

  ... system software must set [Effective Granularity] to the value
  reported in the Local Clock Granularity field by the associated PTM
  Time Source.

but I don't know how to identify the associated PTM Time Source.  Normally
it's the upstream bridge, but an integrated endpoint has no upstream
bridge.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pcie/ptm.c        | 31 +++++++++++++++++++++++++++++--
 include/linux/pci.h           |  1 +
 include/uapi/linux/pci_regs.h |  1 +
 3 files changed, 31 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/pci/pcie/ptm.c b/drivers/pci/pcie/ptm.c
index a14ac94b96dc..bab8ac63c4f3 100644
--- a/drivers/pci/pcie/ptm.c
+++ b/drivers/pci/pcie/ptm.c
@@ -19,13 +19,29 @@
 
 static void pci_ptm_info(struct pci_dev *dev)
 {
-	dev_info(&dev->dev, "PTM enabled%s\n", dev->ptm_root ? " (root)" : "");
+	char clock_desc[8];
+
+	switch (dev->ptm_granularity) {
+	case 0:
+		snprintf(clock_desc, sizeof(clock_desc), "unknown");
+		break;
+	case 255:
+		snprintf(clock_desc, sizeof(clock_desc), ">254ns");
+		break;
+	default:
+		snprintf(clock_desc, sizeof(clock_desc), "%udns",
+			 dev->ptm_granularity);
+		break;
+	}
+	dev_info(&dev->dev, "PTM enabled%s, %s granularity\n",
+		 dev->ptm_root ? " (root)" : "", clock_desc);
 }
 
 void pci_ptm_init(struct pci_dev *dev)
 {
 	int pos;
 	u32 cap, ctrl;
+	u8 local_clock;
 	struct pci_dev *ups;
 
 	if (!pci_is_pcie(dev))
@@ -45,6 +61,7 @@ void pci_ptm_init(struct pci_dev *dev)
 		return;
 
 	pci_read_config_dword(dev, pos + PCI_PTM_CAP, &cap);
+	local_clock = (cap & PCI_PTM_GRANULARITY_MASK) >> 8;
 
 	/*
 	 * There's no point in enabling PTM unless it's enabled in the
@@ -55,14 +72,20 @@ void pci_ptm_init(struct pci_dev *dev)
 	ups = pci_upstream_bridge(dev);
 	if (ups && ups->ptm_enabled) {
 		ctrl = PCI_PTM_CTRL_ENABLE;
+		if (ups->ptm_granularity == 0)
+			dev->ptm_granularity = 0;
+		else if (ups->ptm_granularity > local_clock)
+			dev->ptm_granularity = ups->ptm_granularity;
 	} else {
 		if (cap & PCI_PTM_CAP_ROOT) {
 			ctrl = PCI_PTM_CTRL_ENABLE | PCI_PTM_CTRL_ROOT;
 			dev->ptm_root = 1;
+			dev->ptm_granularity = local_clock;
 		} else
 			return;
 	}
 
+	ctrl |= dev->ptm_granularity << 8;
 	pci_write_config_dword(dev, pos + PCI_PTM_CTRL, ctrl);
 	dev->ptm_enabled = 1;
 
@@ -98,18 +121,22 @@ int pci_enable_ptm(struct pci_dev *dev, u8 *granularity)
 		ups = pci_upstream_bridge(dev);
 		if (!ups || !ups->ptm_enabled)
 			return -EINVAL;
+
+		dev->ptm_granularity = ups->ptm_granularity;
 	} else if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END) {
+		dev->ptm_granularity = 0;
 	} else
 		return -EINVAL;
 
 	ctrl = PCI_PTM_CTRL_ENABLE;
+	ctrl |= dev->ptm_granularity << 8;
 	pci_write_config_dword(dev, pos + PCI_PTM_CTRL, ctrl);
 	dev->ptm_enabled = 1;
 
 	pci_ptm_info(dev);
 
 	if (granularity)
-		*granularity = 0;
+		*granularity = dev->ptm_granularity;
 	return 0;
 }
 EXPORT_SYMBOL(pci_enable_ptm);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 9e4b6d6f3c8d..7256f33b6a15 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -371,6 +371,7 @@ struct pci_dev {
 #ifdef CONFIG_PCIE_PTM
 	unsigned int	ptm_root:1;
 	unsigned int	ptm_enabled:1;
+	u8		ptm_granularity;
 #endif
 #ifdef CONFIG_PCI_MSI
 	const struct attribute_group **msi_irq_groups;
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 72bbe1491cbf..d812172d1d7b 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -969,6 +969,7 @@
 #define PCI_PTM_CAP			0x04	    /* PTM Capability */
 #define  PCI_PTM_CAP_REQ		0x00000001  /* Requester capable */
 #define  PCI_PTM_CAP_ROOT		0x00000004  /* Root capable */
+#define  PCI_PTM_GRANULARITY_MASK	0x0000FF00  /* Clock granularity */
 #define PCI_PTM_CTRL			0x08	    /* PTM Control */
 #define  PCI_PTM_CTRL_ENABLE		0x00000001  /* PTM enable */
 #define  PCI_PTM_CTRL_ROOT		0x00000002  /* Root select */
-- 
cgit v1.2.3


From 72145a68e4ee116533df49af4b87aca0aacc179c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 24 Aug 2016 09:01:23 -0700
Subject: tcp: md5: add LINUX_MIB_TCPMD5FAILURE counter

Adds SNMP counter for drops caused by MD5 mismatches.

The current syslog might help, but a counter is more precise and helps
monitoring.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/snmp.h | 1 +
 net/ipv4/proc.c           | 1 +
 net/ipv4/tcp_ipv4.c       | 1 +
 net/ipv6/tcp_ipv6.c       | 1 +
 4 files changed, 4 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index 25a9ad8bcef1..e7a31f830690 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -235,6 +235,7 @@ enum
 	LINUX_MIB_TCPSPURIOUSRTOS,		/* TCPSpuriousRTOs */
 	LINUX_MIB_TCPMD5NOTFOUND,		/* TCPMD5NotFound */
 	LINUX_MIB_TCPMD5UNEXPECTED,		/* TCPMD5Unexpected */
+	LINUX_MIB_TCPMD5FAILURE,		/* TCPMD5Failure */
 	LINUX_MIB_SACKSHIFTED,
 	LINUX_MIB_SACKMERGED,
 	LINUX_MIB_SACKSHIFTFALLBACK,
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 9f665b63a927..1ed015e4bc79 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -257,6 +257,7 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPSpuriousRTOs", LINUX_MIB_TCPSPURIOUSRTOS),
 	SNMP_MIB_ITEM("TCPMD5NotFound", LINUX_MIB_TCPMD5NOTFOUND),
 	SNMP_MIB_ITEM("TCPMD5Unexpected", LINUX_MIB_TCPMD5UNEXPECTED),
+	SNMP_MIB_ITEM("TCPMD5Failure", LINUX_MIB_TCPMD5FAILURE),
 	SNMP_MIB_ITEM("TCPSackShifted", LINUX_MIB_SACKSHIFTED),
 	SNMP_MIB_ITEM("TCPSackMerged", LINUX_MIB_SACKMERGED),
 	SNMP_MIB_ITEM("TCPSackShiftFallback", LINUX_MIB_SACKSHIFTFALLBACK),
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 436d978c6c39..ad41e8ecf796 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1169,6 +1169,7 @@ static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
 				      NULL, skb);
 
 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
+		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
 		net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
 				     &iph->saddr, ntohs(th->source),
 				     &iph->daddr, ntohs(th->dest),
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index ac0ed7bda406..e4f55683af31 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -671,6 +671,7 @@ static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
 				      NULL, skb);
 
 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
+		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
 		net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
 				     genhash ? "failed" : "mismatch",
 				     &ip6h->saddr, ntohs(th->source),
-- 
cgit v1.2.3


From 4cc6907501ed2393a70ad92a30e00dc54c536e50 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 25 Aug 2016 19:05:19 +0100
Subject: drm/i915: Add I915_PARAM_MMAP_GTT_VERSION to advertise unlimited
 mmaps

Now that we have working partial VMA and faulting support for all
objects, including fence support, advertise to userspace that it can
take advantage of unlimited GGTT mmaps.

v2: Make room in the kerneldoc for a more detailed explanation of the
limitations of the GTT mmap interface.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20160825180519.11341-1-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_drv.c |  7 ++++++
 drivers/gpu/drm/i915/i915_drv.h |  1 +
 drivers/gpu/drm/i915/i915_gem.c | 53 +++++++++++++++++++++++++++++++++++++++++
 include/uapi/drm/i915_drm.h     |  1 +
 4 files changed, 62 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 1d2230f7b749..47fe07283d88 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -355,6 +355,13 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_MIN_EU_IN_POOL:
 		value = INTEL_INFO(dev)->min_eu_in_pool;
 		break;
+	case I915_PARAM_MMAP_GTT_VERSION:
+		/* Though we've started our numbering from 1, and so class all
+		 * earlier versions as 0, in effect their value is undefined as
+		 * the ioctl will report EINVAL for the unknown param!
+		 */
+		value = i915_gem_mmap_gtt_version();
+		break;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", param->param);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 04b4fd6c32e4..9fdaf23336df 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3218,6 +3218,7 @@ int i915_gem_dumb_create(struct drm_file *file_priv,
 			 struct drm_mode_create_dumb *args);
 int i915_gem_mmap_gtt(struct drm_file *file_priv, struct drm_device *dev,
 		      uint32_t handle, uint64_t *offset);
+int i915_gem_mmap_gtt_version(void);
 
 void i915_gem_track_fb(struct drm_i915_gem_object *old,
 		       struct drm_i915_gem_object *new,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 04607d4115d6..d37b44126942 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1679,6 +1679,56 @@ static unsigned int tile_row_pages(struct drm_i915_gem_object *obj)
 	return size >> PAGE_SHIFT;
 }
 
+/**
+ * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
+ *
+ * A history of the GTT mmap interface:
+ *
+ * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
+ *     aligned and suitable for fencing, and still fit into the available
+ *     mappable space left by the pinned display objects. A classic problem
+ *     we called the page-fault-of-doom where we would ping-pong between
+ *     two objects that could not fit inside the GTT and so the memcpy
+ *     would page one object in at the expense of the other between every
+ *     single byte.
+ *
+ * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
+ *     as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
+ *     object is too large for the available space (or simply too large
+ *     for the mappable aperture!), a view is created instead and faulted
+ *     into userspace. (This view is aligned and sized appropriately for
+ *     fenced access.)
+ *
+ * Restrictions:
+ *
+ *  * snoopable objects cannot be accessed via the GTT. It can cause machine
+ *    hangs on some architectures, corruption on others. An attempt to service
+ *    a GTT page fault from a snoopable object will generate a SIGBUS.
+ *
+ *  * the object must be able to fit into RAM (physical memory, though no
+ *    limited to the mappable aperture).
+ *
+ *
+ * Caveats:
+ *
+ *  * a new GTT page fault will synchronize rendering from the GPU and flush
+ *    all data to system memory. Subsequent access will not be synchronized.
+ *
+ *  * all mappings are revoked on runtime device suspend.
+ *
+ *  * there are only 8, 16 or 32 fence registers to share between all users
+ *    (older machines require fence register for display and blitter access
+ *    as well). Contention of the fence registers will cause the previous users
+ *    to be unmapped and any new access will generate new page faults.
+ *
+ *  * running out of memory while servicing a fault may generate a SIGBUS,
+ *    rather than the expected SIGSEGV.
+ */
+int i915_gem_mmap_gtt_version(void)
+{
+	return 1;
+}
+
 /**
  * i915_gem_fault - fault a page into the GTT
  * @area: CPU VMA in question
@@ -1694,6 +1744,9 @@ static unsigned int tile_row_pages(struct drm_i915_gem_object *obj)
  * from the GTT and/or fence registers to make room.  So performance may
  * suffer if the GTT working set is large or there are few fence registers
  * left.
+ *
+ * The current feature set supported by i915_gem_fault() and thus GTT mmaps
+ * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
  */
 int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
 {
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 5501fe83ed92..03725fe89859 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -387,6 +387,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_EXEC_SOFTPIN	 37
 #define I915_PARAM_HAS_POOLED_EU	 38
 #define I915_PARAM_MIN_EU_IN_POOL	 39
+#define I915_PARAM_MMAP_GTT_VERSION	 40
 
 typedef struct drm_i915_getparam {
 	__s32 param;
-- 
cgit v1.2.3


From ef20cd4dd1633987bcf46ac34ace2c8af212361f Mon Sep 17 00:00:00 2001
From: Richard Alpe <richard.alpe@ericsson.com>
Date: Fri, 26 Aug 2016 10:52:53 +0200
Subject: tipc: introduce UDP replicast

This patch introduces UDP replicast. A concept where we emulate
multicast by sending multiple unicast messages to configured peers.

The purpose of replicast is mainly to be able to use TIPC in cloud
environments where IP multicast is disabled. Using replicas to unicast
multicast messages is costly as we have to copy each skb and send the
copies individually.

Signed-off-by: Richard Alpe <richard.alpe@ericsson.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tipc_netlink.h |   1 +
 net/tipc/bearer.c                 |  44 ++++++++++++++
 net/tipc/bearer.h                 |   1 +
 net/tipc/netlink.c                |   5 ++
 net/tipc/udp_media.c              | 118 ++++++++++++++++++++++++++++++++++----
 net/tipc/udp_media.h              |  44 ++++++++++++++
 6 files changed, 201 insertions(+), 12 deletions(-)
 create mode 100644 net/tipc/udp_media.h

(limited to 'include/uapi')

diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h
index bcb65ef725f6..b15664c36219 100644
--- a/include/uapi/linux/tipc_netlink.h
+++ b/include/uapi/linux/tipc_netlink.h
@@ -60,6 +60,7 @@ enum {
 	TIPC_NL_MON_GET,
 	TIPC_NL_MON_PEER_GET,
 	TIPC_NL_PEER_REMOVE,
+	TIPC_NL_BEARER_ADD,
 
 	__TIPC_NL_CMD_MAX,
 	TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 28056fa8f77a..d7b442dd6669 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -42,6 +42,7 @@
 #include "monitor.h"
 #include "bcast.h"
 #include "netlink.h"
+#include "udp_media.h"
 
 #define MAX_ADDR_STR 60
 
@@ -897,6 +898,49 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
 	return 0;
 }
 
+int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info)
+{
+	int err;
+	char *name;
+	struct tipc_bearer *b;
+	struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
+	struct net *net = sock_net(skb->sk);
+
+	if (!info->attrs[TIPC_NLA_BEARER])
+		return -EINVAL;
+
+	err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX,
+			       info->attrs[TIPC_NLA_BEARER],
+			       tipc_nl_bearer_policy);
+	if (err)
+		return err;
+
+	if (!attrs[TIPC_NLA_BEARER_NAME])
+		return -EINVAL;
+	name = nla_data(attrs[TIPC_NLA_BEARER_NAME]);
+
+	rtnl_lock();
+	b = tipc_bearer_find(net, name);
+	if (!b) {
+		rtnl_unlock();
+		return -EINVAL;
+	}
+
+#ifdef CONFIG_TIPC_MEDIA_UDP
+	if (attrs[TIPC_NLA_BEARER_UDP_OPTS]) {
+		err = tipc_udp_nl_bearer_add(b,
+					     attrs[TIPC_NLA_BEARER_UDP_OPTS]);
+		if (err) {
+			rtnl_unlock();
+			return err;
+		}
+	}
+#endif
+	rtnl_unlock();
+
+	return 0;
+}
+
 int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
 {
 	int err;
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 83a9abbfe32c..78892e2f53e3 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -181,6 +181,7 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info);
 int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb);
 int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info);
 int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info);
+int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info);
 
 int tipc_nl_media_dump(struct sk_buff *skb, struct netlink_callback *cb);
 int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 2718de667828..3122f21ca979 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -160,6 +160,11 @@ static const struct genl_ops tipc_genl_v2_ops[] = {
 		.dumpit	= tipc_nl_bearer_dump,
 		.policy = tipc_nl_policy,
 	},
+	{
+		.cmd	= TIPC_NL_BEARER_ADD,
+		.doit	= tipc_nl_bearer_add,
+		.policy = tipc_nl_policy,
+	},
 	{
 		.cmd	= TIPC_NL_BEARER_SET,
 		.doit	= tipc_nl_bearer_set,
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index b8ec1a18241e..6b938cc15daf 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -49,6 +49,7 @@
 #include "core.h"
 #include "bearer.h"
 #include "netlink.h"
+#include "msg.h"
 
 /* IANA assigned UDP port */
 #define UDP_PORT_DEFAULT	6118
@@ -70,6 +71,13 @@ struct udp_media_addr {
 	};
 };
 
+/* struct udp_replicast - container for UDP remote addresses */
+struct udp_replicast {
+	struct udp_media_addr addr;
+	struct rcu_head rcu;
+	struct list_head list;
+};
+
 /**
  * struct udp_bearer - ip/udp bearer data structure
  * @bearer:	associated generic tipc bearer
@@ -82,6 +90,7 @@ struct udp_bearer {
 	struct socket *ubsock;
 	u32 ifindex;
 	struct work_struct work;
+	struct udp_replicast rcast;
 };
 
 static int tipc_udp_is_mcast_addr(struct udp_media_addr *addr)
@@ -203,29 +212,75 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
 {
 	struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value;
 	struct udp_media_addr *dst = (struct udp_media_addr *)&addr->value;
+	struct udp_replicast *rcast;
 	struct udp_bearer *ub;
 	int err = 0;
 
 	if (skb_headroom(skb) < UDP_MIN_HEADROOM) {
 		err = pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC);
 		if (err)
-			goto tx_error;
+			goto out;
 	}
 
 	skb_set_inner_protocol(skb, htons(ETH_P_TIPC));
 	ub = rcu_dereference_rtnl(b->media_ptr);
 	if (!ub) {
 		err = -ENODEV;
-		goto tx_error;
+		goto out;
 	}
 
-	return tipc_udp_xmit(net, skb, ub, src, dst);
+	if (!addr->broadcast || list_empty(&ub->rcast.list))
+		return tipc_udp_xmit(net, skb, ub, src, dst);
 
-tx_error:
+	/* Replicast, send an skb to each configured IP address */
+	list_for_each_entry_rcu(rcast, &ub->rcast.list, list) {
+		struct sk_buff *_skb;
+
+		_skb = pskb_copy(skb, GFP_ATOMIC);
+		if (!_skb) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		err = tipc_udp_xmit(net, _skb, ub, src, &rcast->addr);
+		if (err) {
+			kfree_skb(_skb);
+			goto out;
+		}
+	}
+	err = 0;
+out:
 	kfree_skb(skb);
 	return err;
 }
 
+static int tipc_udp_rcast_add(struct tipc_bearer *b,
+			      struct udp_media_addr *addr)
+{
+	struct udp_replicast *rcast;
+	struct udp_bearer *ub;
+
+	ub = rcu_dereference_rtnl(b->media_ptr);
+	if (!ub)
+		return -ENODEV;
+
+	rcast = kmalloc(sizeof(*rcast), GFP_ATOMIC);
+	if (!rcast)
+		return -ENOMEM;
+
+	memcpy(&rcast->addr, addr, sizeof(struct udp_media_addr));
+
+	if (ntohs(addr->proto) == ETH_P_IP)
+		pr_info("New replicast peer: %pI4\n", &rcast->addr.ipv4);
+#if IS_ENABLED(CONFIG_IPV6)
+	else if (ntohs(addr->proto) == ETH_P_IPV6)
+		pr_info("New replicast peer: %pI6\n", &rcast->addr.ipv6);
+#endif
+
+	list_add_rcu(&rcast->list, &ub->rcast.list);
+	return 0;
+}
+
 /* tipc_udp_recv - read data from bearer socket */
 static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb)
 {
@@ -320,6 +375,32 @@ static int tipc_parse_udp_addr(struct nlattr *nla, struct udp_media_addr *addr,
 	return -EADDRNOTAVAIL;
 }
 
+int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr)
+{
+	int err;
+	struct udp_media_addr addr = {0};
+	struct nlattr *opts[TIPC_NLA_UDP_MAX + 1];
+	struct udp_media_addr *dst;
+
+	if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, attr, tipc_nl_udp_policy))
+		return -EINVAL;
+
+	if (!opts[TIPC_NLA_UDP_REMOTE])
+		return -EINVAL;
+
+	err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], &addr, NULL);
+	if (err)
+		return err;
+
+	dst = (struct udp_media_addr *)&b->bcast_addr.value;
+	if (tipc_udp_is_mcast_addr(dst)) {
+		pr_err("Can't add remote ip to TIPC UDP multicast bearer\n");
+		return -EINVAL;
+	}
+
+	return tipc_udp_rcast_add(b, &addr);
+}
+
 /**
  * tipc_udp_enable - callback to create a new udp bearer instance
  * @net:	network namespace
@@ -334,7 +415,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
 {
 	int err = -EINVAL;
 	struct udp_bearer *ub;
-	struct udp_media_addr *remote;
+	struct udp_media_addr remote = {0};
 	struct udp_media_addr local = {0};
 	struct udp_port_cfg udp_conf = {0};
 	struct udp_tunnel_sock_cfg tuncfg = {NULL};
@@ -344,6 +425,8 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
 	if (!ub)
 		return -ENOMEM;
 
+	INIT_LIST_HEAD(&ub->rcast.list);
+
 	if (!attrs[TIPC_NLA_BEARER_UDP_OPTS])
 		goto err;
 
@@ -362,9 +445,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
 	if (err)
 		goto err;
 
-	remote = (struct udp_media_addr *)&b->bcast_addr.value;
-	memset(remote, 0, sizeof(struct udp_media_addr));
-	err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], remote, NULL);
+	err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], &remote, NULL);
 	if (err)
 		goto err;
 
@@ -409,10 +490,17 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
 	tuncfg.encap_destroy = NULL;
 	setup_udp_tunnel_sock(net, ub->ubsock, &tuncfg);
 
-	if (tipc_udp_is_mcast_addr(remote)) {
-		if (enable_mcast(ub, remote))
-			goto err;
-	}
+	/**
+	 * The bcast media address port is used for all peers and the ip
+	 * is used if it's a multicast address.
+	 */
+	memcpy(&b->bcast_addr.value, &remote, sizeof(remote));
+	if (tipc_udp_is_mcast_addr(&remote))
+		err = enable_mcast(ub, &remote);
+	else
+		err = tipc_udp_rcast_add(b, &remote);
+	if (err)
+		goto err;
 
 	return 0;
 err:
@@ -424,6 +512,12 @@ err:
 static void cleanup_bearer(struct work_struct *work)
 {
 	struct udp_bearer *ub = container_of(work, struct udp_bearer, work);
+	struct udp_replicast *rcast, *tmp;
+
+	list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) {
+		list_del_rcu(&rcast->list);
+		kfree_rcu(rcast, rcu);
+	}
 
 	if (ub->ubsock)
 		udp_tunnel_sock_release(ub->ubsock);
diff --git a/net/tipc/udp_media.h b/net/tipc/udp_media.h
new file mode 100644
index 000000000000..4dcb54880aa6
--- /dev/null
+++ b/net/tipc/udp_media.h
@@ -0,0 +1,44 @@
+/*
+ * net/tipc/udp_media.h: Include file for UDP bearer media
+ *
+ * Copyright (c) 1996-2006, 2013-2016, Ericsson AB
+ * Copyright (c) 2005, 2010-2011, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef CONFIG_TIPC_MEDIA_UDP
+#ifndef _TIPC_UDP_MEDIA_H
+#define _TIPC_UDP_MEDIA_H
+
+int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr);
+
+#endif
+#endif
-- 
cgit v1.2.3


From fdb3accc2c15fabc2b687b2819da9167027c61b6 Mon Sep 17 00:00:00 2001
From: Richard Alpe <richard.alpe@ericsson.com>
Date: Fri, 26 Aug 2016 10:52:55 +0200
Subject: tipc: add the ability to get UDP options via netlink

Add UDP bearer options to netlink bearer get message. This is used by
the tipc user space tool to display UDP options.

The UDP bearer information is passed using either a sockaddr_in or
sockaddr_in6 structs. This means the user space receiver should
intermediately store the retrieved data in a large enough struct
(sockaddr_strage) before casting to the proper IP version type.

Signed-off-by: Richard Alpe <richard.alpe@ericsson.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tipc_netlink.h |  2 ++
 net/tipc/bearer.c                 |  8 +++++
 net/tipc/udp_media.c              | 61 +++++++++++++++++++++++++++++++++++++++
 net/tipc/udp_media.h              |  1 +
 4 files changed, 72 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h
index b15664c36219..f9edd20fe9ba 100644
--- a/include/uapi/linux/tipc_netlink.h
+++ b/include/uapi/linux/tipc_netlink.h
@@ -61,6 +61,7 @@ enum {
 	TIPC_NL_MON_PEER_GET,
 	TIPC_NL_PEER_REMOVE,
 	TIPC_NL_BEARER_ADD,
+	TIPC_NL_UDP_GET_REMOTEIP,
 
 	__TIPC_NL_CMD_MAX,
 	TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1
@@ -100,6 +101,7 @@ enum {
 	TIPC_NLA_UDP_UNSPEC,
 	TIPC_NLA_UDP_LOCAL,		/* sockaddr_storage */
 	TIPC_NLA_UDP_REMOTE,		/* sockaddr_storage */
+	TIPC_NLA_UDP_MULTI_REMOTEIP,	/* flag */
 
 	__TIPC_NLA_UDP_MAX,
 	TIPC_NLA_UDP_MAX = __TIPC_NLA_UDP_MAX - 1
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index d7b442dd6669..975dbeb60ab0 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -712,6 +712,14 @@ static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg,
 		goto prop_msg_full;
 
 	nla_nest_end(msg->skb, prop);
+
+#ifdef CONFIG_TIPC_MEDIA_UDP
+	if (bearer->media->type_id == TIPC_MEDIA_TYPE_UDP) {
+		if (tipc_udp_nl_add_bearer_data(msg, bearer))
+			goto attr_msg_full;
+	}
+#endif
+
 	nla_nest_end(msg->skb, attrs);
 	genlmsg_end(msg->skb, hdr);
 
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index 6ece3c9ccf82..a6cdd9895653 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -401,6 +401,67 @@ static int enable_mcast(struct udp_bearer *ub, struct udp_media_addr *remote)
 	return err;
 }
 
+static int __tipc_nl_add_udp_addr(struct sk_buff *skb,
+				  struct udp_media_addr *addr, int nla_t)
+{
+	if (ntohs(addr->proto) == ETH_P_IP) {
+		struct sockaddr_in ip4;
+
+		ip4.sin_family = AF_INET;
+		ip4.sin_port = addr->port;
+		ip4.sin_addr.s_addr = addr->ipv4.s_addr;
+		if (nla_put(skb, nla_t, sizeof(ip4), &ip4))
+			return -EMSGSIZE;
+
+#if IS_ENABLED(CONFIG_IPV6)
+	} else if (ntohs(addr->proto) == ETH_P_IPV6) {
+		struct sockaddr_in6 ip6;
+
+		ip6.sin6_family = AF_INET6;
+		ip6.sin6_port  = addr->port;
+		memcpy(&ip6.sin6_addr, &addr->ipv6, sizeof(struct in6_addr));
+		if (nla_put(skb, nla_t, sizeof(ip6), &ip6))
+			return -EMSGSIZE;
+#endif
+	}
+
+	return 0;
+}
+
+int tipc_udp_nl_add_bearer_data(struct tipc_nl_msg *msg, struct tipc_bearer *b)
+{
+	struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value;
+	struct udp_media_addr *dst;
+	struct udp_bearer *ub;
+	struct nlattr *nest;
+
+	ub = rcu_dereference_rtnl(b->media_ptr);
+	if (!ub)
+		return -ENODEV;
+
+	nest = nla_nest_start(msg->skb, TIPC_NLA_BEARER_UDP_OPTS);
+	if (!nest)
+		goto msg_full;
+
+	if (__tipc_nl_add_udp_addr(msg->skb, src, TIPC_NLA_UDP_LOCAL))
+		goto msg_full;
+
+	dst = (struct udp_media_addr *)&b->bcast_addr.value;
+	if (__tipc_nl_add_udp_addr(msg->skb, dst, TIPC_NLA_UDP_REMOTE))
+		goto msg_full;
+
+	if (!list_empty(&ub->rcast.list)) {
+		if (nla_put_flag(msg->skb, TIPC_NLA_UDP_MULTI_REMOTEIP))
+			goto msg_full;
+	}
+
+	nla_nest_end(msg->skb, nest);
+	return 0;
+msg_full:
+	nla_nest_cancel(msg->skb, nest);
+	return -EMSGSIZE;
+}
+
 /**
  * tipc_parse_udp_addr - build udp media address from netlink data
  * @nlattr:	netlink attribute containing sockaddr storage aligned address
diff --git a/net/tipc/udp_media.h b/net/tipc/udp_media.h
index 4dcb54880aa6..c06326a134db 100644
--- a/net/tipc/udp_media.h
+++ b/net/tipc/udp_media.h
@@ -39,6 +39,7 @@
 #define _TIPC_UDP_MEDIA_H
 
 int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr);
+int tipc_udp_nl_add_bearer_data(struct tipc_nl_msg *msg, struct tipc_bearer *b);
 
 #endif
 #endif
-- 
cgit v1.2.3


From 5711a98221443aec54c4c81ee98c6ae46acccb65 Mon Sep 17 00:00:00 2001
From: Vidya Sagar Ravipati <vidya@cumulusnetworks.com>
Date: Fri, 26 Aug 2016 01:25:50 -0700
Subject: net: ethtool: add support for 1000BaseX and missing 10G link modes

This patch enhances ethtool link mode bitmap to include
missing interface modes for 1G/10G speeds

Changes:
1000baseX is the mode introduced to cover all 1G Fiber cases.
All modes under 1000BaseX i.e. 1000BASE-SX, 1000BASE-LX, 1000BASE-LX10
and 1000BASE-BX10 are not explicitly defined at this moment.
10G CR,SR,LR and ER link modes are included for 10G speed..

Issue:
ethtool on  1G/10G SFP port reports Base-T
as this port supports 1000baseX,10G CR, SR and LR modes.

root@tor-02$ ethtool swp1
Settings for swp1:
        Supported ports: [ FIBRE ]
        Supported link modes:   1000baseT/Full
                                10000baseT/Full
        Supported pause frame use: Symmetric Receive-only
        Supports auto-negotiation: Yes
        Advertised link modes:  1000baseT/Full
        Advertised pause frame use: No
        Advertised auto-negotiation: No
        Speed: 10000Mb/s
        Duplex: Full
        Port: FIBRE
        PHYAD: 0
        Transceiver: external
        Auto-negotiation: off
        Current message level: 0x00000000 (0)

        Link detected: yes

After fix:
root@tor-02$ ethtool swp1
Settings for swp1:
        Supported ports: [ FIBRE ]
        Supported link modes:   1000baseX/Full
                                10000baseCR/Full
                                10000baseSR/Full
                                10000baseLR/Full
                                10000baseER/Full
        Supported pause frame use: Symmetric Receive-only
        Supports auto-negotiation: Yes
        Advertised link modes:  1000baseT/Full
        Advertised pause frame use: No
        Advertised auto-negotiation: No
        Speed: 10000Mb/s
        Duplex: Full
        Port: FIBRE
        PHYAD: 0
        Transceiver: external
        Auto-negotiation: off
        Current message level: 0x00000000 (0)
        Link detected: yes

Signed-off-by: Vidya Sagar Ravipati <vidya@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index b8f38e84d93a..099a4200732c 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1362,7 +1362,14 @@ enum ethtool_link_mode_bit_indices {
 	ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT	= 37,
 	ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT	= 38,
 	ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT	= 39,
-	ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT         = 40,
+	ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT		= 40,
+	ETHTOOL_LINK_MODE_1000baseX_Full_BIT	= 41,
+	ETHTOOL_LINK_MODE_10000baseCR_Full_BIT	= 42,
+	ETHTOOL_LINK_MODE_10000baseSR_Full_BIT	= 43,
+	ETHTOOL_LINK_MODE_10000baseLR_Full_BIT	= 44,
+	ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT	= 45,
+	ETHTOOL_LINK_MODE_10000baseER_Full_BIT	= 46,
+
 
 	/* Last allowed bit for __ETHTOOL_LINK_MODE_LEGACY_MASK is bit
 	 * 31. Please do NOT define any SUPPORTED_* or ADVERTISED_*
@@ -1371,7 +1378,7 @@ enum ethtool_link_mode_bit_indices {
 	 */
 
 	__ETHTOOL_LINK_MODE_LAST
-	  = ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT,
+	  = ETHTOOL_LINK_MODE_10000baseER_Full_BIT,
 };
 
 #define __ETHTOOL_LINK_MODE_LEGACY_MASK(base_name)	\
-- 
cgit v1.2.3


From 295d0dafd31c9a26f3d34a9bdc75f226e18fd9a2 Mon Sep 17 00:00:00 2001
From: Ken Wang <Qingqing.Wang@amd.com>
Date: Tue, 24 May 2016 21:02:53 +0800
Subject: drm/amdgpu: Add SI Family information

Signed-off-by: Ken Wang <Qingqing.Wang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 +
 include/uapi/drm/amdgpu_drm.h              | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index aef9a9ac6280..f95bcb80dde3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1245,6 +1245,7 @@ static int amdgpu_early_init(struct amdgpu_device *adev)
 	case CHIP_PITCAIRN:
 	case CHIP_OLAND:
 	case CHIP_HAINAN:
+		adev->family = AMDGPU_FAMILY_SI;
 		r = si_set_ip_blocks(adev);
 		if (r)
 			return r;
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index ae2845fdcb5f..d6b5a21f3d3c 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -649,6 +649,7 @@ struct drm_amdgpu_info_hw_ip {
  * Supported GPU families
  */
 #define AMDGPU_FAMILY_UNKNOWN			0
+#define AMDGPU_FAMILY_SI			110 /* Hainan, Oland, Verde, Pitcairn, Tahiti */
 #define AMDGPU_FAMILY_CI			120 /* Bonaire, Hawaii */
 #define AMDGPU_FAMILY_KV			125 /* Kaveri, Kabini, Mullins */
 #define AMDGPU_FAMILY_VI			130 /* Iceland, Tonga */
-- 
cgit v1.2.3


From b6cb5ac8331b6bcfe9ce38c7f7f58db6e1d6270a Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Wed, 31 Aug 2016 15:36:52 +0200
Subject: net: bridge: add per-port multicast flood flag

Add a per-port flag to control the unknown multicast flood, similar to the
unknown unicast flood flag and break a few long lines in the netlink flag
exports.

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_bridge.h    |  1 +
 include/uapi/linux/if_link.h |  1 +
 net/bridge/br_forward.c      |  3 +++
 net/bridge/br_if.c           |  2 +-
 net/bridge/br_netlink.c      | 12 +++++++++---
 net/bridge/br_sysfs_if.c     |  1 +
 6 files changed, 16 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index dcb89e3515db..c6587c01d951 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -45,6 +45,7 @@ struct br_ip_list {
 #define BR_PROXYARP		BIT(8)
 #define BR_LEARNING_SYNC	BIT(9)
 #define BR_PROXYARP_WIFI	BIT(10)
+#define BR_MCAST_FLOOD		BIT(11)
 
 #define BR_DEFAULT_AGEING_TIME	(300 * HZ)
 
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index a1b5202c5f6b..9bf3aecfe05b 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -318,6 +318,7 @@ enum {
 	IFLA_BRPORT_FLUSH,
 	IFLA_BRPORT_MULTICAST_ROUTER,
 	IFLA_BRPORT_PAD,
+	IFLA_BRPORT_MCAST_FLOOD,
 	__IFLA_BRPORT_MAX
 };
 #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 5de854ed3340..7cb41aee4c82 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -186,6 +186,9 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb,
 		/* Do not flood unicast traffic to ports that turn it off */
 		if (pkt_type == BR_PKT_UNICAST && !(p->flags & BR_FLOOD))
 			continue;
+		if (pkt_type == BR_PKT_MULTICAST &&
+		    !(p->flags & BR_MCAST_FLOOD))
+			continue;
 
 		/* Do not flood to ports that enable proxy ARP */
 		if (p->flags & BR_PROXYARP)
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 1da3221845f1..ed0dd3340084 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -362,7 +362,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br,
 	p->path_cost = port_cost(dev);
 	p->priority = 0x8000 >> BR_PORT_BITS;
 	p->port_no = index;
-	p->flags = BR_LEARNING | BR_FLOOD;
+	p->flags = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD;
 	br_init_port(p);
 	br_set_state(p, BR_STATE_DISABLED);
 	br_stp_port_timer_init(p);
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 190a5bc00f4a..e99037c6f7b7 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -169,10 +169,15 @@ static int br_port_fill_attrs(struct sk_buff *skb,
 	    nla_put_u32(skb, IFLA_BRPORT_COST, p->path_cost) ||
 	    nla_put_u8(skb, IFLA_BRPORT_MODE, mode) ||
 	    nla_put_u8(skb, IFLA_BRPORT_GUARD, !!(p->flags & BR_BPDU_GUARD)) ||
-	    nla_put_u8(skb, IFLA_BRPORT_PROTECT, !!(p->flags & BR_ROOT_BLOCK)) ||
-	    nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, !!(p->flags & BR_MULTICAST_FAST_LEAVE)) ||
+	    nla_put_u8(skb, IFLA_BRPORT_PROTECT,
+		       !!(p->flags & BR_ROOT_BLOCK)) ||
+	    nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE,
+		       !!(p->flags & BR_MULTICAST_FAST_LEAVE)) ||
 	    nla_put_u8(skb, IFLA_BRPORT_LEARNING, !!(p->flags & BR_LEARNING)) ||
-	    nla_put_u8(skb, IFLA_BRPORT_UNICAST_FLOOD, !!(p->flags & BR_FLOOD)) ||
+	    nla_put_u8(skb, IFLA_BRPORT_UNICAST_FLOOD,
+		       !!(p->flags & BR_FLOOD)) ||
+	    nla_put_u8(skb, IFLA_BRPORT_MCAST_FLOOD,
+		       !!(p->flags & BR_MCAST_FLOOD)) ||
 	    nla_put_u8(skb, IFLA_BRPORT_PROXYARP, !!(p->flags & BR_PROXYARP)) ||
 	    nla_put_u8(skb, IFLA_BRPORT_PROXYARP_WIFI,
 		       !!(p->flags & BR_PROXYARP_WIFI)) ||
@@ -630,6 +635,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
 	br_set_port_flag(p, tb, IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK);
 	br_set_port_flag(p, tb, IFLA_BRPORT_LEARNING, BR_LEARNING);
 	br_set_port_flag(p, tb, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD);
+	br_set_port_flag(p, tb, IFLA_BRPORT_MCAST_FLOOD, BR_MCAST_FLOOD);
 	br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP, BR_PROXYARP);
 	br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP_WIFI, BR_PROXYARP_WIFI);
 
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 1e04d4d44273..e657258e1f2c 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -171,6 +171,7 @@ BRPORT_ATTR_FLAG(learning, BR_LEARNING);
 BRPORT_ATTR_FLAG(unicast_flood, BR_FLOOD);
 BRPORT_ATTR_FLAG(proxyarp, BR_PROXYARP);
 BRPORT_ATTR_FLAG(proxyarp_wifi, BR_PROXYARP_WIFI);
+BRPORT_ATTR_FLAG(multicast_flood, BR_MCAST_FLOOD);
 
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
 static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf)
-- 
cgit v1.2.3


From 0515e5999a466dfe6e1924f460da599bb6821487 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Thu, 1 Sep 2016 18:37:22 -0700
Subject: bpf: introduce BPF_PROG_TYPE_PERF_EVENT program type

Introduce BPF_PROG_TYPE_PERF_EVENT programs that can be attached to
HW and SW perf events (PERF_TYPE_HARDWARE and PERF_TYPE_SOFTWARE
correspondingly in uapi/linux/perf_event.h)

The program visible context meta structure is
struct bpf_perf_event_data {
    struct pt_regs regs;
     __u64 sample_period;
};
which is accessible directly from the program:
int bpf_prog(struct bpf_perf_event_data *ctx)
{
  ... ctx->sample_period ...
  ... ctx->regs.ip ...
}

The bpf verifier rewrites the accesses into kernel internal
struct bpf_perf_event_data_kern which allows changing
struct perf_sample_data without affecting bpf programs.
New fields can be added to the end of struct bpf_perf_event_data
in the future.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/perf_event.h          |  5 +++
 include/uapi/linux/Kbuild           |  1 +
 include/uapi/linux/bpf.h            |  1 +
 include/uapi/linux/bpf_perf_event.h | 18 +++++++++++
 kernel/trace/bpf_trace.c            | 61 +++++++++++++++++++++++++++++++++++++
 5 files changed, 86 insertions(+)
 create mode 100644 include/uapi/linux/bpf_perf_event.h

(limited to 'include/uapi')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 2b6b43cc0dd5..97bfe62f30d7 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -788,6 +788,11 @@ struct perf_output_handle {
 	int				page;
 };
 
+struct bpf_perf_event_data_kern {
+	struct pt_regs *regs;
+	struct perf_sample_data *data;
+};
+
 #ifdef CONFIG_CGROUP_PERF
 
 /*
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 185f8ea2702f..d0352a971ebd 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -71,6 +71,7 @@ header-y += binfmts.h
 header-y += blkpg.h
 header-y += blktrace_api.h
 header-y += bpf_common.h
+header-y += bpf_perf_event.h
 header-y += bpf.h
 header-y += bpqether.h
 header-y += bsg.h
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index e4c5a1baa993..f896dfac4ac0 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -95,6 +95,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_SCHED_ACT,
 	BPF_PROG_TYPE_TRACEPOINT,
 	BPF_PROG_TYPE_XDP,
+	BPF_PROG_TYPE_PERF_EVENT,
 };
 
 #define BPF_PSEUDO_MAP_FD	1
diff --git a/include/uapi/linux/bpf_perf_event.h b/include/uapi/linux/bpf_perf_event.h
new file mode 100644
index 000000000000..067427259820
--- /dev/null
+++ b/include/uapi/linux/bpf_perf_event.h
@@ -0,0 +1,18 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _UAPI__LINUX_BPF_PERF_EVENT_H__
+#define _UAPI__LINUX_BPF_PERF_EVENT_H__
+
+#include <linux/types.h>
+#include <linux/ptrace.h>
+
+struct bpf_perf_event_data {
+	struct pt_regs regs;
+	__u64 sample_period;
+};
+
+#endif /* _UAPI__LINUX_BPF_PERF_EVENT_H__ */
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index ad35213b8405..d3869b03d9fe 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1,4 +1,5 @@
 /* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com
+ * Copyright (c) 2016 Facebook
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of version 2 of the GNU General Public
@@ -8,6 +9,7 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/bpf.h>
+#include <linux/bpf_perf_event.h>
 #include <linux/filter.h>
 #include <linux/uaccess.h>
 #include <linux/ctype.h>
@@ -552,10 +554,69 @@ static struct bpf_prog_type_list tracepoint_tl = {
 	.type	= BPF_PROG_TYPE_TRACEPOINT,
 };
 
+static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
+				    enum bpf_reg_type *reg_type)
+{
+	if (off < 0 || off >= sizeof(struct bpf_perf_event_data))
+		return false;
+	if (type != BPF_READ)
+		return false;
+	if (off % size != 0)
+		return false;
+	if (off == offsetof(struct bpf_perf_event_data, sample_period)) {
+		if (size != sizeof(u64))
+			return false;
+	} else {
+		if (size != sizeof(long))
+			return false;
+	}
+	return true;
+}
+
+static u32 pe_prog_convert_ctx_access(enum bpf_access_type type, int dst_reg,
+				      int src_reg, int ctx_off,
+				      struct bpf_insn *insn_buf,
+				      struct bpf_prog *prog)
+{
+	struct bpf_insn *insn = insn_buf;
+
+	switch (ctx_off) {
+	case offsetof(struct bpf_perf_event_data, sample_period):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct perf_sample_data, period) != sizeof(u64));
+		*insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct bpf_perf_event_data_kern, data)),
+				      dst_reg, src_reg,
+				      offsetof(struct bpf_perf_event_data_kern, data));
+		*insn++ = BPF_LDX_MEM(BPF_DW, dst_reg, dst_reg,
+				      offsetof(struct perf_sample_data, period));
+		break;
+	default:
+		*insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct bpf_perf_event_data_kern, regs)),
+				      dst_reg, src_reg,
+				      offsetof(struct bpf_perf_event_data_kern, regs));
+		*insn++ = BPF_LDX_MEM(bytes_to_bpf_size(sizeof(long)),
+				      dst_reg, dst_reg, ctx_off);
+		break;
+	}
+
+	return insn - insn_buf;
+}
+
+static const struct bpf_verifier_ops perf_event_prog_ops = {
+	.get_func_proto		= tp_prog_func_proto,
+	.is_valid_access	= pe_prog_is_valid_access,
+	.convert_ctx_access	= pe_prog_convert_ctx_access,
+};
+
+static struct bpf_prog_type_list perf_event_tl = {
+	.ops	= &perf_event_prog_ops,
+	.type	= BPF_PROG_TYPE_PERF_EVENT,
+};
+
 static int __init register_kprobe_prog_ops(void)
 {
 	bpf_register_prog_type(&kprobe_tl);
 	bpf_register_prog_type(&tracepoint_tl);
+	bpf_register_prog_type(&perf_event_tl);
 	return 0;
 }
 late_initcall(register_kprobe_prog_ops);
-- 
cgit v1.2.3


From ecc6569f3503b39f45bc6b86197b5e0a8533fb72 Mon Sep 17 00:00:00 2001
From: Gao Feng <fgao@ikuai8.com>
Date: Thu, 25 Aug 2016 23:08:11 +0800
Subject: netfilter: gre: Use consistent GRE_* macros instead of ones defined
 by netfilter.

There are already some GRE_* macros in kernel, so it is unnecessary
to define these macros. And remove some useless macros

Signed-off-by: Gao Feng <fgao@ikuai8.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_conntrack_proto_gre.h | 22 ++--------------------
 include/uapi/linux/if_tunnel.h                   |  1 +
 net/ipv4/netfilter/nf_nat_proto_gre.c            |  4 ++--
 net/netfilter/nf_conntrack_proto_gre.c           |  4 ++--
 4 files changed, 7 insertions(+), 24 deletions(-)

(limited to 'include/uapi')

diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h
index df78dc2b5524..0189747f2691 100644
--- a/include/linux/netfilter/nf_conntrack_proto_gre.h
+++ b/include/linux/netfilter/nf_conntrack_proto_gre.h
@@ -1,29 +1,11 @@
 #ifndef _CONNTRACK_PROTO_GRE_H
 #define _CONNTRACK_PROTO_GRE_H
 #include <asm/byteorder.h>
+#include <net/gre.h>
+#include <net/pptp.h>
 
 /* GRE PROTOCOL HEADER */
 
-/* GRE Version field */
-#define GRE_VERSION_1701	0x0
-#define GRE_VERSION_PPTP	0x1
-
-/* GRE Protocol field */
-#define GRE_PROTOCOL_PPTP	0x880B
-
-/* GRE Flags */
-#define GRE_FLAG_C		0x80
-#define GRE_FLAG_R		0x40
-#define GRE_FLAG_K		0x20
-#define GRE_FLAG_S		0x10
-#define GRE_FLAG_A		0x80
-
-#define GRE_IS_C(f)	((f)&GRE_FLAG_C)
-#define GRE_IS_R(f)	((f)&GRE_FLAG_R)
-#define GRE_IS_K(f)	((f)&GRE_FLAG_K)
-#define GRE_IS_S(f)	((f)&GRE_FLAG_S)
-#define GRE_IS_A(f)	((f)&GRE_FLAG_A)
-
 /* GRE is a mess: Four different standards */
 struct gre_hdr {
 #if defined(__LITTLE_ENDIAN_BITFIELD)
diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index 9865c8caedde..fb7337d6b985 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -39,6 +39,7 @@
 #define GRE_IS_REC(f)		((f) & GRE_REC)
 #define GRE_IS_ACK(f)		((f) & GRE_ACK)
 
+#define GRE_VERSION_0		__cpu_to_be16(0x0000)
 #define GRE_VERSION_1		__cpu_to_be16(0x0001)
 #define GRE_PROTO_PPP		__cpu_to_be16(0x880b)
 #define GRE_PPTP_KEY_MASK	__cpu_to_be32(0xffff)
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index 9414923f1e15..93198d71dbb6 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -104,11 +104,11 @@ gre_manip_pkt(struct sk_buff *skb,
 	if (maniptype != NF_NAT_MANIP_DST)
 		return true;
 	switch (greh->version) {
-	case GRE_VERSION_1701:
+	case ntohs(GRE_VERSION_0):
 		/* We do not currently NAT any GREv0 packets.
 		 * Try to behave like "nf_nat_proto_unknown" */
 		break;
-	case GRE_VERSION_PPTP:
+	case ntohs(GRE_VERSION_1):
 		pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key));
 		pgreh->call_id = tuple->dst.u.gre.key;
 		break;
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index a96451a7af20..deb239a014e4 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -200,7 +200,7 @@ static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
 
 	/* first only delinearize old RFC1701 GRE header */
 	grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr);
-	if (!grehdr || grehdr->version != GRE_VERSION_PPTP) {
+	if (!grehdr || grehdr->version != ntohs(GRE_VERSION_1)) {
 		/* try to behave like "nf_conntrack_proto_generic" */
 		tuple->src.u.all = 0;
 		tuple->dst.u.all = 0;
@@ -212,7 +212,7 @@ static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
 	if (!pgrehdr)
 		return true;
 
-	if (ntohs(grehdr->protocol) != GRE_PROTOCOL_PPTP) {
+	if (grehdr->protocol != GRE_PROTO_PPP) {
 		pr_debug("GRE_VERSION_PPTP but unknown proto\n");
 		return false;
 	}
-- 
cgit v1.2.3


From 0d9932b2875f568d679f2af33ce610da3903ac11 Mon Sep 17 00:00:00 2001
From: Laura Garcia Liebana <nevola@gmail.com>
Date: Fri, 2 Sep 2016 15:05:57 +0200
Subject: netfilter: nft_numgen: rename until attribute by modulus

The _until_ attribute is renamed to _modulus_ as the behaviour is similar to
other expresions with number limits (ex. nft_hash).

Renaming is possible because there isn't a kernel release yet with these
changes.

Signed-off-by: Laura Garcia Liebana <nevola@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h |  4 ++--
 net/netfilter/nft_numgen.c               | 30 +++++++++++++++---------------
 2 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 28ce01d79707..24161e25576d 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1126,13 +1126,13 @@ enum nft_trace_types {
  * enum nft_ng_attributes - nf_tables number generator expression netlink attributes
  *
  * @NFTA_NG_DREG: destination register (NLA_U32)
- * @NFTA_NG_UNTIL: source value to increment the counter until reset (NLA_U32)
+ * @NFTA_NG_MODULUS: maximum counter value (NLA_U32)
  * @NFTA_NG_TYPE: operation type (NLA_U32)
  */
 enum nft_ng_attributes {
 	NFTA_NG_UNSPEC,
 	NFTA_NG_DREG,
-	NFTA_NG_UNTIL,
+	NFTA_NG_MODULUS,
 	NFTA_NG_TYPE,
 	__NFTA_NG_MAX
 };
diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c
index 294745ecb0fc..f51a3ede3932 100644
--- a/net/netfilter/nft_numgen.c
+++ b/net/netfilter/nft_numgen.c
@@ -21,7 +21,7 @@ static DEFINE_PER_CPU(struct rnd_state, nft_numgen_prandom_state);
 
 struct nft_ng_inc {
 	enum nft_registers      dreg:8;
-	u32			until;
+	u32			modulus;
 	atomic_t		counter;
 };
 
@@ -34,7 +34,7 @@ static void nft_ng_inc_eval(const struct nft_expr *expr,
 
 	do {
 		oval = atomic_read(&priv->counter);
-		nval = (oval + 1 < priv->until) ? oval + 1 : 0;
+		nval = (oval + 1 < priv->modulus) ? oval + 1 : 0;
 	} while (atomic_cmpxchg(&priv->counter, oval, nval) != oval);
 
 	memcpy(&regs->data[priv->dreg], &priv->counter, sizeof(u32));
@@ -42,7 +42,7 @@ static void nft_ng_inc_eval(const struct nft_expr *expr,
 
 static const struct nla_policy nft_ng_policy[NFTA_NG_MAX + 1] = {
 	[NFTA_NG_DREG]		= { .type = NLA_U32 },
-	[NFTA_NG_UNTIL]		= { .type = NLA_U32 },
+	[NFTA_NG_MODULUS]	= { .type = NLA_U32 },
 	[NFTA_NG_TYPE]		= { .type = NLA_U32 },
 };
 
@@ -52,8 +52,8 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx,
 {
 	struct nft_ng_inc *priv = nft_expr_priv(expr);
 
-	priv->until = ntohl(nla_get_be32(tb[NFTA_NG_UNTIL]));
-	if (priv->until == 0)
+	priv->modulus = ntohl(nla_get_be32(tb[NFTA_NG_MODULUS]));
+	if (priv->modulus == 0)
 		return -ERANGE;
 
 	priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]);
@@ -64,11 +64,11 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx,
 }
 
 static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg,
-		       u32 until, enum nft_ng_types type)
+		       u32 modulus, enum nft_ng_types type)
 {
 	if (nft_dump_register(skb, NFTA_NG_DREG, dreg))
 		goto nla_put_failure;
-	if (nla_put_be32(skb, NFTA_NG_UNTIL, htonl(until)))
+	if (nla_put_be32(skb, NFTA_NG_MODULUS, htonl(modulus)))
 		goto nla_put_failure;
 	if (nla_put_be32(skb, NFTA_NG_TYPE, htonl(type)))
 		goto nla_put_failure;
@@ -83,12 +83,12 @@ static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
 	const struct nft_ng_inc *priv = nft_expr_priv(expr);
 
-	return nft_ng_dump(skb, priv->dreg, priv->until, NFT_NG_INCREMENTAL);
+	return nft_ng_dump(skb, priv->dreg, priv->modulus, NFT_NG_INCREMENTAL);
 }
 
 struct nft_ng_random {
 	enum nft_registers      dreg:8;
-	u32			until;
+	u32			modulus;
 };
 
 static void nft_ng_random_eval(const struct nft_expr *expr,
@@ -99,7 +99,7 @@ static void nft_ng_random_eval(const struct nft_expr *expr,
 	struct rnd_state *state = this_cpu_ptr(&nft_numgen_prandom_state);
 
 	regs->data[priv->dreg] = reciprocal_scale(prandom_u32_state(state),
-						  priv->until);
+						  priv->modulus);
 }
 
 static int nft_ng_random_init(const struct nft_ctx *ctx,
@@ -108,8 +108,8 @@ static int nft_ng_random_init(const struct nft_ctx *ctx,
 {
 	struct nft_ng_random *priv = nft_expr_priv(expr);
 
-	priv->until = ntohl(nla_get_be32(tb[NFTA_NG_UNTIL]));
-	if (priv->until == 0)
+	priv->modulus = ntohl(nla_get_be32(tb[NFTA_NG_MODULUS]));
+	if (priv->modulus == 0)
 		return -ERANGE;
 
 	prandom_init_once(&nft_numgen_prandom_state);
@@ -124,7 +124,7 @@ static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
 	const struct nft_ng_random *priv = nft_expr_priv(expr);
 
-	return nft_ng_dump(skb, priv->dreg, priv->until, NFT_NG_RANDOM);
+	return nft_ng_dump(skb, priv->dreg, priv->modulus, NFT_NG_RANDOM);
 }
 
 static struct nft_expr_type nft_ng_type;
@@ -149,8 +149,8 @@ nft_ng_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
 {
 	u32 type;
 
-	if (!tb[NFTA_NG_DREG]	||
-	    !tb[NFTA_NG_UNTIL]	||
+	if (!tb[NFTA_NG_DREG]	 ||
+	    !tb[NFTA_NG_MODULUS] ||
 	    !tb[NFTA_NG_TYPE])
 		return ERR_PTR(-EINVAL);
 
-- 
cgit v1.2.3


From d545caca827b65aab557a9e9dcdcf1e5a3823c2d Mon Sep 17 00:00:00 2001
From: Lorenzo Colitti <lorenzo@google.com>
Date: Thu, 8 Sep 2016 00:42:25 +0900
Subject: net: inet: diag: expose the socket mark to privileged processes.

This adds the capability for a process that has CAP_NET_ADMIN on
a socket to see the socket mark in socket dumps.

Commit a52e95abf772 ("net: diag: allow socket bytecode filters to
match socket marks") recently gave privileged processes the
ability to filter socket dumps based on mark. This patch is
complementary: it ensures that the mark is also passed to
userspace in the socket's netlink attributes.  It is useful for
tools like ss which display information about sockets.

Tested: https://android-review.googlesource.com/270210
Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inet_diag.h      |  4 ++--
 include/uapi/linux/inet_diag.h |  1 +
 net/ipv4/inet_diag.c           | 49 ++++++++++++++++++++++++++++--------------
 net/ipv4/udp_diag.c            | 10 +++++----
 net/sctp/sctp_diag.c           | 20 +++++++++++------
 5 files changed, 56 insertions(+), 28 deletions(-)

(limited to 'include/uapi')

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index feb04ea20f11..65da430e260f 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -37,7 +37,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 		      struct sk_buff *skb, const struct inet_diag_req_v2 *req,
 		      struct user_namespace *user_ns,
 		      u32 pid, u32 seq, u16 nlmsg_flags,
-		      const struct nlmsghdr *unlh);
+		      const struct nlmsghdr *unlh, bool net_admin);
 void inet_diag_dump_icsk(struct inet_hashinfo *h, struct sk_buff *skb,
 			 struct netlink_callback *cb,
 			 const struct inet_diag_req_v2 *r,
@@ -56,7 +56,7 @@ void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk);
 
 int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
 			     struct inet_diag_msg *r, int ext,
-			     struct user_namespace *user_ns);
+			     struct user_namespace *user_ns, bool net_admin);
 
 extern int  inet_diag_register(const struct inet_diag_handler *handler);
 extern void inet_diag_unregister(const struct inet_diag_handler *handler);
diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h
index 5581206a08ae..b5c366f87b3e 100644
--- a/include/uapi/linux/inet_diag.h
+++ b/include/uapi/linux/inet_diag.h
@@ -123,6 +123,7 @@ enum {
 	INET_DIAG_LOCALS,
 	INET_DIAG_PEERS,
 	INET_DIAG_PAD,
+	INET_DIAG_MARK,
 	__INET_DIAG_MAX,
 };
 
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index abfbe492ebfe..e4d16fc5bbb3 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -99,6 +99,7 @@ static size_t inet_sk_attr_size(void)
 		+ nla_total_size(1) /* INET_DIAG_SHUTDOWN */
 		+ nla_total_size(1) /* INET_DIAG_TOS */
 		+ nla_total_size(1) /* INET_DIAG_TCLASS */
+		+ nla_total_size(4) /* INET_DIAG_MARK */
 		+ nla_total_size(sizeof(struct inet_diag_meminfo))
 		+ nla_total_size(sizeof(struct inet_diag_msg))
 		+ nla_total_size(SK_MEMINFO_VARS * sizeof(u32))
@@ -109,7 +110,8 @@ static size_t inet_sk_attr_size(void)
 
 int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
 			     struct inet_diag_msg *r, int ext,
-			     struct user_namespace *user_ns)
+			     struct user_namespace *user_ns,
+			     bool net_admin)
 {
 	const struct inet_sock *inet = inet_sk(sk);
 
@@ -136,6 +138,9 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
 	}
 #endif
 
+	if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, sk->sk_mark))
+		goto errout;
+
 	r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk));
 	r->idiag_inode = sock_i_ino(sk);
 
@@ -149,7 +154,8 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 		      struct sk_buff *skb, const struct inet_diag_req_v2 *req,
 		      struct user_namespace *user_ns,
 		      u32 portid, u32 seq, u16 nlmsg_flags,
-		      const struct nlmsghdr *unlh)
+		      const struct nlmsghdr *unlh,
+		      bool net_admin)
 {
 	const struct tcp_congestion_ops *ca_ops;
 	const struct inet_diag_handler *handler;
@@ -175,7 +181,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 	r->idiag_timer = 0;
 	r->idiag_retrans = 0;
 
-	if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns))
+	if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin))
 		goto errout;
 
 	if (ext & (1 << (INET_DIAG_MEMINFO - 1))) {
@@ -274,10 +280,11 @@ static int inet_csk_diag_fill(struct sock *sk,
 			      const struct inet_diag_req_v2 *req,
 			      struct user_namespace *user_ns,
 			      u32 portid, u32 seq, u16 nlmsg_flags,
-			      const struct nlmsghdr *unlh)
+			      const struct nlmsghdr *unlh,
+			      bool net_admin)
 {
-	return inet_sk_diag_fill(sk, inet_csk(sk), skb, req,
-				 user_ns, portid, seq, nlmsg_flags, unlh);
+	return inet_sk_diag_fill(sk, inet_csk(sk), skb, req, user_ns,
+				 portid, seq, nlmsg_flags, unlh, net_admin);
 }
 
 static int inet_twsk_diag_fill(struct sock *sk,
@@ -319,8 +326,9 @@ static int inet_twsk_diag_fill(struct sock *sk,
 
 static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb,
 			      u32 portid, u32 seq, u16 nlmsg_flags,
-			      const struct nlmsghdr *unlh)
+			      const struct nlmsghdr *unlh, bool net_admin)
 {
+	struct request_sock *reqsk = inet_reqsk(sk);
 	struct inet_diag_msg *r;
 	struct nlmsghdr *nlh;
 	long tmo;
@@ -334,7 +342,7 @@ static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb,
 	inet_diag_msg_common_fill(r, sk);
 	r->idiag_state = TCP_SYN_RECV;
 	r->idiag_timer = 1;
-	r->idiag_retrans = inet_reqsk(sk)->num_retrans;
+	r->idiag_retrans = reqsk->num_retrans;
 
 	BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) !=
 		     offsetof(struct sock, sk_cookie));
@@ -346,6 +354,10 @@ static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb,
 	r->idiag_uid	= 0;
 	r->idiag_inode	= 0;
 
+	if (net_admin && nla_put_u32(skb, INET_DIAG_MARK,
+				     inet_rsk(reqsk)->ir_mark))
+		return -EMSGSIZE;
+
 	nlmsg_end(skb, nlh);
 	return 0;
 }
@@ -354,7 +366,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
 			const struct inet_diag_req_v2 *r,
 			struct user_namespace *user_ns,
 			u32 portid, u32 seq, u16 nlmsg_flags,
-			const struct nlmsghdr *unlh)
+			const struct nlmsghdr *unlh, bool net_admin)
 {
 	if (sk->sk_state == TCP_TIME_WAIT)
 		return inet_twsk_diag_fill(sk, skb, portid, seq,
@@ -362,10 +374,10 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
 
 	if (sk->sk_state == TCP_NEW_SYN_RECV)
 		return inet_req_diag_fill(sk, skb, portid, seq,
-					  nlmsg_flags, unlh);
+					  nlmsg_flags, unlh, net_admin);
 
 	return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq,
-				  nlmsg_flags, unlh);
+				  nlmsg_flags, unlh, net_admin);
 }
 
 struct sock *inet_diag_find_one_icsk(struct net *net,
@@ -435,7 +447,8 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
 	err = sk_diag_fill(sk, rep, req,
 			   sk_user_ns(NETLINK_CB(in_skb).sk),
 			   NETLINK_CB(in_skb).portid,
-			   nlh->nlmsg_seq, 0, nlh);
+			   nlh->nlmsg_seq, 0, nlh,
+			   netlink_net_capable(in_skb, CAP_NET_ADMIN));
 	if (err < 0) {
 		WARN_ON(err == -EMSGSIZE);
 		nlmsg_free(rep);
@@ -796,7 +809,8 @@ static int inet_csk_diag_dump(struct sock *sk,
 			      struct sk_buff *skb,
 			      struct netlink_callback *cb,
 			      const struct inet_diag_req_v2 *r,
-			      const struct nlattr *bc)
+			      const struct nlattr *bc,
+			      bool net_admin)
 {
 	if (!inet_diag_bc_sk(bc, sk))
 		return 0;
@@ -804,7 +818,8 @@ static int inet_csk_diag_dump(struct sock *sk,
 	return inet_csk_diag_fill(sk, skb, r,
 				  sk_user_ns(NETLINK_CB(cb->skb).sk),
 				  NETLINK_CB(cb->skb).portid,
-				  cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
+				  cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh,
+				  net_admin);
 }
 
 static void twsk_build_assert(void)
@@ -840,6 +855,7 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
 	struct net *net = sock_net(skb->sk);
 	int i, num, s_i, s_num;
 	u32 idiag_states = r->idiag_states;
+	bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
 
 	if (idiag_states & TCPF_SYN_RECV)
 		idiag_states |= TCPF_NEW_SYN_RECV;
@@ -880,7 +896,8 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
 				    cb->args[3] > 0)
 					goto next_listen;
 
-				if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) {
+				if (inet_csk_diag_dump(sk, skb, cb, r,
+						       bc, net_admin) < 0) {
 					spin_unlock_bh(&ilb->lock);
 					goto done;
 				}
@@ -948,7 +965,7 @@ skip_listen_ht:
 					   sk_user_ns(NETLINK_CB(cb->skb).sk),
 					   NETLINK_CB(cb->skb).portid,
 					   cb->nlh->nlmsg_seq, NLM_F_MULTI,
-					   cb->nlh);
+					   cb->nlh, net_admin);
 			if (res < 0) {
 				spin_unlock_bh(lock);
 				goto done;
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 58b79c0c0d69..9a89c10a55f0 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -20,7 +20,7 @@
 static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
 			struct netlink_callback *cb,
 			const struct inet_diag_req_v2 *req,
-			struct nlattr *bc)
+			struct nlattr *bc, bool net_admin)
 {
 	if (!inet_diag_bc_sk(bc, sk))
 		return 0;
@@ -28,7 +28,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
 	return inet_sk_diag_fill(sk, NULL, skb, req,
 			sk_user_ns(NETLINK_CB(cb->skb).sk),
 			NETLINK_CB(cb->skb).portid,
-			cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
+			cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh, net_admin);
 }
 
 static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
@@ -76,7 +76,8 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
 	err = inet_sk_diag_fill(sk, NULL, rep, req,
 			   sk_user_ns(NETLINK_CB(in_skb).sk),
 			   NETLINK_CB(in_skb).portid,
-			   nlh->nlmsg_seq, 0, nlh);
+			   nlh->nlmsg_seq, 0, nlh,
+			   netlink_net_capable(in_skb, CAP_NET_ADMIN));
 	if (err < 0) {
 		WARN_ON(err == -EMSGSIZE);
 		kfree_skb(rep);
@@ -97,6 +98,7 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb,
 		     struct netlink_callback *cb,
 		     const struct inet_diag_req_v2 *r, struct nlattr *bc)
 {
+	bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
 	struct net *net = sock_net(skb->sk);
 	int num, s_num, slot, s_slot;
 
@@ -132,7 +134,7 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb,
 			    r->id.idiag_dport)
 				goto next;
 
-			if (sk_diag_dump(sk, skb, cb, r, bc) < 0) {
+			if (sk_diag_dump(sk, skb, cb, r, bc, net_admin) < 0) {
 				spin_unlock_bh(&hslot->lock);
 				goto done;
 			}
diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c
index f3508aa75815..807158e32f5f 100644
--- a/net/sctp/sctp_diag.c
+++ b/net/sctp/sctp_diag.c
@@ -106,7 +106,8 @@ static int inet_sctp_diag_fill(struct sock *sk, struct sctp_association *asoc,
 			       const struct inet_diag_req_v2 *req,
 			       struct user_namespace *user_ns,
 			       int portid, u32 seq, u16 nlmsg_flags,
-			       const struct nlmsghdr *unlh)
+			       const struct nlmsghdr *unlh,
+			       bool net_admin)
 {
 	struct sctp_endpoint *ep = sctp_sk(sk)->ep;
 	struct list_head *addr_list;
@@ -133,7 +134,7 @@ static int inet_sctp_diag_fill(struct sock *sk, struct sctp_association *asoc,
 		r->idiag_retrans = 0;
 	}
 
-	if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns))
+	if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin))
 		goto errout;
 
 	if (ext & (1 << (INET_DIAG_SKMEMINFO - 1))) {
@@ -203,6 +204,7 @@ struct sctp_comm_param {
 	struct netlink_callback *cb;
 	const struct inet_diag_req_v2 *r;
 	const struct nlmsghdr *nlh;
+	bool net_admin;
 };
 
 static size_t inet_assoc_attr_size(struct sctp_association *asoc)
@@ -219,6 +221,7 @@ static size_t inet_assoc_attr_size(struct sctp_association *asoc)
 		+ nla_total_size(1) /* INET_DIAG_SHUTDOWN */
 		+ nla_total_size(1) /* INET_DIAG_TOS */
 		+ nla_total_size(1) /* INET_DIAG_TCLASS */
+		+ nla_total_size(4) /* INET_DIAG_MARK */
 		+ nla_total_size(addrlen * asoc->peer.transport_count)
 		+ nla_total_size(addrlen * addrcnt)
 		+ nla_total_size(sizeof(struct inet_diag_meminfo))
@@ -256,7 +259,8 @@ static int sctp_tsp_dump_one(struct sctp_transport *tsp, void *p)
 	err = inet_sctp_diag_fill(sk, assoc, rep, req,
 				  sk_user_ns(NETLINK_CB(in_skb).sk),
 				  NETLINK_CB(in_skb).portid,
-				  nlh->nlmsg_seq, 0, nlh);
+				  nlh->nlmsg_seq, 0, nlh,
+				  commp->net_admin);
 	release_sock(sk);
 	if (err < 0) {
 		WARN_ON(err == -EMSGSIZE);
@@ -310,7 +314,8 @@ static int sctp_tsp_dump(struct sctp_transport *tsp, void *p)
 					sk_user_ns(NETLINK_CB(cb->skb).sk),
 					NETLINK_CB(cb->skb).portid,
 					cb->nlh->nlmsg_seq,
-					NLM_F_MULTI, cb->nlh) < 0) {
+					NLM_F_MULTI, cb->nlh,
+					commp->net_admin) < 0) {
 			cb->args[3] = 1;
 			err = 2;
 			goto release;
@@ -320,7 +325,8 @@ static int sctp_tsp_dump(struct sctp_transport *tsp, void *p)
 		if (inet_sctp_diag_fill(sk, assoc, skb, r,
 					sk_user_ns(NETLINK_CB(cb->skb).sk),
 					NETLINK_CB(cb->skb).portid,
-					cb->nlh->nlmsg_seq, 0, cb->nlh) < 0) {
+					cb->nlh->nlmsg_seq, 0, cb->nlh,
+					commp->net_admin) < 0) {
 			err = 2;
 			goto release;
 		}
@@ -375,7 +381,7 @@ static int sctp_ep_dump(struct sctp_endpoint *ep, void *p)
 				sk_user_ns(NETLINK_CB(cb->skb).sk),
 				NETLINK_CB(cb->skb).portid,
 				cb->nlh->nlmsg_seq, NLM_F_MULTI,
-				cb->nlh) < 0) {
+				cb->nlh, commp->net_admin) < 0) {
 		err = 2;
 		goto out;
 	}
@@ -412,6 +418,7 @@ static int sctp_diag_dump_one(struct sk_buff *in_skb,
 		.skb = in_skb,
 		.r = req,
 		.nlh = nlh,
+		.net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN),
 	};
 
 	if (req->sdiag_family == AF_INET) {
@@ -447,6 +454,7 @@ static void sctp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
 		.skb = skb,
 		.cb = cb,
 		.r = r,
+		.net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN),
 	};
 
 	/* eps hashtable dumps
-- 
cgit v1.2.3


From 8c146bb9d59aa2ac45222171916ece186c4b3943 Mon Sep 17 00:00:00 2001
From: Thomas F Herbert <thomasfherbert@gmail.com>
Date: Wed, 7 Sep 2016 12:56:57 -0400
Subject: openvswitch: 802.1ad uapi changes.

openvswitch: Add support for 8021.AD

Change the description of the VLAN tpid field.

Signed-off-by: Thomas F Herbert <thomasfherbert@gmail.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/openvswitch.h | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 54c3b4f4aceb..59ed3992c760 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -605,13 +605,13 @@ struct ovs_action_push_mpls {
  * @vlan_tci: Tag control identifier (TCI) to push.  The CFI bit must be set
  * (but it will not be set in the 802.1Q header that is pushed).
  *
- * The @vlan_tpid value is typically %ETH_P_8021Q.  The only acceptable TPID
- * values are those that the kernel module also parses as 802.1Q headers, to
- * prevent %OVS_ACTION_ATTR_PUSH_VLAN followed by %OVS_ACTION_ATTR_POP_VLAN
- * from having surprising results.
+ * The @vlan_tpid value is typically %ETH_P_8021Q or %ETH_P_8021AD.
+ * The only acceptable TPID values are those that the kernel module also parses
+ * as 802.1Q or 802.1AD headers, to prevent %OVS_ACTION_ATTR_PUSH_VLAN followed
+ * by %OVS_ACTION_ATTR_POP_VLAN from having surprising results.
  */
 struct ovs_action_push_vlan {
-	__be16 vlan_tpid;	/* 802.1Q TPID. */
+	__be16 vlan_tpid;	/* 802.1Q or 802.1ad TPID. */
 	__be16 vlan_tci;	/* 802.1Q TCI (VLAN ID and priority). */
 };
 
@@ -721,9 +721,10 @@ enum ovs_nat_attr {
  * is copied from the value to the packet header field, rest of the bits are
  * left unchanged.  The non-masked value bits must be passed in as zeroes.
  * Masking is not supported for the %OVS_KEY_ATTR_TUNNEL attribute.
- * @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q header onto the
- * packet.
- * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet.
+ * @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q or 802.1ad header
+ * onto the packet.
+ * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q or 802.1ad header
+ * from the packet.
  * @OVS_ACTION_ATTR_SAMPLE: Probabilitically executes actions, as specified in
  * the nested %OVS_SAMPLE_ATTR_* attributes.
  * @OVS_ACTION_ATTR_PUSH_MPLS: Push a new MPLS label stack entry onto the
-- 
cgit v1.2.3


From 34a3d4b2d1f1b7c81af79f6f93a6cef4c3a0f54a Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@tricolour.ca>
Date: Thu, 8 Sep 2016 13:55:56 -0400
Subject: xfrm: fix header file comment reference to struct
 xfrm_replay_state_esn

Reported-by: Paul Wouters <paul@nohats.ca>
Signed-off-by: Richard Guy Briggs <rgb@tricolour.ca>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/uapi/linux/xfrm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h
index 143338978b48..1fc62b239f1b 100644
--- a/include/uapi/linux/xfrm.h
+++ b/include/uapi/linux/xfrm.h
@@ -298,7 +298,7 @@ enum xfrm_attr_type_t {
 	XFRMA_ALG_AUTH_TRUNC,	/* struct xfrm_algo_auth */
 	XFRMA_MARK,		/* struct xfrm_mark */
 	XFRMA_TFCPAD,		/* __u32 */
-	XFRMA_REPLAY_ESN_VAL,	/* struct xfrm_replay_esn */
+	XFRMA_REPLAY_ESN_VAL,	/* struct xfrm_replay_state_esn */
 	XFRMA_SA_EXTRA_FLAGS,	/* __u32 */
 	XFRMA_PROTO,		/* __u8 */
 	XFRMA_ADDRESS_FILTER,	/* struct xfrm_address_filter */
-- 
cgit v1.2.3


From e8c24d3a23a469f1f40d4de24d872ca7023ced0a Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Fri, 29 Jul 2016 09:30:15 -0700
Subject: x86/pkeys: Allocation/free syscalls

This patch adds two new system calls:

	int pkey_alloc(unsigned long flags, unsigned long init_access_rights)
	int pkey_free(int pkey);

These implement an "allocator" for the protection keys
themselves, which can be thought of as analogous to the allocator
that the kernel has for file descriptors.  The kernel tracks
which numbers are in use, and only allows operations on keys that
are valid.  A key which was not obtained by pkey_alloc() may not,
for instance, be passed to pkey_mprotect().

These system calls are also very important given the kernel's use
of pkeys to implement execute-only support.  These help ensure
that userspace can never assume that it has control of a key
unless it first asks the kernel.  The kernel does not promise to
preserve PKRU (right register) contents except for allocated
pkeys.

The 'init_access_rights' argument to pkey_alloc() specifies the
rights that will be established for the returned pkey.  For
instance:

	pkey = pkey_alloc(flags, PKEY_DENY_WRITE);

will allocate 'pkey', but also sets the bits in PKRU[1] such that
writing to 'pkey' is already denied.

The kernel does not prevent pkey_free() from successfully freeing
in-use pkeys (those still assigned to a memory range by
pkey_mprotect()).  It would be expensive to implement the checks
for this, so we instead say, "Just don't do it" since sane
software will never do it anyway.

Any piece of userspace calling pkey_alloc() needs to be prepared
for it to fail.  Why?  pkey_alloc() returns the same error code
(ENOSPC) when there are no pkeys and when pkeys are unsupported.
They can be unsupported for a whole host of reasons, so apps must
be prepared for this.  Also, libraries or LD_PRELOADs might steal
keys before an application gets access to them.

This allocation mechanism could be implemented in userspace.
Even if we did it in userspace, we would still need additional
user/kernel interfaces to tell userspace which keys are being
used by the kernel internally (such as for execute-only
mappings).  Having the kernel provide this facility completely
removes the need for these additional interfaces, or having an
implementation of this in userspace at all.

Note that we have to make changes to all of the architectures
that do not use mman-common.h because we use the new
PKEY_DENY_ACCESS/WRITE macros in arch-independent code.

1. PKRU is the Protection Key Rights User register.  It is a
   usermode-accessible register that controls whether writes
   and/or access to each individual pkey is allowed or denied.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: linux-arch@vger.kernel.org
Cc: Dave Hansen <dave@sr71.net>
Cc: arnd@arndb.de
Cc: linux-api@vger.kernel.org
Cc: linux-mm@kvack.org
Cc: luto@kernel.org
Cc: akpm@linux-foundation.org
Cc: torvalds@linux-foundation.org
Link: http://lkml.kernel.org/r/20160729163015.444FE75F@viggo.jf.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/alpha/include/uapi/asm/mman.h     |  5 +++
 arch/mips/include/uapi/asm/mman.h      |  5 +++
 arch/parisc/include/uapi/asm/mman.h    |  5 +++
 arch/x86/include/asm/mmu.h             |  8 ++++
 arch/x86/include/asm/mmu_context.h     | 10 ++++-
 arch/x86/include/asm/pkeys.h           | 73 +++++++++++++++++++++++++++++++---
 arch/x86/kernel/fpu/xstate.c           |  5 ++-
 arch/x86/mm/pkeys.c                    | 38 ++++++++++++++----
 arch/xtensa/include/uapi/asm/mman.h    |  5 +++
 include/linux/pkeys.h                  | 28 ++++++++++---
 include/uapi/asm-generic/mman-common.h |  5 +++
 mm/mprotect.c                          | 61 +++++++++++++++++++++++++---
 12 files changed, 221 insertions(+), 27 deletions(-)

(limited to 'include/uapi')

diff --git a/arch/alpha/include/uapi/asm/mman.h b/arch/alpha/include/uapi/asm/mman.h
index fec1947b8dbc..02760f6e6ca4 100644
--- a/arch/alpha/include/uapi/asm/mman.h
+++ b/arch/alpha/include/uapi/asm/mman.h
@@ -78,4 +78,9 @@
 #define MAP_HUGE_SHIFT	26
 #define MAP_HUGE_MASK	0x3f
 
+#define PKEY_DISABLE_ACCESS	0x1
+#define PKEY_DISABLE_WRITE	0x2
+#define PKEY_ACCESS_MASK	(PKEY_DISABLE_ACCESS |\
+				 PKEY_DISABLE_WRITE)
+
 #endif /* __ALPHA_MMAN_H__ */
diff --git a/arch/mips/include/uapi/asm/mman.h b/arch/mips/include/uapi/asm/mman.h
index ccdcfcbb24aa..655e2fb5395b 100644
--- a/arch/mips/include/uapi/asm/mman.h
+++ b/arch/mips/include/uapi/asm/mman.h
@@ -105,4 +105,9 @@
 #define MAP_HUGE_SHIFT	26
 #define MAP_HUGE_MASK	0x3f
 
+#define PKEY_DISABLE_ACCESS	0x1
+#define PKEY_DISABLE_WRITE	0x2
+#define PKEY_ACCESS_MASK	(PKEY_DISABLE_ACCESS |\
+				 PKEY_DISABLE_WRITE)
+
 #endif /* _ASM_MMAN_H */
diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h
index f3db7d8eb0c2..5979745815a5 100644
--- a/arch/parisc/include/uapi/asm/mman.h
+++ b/arch/parisc/include/uapi/asm/mman.h
@@ -75,4 +75,9 @@
 #define MAP_HUGE_SHIFT	26
 #define MAP_HUGE_MASK	0x3f
 
+#define PKEY_DISABLE_ACCESS	0x1
+#define PKEY_DISABLE_WRITE	0x2
+#define PKEY_ACCESS_MASK	(PKEY_DISABLE_ACCESS |\
+				 PKEY_DISABLE_WRITE)
+
 #endif /* __PARISC_MMAN_H__ */
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 1ea0baef1175..72198c64e646 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -23,6 +23,14 @@ typedef struct {
 	const struct vdso_image *vdso_image;	/* vdso image in use */
 
 	atomic_t perf_rdpmc_allowed;	/* nonzero if rdpmc is allowed */
+#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+	/*
+	 * One bit per protection key says whether userspace can
+	 * use it or not.  protected by mmap_sem.
+	 */
+	u16 pkey_allocation_map;
+	s16 execute_only_pkey;
+#endif
 } mm_context_t;
 
 #ifdef CONFIG_SMP
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index af0251fc85ed..8e0a9fe86de4 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -108,7 +108,16 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
 static inline int init_new_context(struct task_struct *tsk,
 				   struct mm_struct *mm)
 {
+	#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+	if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
+		/* pkey 0 is the default and always allocated */
+		mm->context.pkey_allocation_map = 0x1;
+		/* -1 means unallocated or invalid */
+		mm->context.execute_only_pkey = -1;
+	}
+	#endif
 	init_new_context_ldt(tsk, mm);
+
 	return 0;
 }
 static inline void destroy_context(struct mm_struct *mm)
@@ -263,5 +272,4 @@ static inline bool arch_pte_access_permitted(pte_t pte, bool write)
 {
 	return __pkru_allows_pkey(pte_flags_pkey(pte_flags(pte)), write);
 }
-
 #endif /* _ASM_X86_MMU_CONTEXT_H */
diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h
index 666ffc862ef7..b406889de0db 100644
--- a/arch/x86/include/asm/pkeys.h
+++ b/arch/x86/include/asm/pkeys.h
@@ -1,12 +1,7 @@
 #ifndef _ASM_X86_PKEYS_H
 #define _ASM_X86_PKEYS_H
 
-#define PKEY_DEDICATED_EXECUTE_ONLY 15
-/*
- * Consider the PKEY_DEDICATED_EXECUTE_ONLY key unavailable.
- */
-#define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? \
-		PKEY_DEDICATED_EXECUTE_ONLY : 1)
+#define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1)
 
 extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 		unsigned long init_val);
@@ -40,4 +35,70 @@ extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 
 #define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2 | VM_PKEY_BIT3)
 
+#define mm_pkey_allocation_map(mm)	(mm->context.pkey_allocation_map)
+#define mm_set_pkey_allocated(mm, pkey) do {		\
+	mm_pkey_allocation_map(mm) |= (1U << pkey);	\
+} while (0)
+#define mm_set_pkey_free(mm, pkey) do {			\
+	mm_pkey_allocation_map(mm) &= ~(1U << pkey);	\
+} while (0)
+
+static inline
+bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey)
+{
+	return mm_pkey_allocation_map(mm) & (1U << pkey);
+}
+
+/*
+ * Returns a positive, 4-bit key on success, or -1 on failure.
+ */
+static inline
+int mm_pkey_alloc(struct mm_struct *mm)
+{
+	/*
+	 * Note: this is the one and only place we make sure
+	 * that the pkey is valid as far as the hardware is
+	 * concerned.  The rest of the kernel trusts that
+	 * only good, valid pkeys come out of here.
+	 */
+	u16 all_pkeys_mask = ((1U << arch_max_pkey()) - 1);
+	int ret;
+
+	/*
+	 * Are we out of pkeys?  We must handle this specially
+	 * because ffz() behavior is undefined if there are no
+	 * zeros.
+	 */
+	if (mm_pkey_allocation_map(mm) == all_pkeys_mask)
+		return -1;
+
+	ret = ffz(mm_pkey_allocation_map(mm));
+
+	mm_set_pkey_allocated(mm, ret);
+
+	return ret;
+}
+
+static inline
+int mm_pkey_free(struct mm_struct *mm, int pkey)
+{
+	/*
+	 * pkey 0 is special, always allocated and can never
+	 * be freed.
+	 */
+	if (!pkey)
+		return -EINVAL;
+	if (!mm_pkey_is_allocated(mm, pkey))
+		return -EINVAL;
+
+	mm_set_pkey_free(mm, pkey);
+
+	return 0;
+}
+
+extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
+		unsigned long init_val);
+extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
+		unsigned long init_val);
+
 #endif /*_ASM_X86_PKEYS_H */
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 01567aa87503..124aa5c593f8 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -5,6 +5,7 @@
  */
 #include <linux/compat.h>
 #include <linux/cpu.h>
+#include <linux/mman.h>
 #include <linux/pkeys.h>
 
 #include <asm/fpu/api.h>
@@ -866,9 +867,10 @@ const void *get_xsave_field_ptr(int xsave_state)
 	return get_xsave_addr(&fpu->state.xsave, xsave_state);
 }
 
+#ifdef CONFIG_ARCH_HAS_PKEYS
+
 #define NR_VALID_PKRU_BITS (CONFIG_NR_PROTECTION_KEYS * 2)
 #define PKRU_VALID_MASK (NR_VALID_PKRU_BITS - 1)
-
 /*
  * This will go out and modify PKRU register to set the access
  * rights for @pkey to @init_val.
@@ -914,6 +916,7 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 
 	return 0;
 }
+#endif /* ! CONFIG_ARCH_HAS_PKEYS */
 
 /*
  * This is similar to user_regset_copyout(), but will not add offset to
diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c
index e8c474451928..e6113bbb56e1 100644
--- a/arch/x86/mm/pkeys.c
+++ b/arch/x86/mm/pkeys.c
@@ -21,8 +21,19 @@
 
 int __execute_only_pkey(struct mm_struct *mm)
 {
+	bool need_to_set_mm_pkey = false;
+	int execute_only_pkey = mm->context.execute_only_pkey;
 	int ret;
 
+	/* Do we need to assign a pkey for mm's execute-only maps? */
+	if (execute_only_pkey == -1) {
+		/* Go allocate one to use, which might fail */
+		execute_only_pkey = mm_pkey_alloc(mm);
+		if (execute_only_pkey < 0)
+			return -1;
+		need_to_set_mm_pkey = true;
+	}
+
 	/*
 	 * We do not want to go through the relatively costly
 	 * dance to set PKRU if we do not need to.  Check it
@@ -32,22 +43,33 @@ int __execute_only_pkey(struct mm_struct *mm)
 	 * can make fpregs inactive.
 	 */
 	preempt_disable();
-	if (fpregs_active() &&
-	    !__pkru_allows_read(read_pkru(), PKEY_DEDICATED_EXECUTE_ONLY)) {
+	if (!need_to_set_mm_pkey &&
+	    fpregs_active() &&
+	    !__pkru_allows_read(read_pkru(), execute_only_pkey)) {
 		preempt_enable();
-		return PKEY_DEDICATED_EXECUTE_ONLY;
+		return execute_only_pkey;
 	}
 	preempt_enable();
-	ret = arch_set_user_pkey_access(current, PKEY_DEDICATED_EXECUTE_ONLY,
+
+	/*
+	 * Set up PKRU so that it denies access for everything
+	 * other than execution.
+	 */
+	ret = arch_set_user_pkey_access(current, execute_only_pkey,
 			PKEY_DISABLE_ACCESS);
 	/*
 	 * If the PKRU-set operation failed somehow, just return
 	 * 0 and effectively disable execute-only support.
 	 */
-	if (ret)
-		return 0;
+	if (ret) {
+		mm_set_pkey_free(mm, execute_only_pkey);
+		return -1;
+	}
 
-	return PKEY_DEDICATED_EXECUTE_ONLY;
+	/* We got one, store it and use it from here on out */
+	if (need_to_set_mm_pkey)
+		mm->context.execute_only_pkey = execute_only_pkey;
+	return execute_only_pkey;
 }
 
 static inline bool vma_is_pkey_exec_only(struct vm_area_struct *vma)
@@ -55,7 +77,7 @@ static inline bool vma_is_pkey_exec_only(struct vm_area_struct *vma)
 	/* Do this check first since the vm_flags should be hot */
 	if ((vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) != VM_EXEC)
 		return false;
-	if (vma_pkey(vma) != PKEY_DEDICATED_EXECUTE_ONLY)
+	if (vma_pkey(vma) != vma->vm_mm->context.execute_only_pkey)
 		return false;
 
 	return true;
diff --git a/arch/xtensa/include/uapi/asm/mman.h b/arch/xtensa/include/uapi/asm/mman.h
index 9e079d49e7f2..24365b30aae9 100644
--- a/arch/xtensa/include/uapi/asm/mman.h
+++ b/arch/xtensa/include/uapi/asm/mman.h
@@ -117,4 +117,9 @@
 #define MAP_HUGE_SHIFT	26
 #define MAP_HUGE_MASK	0x3f
 
+#define PKEY_DISABLE_ACCESS	0x1
+#define PKEY_DISABLE_WRITE	0x2
+#define PKEY_ACCESS_MASK	(PKEY_DISABLE_ACCESS |\
+				 PKEY_DISABLE_WRITE)
+
 #endif /* _XTENSA_MMAN_H */
diff --git a/include/linux/pkeys.h b/include/linux/pkeys.h
index 6899b0bc7ce0..8ff21125dc8a 100644
--- a/include/linux/pkeys.h
+++ b/include/linux/pkeys.h
@@ -4,11 +4,6 @@
 #include <linux/mm_types.h>
 #include <asm/mmu_context.h>
 
-#define PKEY_DISABLE_ACCESS	0x1
-#define PKEY_DISABLE_WRITE	0x2
-#define PKEY_ACCESS_MASK	(PKEY_DISABLE_ACCESS |\
-				 PKEY_DISABLE_WRITE)
-
 #ifdef CONFIG_ARCH_HAS_PKEYS
 #include <asm/pkeys.h>
 #else /* ! CONFIG_ARCH_HAS_PKEYS */
@@ -17,6 +12,29 @@
 #define arch_override_mprotect_pkey(vma, prot, pkey) (0)
 #define PKEY_DEDICATED_EXECUTE_ONLY 0
 #define ARCH_VM_PKEY_FLAGS 0
+
+static inline bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey)
+{
+	return (pkey == 0);
+}
+
+static inline int mm_pkey_alloc(struct mm_struct *mm)
+{
+	return -1;
+}
+
+static inline int mm_pkey_free(struct mm_struct *mm, int pkey)
+{
+	WARN_ONCE(1, "free of protection key when disabled");
+	return -EINVAL;
+}
+
+static inline int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
+			unsigned long init_val)
+{
+	return 0;
+}
+
 #endif /* ! CONFIG_ARCH_HAS_PKEYS */
 
 #endif /* _LINUX_PKEYS_H */
diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h
index 58274382a616..8c27db0c5c08 100644
--- a/include/uapi/asm-generic/mman-common.h
+++ b/include/uapi/asm-generic/mman-common.h
@@ -72,4 +72,9 @@
 #define MAP_HUGE_SHIFT	26
 #define MAP_HUGE_MASK	0x3f
 
+#define PKEY_DISABLE_ACCESS	0x1
+#define PKEY_DISABLE_WRITE	0x2
+#define PKEY_ACCESS_MASK	(PKEY_DISABLE_ACCESS |\
+				 PKEY_DISABLE_WRITE)
+
 #endif /* __ASM_GENERIC_MMAN_COMMON_H */
diff --git a/mm/mprotect.c b/mm/mprotect.c
index abd9c8257b2e..7b35ee3894ee 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -23,11 +23,13 @@
 #include <linux/mmu_notifier.h>
 #include <linux/migrate.h>
 #include <linux/perf_event.h>
+#include <linux/pkeys.h>
 #include <linux/ksm.h>
 #include <linux/pkeys.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/cacheflush.h>
+#include <asm/mmu_context.h>
 #include <asm/tlbflush.h>
 
 #include "internal.h"
@@ -364,12 +366,6 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
 	const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP);
 	const bool rier = (current->personality & READ_IMPLIES_EXEC) &&
 				(prot & PROT_READ);
-	/*
-	 * A temporary safety check since we are not validating
-	 * the pkey before we introduce the allocation code.
-	 */
-	if (pkey != -1)
-		return -EINVAL;
 
 	prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP);
 	if (grows == (PROT_GROWSDOWN|PROT_GROWSUP)) /* can't be both */
@@ -391,6 +387,14 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
 	if (down_write_killable(&current->mm->mmap_sem))
 		return -EINTR;
 
+	/*
+	 * If userspace did not allocate the pkey, do not let
+	 * them use it here.
+	 */
+	error = -EINVAL;
+	if ((pkey != -1) && !mm_pkey_is_allocated(current->mm, pkey))
+		goto out;
+
 	vma = find_vma(current->mm, start);
 	error = -ENOMEM;
 	if (!vma)
@@ -485,3 +489,48 @@ SYSCALL_DEFINE4(pkey_mprotect, unsigned long, start, size_t, len,
 {
 	return do_mprotect_pkey(start, len, prot, pkey);
 }
+
+SYSCALL_DEFINE2(pkey_alloc, unsigned long, flags, unsigned long, init_val)
+{
+	int pkey;
+	int ret;
+
+	/* No flags supported yet. */
+	if (flags)
+		return -EINVAL;
+	/* check for unsupported init values */
+	if (init_val & ~PKEY_ACCESS_MASK)
+		return -EINVAL;
+
+	down_write(&current->mm->mmap_sem);
+	pkey = mm_pkey_alloc(current->mm);
+
+	ret = -ENOSPC;
+	if (pkey == -1)
+		goto out;
+
+	ret = arch_set_user_pkey_access(current, pkey, init_val);
+	if (ret) {
+		mm_pkey_free(current->mm, pkey);
+		goto out;
+	}
+	ret = pkey;
+out:
+	up_write(&current->mm->mmap_sem);
+	return ret;
+}
+
+SYSCALL_DEFINE1(pkey_free, int, pkey)
+{
+	int ret;
+
+	down_write(&current->mm->mmap_sem);
+	ret = mm_pkey_free(current->mm, pkey);
+	up_write(&current->mm->mmap_sem);
+
+	/*
+	 * We could provie warnings or errors if any VMA still
+	 * has the pkey set here.
+	 */
+	return ret;
+}
-- 
cgit v1.2.3


From a60f7b69d92c0142c80a30d669a76b617b7f6879 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Fri, 29 Jul 2016 09:30:18 -0700
Subject: generic syscalls: Wire up memory protection keys syscalls

These new syscalls are implemented as generic code, so enable them for
architectures like arm64 which use the generic syscall table.

According to Arnd:

  Even if the support is x86 specific for the forseeable future, it may be
  good to reserve the number just in case.  The other architecture specific
  syscall lists are usually left to the individual arch maintainers, most a
  lot of the newer architectures share this table.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Cc: linux-arch@vger.kernel.org
Cc: Dave Hansen <dave@sr71.net>
Cc: mgorman@techsingularity.net
Cc: linux-api@vger.kernel.org
Cc: linux-mm@kvack.org
Cc: luto@kernel.org
Cc: akpm@linux-foundation.org
Cc: torvalds@linux-foundation.org
Link: http://lkml.kernel.org/r/20160729163018.505A6875@viggo.jf.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/syscalls.h          |  8 ++++++++
 include/uapi/asm-generic/unistd.h | 12 +++++++++++-
 2 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index d02239022bd0..0d7abb8b7315 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -898,4 +898,12 @@ asmlinkage long sys_copy_file_range(int fd_in, loff_t __user *off_in,
 
 asmlinkage long sys_mlock2(unsigned long start, size_t len, int flags);
 
+asmlinkage long sys_pkey_mprotect(unsigned long start, size_t len,
+				  unsigned long prot, int pkey);
+asmlinkage long sys_pkey_alloc(unsigned long flags, unsigned long init_val);
+asmlinkage long sys_pkey_free(int pkey);
+//asmlinkage long sys_pkey_get(int pkey, unsigned long flags);
+//asmlinkage long sys_pkey_set(int pkey, unsigned long access_rights,
+//			     unsigned long flags);
+
 #endif
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index a26415b5151c..dbfee7e86ba6 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -724,9 +724,19 @@ __SYSCALL(__NR_copy_file_range, sys_copy_file_range)
 __SC_COMP(__NR_preadv2, sys_preadv2, compat_sys_preadv2)
 #define __NR_pwritev2 287
 __SC_COMP(__NR_pwritev2, sys_pwritev2, compat_sys_pwritev2)
+#define __NR_pkey_mprotect 288
+__SYSCALL(__NR_pkey_mprotect, sys_pkey_mprotect)
+#define __NR_pkey_alloc 289
+__SYSCALL(__NR_pkey_alloc,    sys_pkey_alloc)
+#define __NR_pkey_free 290
+__SYSCALL(__NR_pkey_free,     sys_pkey_free)
+#define __NR_pkey_get 291
+//__SYSCALL(__NR_pkey_get,      sys_pkey_get)
+#define __NR_pkey_set 292
+//__SYSCALL(__NR_pkey_set,      sys_pkey_set)
 
 #undef __NR_syscalls
-#define __NR_syscalls 288
+#define __NR_syscalls 291
 
 /*
  * All syscalls below here should go away really,
-- 
cgit v1.2.3


From 848d10314bfe1be79f5ca4deb3d2bf81f6adcf38 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Date: Wed, 31 Aug 2016 16:34:07 -0300
Subject: [media] fix broken references on dvb/video*rst

Trivially fix those broken references, by copying the structs
fron the header, just like other API documentation at the
DVB side.

This doesn't have the level of quality used at the V4L2 side
of the API, but, as this documents a deprecated API, used
only by av7110 driver, it doesn't make much sense to invest
time making it better.

Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 Documentation/media/uapi/dvb/video-command.rst     | 30 ++++++++++++++++++++++
 Documentation/media/uapi/dvb/video-get-event.rst   | 17 ++++++++++++
 Documentation/media/uapi/dvb/video-get-navi.rst    | 10 +++++++-
 Documentation/media/uapi/dvb/video-get-size.rst    | 10 ++++++++
 Documentation/media/uapi/dvb/video-get-status.rst  | 11 ++++++++
 .../media/uapi/dvb/video-select-source.rst         | 10 ++++++++
 .../media/uapi/dvb/video-set-attributes.rst        | 16 ++++++++++++
 .../media/uapi/dvb/video-set-display-format.rst    |  2 +-
 Documentation/media/uapi/dvb/video-set-format.rst  |  9 +++++++
 .../media/uapi/dvb/video-set-highlight.rst         | 26 ++++++++++++++++++-
 .../media/uapi/dvb/video-set-spu-palette.rst       | 10 +++++++-
 Documentation/media/uapi/dvb/video-set-spu.rst     | 11 +++++++-
 include/uapi/linux/dvb/video.h                     |  3 ++-
 13 files changed, 159 insertions(+), 6 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/media/uapi/dvb/video-command.rst b/Documentation/media/uapi/dvb/video-command.rst
index 4772562036f1..536d0fdd8399 100644
--- a/Documentation/media/uapi/dvb/video-command.rst
+++ b/Documentation/media/uapi/dvb/video-command.rst
@@ -59,6 +59,36 @@ subset of the ``v4l2_decoder_cmd`` struct, so refer to the
 :ref:`VIDIOC_DECODER_CMD` documentation for
 more information.
 
+.. c:type:: struct video_command
+
+.. code-block:: c
+
+	/* The structure must be zeroed before use by the application
+	This ensures it can be extended safely in the future. */
+	struct video_command {
+		__u32 cmd;
+		__u32 flags;
+		union {
+			struct {
+				__u64 pts;
+			} stop;
+
+			struct {
+				/* 0 or 1000 specifies normal speed,
+				1 specifies forward single stepping,
+				-1 specifies backward single stepping,
+				>1: playback at speed/1000 of the normal speed,
+				<-1: reverse playback at (-speed/1000) of the normal speed. */
+				__s32 speed;
+				__u32 format;
+			} play;
+
+			struct {
+				__u32 data[16];
+			} raw;
+		};
+	};
+
 
 Return Value
 ------------
diff --git a/Documentation/media/uapi/dvb/video-get-event.rst b/Documentation/media/uapi/dvb/video-get-event.rst
index 8c0c622c380b..6ad14cdb894a 100644
--- a/Documentation/media/uapi/dvb/video-get-event.rst
+++ b/Documentation/media/uapi/dvb/video-get-event.rst
@@ -64,6 +64,23 @@ included in the exceptfds argument, and for poll(), POLLPRI should be
 specified as the wake-up condition. Read-only permissions are sufficient
 for this ioctl call.
 
+.. c:type:: video_event
+
+.. code-block:: c
+
+	struct video_event {
+		__s32 type;
+	#define VIDEO_EVENT_SIZE_CHANGED	1
+	#define VIDEO_EVENT_FRAME_RATE_CHANGED	2
+	#define VIDEO_EVENT_DECODER_STOPPED 	3
+	#define VIDEO_EVENT_VSYNC 		4
+		__kernel_time_t timestamp;
+		union {
+			video_size_t size;
+			unsigned int frame_rate;	/* in frames per 1000sec */
+			unsigned char vsync_field;	/* unknown/odd/even/progressive */
+		} u;
+	};
 
 Return Value
 ------------
diff --git a/Documentation/media/uapi/dvb/video-get-navi.rst b/Documentation/media/uapi/dvb/video-get-navi.rst
index b8de9ccf38c2..114a9ac48b9e 100644
--- a/Documentation/media/uapi/dvb/video-get-navi.rst
+++ b/Documentation/media/uapi/dvb/video-get-navi.rst
@@ -16,7 +16,7 @@ VIDEO_GET_NAVI
 Synopsis
 --------
 
-.. c:function:: int ioctl(fd, VIDEO_GET_NAVI , video_navi_pack_t *navipack)
+.. c:function:: int ioctl(fd, VIDEO_GET_NAVI , struct video_navi_pack *navipack)
     :name: VIDEO_GET_NAVI
 
 
@@ -54,6 +54,14 @@ This ioctl returns navigational information from the DVD stream. This is
 especially needed if an encoded stream has to be decoded by the
 hardware.
 
+.. c:type:: video_navi_pack
+
+.. code-block::c
+
+	typedef struct video_navi_pack {
+		int length;          /* 0 ... 1024 */
+		__u8 data[1024];
+	} video_navi_pack_t;
 
 Return Value
 ------------
diff --git a/Documentation/media/uapi/dvb/video-get-size.rst b/Documentation/media/uapi/dvb/video-get-size.rst
index ce8b4c6b41a5..d077fe2305a0 100644
--- a/Documentation/media/uapi/dvb/video-get-size.rst
+++ b/Documentation/media/uapi/dvb/video-get-size.rst
@@ -52,6 +52,16 @@ Description
 
 This ioctl returns the size and aspect ratio.
 
+.. c:type:: video_size_t
+
+.. code-block::c
+
+	typedef struct {
+		int w;
+		int h;
+		video_format_t aspect_ratio;
+	} video_size_t;
+
 
 Return Value
 ------------
diff --git a/Documentation/media/uapi/dvb/video-get-status.rst b/Documentation/media/uapi/dvb/video-get-status.rst
index 7b6a278b5246..ed6ea19827a6 100644
--- a/Documentation/media/uapi/dvb/video-get-status.rst
+++ b/Documentation/media/uapi/dvb/video-get-status.rst
@@ -53,6 +53,17 @@ Description
 This ioctl call asks the Video Device to return the current status of
 the device.
 
+.. c:type:: video_status
+
+.. code-block:: c
+
+	struct video_status {
+		int                   video_blank;   /* blank video on freeze? */
+		video_play_state_t    play_state;    /* current state of playback */
+		video_stream_source_t stream_source; /* current source (demux/memory) */
+		video_format_t        video_format;  /* current aspect ratio of stream*/
+		video_displayformat_t display_format;/* selected cropping mode */
+	};
 
 Return Value
 ------------
diff --git a/Documentation/media/uapi/dvb/video-select-source.rst b/Documentation/media/uapi/dvb/video-select-source.rst
index eaa1088f07da..2f4fbf4b490c 100644
--- a/Documentation/media/uapi/dvb/video-select-source.rst
+++ b/Documentation/media/uapi/dvb/video-select-source.rst
@@ -58,6 +58,16 @@ This ioctl call informs the video device which source shall be used for
 the input data. The possible sources are demux or memory. If memory is
 selected, the data is fed to the video device through the write command.
 
+.. c:type:: video_stream_source_t
+
+.. code-block:: c
+
+	typedef enum {
+		VIDEO_SOURCE_DEMUX, /* Select the demux as the main source */
+		VIDEO_SOURCE_MEMORY /* If this source is selected, the stream
+				comes from the user through the write
+				system call */
+	} video_stream_source_t;
 
 Return Value
 ------------
diff --git a/Documentation/media/uapi/dvb/video-set-attributes.rst b/Documentation/media/uapi/dvb/video-set-attributes.rst
index 8901520d7e43..b2f11a6746e9 100644
--- a/Documentation/media/uapi/dvb/video-set-attributes.rst
+++ b/Documentation/media/uapi/dvb/video-set-attributes.rst
@@ -55,6 +55,22 @@ information about the stream. Some hardware may not need this
 information, but the call also tells the hardware to prepare for DVD
 playback.
 
+.. c:type:: video_attributes_t
+
+.. code-block::c
+
+	typedef __u16 video_attributes_t;
+	/*   bits: descr. */
+	/*   15-14 Video compression mode (0=MPEG-1, 1=MPEG-2) */
+	/*   13-12 TV system (0=525/60, 1=625/50) */
+	/*   11-10 Aspect ratio (0=4:3, 3=16:9) */
+	/*    9- 8 permitted display mode on 4:3 monitor (0=both, 1=only pan-sca */
+	/*    7    line 21-1 data present in GOP (1=yes, 0=no) */
+	/*    6    line 21-2 data present in GOP (1=yes, 0=no) */
+	/*    5- 3 source resolution (0=720x480/576, 1=704x480/576, 2=352x480/57 */
+	/*    2    source letterboxed (1=yes, 0=no) */
+	/*    0    film/camera mode (0=camera, 1=film (625/50 only)) */
+
 
 Return Value
 ------------
diff --git a/Documentation/media/uapi/dvb/video-set-display-format.rst b/Documentation/media/uapi/dvb/video-set-display-format.rst
index 6abf19479939..2ef7401781be 100644
--- a/Documentation/media/uapi/dvb/video-set-display-format.rst
+++ b/Documentation/media/uapi/dvb/video-set-display-format.rst
@@ -16,7 +16,7 @@ VIDEO_SET_DISPLAY_FORMAT
 Synopsis
 --------
 
-.. c:function:: int ioctl(fd, VIDEO_SET_DISPLAY_FORMAT, video_display_format_t format)
+.. c:function:: int ioctl(fd, VIDEO_SET_DISPLAY_FORMAT)
     :name: VIDEO_SET_DISPLAY_FORMAT
 
 
diff --git a/Documentation/media/uapi/dvb/video-set-format.rst b/Documentation/media/uapi/dvb/video-set-format.rst
index 117618525538..4239a4e365bb 100644
--- a/Documentation/media/uapi/dvb/video-set-format.rst
+++ b/Documentation/media/uapi/dvb/video-set-format.rst
@@ -54,6 +54,15 @@ This ioctl sets the screen format (aspect ratio) of the connected output
 device (TV) so that the output of the decoder can be adjusted
 accordingly.
 
+.. c:type:: video_format_t
+
+.. code-block:: c
+
+	typedef enum {
+		VIDEO_FORMAT_4_3,     /* Select 4:3 format */
+		VIDEO_FORMAT_16_9,    /* Select 16:9 format. */
+		VIDEO_FORMAT_221_1    /* 2.21:1 */
+	} video_format_t;
 
 Return Value
 ------------
diff --git a/Documentation/media/uapi/dvb/video-set-highlight.rst b/Documentation/media/uapi/dvb/video-set-highlight.rst
index d93b69eef15b..90aeafd923b7 100644
--- a/Documentation/media/uapi/dvb/video-set-highlight.rst
+++ b/Documentation/media/uapi/dvb/video-set-highlight.rst
@@ -16,7 +16,7 @@ VIDEO_SET_HIGHLIGHT
 Synopsis
 --------
 
-.. c:function:: int ioctl(fd, VIDEO_SET_HIGHLIGHT ,video_highlight_t *vhilite)
+.. c:function:: int ioctl(fd, VIDEO_SET_HIGHLIGHT, struct video_highlight *vhilite)
     :name: VIDEO_SET_HIGHLIGHT
 
 
@@ -53,6 +53,30 @@ Description
 This ioctl sets the SPU highlight information for the menu access of a
 DVD.
 
+.. c:type:: video_highlight
+
+.. code-block:: c
+
+	typedef
+	struct video_highlight {
+		int     active;      /*    1=show highlight, 0=hide highlight */
+		__u8    contrast1;   /*    7- 4  Pattern pixel contrast */
+				/*    3- 0  Background pixel contrast */
+		__u8    contrast2;   /*    7- 4  Emphasis pixel-2 contrast */
+				/*    3- 0  Emphasis pixel-1 contrast */
+		__u8    color1;      /*    7- 4  Pattern pixel color */
+				/*    3- 0  Background pixel color */
+		__u8    color2;      /*    7- 4  Emphasis pixel-2 color */
+				/*    3- 0  Emphasis pixel-1 color */
+		__u32    ypos;       /*   23-22  auto action mode */
+				/*   21-12  start y */
+				/*    9- 0  end y */
+		__u32    xpos;       /*   23-22  button color number */
+				/*   21-12  start x */
+				/*    9- 0  end x */
+	} video_highlight_t;
+
+
 
 Return Value
 ------------
diff --git a/Documentation/media/uapi/dvb/video-set-spu-palette.rst b/Documentation/media/uapi/dvb/video-set-spu-palette.rst
index b24f7882089a..51a1913d21d2 100644
--- a/Documentation/media/uapi/dvb/video-set-spu-palette.rst
+++ b/Documentation/media/uapi/dvb/video-set-spu-palette.rst
@@ -16,7 +16,7 @@ VIDEO_SET_SPU_PALETTE
 Synopsis
 --------
 
-.. c:function:: int ioctl(fd, VIDEO_SET_SPU_PALETTE, video_spu_palette_t *palette )
+.. c:function:: int ioctl(fd, VIDEO_SET_SPU_PALETTE, struct video_spu_palette *palette )
     :name: VIDEO_SET_SPU_PALETTE
 
 
@@ -52,6 +52,14 @@ Description
 
 This ioctl sets the SPU color palette.
 
+.. c:type:: video_spu_palette
+
+.. code-block::c
+
+	typedef struct video_spu_palette {      /* SPU Palette information */
+		int length;
+		__u8 __user *palette;
+	} video_spu_palette_t;
 
 Return Value
 ------------
diff --git a/Documentation/media/uapi/dvb/video-set-spu.rst b/Documentation/media/uapi/dvb/video-set-spu.rst
index 2a7f0625de38..739e5e7bd133 100644
--- a/Documentation/media/uapi/dvb/video-set-spu.rst
+++ b/Documentation/media/uapi/dvb/video-set-spu.rst
@@ -16,7 +16,7 @@ VIDEO_SET_SPU
 Synopsis
 --------
 
-.. c:function:: int ioctl(fd, VIDEO_SET_SPU , video_spu_t *spu)
+.. c:function:: int ioctl(fd, VIDEO_SET_SPU , struct video_spu *spu)
     :name: VIDEO_SET_SPU
 
 
@@ -54,6 +54,15 @@ Description
 This ioctl activates or deactivates SPU decoding in a DVD input stream.
 It can only be used, if the driver is able to handle a DVD stream.
 
+.. c:type:: struct video_spu
+
+.. code-block:: c
+
+	typedef struct video_spu {
+		int active;
+		int stream_id;
+	} video_spu_t;
+
 
 Return Value
 ------------
diff --git a/include/uapi/linux/dvb/video.h b/include/uapi/linux/dvb/video.h
index 49392564f9d6..260f033a5b54 100644
--- a/include/uapi/linux/dvb/video.h
+++ b/include/uapi/linux/dvb/video.h
@@ -206,7 +206,8 @@ typedef __u16 video_attributes_t;
 /*    6    line 21-2 data present in GOP (1=yes, 0=no) */
 /*    5- 3 source resolution (0=720x480/576, 1=704x480/576, 2=352x480/57 */
 /*    2    source letterboxed (1=yes, 0=no) */
-/*    0    film/camera mode (0=camera, 1=film (625/50 only)) */
+/*    0    film/camera mode (0=
+ *camera, 1=film (625/50 only)) */
 
 
 /* bit definitions for capabilities: */
-- 
cgit v1.2.3


From cfe41359942b933ee0058c73df7dcc00dae64885 Mon Sep 17 00:00:00 2001
From: Jouni Ukkonen <jouni.ukkonen@intel.com>
Date: Mon, 25 Apr 2016 09:19:10 -0300
Subject: [media] media: Add 1X14 14-bit raw bayer media bus code definitions

The codes will be called:

	MEDIA_BUS_FMT_SBGGR14_1X14
	MEDIA_BUS_FMT_SGBRG14_1X14
	MEDIA_BUS_FMT_SGRBG14_1X14
	MEDIA_BUS_FMT_SRGGB14_1X14

Signed-off-by: Jouni Ukkonen <jouni.ukkonen@intel.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 Documentation/media/uapi/v4l/subdev-formats.rst | 258 +++++++++++++++++++++++-
 include/uapi/linux/media-bus-format.h           |   6 +-
 2 files changed, 262 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/media/uapi/v4l/subdev-formats.rst b/Documentation/media/uapi/v4l/subdev-formats.rst
index 568d5dd56561..273ca484983d 100644
--- a/Documentation/media/uapi/v4l/subdev-formats.rst
+++ b/Documentation/media/uapi/v4l/subdev-formats.rst
@@ -2812,7 +2812,7 @@ organization is given as an example for the first pixel only.
        -  Code
 
        -
-       -  :cspan:`11` Data organization
+       -  :cspan:`13` Data organization
 
     -  .. row 2
 
@@ -2820,6 +2820,10 @@ organization is given as an example for the first pixel only.
        -
        -  Bit
 
+       -  13
+
+       -  12
+
        -  11
 
        -  10
@@ -2859,6 +2863,10 @@ organization is given as an example for the first pixel only.
 
        -
 
+       -  -
+
+       -  -
+
        -  b\ :sub:`7`
 
        -  b\ :sub:`6`
@@ -2890,6 +2898,10 @@ organization is given as an example for the first pixel only.
 
        -
 
+       -  -
+
+       -  -
+
        -  g\ :sub:`7`
 
        -  g\ :sub:`6`
@@ -2921,6 +2933,10 @@ organization is given as an example for the first pixel only.
 
        -
 
+       -  -
+
+       -  -
+
        -  g\ :sub:`7`
 
        -  g\ :sub:`6`
@@ -2952,6 +2968,10 @@ organization is given as an example for the first pixel only.
 
        -
 
+       -  -
+
+       -  -
+
        -  r\ :sub:`7`
 
        -  r\ :sub:`6`
@@ -2983,6 +3003,10 @@ organization is given as an example for the first pixel only.
 
        -
 
+       -  -
+
+       -  -
+
        -  b\ :sub:`7`
 
        -  b\ :sub:`6`
@@ -3014,6 +3038,10 @@ organization is given as an example for the first pixel only.
 
        -
 
+       -  -
+
+       -  -
+
        -  g\ :sub:`7`
 
        -  g\ :sub:`6`
@@ -3045,6 +3073,10 @@ organization is given as an example for the first pixel only.
 
        -
 
+       -  -
+
+       -  -
+
        -  g\ :sub:`7`
 
        -  g\ :sub:`6`
@@ -3076,6 +3108,10 @@ organization is given as an example for the first pixel only.
 
        -
 
+       -  -
+
+       -  -
+
        -  r\ :sub:`7`
 
        -  r\ :sub:`6`
@@ -3107,6 +3143,10 @@ organization is given as an example for the first pixel only.
 
        -
 
+       -  -
+
+       -  -
+
        -  b\ :sub:`7`
 
        -  b\ :sub:`6`
@@ -3138,6 +3178,10 @@ organization is given as an example for the first pixel only.
 
        -
 
+       -  -
+
+       -  -
+
        -  g\ :sub:`7`
 
        -  g\ :sub:`6`
@@ -3169,6 +3213,10 @@ organization is given as an example for the first pixel only.
 
        -
 
+       -  -
+
+       -  -
+
        -  g\ :sub:`7`
 
        -  g\ :sub:`6`
@@ -3200,6 +3248,10 @@ organization is given as an example for the first pixel only.
 
        -
 
+       -  -
+
+       -  -
+
        -  r\ :sub:`7`
 
        -  r\ :sub:`6`
@@ -3231,6 +3283,10 @@ organization is given as an example for the first pixel only.
 
        -
 
+       -  -
+
+       -  -
+
        -  0
 
        -  0
@@ -3260,6 +3316,10 @@ organization is given as an example for the first pixel only.
 
        -
 
+       -  -
+
+       -  -
+
        -  b\ :sub:`7`
 
        -  b\ :sub:`6`
@@ -3291,6 +3351,10 @@ organization is given as an example for the first pixel only.
 
        -
 
+       -  -
+
+       -  -
+
        -  b\ :sub:`7`
 
        -  b\ :sub:`6`
@@ -3320,6 +3384,10 @@ organization is given as an example for the first pixel only.
 
        -
 
+       -  -
+
+       -  -
+
        -  0
 
        -  0
@@ -3351,6 +3419,10 @@ organization is given as an example for the first pixel only.
 
        -
 
+       -  -
+
+       -  -
+
        -  b\ :sub:`9`
 
        -  b\ :sub:`8`
@@ -3380,6 +3452,10 @@ organization is given as an example for the first pixel only.
 
        -
 
+       -  -
+
+       -  -
+
        -  b\ :sub:`1`
 
        -  b\ :sub:`0`
@@ -3411,6 +3487,10 @@ organization is given as an example for the first pixel only.
 
        -
 
+       -  -
+
+       -  -
+
        -  b\ :sub:`1`
 
        -  b\ :sub:`0`
@@ -3440,6 +3520,10 @@ organization is given as an example for the first pixel only.
 
        -
 
+       -  -
+
+       -  -
+
        -  b\ :sub:`9`
 
        -  b\ :sub:`8`
@@ -3467,6 +3551,10 @@ organization is given as an example for the first pixel only.
 
        -
 
+       -  -
+
+       -  -
+
        -  b\ :sub:`9`
 
        -  b\ :sub:`8`
@@ -3498,6 +3586,10 @@ organization is given as an example for the first pixel only.
 
        -
 
+       -  -
+
+       -  -
+
        -  g\ :sub:`9`
 
        -  g\ :sub:`8`
@@ -3529,6 +3621,10 @@ organization is given as an example for the first pixel only.
 
        -
 
+       -  -
+
+       -  -
+
        -  g\ :sub:`9`
 
        -  g\ :sub:`8`
@@ -3560,6 +3656,10 @@ organization is given as an example for the first pixel only.
 
        -
 
+       -  -
+
+       -  -
+
        -  r\ :sub:`9`
 
        -  r\ :sub:`8`
@@ -3587,6 +3687,10 @@ organization is given as an example for the first pixel only.
        -  0x3008
 
        -
+       -  -
+
+       -  -
+
        -  b\ :sub:`11`
 
        -  b\ :sub:`10`
@@ -3618,6 +3722,10 @@ organization is given as an example for the first pixel only.
        -  0x3010
 
        -
+       -  -
+
+       -  -
+
        -  g\ :sub:`11`
 
        -  g\ :sub:`10`
@@ -3649,6 +3757,10 @@ organization is given as an example for the first pixel only.
        -  0x3011
 
        -
+       -  -
+
+       -  -
+
        -  g\ :sub:`11`
 
        -  g\ :sub:`10`
@@ -3680,6 +3792,150 @@ organization is given as an example for the first pixel only.
        -  0x3012
 
        -
+       -  -
+
+       -  -
+
+       -  r\ :sub:`11`
+
+       -  r\ :sub:`10`
+
+       -  r\ :sub:`9`
+
+       -  r\ :sub:`8`
+
+       -  r\ :sub:`7`
+
+       -  r\ :sub:`6`
+
+       -  r\ :sub:`5`
+
+       -  r\ :sub:`4`
+
+       -  r\ :sub:`3`
+
+       -  r\ :sub:`2`
+
+       -  r\ :sub:`1`
+
+       -  r\ :sub:`0`
+
+    -  .. _MEDIA-BUS-FMT-SBGGR14-1X14:
+
+       -  MEDIA_BUS_FMT_SBGGR14_1X14
+
+       -  0x3019
+
+       -
+       -  b\ :sub:`13`
+
+       -  b\ :sub:`12`
+
+       -  b\ :sub:`11`
+
+       -  b\ :sub:`10`
+
+       -  b\ :sub:`9`
+
+       -  b\ :sub:`8`
+
+       -  b\ :sub:`7`
+
+       -  b\ :sub:`6`
+
+       -  b\ :sub:`5`
+
+       -  b\ :sub:`4`
+
+       -  b\ :sub:`3`
+
+       -  b\ :sub:`2`
+
+       -  b\ :sub:`1`
+
+       -  b\ :sub:`0`
+
+    -  .. _MEDIA-BUS-FMT-SGBRG14-1X14:
+
+       -  MEDIA_BUS_FMT_SGBRG14_1X14
+
+       -  0x301a
+
+       -
+       -  g\ :sub:`13`
+
+       -  g\ :sub:`12`
+
+       -  g\ :sub:`11`
+
+       -  g\ :sub:`10`
+
+       -  g\ :sub:`9`
+
+       -  g\ :sub:`8`
+
+       -  g\ :sub:`7`
+
+       -  g\ :sub:`6`
+
+       -  g\ :sub:`5`
+
+       -  g\ :sub:`4`
+
+       -  g\ :sub:`3`
+
+       -  g\ :sub:`2`
+
+       -  g\ :sub:`1`
+
+       -  g\ :sub:`0`
+
+    -  .. _MEDIA-BUS-FMT-SGRBG14-1X14:
+
+       -  MEDIA_BUS_FMT_SGRBG14_1X14
+
+       -  0x301b
+
+       -
+       -  g\ :sub:`13`
+
+       -  g\ :sub:`12`
+
+       -  g\ :sub:`11`
+
+       -  g\ :sub:`10`
+
+       -  g\ :sub:`9`
+
+       -  g\ :sub:`8`
+
+       -  g\ :sub:`7`
+
+       -  g\ :sub:`6`
+
+       -  g\ :sub:`5`
+
+       -  g\ :sub:`4`
+
+       -  g\ :sub:`3`
+
+       -  g\ :sub:`2`
+
+       -  g\ :sub:`1`
+
+       -  g\ :sub:`0`
+
+    -  .. _MEDIA-BUS-FMT-SRGGB14-1X14:
+
+       -  MEDIA_BUS_FMT_SRGGB14_1X14
+
+       -  0x301c
+
+       -
+       -  r\ :sub:`13`
+
+       -  r\ :sub:`12`
+
        -  r\ :sub:`11`
 
        -  r\ :sub:`10`
diff --git a/include/uapi/linux/media-bus-format.h b/include/uapi/linux/media-bus-format.h
index 190d491d5b13..1dff459ab6ff 100644
--- a/include/uapi/linux/media-bus-format.h
+++ b/include/uapi/linux/media-bus-format.h
@@ -97,7 +97,7 @@
 #define MEDIA_BUS_FMT_YUV10_1X30		0x2016
 #define MEDIA_BUS_FMT_AYUV8_1X32		0x2017
 
-/* Bayer - next is	0x3019 */
+/* Bayer - next is	0x301d */
 #define MEDIA_BUS_FMT_SBGGR8_1X8		0x3001
 #define MEDIA_BUS_FMT_SGBRG8_1X8		0x3013
 #define MEDIA_BUS_FMT_SGRBG8_1X8		0x3002
@@ -122,6 +122,10 @@
 #define MEDIA_BUS_FMT_SGBRG12_1X12		0x3010
 #define MEDIA_BUS_FMT_SGRBG12_1X12		0x3011
 #define MEDIA_BUS_FMT_SRGGB12_1X12		0x3012
+#define MEDIA_BUS_FMT_SBGGR14_1X14		0x3019
+#define MEDIA_BUS_FMT_SGBRG14_1X14		0x301a
+#define MEDIA_BUS_FMT_SGRBG14_1X14		0x301b
+#define MEDIA_BUS_FMT_SRGGB14_1X14		0x301c
 
 /* JPEG compressed formats - next is	0x4002 */
 #define MEDIA_BUS_FMT_JPEG_1X8			0x4001
-- 
cgit v1.2.3


From 82dec0a7db16f7691470b58c28e79a2f65e829c4 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Mon, 27 Jun 2016 11:15:57 -0300
Subject: [media] media: Add 1X16 16-bit raw bayer media bus code definitions

The codes will be called:

	MEDIA_BUS_FMT_SBGGR16_1X16
	MEDIA_BUS_FMT_SGBRG16_1X16
	MEDIA_BUS_FMT_SGRBG16_1X16
	MEDIA_BUS_FMT_SRGGB16_1X16

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 Documentation/media/uapi/v4l/subdev-formats.rst | 290 +++++++++++++++++++++++-
 include/uapi/linux/media-bus-format.h           |   6 +-
 2 files changed, 294 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/media/uapi/v4l/subdev-formats.rst b/Documentation/media/uapi/v4l/subdev-formats.rst
index 273ca484983d..08d6532b0a26 100644
--- a/Documentation/media/uapi/v4l/subdev-formats.rst
+++ b/Documentation/media/uapi/v4l/subdev-formats.rst
@@ -2812,7 +2812,7 @@ organization is given as an example for the first pixel only.
        -  Code
 
        -
-       -  :cspan:`13` Data organization
+       -  :cspan:`15` Data organization
 
     -  .. row 2
 
@@ -2820,6 +2820,10 @@ organization is given as an example for the first pixel only.
        -
        -  Bit
 
+       -  15
+
+       -  14
+
        -  13
 
        -  12
@@ -2867,6 +2871,10 @@ organization is given as an example for the first pixel only.
 
        -  -
 
+       -  -
+
+       -  -
+
        -  b\ :sub:`7`
 
        -  b\ :sub:`6`
@@ -2902,6 +2910,10 @@ organization is given as an example for the first pixel only.
 
        -  -
 
+       -  -
+
+       -  -
+
        -  g\ :sub:`7`
 
        -  g\ :sub:`6`
@@ -2937,6 +2949,10 @@ organization is given as an example for the first pixel only.
 
        -  -
 
+       -  -
+
+       -  -
+
        -  g\ :sub:`7`
 
        -  g\ :sub:`6`
@@ -2972,6 +2988,10 @@ organization is given as an example for the first pixel only.
 
        -  -
 
+       -  -
+
+       -  -
+
        -  r\ :sub:`7`
 
        -  r\ :sub:`6`
@@ -3007,6 +3027,10 @@ organization is given as an example for the first pixel only.
 
        -  -
 
+       -  -
+
+       -  -
+
        -  b\ :sub:`7`
 
        -  b\ :sub:`6`
@@ -3042,6 +3066,10 @@ organization is given as an example for the first pixel only.
 
        -  -
 
+       -  -
+
+       -  -
+
        -  g\ :sub:`7`
 
        -  g\ :sub:`6`
@@ -3077,6 +3105,10 @@ organization is given as an example for the first pixel only.
 
        -  -
 
+       -  -
+
+       -  -
+
        -  g\ :sub:`7`
 
        -  g\ :sub:`6`
@@ -3112,6 +3144,10 @@ organization is given as an example for the first pixel only.
 
        -  -
 
+       -  -
+
+       -  -
+
        -  r\ :sub:`7`
 
        -  r\ :sub:`6`
@@ -3147,6 +3183,10 @@ organization is given as an example for the first pixel only.
 
        -  -
 
+       -  -
+
+       -  -
+
        -  b\ :sub:`7`
 
        -  b\ :sub:`6`
@@ -3182,6 +3222,10 @@ organization is given as an example for the first pixel only.
 
        -  -
 
+       -  -
+
+       -  -
+
        -  g\ :sub:`7`
 
        -  g\ :sub:`6`
@@ -3217,6 +3261,10 @@ organization is given as an example for the first pixel only.
 
        -  -
 
+       -  -
+
+       -  -
+
        -  g\ :sub:`7`
 
        -  g\ :sub:`6`
@@ -3252,6 +3300,10 @@ organization is given as an example for the first pixel only.
 
        -  -
 
+       -  -
+
+       -  -
+
        -  r\ :sub:`7`
 
        -  r\ :sub:`6`
@@ -3287,6 +3339,10 @@ organization is given as an example for the first pixel only.
 
        -  -
 
+       -  -
+
+       -  -
+
        -  0
 
        -  0
@@ -3320,6 +3376,10 @@ organization is given as an example for the first pixel only.
 
        -  -
 
+       -  -
+
+       -  -
+
        -  b\ :sub:`7`
 
        -  b\ :sub:`6`
@@ -3355,6 +3415,10 @@ organization is given as an example for the first pixel only.
 
        -  -
 
+       -  -
+
+       -  -
+
        -  b\ :sub:`7`
 
        -  b\ :sub:`6`
@@ -3388,6 +3452,10 @@ organization is given as an example for the first pixel only.
 
        -  -
 
+       -  -
+
+       -  -
+
        -  0
 
        -  0
@@ -3423,6 +3491,10 @@ organization is given as an example for the first pixel only.
 
        -  -
 
+       -  -
+
+       -  -
+
        -  b\ :sub:`9`
 
        -  b\ :sub:`8`
@@ -3456,6 +3528,10 @@ organization is given as an example for the first pixel only.
 
        -  -
 
+       -  -
+
+       -  -
+
        -  b\ :sub:`1`
 
        -  b\ :sub:`0`
@@ -3491,6 +3567,10 @@ organization is given as an example for the first pixel only.
 
        -  -
 
+       -  -
+
+       -  -
+
        -  b\ :sub:`1`
 
        -  b\ :sub:`0`
@@ -3524,6 +3604,10 @@ organization is given as an example for the first pixel only.
 
        -  -
 
+       -  -
+
+       -  -
+
        -  b\ :sub:`9`
 
        -  b\ :sub:`8`
@@ -3555,6 +3639,10 @@ organization is given as an example for the first pixel only.
 
        -  -
 
+       -  -
+
+       -  -
+
        -  b\ :sub:`9`
 
        -  b\ :sub:`8`
@@ -3590,6 +3678,10 @@ organization is given as an example for the first pixel only.
 
        -  -
 
+       -  -
+
+       -  -
+
        -  g\ :sub:`9`
 
        -  g\ :sub:`8`
@@ -3625,6 +3717,10 @@ organization is given as an example for the first pixel only.
 
        -  -
 
+       -  -
+
+       -  -
+
        -  g\ :sub:`9`
 
        -  g\ :sub:`8`
@@ -3660,6 +3756,10 @@ organization is given as an example for the first pixel only.
 
        -  -
 
+       -  -
+
+       -  -
+
        -  r\ :sub:`9`
 
        -  r\ :sub:`8`
@@ -3691,6 +3791,10 @@ organization is given as an example for the first pixel only.
 
        -  -
 
+       -  -
+
+       -  -
+
        -  b\ :sub:`11`
 
        -  b\ :sub:`10`
@@ -3726,6 +3830,10 @@ organization is given as an example for the first pixel only.
 
        -  -
 
+       -  -
+
+       -  -
+
        -  g\ :sub:`11`
 
        -  g\ :sub:`10`
@@ -3761,6 +3869,10 @@ organization is given as an example for the first pixel only.
 
        -  -
 
+       -  -
+
+       -  -
+
        -  g\ :sub:`11`
 
        -  g\ :sub:`10`
@@ -3796,6 +3908,10 @@ organization is given as an example for the first pixel only.
 
        -  -
 
+       -  -
+
+       -  -
+
        -  r\ :sub:`11`
 
        -  r\ :sub:`10`
@@ -3827,6 +3943,10 @@ organization is given as an example for the first pixel only.
        -  0x3019
 
        -
+       -  -
+
+       -  -
+
        -  b\ :sub:`13`
 
        -  b\ :sub:`12`
@@ -3862,6 +3982,10 @@ organization is given as an example for the first pixel only.
        -  0x301a
 
        -
+       -  -
+
+       -  -
+
        -  g\ :sub:`13`
 
        -  g\ :sub:`12`
@@ -3897,6 +4021,10 @@ organization is given as an example for the first pixel only.
        -  0x301b
 
        -
+       -  -
+
+       -  -
+
        -  g\ :sub:`13`
 
        -  g\ :sub:`12`
@@ -3932,6 +4060,166 @@ organization is given as an example for the first pixel only.
        -  0x301c
 
        -
+       -  -
+
+       -  -
+
+       -  r\ :sub:`13`
+
+       -  r\ :sub:`12`
+
+       -  r\ :sub:`11`
+
+       -  r\ :sub:`10`
+
+       -  r\ :sub:`9`
+
+       -  r\ :sub:`8`
+
+       -  r\ :sub:`7`
+
+       -  r\ :sub:`6`
+
+       -  r\ :sub:`5`
+
+       -  r\ :sub:`4`
+
+       -  r\ :sub:`3`
+
+       -  r\ :sub:`2`
+
+       -  r\ :sub:`1`
+
+       -  r\ :sub:`0`
+
+    -  .. _MEDIA-BUS-FMT-SBGGR16-1X16:
+
+       -  MEDIA_BUS_FMT_SBGGR16_1X16
+
+       -  0x301d
+
+       -
+       -  b\ :sub:`15`
+
+       -  b\ :sub:`14`
+
+       -  b\ :sub:`13`
+
+       -  b\ :sub:`12`
+
+       -  b\ :sub:`11`
+
+       -  b\ :sub:`10`
+
+       -  b\ :sub:`9`
+
+       -  b\ :sub:`8`
+
+       -  b\ :sub:`7`
+
+       -  b\ :sub:`6`
+
+       -  b\ :sub:`5`
+
+       -  b\ :sub:`4`
+
+       -  b\ :sub:`3`
+
+       -  b\ :sub:`2`
+
+       -  b\ :sub:`1`
+
+       -  b\ :sub:`0`
+
+    -  .. _MEDIA-BUS-FMT-SGBRG16-1X16:
+
+       -  MEDIA_BUS_FMT_SGBRG16_1X16
+
+       -  0x301e
+
+       -
+       -  g\ :sub:`15`
+
+       -  g\ :sub:`14`
+
+       -  g\ :sub:`13`
+
+       -  g\ :sub:`12`
+
+       -  g\ :sub:`11`
+
+       -  g\ :sub:`10`
+
+       -  g\ :sub:`9`
+
+       -  g\ :sub:`8`
+
+       -  g\ :sub:`7`
+
+       -  g\ :sub:`6`
+
+       -  g\ :sub:`5`
+
+       -  g\ :sub:`4`
+
+       -  g\ :sub:`3`
+
+       -  g\ :sub:`2`
+
+       -  g\ :sub:`1`
+
+       -  g\ :sub:`0`
+
+    -  .. _MEDIA-BUS-FMT-SGRBG16-1X16:
+
+       -  MEDIA_BUS_FMT_SGRBG16_1X16
+
+       -  0x301f
+
+       -
+       -  g\ :sub:`15`
+
+       -  g\ :sub:`14`
+
+       -  g\ :sub:`13`
+
+       -  g\ :sub:`12`
+
+       -  g\ :sub:`11`
+
+       -  g\ :sub:`10`
+
+       -  g\ :sub:`9`
+
+       -  g\ :sub:`8`
+
+       -  g\ :sub:`7`
+
+       -  g\ :sub:`6`
+
+       -  g\ :sub:`5`
+
+       -  g\ :sub:`4`
+
+       -  g\ :sub:`3`
+
+       -  g\ :sub:`2`
+
+       -  g\ :sub:`1`
+
+       -  g\ :sub:`0`
+
+    -  .. _MEDIA-BUS-FMT-SRGGB16-1X16:
+
+       -  MEDIA_BUS_FMT_SRGGB16_1X16
+
+       -  0x3020
+
+       -
+       -  r\ :sub:`15`
+
+       -  r\ :sub:`14`
+
        -  r\ :sub:`13`
 
        -  r\ :sub:`12`
diff --git a/include/uapi/linux/media-bus-format.h b/include/uapi/linux/media-bus-format.h
index 1dff459ab6ff..2168759c1287 100644
--- a/include/uapi/linux/media-bus-format.h
+++ b/include/uapi/linux/media-bus-format.h
@@ -97,7 +97,7 @@
 #define MEDIA_BUS_FMT_YUV10_1X30		0x2016
 #define MEDIA_BUS_FMT_AYUV8_1X32		0x2017
 
-/* Bayer - next is	0x301d */
+/* Bayer - next is	0x3021 */
 #define MEDIA_BUS_FMT_SBGGR8_1X8		0x3001
 #define MEDIA_BUS_FMT_SGBRG8_1X8		0x3013
 #define MEDIA_BUS_FMT_SGRBG8_1X8		0x3002
@@ -126,6 +126,10 @@
 #define MEDIA_BUS_FMT_SGBRG14_1X14		0x301a
 #define MEDIA_BUS_FMT_SGRBG14_1X14		0x301b
 #define MEDIA_BUS_FMT_SRGGB14_1X14		0x301c
+#define MEDIA_BUS_FMT_SBGGR16_1X16		0x301d
+#define MEDIA_BUS_FMT_SGBRG16_1X16		0x301e
+#define MEDIA_BUS_FMT_SGRBG16_1X16		0x301f
+#define MEDIA_BUS_FMT_SRGGB16_1X16		0x3020
 
 /* JPEG compressed formats - next is	0x4002 */
 #define MEDIA_BUS_FMT_JPEG_1X8			0x4001
-- 
cgit v1.2.3


From bc3103f1ed405de587fa43d8b0671e615505a700 Mon Sep 17 00:00:00 2001
From: Amir Vadai <amir@vadai.me>
Date: Thu, 8 Sep 2016 16:23:47 +0300
Subject: net/sched: cls_flower: Classify packet in ip tunnels

Introduce classifying by metadata extracted by the tunnel device.
Outer header fields - source/dest ip and tunnel id, are extracted from
the metadata when classifying.

For example, the following will add a filter on the ingress Qdisc of shared
vxlan device named 'vxlan0'. To forward packets with outer src ip
11.11.0.2, dst ip 11.11.0.1 and tunnel id 11. The packets will be
forwarded to tap device 'vnet0' (after metadata is released):

$ tc filter add dev vxlan0 protocol ip parent ffff: \
    flower \
      enc_src_ip 11.11.0.2 \
      enc_dst_ip 11.11.0.1 \
      enc_key_id 11 \
      dst_ip 11.11.11.1 \
    action tunnel_key release \
    action mirred egress redirect dev vnet0

The action tunnel_key, will be introduced in the next patch in this
series.

Signed-off-by: Amir Vadai <amir@vadai.me>
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_cls.h |  11 +++++
 net/sched/cls_flower.c       | 100 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 110 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 51b5b247fb5a..f9c287c67eae 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -431,6 +431,17 @@ enum {
 	TCA_FLOWER_KEY_VLAN_ID,
 	TCA_FLOWER_KEY_VLAN_PRIO,
 	TCA_FLOWER_KEY_VLAN_ETH_TYPE,
+
+	TCA_FLOWER_KEY_ENC_KEY_ID,	/* be32 */
+	TCA_FLOWER_KEY_ENC_IPV4_SRC,	/* be32 */
+	TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,/* be32 */
+	TCA_FLOWER_KEY_ENC_IPV4_DST,	/* be32 */
+	TCA_FLOWER_KEY_ENC_IPV4_DST_MASK,/* be32 */
+	TCA_FLOWER_KEY_ENC_IPV6_SRC,	/* struct in6_addr */
+	TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,/* struct in6_addr */
+	TCA_FLOWER_KEY_ENC_IPV6_DST,	/* struct in6_addr */
+	TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,/* struct in6_addr */
+
 	__TCA_FLOWER_MAX,
 };
 
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index cf9ad5b50889..b084b2aab2d7 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -23,9 +23,13 @@
 #include <net/ip.h>
 #include <net/flow_dissector.h>
 
+#include <net/dst.h>
+#include <net/dst_metadata.h>
+
 struct fl_flow_key {
 	int	indev_ifindex;
 	struct flow_dissector_key_control control;
+	struct flow_dissector_key_control enc_control;
 	struct flow_dissector_key_basic basic;
 	struct flow_dissector_key_eth_addrs eth;
 	struct flow_dissector_key_vlan vlan;
@@ -35,6 +39,11 @@ struct fl_flow_key {
 		struct flow_dissector_key_ipv6_addrs ipv6;
 	};
 	struct flow_dissector_key_ports tp;
+	struct flow_dissector_key_keyid enc_key_id;
+	union {
+		struct flow_dissector_key_ipv4_addrs enc_ipv4;
+		struct flow_dissector_key_ipv6_addrs enc_ipv6;
+	};
 } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
 
 struct fl_flow_mask_range {
@@ -124,11 +133,31 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	struct cls_fl_filter *f;
 	struct fl_flow_key skb_key;
 	struct fl_flow_key skb_mkey;
+	struct ip_tunnel_info *info;
 
 	if (!atomic_read(&head->ht.nelems))
 		return -1;
 
 	fl_clear_masked_range(&skb_key, &head->mask);
+
+	info = skb_tunnel_info(skb);
+	if (info) {
+		struct ip_tunnel_key *key = &info->key;
+
+		switch (ip_tunnel_info_af(info)) {
+		case AF_INET:
+			skb_key.enc_ipv4.src = key->u.ipv4.src;
+			skb_key.enc_ipv4.dst = key->u.ipv4.dst;
+			break;
+		case AF_INET6:
+			skb_key.enc_ipv6.src = key->u.ipv6.src;
+			skb_key.enc_ipv6.dst = key->u.ipv6.dst;
+			break;
+		}
+
+		skb_key.enc_key_id.keyid = tunnel_id_to_key32(key->tun_id);
+	}
+
 	skb_key.indev_ifindex = skb->skb_iif;
 	/* skb_flow_dissect() does not set n_proto in case an unknown protocol,
 	 * so do it rather here.
@@ -297,7 +326,15 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
 	[TCA_FLOWER_KEY_VLAN_ID]	= { .type = NLA_U16 },
 	[TCA_FLOWER_KEY_VLAN_PRIO]	= { .type = NLA_U8 },
 	[TCA_FLOWER_KEY_VLAN_ETH_TYPE]	= { .type = NLA_U16 },
-
+	[TCA_FLOWER_KEY_ENC_KEY_ID]	= { .type = NLA_U32 },
+	[TCA_FLOWER_KEY_ENC_IPV4_SRC]	= { .type = NLA_U32 },
+	[TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK] = { .type = NLA_U32 },
+	[TCA_FLOWER_KEY_ENC_IPV4_DST]	= { .type = NLA_U32 },
+	[TCA_FLOWER_KEY_ENC_IPV4_DST_MASK] = { .type = NLA_U32 },
+	[TCA_FLOWER_KEY_ENC_IPV6_SRC]	= { .len = sizeof(struct in6_addr) },
+	[TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK] = { .len = sizeof(struct in6_addr) },
+	[TCA_FLOWER_KEY_ENC_IPV6_DST]	= { .len = sizeof(struct in6_addr) },
+	[TCA_FLOWER_KEY_ENC_IPV6_DST_MASK] = { .len = sizeof(struct in6_addr) },
 };
 
 static void fl_set_key_val(struct nlattr **tb,
@@ -409,6 +446,40 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
 			       sizeof(key->tp.dst));
 	}
 
+	if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] ||
+	    tb[TCA_FLOWER_KEY_ENC_IPV4_DST]) {
+		key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+		fl_set_key_val(tb, &key->enc_ipv4.src,
+			       TCA_FLOWER_KEY_ENC_IPV4_SRC,
+			       &mask->enc_ipv4.src,
+			       TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,
+			       sizeof(key->enc_ipv4.src));
+		fl_set_key_val(tb, &key->enc_ipv4.dst,
+			       TCA_FLOWER_KEY_ENC_IPV4_DST,
+			       &mask->enc_ipv4.dst,
+			       TCA_FLOWER_KEY_ENC_IPV4_DST_MASK,
+			       sizeof(key->enc_ipv4.dst));
+	}
+
+	if (tb[TCA_FLOWER_KEY_ENC_IPV6_SRC] ||
+	    tb[TCA_FLOWER_KEY_ENC_IPV6_DST]) {
+		key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+		fl_set_key_val(tb, &key->enc_ipv6.src,
+			       TCA_FLOWER_KEY_ENC_IPV6_SRC,
+			       &mask->enc_ipv6.src,
+			       TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,
+			       sizeof(key->enc_ipv6.src));
+		fl_set_key_val(tb, &key->enc_ipv6.dst,
+			       TCA_FLOWER_KEY_ENC_IPV6_DST,
+			       &mask->enc_ipv6.dst,
+			       TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,
+			       sizeof(key->enc_ipv6.dst));
+	}
+
+	fl_set_key_val(tb, &key->enc_key_id.keyid, TCA_FLOWER_KEY_ENC_KEY_ID,
+		       &mask->enc_key_id.keyid, TCA_FLOWER_KEY_ENC_KEY_ID,
+		       sizeof(key->enc_key_id.keyid));
+
 	return 0;
 }
 
@@ -821,6 +892,33 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 				  sizeof(key->tp.dst))))
 		goto nla_put_failure;
 
+	if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS &&
+	    (fl_dump_key_val(skb, &key->enc_ipv4.src,
+			    TCA_FLOWER_KEY_ENC_IPV4_SRC, &mask->enc_ipv4.src,
+			    TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,
+			    sizeof(key->enc_ipv4.src)) ||
+	     fl_dump_key_val(skb, &key->enc_ipv4.dst,
+			     TCA_FLOWER_KEY_ENC_IPV4_DST, &mask->enc_ipv4.dst,
+			     TCA_FLOWER_KEY_ENC_IPV4_DST_MASK,
+			     sizeof(key->enc_ipv4.dst))))
+		goto nla_put_failure;
+	else if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS &&
+		 (fl_dump_key_val(skb, &key->enc_ipv6.src,
+			    TCA_FLOWER_KEY_ENC_IPV6_SRC, &mask->enc_ipv6.src,
+			    TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,
+			    sizeof(key->enc_ipv6.src)) ||
+		 fl_dump_key_val(skb, &key->enc_ipv6.dst,
+				 TCA_FLOWER_KEY_ENC_IPV6_DST,
+				 &mask->enc_ipv6.dst,
+				 TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,
+			    sizeof(key->enc_ipv6.dst))))
+		goto nla_put_failure;
+
+	if (fl_dump_key_val(skb, &key->enc_key_id, TCA_FLOWER_KEY_ENC_KEY_ID,
+			    &mask->enc_key_id, TCA_FLOWER_KEY_ENC_KEY_ID,
+			    sizeof(key->enc_key_id)))
+		goto nla_put_failure;
+
 	nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags);
 
 	if (tcf_exts_dump(skb, &f->exts))
-- 
cgit v1.2.3


From d0f6dd8a914f42c6f1a3a8c08caa16559d3d9a1b Mon Sep 17 00:00:00 2001
From: Amir Vadai <amir@vadai.me>
Date: Thu, 8 Sep 2016 16:23:48 +0300
Subject: net/sched: Introduce act_tunnel_key

This action could be used before redirecting packets to a shared tunnel
device, or when redirecting packets arriving from a such a device.

The action will release the metadata created by the tunnel device
(decap), or set the metadata with the specified values for encap
operation.

For example, the following flower filter will forward all ICMP packets
destined to 11.11.11.2 through the shared vxlan device 'vxlan0'. Before
redirecting, a metadata for the vxlan tunnel is created using the
tunnel_key action and it's arguments:

$ tc filter add dev net0 protocol ip parent ffff: \
    flower \
      ip_proto 1 \
      dst_ip 11.11.11.2 \
    action tunnel_key set \
      src_ip 11.11.0.1 \
      dst_ip 11.11.0.2 \
      id 11 \
    action mirred egress redirect dev vxlan0

Signed-off-by: Amir Vadai <amir@vadai.me>
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Reviewed-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_act/tc_tunnel_key.h        |  30 +++
 include/uapi/linux/tc_act/tc_tunnel_key.h |  41 ++++
 net/sched/Kconfig                         |  11 +
 net/sched/Makefile                        |   1 +
 net/sched/act_tunnel_key.c                | 351 ++++++++++++++++++++++++++++++
 5 files changed, 434 insertions(+)
 create mode 100644 include/net/tc_act/tc_tunnel_key.h
 create mode 100644 include/uapi/linux/tc_act/tc_tunnel_key.h
 create mode 100644 net/sched/act_tunnel_key.c

(limited to 'include/uapi')

diff --git a/include/net/tc_act/tc_tunnel_key.h b/include/net/tc_act/tc_tunnel_key.h
new file mode 100644
index 000000000000..253f8da6c2a6
--- /dev/null
+++ b/include/net/tc_act/tc_tunnel_key.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2016, Amir Vadai <amir@vadai.me>
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __NET_TC_TUNNEL_KEY_H
+#define __NET_TC_TUNNEL_KEY_H
+
+#include <net/act_api.h>
+
+struct tcf_tunnel_key_params {
+	struct rcu_head		rcu;
+	int			tcft_action;
+	int			action;
+	struct metadata_dst     *tcft_enc_metadata;
+};
+
+struct tcf_tunnel_key {
+	struct tc_action	      common;
+	struct tcf_tunnel_key_params __rcu *params;
+};
+
+#define to_tunnel_key(a) ((struct tcf_tunnel_key *)a)
+
+#endif /* __NET_TC_TUNNEL_KEY_H */
diff --git a/include/uapi/linux/tc_act/tc_tunnel_key.h b/include/uapi/linux/tc_act/tc_tunnel_key.h
new file mode 100644
index 000000000000..890106ff16e6
--- /dev/null
+++ b/include/uapi/linux/tc_act/tc_tunnel_key.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2016, Amir Vadai <amir@vadai.me>
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __LINUX_TC_TUNNEL_KEY_H
+#define __LINUX_TC_TUNNEL_KEY_H
+
+#include <linux/pkt_cls.h>
+
+#define TCA_ACT_TUNNEL_KEY 17
+
+#define TCA_TUNNEL_KEY_ACT_SET	    1
+#define TCA_TUNNEL_KEY_ACT_RELEASE  2
+
+struct tc_tunnel_key {
+	tc_gen;
+	int t_action;
+};
+
+enum {
+	TCA_TUNNEL_KEY_UNSPEC,
+	TCA_TUNNEL_KEY_TM,
+	TCA_TUNNEL_KEY_PARMS,
+	TCA_TUNNEL_KEY_ENC_IPV4_SRC,	/* be32 */
+	TCA_TUNNEL_KEY_ENC_IPV4_DST,	/* be32 */
+	TCA_TUNNEL_KEY_ENC_IPV6_SRC,	/* struct in6_addr */
+	TCA_TUNNEL_KEY_ENC_IPV6_DST,	/* struct in6_addr */
+	TCA_TUNNEL_KEY_ENC_KEY_ID,	/* be64 */
+	TCA_TUNNEL_KEY_PAD,
+	__TCA_TUNNEL_KEY_MAX,
+};
+
+#define TCA_TUNNEL_KEY_MAX (__TCA_TUNNEL_KEY_MAX - 1)
+
+#endif
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index ccf931b3b94c..72e3426fa48f 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -761,6 +761,17 @@ config NET_ACT_IFE
 	  To compile this code as a module, choose M here: the
 	  module will be called act_ife.
 
+config NET_ACT_TUNNEL_KEY
+        tristate "IP tunnel metadata manipulation"
+        depends on NET_CLS_ACT
+        ---help---
+	  Say Y here to set/release ip tunnel metadata.
+
+	  If unsure, say N.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called act_tunnel_key.
+
 config NET_IFE_SKBMARK
         tristate "Support to encoding decoding skb mark on IFE action"
         depends on NET_ACT_IFE
diff --git a/net/sched/Makefile b/net/sched/Makefile
index ae088a5a9d95..b9d046b9535a 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_NET_ACT_CONNMARK)	+= act_connmark.o
 obj-$(CONFIG_NET_ACT_IFE)	+= act_ife.o
 obj-$(CONFIG_NET_IFE_SKBMARK)	+= act_meta_mark.o
 obj-$(CONFIG_NET_IFE_SKBPRIO)	+= act_meta_skbprio.o
+obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o
 obj-$(CONFIG_NET_SCH_FIFO)	+= sch_fifo.o
 obj-$(CONFIG_NET_SCH_CBQ)	+= sch_cbq.o
 obj-$(CONFIG_NET_SCH_HTB)	+= sch_htb.o
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
new file mode 100644
index 000000000000..dceff7412dc3
--- /dev/null
+++ b/net/sched/act_tunnel_key.c
@@ -0,0 +1,351 @@
+/*
+ * Copyright (c) 2016, Amir Vadai <amir@vadai.me>
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <net/dst.h>
+#include <net/dst_metadata.h>
+
+#include <linux/tc_act/tc_tunnel_key.h>
+#include <net/tc_act/tc_tunnel_key.h>
+
+#define TUNNEL_KEY_TAB_MASK     15
+
+static int tunnel_key_net_id;
+static struct tc_action_ops act_tunnel_key_ops;
+
+static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a,
+			  struct tcf_result *res)
+{
+	struct tcf_tunnel_key *t = to_tunnel_key(a);
+	struct tcf_tunnel_key_params *params;
+	int action;
+
+	rcu_read_lock();
+
+	params = rcu_dereference(t->params);
+
+	tcf_lastuse_update(&t->tcf_tm);
+	bstats_cpu_update(this_cpu_ptr(t->common.cpu_bstats), skb);
+	action = params->action;
+
+	switch (params->tcft_action) {
+	case TCA_TUNNEL_KEY_ACT_RELEASE:
+		skb_dst_drop(skb);
+		break;
+	case TCA_TUNNEL_KEY_ACT_SET:
+		skb_dst_drop(skb);
+		skb_dst_set(skb, dst_clone(&params->tcft_enc_metadata->dst));
+		break;
+	default:
+		WARN_ONCE(1, "Bad tunnel_key action %d.\n",
+			  params->tcft_action);
+		break;
+	}
+
+	rcu_read_unlock();
+
+	return action;
+}
+
+static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = {
+	[TCA_TUNNEL_KEY_PARMS]	    = { .len = sizeof(struct tc_tunnel_key) },
+	[TCA_TUNNEL_KEY_ENC_IPV4_SRC] = { .type = NLA_U32 },
+	[TCA_TUNNEL_KEY_ENC_IPV4_DST] = { .type = NLA_U32 },
+	[TCA_TUNNEL_KEY_ENC_IPV6_SRC] = { .len = sizeof(struct in6_addr) },
+	[TCA_TUNNEL_KEY_ENC_IPV6_DST] = { .len = sizeof(struct in6_addr) },
+	[TCA_TUNNEL_KEY_ENC_KEY_ID]   = { .type = NLA_U32 },
+};
+
+static int tunnel_key_init(struct net *net, struct nlattr *nla,
+			   struct nlattr *est, struct tc_action **a,
+			   int ovr, int bind)
+{
+	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
+	struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1];
+	struct tcf_tunnel_key_params *params_old;
+	struct tcf_tunnel_key_params *params_new;
+	struct metadata_dst *metadata = NULL;
+	struct tc_tunnel_key *parm;
+	struct tcf_tunnel_key *t;
+	bool exists = false;
+	__be64 key_id;
+	int ret = 0;
+	int err;
+
+	if (!nla)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_TUNNEL_KEY_MAX, nla, tunnel_key_policy);
+	if (err < 0)
+		return err;
+
+	if (!tb[TCA_TUNNEL_KEY_PARMS])
+		return -EINVAL;
+
+	parm = nla_data(tb[TCA_TUNNEL_KEY_PARMS]);
+	exists = tcf_hash_check(tn, parm->index, a, bind);
+	if (exists && bind)
+		return 0;
+
+	switch (parm->t_action) {
+	case TCA_TUNNEL_KEY_ACT_RELEASE:
+		break;
+	case TCA_TUNNEL_KEY_ACT_SET:
+		if (!tb[TCA_TUNNEL_KEY_ENC_KEY_ID]) {
+			ret = -EINVAL;
+			goto err_out;
+		}
+
+		key_id = key32_to_tunnel_id(nla_get_be32(tb[TCA_TUNNEL_KEY_ENC_KEY_ID]));
+
+		if (tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC] &&
+		    tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]) {
+			__be32 saddr;
+			__be32 daddr;
+
+			saddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC]);
+			daddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]);
+
+			metadata = __ip_tun_set_dst(saddr, daddr, 0, 0,
+						    TUNNEL_KEY, key_id, 0);
+		} else if (tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC] &&
+			   tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]) {
+			struct in6_addr saddr;
+			struct in6_addr daddr;
+
+			saddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC]);
+			daddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]);
+
+			metadata = __ipv6_tun_set_dst(&saddr, &daddr, 0, 0, 0,
+						      TUNNEL_KEY, key_id, 0);
+		}
+
+		if (!metadata) {
+			ret = -EINVAL;
+			goto err_out;
+		}
+
+		metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX;
+		break;
+	default:
+		goto err_out;
+	}
+
+	if (!exists) {
+		ret = tcf_hash_create(tn, parm->index, est, a,
+				      &act_tunnel_key_ops, bind, true);
+		if (ret)
+			return ret;
+
+		ret = ACT_P_CREATED;
+	} else {
+		tcf_hash_release(*a, bind);
+		if (!ovr)
+			return -EEXIST;
+	}
+
+	t = to_tunnel_key(*a);
+
+	ASSERT_RTNL();
+	params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
+	if (unlikely(!params_new)) {
+		if (ret == ACT_P_CREATED)
+			tcf_hash_release(*a, bind);
+		return -ENOMEM;
+	}
+
+	params_old = rtnl_dereference(t->params);
+
+	params_new->action = parm->action;
+	params_new->tcft_action = parm->t_action;
+	params_new->tcft_enc_metadata = metadata;
+
+	rcu_assign_pointer(t->params, params_new);
+
+	if (params_old)
+		kfree_rcu(params_old, rcu);
+
+	if (ret == ACT_P_CREATED)
+		tcf_hash_insert(tn, *a);
+
+	return ret;
+
+err_out:
+	if (exists)
+		tcf_hash_release(*a, bind);
+	return ret;
+}
+
+static void tunnel_key_release(struct tc_action *a, int bind)
+{
+	struct tcf_tunnel_key *t = to_tunnel_key(a);
+	struct tcf_tunnel_key_params *params;
+
+	rcu_read_lock();
+	params = rcu_dereference(t->params);
+
+	if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET)
+		dst_release(&params->tcft_enc_metadata->dst);
+
+	kfree_rcu(params, rcu);
+
+	rcu_read_unlock();
+}
+
+static int tunnel_key_dump_addresses(struct sk_buff *skb,
+				     const struct ip_tunnel_info *info)
+{
+	unsigned short family = ip_tunnel_info_af(info);
+
+	if (family == AF_INET) {
+		__be32 saddr = info->key.u.ipv4.src;
+		__be32 daddr = info->key.u.ipv4.dst;
+
+		if (!nla_put_in_addr(skb, TCA_TUNNEL_KEY_ENC_IPV4_SRC, saddr) &&
+		    !nla_put_in_addr(skb, TCA_TUNNEL_KEY_ENC_IPV4_DST, daddr))
+			return 0;
+	}
+
+	if (family == AF_INET6) {
+		const struct in6_addr *saddr6 = &info->key.u.ipv6.src;
+		const struct in6_addr *daddr6 = &info->key.u.ipv6.dst;
+
+		if (!nla_put_in6_addr(skb,
+				      TCA_TUNNEL_KEY_ENC_IPV6_SRC, saddr6) &&
+		    !nla_put_in6_addr(skb,
+				      TCA_TUNNEL_KEY_ENC_IPV6_DST, daddr6))
+			return 0;
+	}
+
+	return -EINVAL;
+}
+
+static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
+			   int bind, int ref)
+{
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tcf_tunnel_key *t = to_tunnel_key(a);
+	struct tcf_tunnel_key_params *params;
+	struct tc_tunnel_key opt = {
+		.index    = t->tcf_index,
+		.refcnt   = t->tcf_refcnt - ref,
+		.bindcnt  = t->tcf_bindcnt - bind,
+	};
+	struct tcf_t tm;
+	int ret = -1;
+
+	rcu_read_lock();
+	params = rcu_dereference(t->params);
+
+	opt.t_action = params->tcft_action;
+	opt.action = params->action;
+
+	if (nla_put(skb, TCA_TUNNEL_KEY_PARMS, sizeof(opt), &opt))
+		goto nla_put_failure;
+
+	if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) {
+		struct ip_tunnel_key *key =
+			&params->tcft_enc_metadata->u.tun_info.key;
+		__be32 key_id = tunnel_id_to_key32(key->tun_id);
+
+		if (nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_KEY_ID, key_id) ||
+		    tunnel_key_dump_addresses(skb,
+					      &params->tcft_enc_metadata->u.tun_info))
+			goto nla_put_failure;
+	}
+
+	tcf_tm_dump(&tm, &t->tcf_tm);
+	if (nla_put_64bit(skb, TCA_TUNNEL_KEY_TM, sizeof(tm),
+			  &tm, TCA_TUNNEL_KEY_PAD))
+		goto nla_put_failure;
+
+	ret = skb->len;
+	goto out;
+
+nla_put_failure:
+	nlmsg_trim(skb, b);
+out:
+	rcu_read_unlock();
+
+	return ret;
+}
+
+static int tunnel_key_walker(struct net *net, struct sk_buff *skb,
+			     struct netlink_callback *cb, int type,
+			     const struct tc_action_ops *ops)
+{
+	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
+
+	return tcf_generic_walker(tn, skb, cb, type, ops);
+}
+
+static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
+
+	return tcf_hash_search(tn, a, index);
+}
+
+static struct tc_action_ops act_tunnel_key_ops = {
+	.kind		=	"tunnel_key",
+	.type		=	TCA_ACT_TUNNEL_KEY,
+	.owner		=	THIS_MODULE,
+	.act		=	tunnel_key_act,
+	.dump		=	tunnel_key_dump,
+	.init		=	tunnel_key_init,
+	.cleanup	=	tunnel_key_release,
+	.walk		=	tunnel_key_walker,
+	.lookup		=	tunnel_key_search,
+	.size		=	sizeof(struct tcf_tunnel_key),
+};
+
+static __net_init int tunnel_key_init_net(struct net *net)
+{
+	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
+
+	return tc_action_net_init(tn, &act_tunnel_key_ops, TUNNEL_KEY_TAB_MASK);
+}
+
+static void __net_exit tunnel_key_exit_net(struct net *net)
+{
+	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
+
+	tc_action_net_exit(tn);
+}
+
+static struct pernet_operations tunnel_key_net_ops = {
+	.init = tunnel_key_init_net,
+	.exit = tunnel_key_exit_net,
+	.id   = &tunnel_key_net_id,
+	.size = sizeof(struct tc_action_net),
+};
+
+static int __init tunnel_key_init_module(void)
+{
+	return tcf_register_action(&act_tunnel_key_ops, &tunnel_key_net_ops);
+}
+
+static void __exit tunnel_key_cleanup_module(void)
+{
+	tcf_unregister_action(&act_tunnel_key_ops, &tunnel_key_net_ops);
+}
+
+module_init(tunnel_key_init_module);
+module_exit(tunnel_key_cleanup_module);
+
+MODULE_AUTHOR("Amir Vadai <amir@vadai.me>");
+MODULE_DESCRIPTION("ip tunnel manipulation actions");
+MODULE_LICENSE("GPL v2");
-- 
cgit v1.2.3


From 70ca767ea1b2748f45e96192400e515dddbe517c Mon Sep 17 00:00:00 2001
From: Laura Garcia Liebana <nevola@gmail.com>
Date: Tue, 6 Sep 2016 08:44:19 +0200
Subject: netfilter: nft_hash: Add hash offset value

Add support to pass through an offset to the hash value. With this
feature, the sysadmin is able to generate a hash with a given
offset value.

Example:

	meta mark set jhash ip saddr mod 2 seed 0xabcd offset 100

This option generates marks according to the source address from 100 to
101.

Signed-off-by: Laura Garcia Liebana <nevola@gmail.com>
---
 include/uapi/linux/netfilter/nf_tables.h |  2 ++
 net/netfilter/nft_hash.c                 | 17 +++++++++++++----
 2 files changed, 15 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 24161e25576d..8c653bbd1ead 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -731,6 +731,7 @@ enum nft_meta_keys {
  * @NFTA_HASH_LEN: source data length (NLA_U32)
  * @NFTA_HASH_MODULUS: modulus value (NLA_U32)
  * @NFTA_HASH_SEED: seed value (NLA_U32)
+ * @NFTA_HASH_OFFSET: add this offset value to hash result (NLA_U32)
  */
 enum nft_hash_attributes {
 	NFTA_HASH_UNSPEC,
@@ -739,6 +740,7 @@ enum nft_hash_attributes {
 	NFTA_HASH_LEN,
 	NFTA_HASH_MODULUS,
 	NFTA_HASH_SEED,
+	NFTA_HASH_OFFSET,
 	__NFTA_HASH_MAX,
 };
 #define NFTA_HASH_MAX	(__NFTA_HASH_MAX - 1)
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 764251d31e46..bd12f7a801c2 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -23,6 +23,7 @@ struct nft_hash {
 	u8			len;
 	u32			modulus;
 	u32			seed;
+	u32			offset;
 };
 
 static void nft_hash_eval(const struct nft_expr *expr,
@@ -31,10 +32,10 @@ static void nft_hash_eval(const struct nft_expr *expr,
 {
 	struct nft_hash *priv = nft_expr_priv(expr);
 	const void *data = &regs->data[priv->sreg];
+	u32 h;
 
-	regs->data[priv->dreg] =
-		reciprocal_scale(jhash(data, priv->len, priv->seed),
-				 priv->modulus);
+	h = reciprocal_scale(jhash(data, priv->len, priv->seed), priv->modulus);
+	regs->data[priv->dreg] = h + priv->offset;
 }
 
 static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = {
@@ -59,6 +60,9 @@ static int nft_hash_init(const struct nft_ctx *ctx,
 	    !tb[NFTA_HASH_MODULUS])
 		return -EINVAL;
 
+	if (tb[NFTA_HASH_OFFSET])
+		priv->offset = ntohl(nla_get_be32(tb[NFTA_HASH_OFFSET]));
+
 	priv->sreg = nft_parse_register(tb[NFTA_HASH_SREG]);
 	priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]);
 
@@ -72,6 +76,9 @@ static int nft_hash_init(const struct nft_ctx *ctx,
 	if (priv->modulus <= 1)
 		return -ERANGE;
 
+	if (priv->offset + priv->modulus - 1 < U32_MAX)
+		return -EOVERFLOW;
+
 	priv->seed = ntohl(nla_get_be32(tb[NFTA_HASH_SEED]));
 
 	return nft_validate_register_load(priv->sreg, len) &&
@@ -94,7 +101,9 @@ static int nft_hash_dump(struct sk_buff *skb,
 		goto nla_put_failure;
 	if (nla_put_be32(skb, NFTA_HASH_SEED, htonl(priv->seed)))
 		goto nla_put_failure;
-
+	if (priv->offset != 0)
+		if (nla_put_be32(skb, NFTA_HASH_OFFSET, htonl(priv->offset)))
+			goto nla_put_failure;
 	return 0;
 
 nla_put_failure:
-- 
cgit v1.2.3


From dbd2be0646e3239022630c426cbceefa15714bca Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 7 Sep 2016 12:22:18 +0200
Subject: netfilter: nft_dynset: allow to invert match criteria

The dynset expression matches if we can fit a new entry into the set.
If there is no room for it, then it breaks the rule evaluation.

This patch introduces the inversion flag so you can add rules to
explicitly drop packets that don't fit into the set. For example:

 # nft filter input flow table xyz size 4 { ip saddr timeout 120s counter } overflow drop

This is useful to provide a replacement for connlimit.

For the rule above, every new entry uses the IPv4 address as key in the
set, this entry gets a timeout of 120 seconds that gets refresh on every
packet seen. If we get new flow and our set already contains 4 entries
already, then this packet is dropped.

You can already express this in positive logic, assuming default policy
to drop:

 # nft filter input flow table xyz size 4 { ip saddr timeout 10s counter } accept

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h |  6 ++++++
 net/netfilter/nft_dynset.c               | 20 +++++++++++++++++++-
 2 files changed, 25 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 8c653bbd1ead..bc0eb6a1066d 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -575,6 +575,10 @@ enum nft_dynset_ops {
 	NFT_DYNSET_OP_UPDATE,
 };
 
+enum nft_dynset_flags {
+	NFT_DYNSET_F_INV	= (1 << 0),
+};
+
 /**
  * enum nft_dynset_attributes - dynset expression attributes
  *
@@ -585,6 +589,7 @@ enum nft_dynset_ops {
  * @NFTA_DYNSET_SREG_DATA: source register of the data (NLA_U32)
  * @NFTA_DYNSET_TIMEOUT: timeout value for the new element (NLA_U64)
  * @NFTA_DYNSET_EXPR: expression (NLA_NESTED: nft_expr_attributes)
+ * @NFTA_DYNSET_FLAGS: flags (NLA_U32)
  */
 enum nft_dynset_attributes {
 	NFTA_DYNSET_UNSPEC,
@@ -596,6 +601,7 @@ enum nft_dynset_attributes {
 	NFTA_DYNSET_TIMEOUT,
 	NFTA_DYNSET_EXPR,
 	NFTA_DYNSET_PAD,
+	NFTA_DYNSET_FLAGS,
 	__NFTA_DYNSET_MAX,
 };
 #define NFTA_DYNSET_MAX		(__NFTA_DYNSET_MAX - 1)
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 0af26699bf04..e3b83c31da2e 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -22,6 +22,7 @@ struct nft_dynset {
 	enum nft_dynset_ops		op:8;
 	enum nft_registers		sreg_key:8;
 	enum nft_registers		sreg_data:8;
+	bool				invert;
 	u64				timeout;
 	struct nft_expr			*expr;
 	struct nft_set_binding		binding;
@@ -82,10 +83,14 @@ static void nft_dynset_eval(const struct nft_expr *expr,
 
 		if (sexpr != NULL)
 			sexpr->ops->eval(sexpr, regs, pkt);
+
+		if (priv->invert)
+			regs->verdict.code = NFT_BREAK;
 		return;
 	}
 out:
-	regs->verdict.code = NFT_BREAK;
+	if (!priv->invert)
+		regs->verdict.code = NFT_BREAK;
 }
 
 static const struct nla_policy nft_dynset_policy[NFTA_DYNSET_MAX + 1] = {
@@ -96,6 +101,7 @@ static const struct nla_policy nft_dynset_policy[NFTA_DYNSET_MAX + 1] = {
 	[NFTA_DYNSET_SREG_DATA]	= { .type = NLA_U32 },
 	[NFTA_DYNSET_TIMEOUT]	= { .type = NLA_U64 },
 	[NFTA_DYNSET_EXPR]	= { .type = NLA_NESTED },
+	[NFTA_DYNSET_FLAGS]	= { .type = NLA_U32 },
 };
 
 static int nft_dynset_init(const struct nft_ctx *ctx,
@@ -113,6 +119,15 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
 	    tb[NFTA_DYNSET_SREG_KEY] == NULL)
 		return -EINVAL;
 
+	if (tb[NFTA_DYNSET_FLAGS]) {
+		u32 flags = ntohl(nla_get_be32(tb[NFTA_DYNSET_FLAGS]));
+
+		if (flags & ~NFT_DYNSET_F_INV)
+			return -EINVAL;
+		if (flags & NFT_DYNSET_F_INV)
+			priv->invert = true;
+	}
+
 	set = nf_tables_set_lookup(ctx->table, tb[NFTA_DYNSET_SET_NAME],
 				   genmask);
 	if (IS_ERR(set)) {
@@ -220,6 +235,7 @@ static void nft_dynset_destroy(const struct nft_ctx *ctx,
 static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
 	const struct nft_dynset *priv = nft_expr_priv(expr);
+	u32 flags = priv->invert ? NFT_DYNSET_F_INV : 0;
 
 	if (nft_dump_register(skb, NFTA_DYNSET_SREG_KEY, priv->sreg_key))
 		goto nla_put_failure;
@@ -235,6 +251,8 @@ static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr)
 		goto nla_put_failure;
 	if (priv->expr && nft_expr_dump(skb, NFTA_DYNSET_EXPR, priv->expr))
 		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_DYNSET_FLAGS, htonl(flags)))
+		goto nla_put_failure;
 	return 0;
 
 nla_put_failure:
-- 
cgit v1.2.3


From 8e8118f893138d4cc3d4dbf4163d7497fca54a9d Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sun, 11 Sep 2016 22:55:53 +0200
Subject: netfilter: conntrack: remove packet hotpath stats

These counters sit in hot path and do show up in perf, this is especially
true for 'found' and 'searched' which get incremented for every packet
processed.

Information like

searched=212030105
new=623431
found=333613
delete=623327

does not seem too helpful nowadays:

- on busy systems found and searched will overflow every few hours
(these are 32bit integers), other more busy ones every few days.

- for debugging there are better methods, such as iptables' trace target,
the conntrack log sysctls.  Nowadays we also have perf tool.

This removes packet path stat counters except those that
are expected to be 0 (or close to 0) on a normal system, e.g.
'insert_failed' (race happened) or 'invalid' (proto tracker rejects).

The insert stat is retained for the ctnetlink case.
The found stat is retained for the tuple-is-taken check when NAT has to
determine if it needs to pick a different source address.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_conntrack_common.h      |  4 ----
 include/uapi/linux/netfilter/nfnetlink_conntrack.h |  8 ++++----
 net/netfilter/nf_conntrack_core.c                  | 14 ++------------
 net/netfilter/nf_conntrack_netlink.c               |  6 +-----
 net/netfilter/nf_conntrack_standalone.c            |  8 ++++----
 5 files changed, 11 insertions(+), 29 deletions(-)

(limited to 'include/uapi')

diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index 275505792664..1d1ef4e20512 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -4,13 +4,9 @@
 #include <uapi/linux/netfilter/nf_conntrack_common.h>
 
 struct ip_conntrack_stat {
-	unsigned int searched;
 	unsigned int found;
-	unsigned int new;
 	unsigned int invalid;
 	unsigned int ignore;
-	unsigned int delete;
-	unsigned int delete_list;
 	unsigned int insert;
 	unsigned int insert_failed;
 	unsigned int drop;
diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
index 9df789709abe..6deb8867c5fc 100644
--- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
@@ -231,13 +231,13 @@ enum ctattr_secctx {
 
 enum ctattr_stats_cpu {
 	CTA_STATS_UNSPEC,
-	CTA_STATS_SEARCHED,
+	CTA_STATS_SEARCHED,	/* no longer used */
 	CTA_STATS_FOUND,
-	CTA_STATS_NEW,
+	CTA_STATS_NEW,		/* no longer used */
 	CTA_STATS_INVALID,
 	CTA_STATS_IGNORE,
-	CTA_STATS_DELETE,
-	CTA_STATS_DELETE_LIST,
+	CTA_STATS_DELETE,	/* no longer used */
+	CTA_STATS_DELETE_LIST,	/* no longer used */
 	CTA_STATS_INSERT,
 	CTA_STATS_INSERT_FAILED,
 	CTA_STATS_DROP,
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index ac1db4019d5c..8d1ddb9b63ed 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -379,7 +379,6 @@ static void
 destroy_conntrack(struct nf_conntrack *nfct)
 {
 	struct nf_conn *ct = (struct nf_conn *)nfct;
-	struct net *net = nf_ct_net(ct);
 	struct nf_conntrack_l4proto *l4proto;
 
 	pr_debug("destroy_conntrack(%p)\n", ct);
@@ -406,7 +405,6 @@ destroy_conntrack(struct nf_conntrack *nfct)
 
 	nf_ct_del_from_dying_or_unconfirmed_list(ct);
 
-	NF_CT_STAT_INC(net, delete);
 	local_bh_enable();
 
 	if (ct->master)
@@ -438,7 +436,6 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct)
 
 	nf_ct_add_to_dying_list(ct);
 
-	NF_CT_STAT_INC(net, delete_list);
 	local_bh_enable();
 }
 
@@ -529,11 +526,8 @@ begin:
 		if (nf_ct_is_dying(ct))
 			continue;
 
-		if (nf_ct_key_equal(h, tuple, zone, net)) {
-			NF_CT_STAT_INC_ATOMIC(net, found);
+		if (nf_ct_key_equal(h, tuple, zone, net))
 			return h;
-		}
-		NF_CT_STAT_INC_ATOMIC(net, searched);
 	}
 	/*
 	 * if the nulls value we got at the end of this lookup is
@@ -798,7 +792,6 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 	 */
 	__nf_conntrack_hash_insert(ct, hash, reply_hash);
 	nf_conntrack_double_unlock(hash, reply_hash);
-	NF_CT_STAT_INC(net, insert);
 	local_bh_enable();
 
 	help = nfct_help(ct);
@@ -857,7 +850,6 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
 			rcu_read_unlock();
 			return 1;
 		}
-		NF_CT_STAT_INC_ATOMIC(net, searched);
 	}
 
 	if (get_nulls_value(n) != hash) {
@@ -1177,10 +1169,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
 		}
 		spin_unlock(&nf_conntrack_expect_lock);
 	}
-	if (!exp) {
+	if (!exp)
 		__nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC);
-		NF_CT_STAT_INC(net, new);
-	}
 
 	/* Now it is inserted into the unconfirmed list, bump refcount */
 	nf_conntrack_get(&ct->ct_general);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index c052b712c49f..27540455dc62 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1984,13 +1984,9 @@ ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 portid, u32 seq,
 	nfmsg->version      = NFNETLINK_V0;
 	nfmsg->res_id	    = htons(cpu);
 
-	if (nla_put_be32(skb, CTA_STATS_SEARCHED, htonl(st->searched)) ||
-	    nla_put_be32(skb, CTA_STATS_FOUND, htonl(st->found)) ||
-	    nla_put_be32(skb, CTA_STATS_NEW, htonl(st->new)) ||
+	if (nla_put_be32(skb, CTA_STATS_FOUND, htonl(st->found)) ||
 	    nla_put_be32(skb, CTA_STATS_INVALID, htonl(st->invalid)) ||
 	    nla_put_be32(skb, CTA_STATS_IGNORE, htonl(st->ignore)) ||
-	    nla_put_be32(skb, CTA_STATS_DELETE, htonl(st->delete)) ||
-	    nla_put_be32(skb, CTA_STATS_DELETE_LIST, htonl(st->delete_list)) ||
 	    nla_put_be32(skb, CTA_STATS_INSERT, htonl(st->insert)) ||
 	    nla_put_be32(skb, CTA_STATS_INSERT_FAILED,
 				htonl(st->insert_failed)) ||
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 3d9a316a3c77..7d52f8401afd 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -352,13 +352,13 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
 	seq_printf(seq, "%08x  %08x %08x %08x %08x %08x %08x %08x "
 			"%08x %08x %08x %08x %08x  %08x %08x %08x %08x\n",
 		   nr_conntracks,
-		   st->searched,
+		   0,
 		   st->found,
-		   st->new,
+		   0,
 		   st->invalid,
 		   st->ignore,
-		   st->delete,
-		   st->delete_list,
+		   0,
+		   0,
 		   st->insert,
 		   st->insert_failed,
 		   st->drop,
-- 
cgit v1.2.3


From 398fa4db6c694e1f655cad78478a9e3fb030cc01 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Thu, 15 Sep 2016 07:25:19 +0900
Subject: ALSA: control: move layout of TLV payload to UAPI header

In ALSA control interface, each element set can have threshold level
information. This information is transferred between drivers/applications,
in a shape of tlv packet. The layout of this packet is defined in
'uapi/sound/asound.h' (struct snd_ctl_tlv):

struct snd_ctl_tlv {
    unsigned int numid;
    unsigned int length;
    unsigned int tlv[0];
};

Data in the payload (struct snd_ctl_tlv.tlv) is expected to be filled
according to our own protocol. This protocol is described in
'include/sound/tlv.h'. A layout of the payload is expected as:

struct snd_ctl_tlv.tlv[0]: one of SNDRV_CTL_TLVT_XXX
struct snd_ctl_tlv.tlv[1]: Length of data
struct snd_ctl_tlv.tlv[2...]: data

Unfortunately, the macro is not exported to user land yet, thus
applications cannot get to know the protocol.

Additionally, ALSA control core has a feature called as 'user-defined'
element set. This allows applications to add/remove arbitrary element sets
with elements to control devices. Elements in the element set can be
operated by the same way as the ones added by in-kernel implementation.

For threshold level information of 'user-defined' element set, applications
need to register the information to an element set. However, as described
above, layout of the payload is closed in kernel land. This is quite
inconvenient, too.

This commit moves the protocol to UAPI header for TLV.

Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/tlv.h      | 61 ------------------------------------------------
 include/uapi/sound/tlv.h | 60 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 60 insertions(+), 61 deletions(-)

(limited to 'include/uapi')

diff --git a/include/sound/tlv.h b/include/sound/tlv.h
index df97d1966468..1f593b6747e9 100644
--- a/include/sound/tlv.h
+++ b/include/sound/tlv.h
@@ -22,67 +22,6 @@
  *
  */
 
-/*
- * TLV structure is right behind the struct snd_ctl_tlv:
- *   unsigned int type  	- see SNDRV_CTL_TLVT_*
- *   unsigned int length
- *   .... data aligned to sizeof(unsigned int), use
- *        block_length = (length + (sizeof(unsigned int) - 1)) &
- *                       ~(sizeof(unsigned int) - 1)) ....
- */
-
 #include <uapi/sound/tlv.h>
 
-#define TLV_ITEM(type, ...) \
-	(type), TLV_LENGTH(__VA_ARGS__), __VA_ARGS__
-#define TLV_LENGTH(...) \
-	((unsigned int)sizeof((const unsigned int[]) { __VA_ARGS__ }))
-
-#define TLV_CONTAINER_ITEM(...) \
-	TLV_ITEM(SNDRV_CTL_TLVT_CONTAINER, __VA_ARGS__)
-#define DECLARE_TLV_CONTAINER(name, ...) \
-	unsigned int name[] = { TLV_CONTAINER_ITEM(__VA_ARGS__) }
-
-#define TLV_DB_SCALE_MASK	0xffff
-#define TLV_DB_SCALE_MUTE	0x10000
-#define TLV_DB_SCALE_ITEM(min, step, mute)			\
-	TLV_ITEM(SNDRV_CTL_TLVT_DB_SCALE,			\
-		 (min),					\
-		 ((step) & TLV_DB_SCALE_MASK) |		\
-			((mute) ? TLV_DB_SCALE_MUTE : 0))
-#define DECLARE_TLV_DB_SCALE(name, min, step, mute) \
-	unsigned int name[] = { TLV_DB_SCALE_ITEM(min, step, mute) }
-
-/* dB scale specified with min/max values instead of step */
-#define TLV_DB_MINMAX_ITEM(min_dB, max_dB)			\
-	TLV_ITEM(SNDRV_CTL_TLVT_DB_MINMAX, (min_dB), (max_dB))
-#define TLV_DB_MINMAX_MUTE_ITEM(min_dB, max_dB)			\
-	TLV_ITEM(SNDRV_CTL_TLVT_DB_MINMAX_MUTE, (min_dB), (max_dB))
-#define DECLARE_TLV_DB_MINMAX(name, min_dB, max_dB) \
-	unsigned int name[] = { TLV_DB_MINMAX_ITEM(min_dB, max_dB) }
-#define DECLARE_TLV_DB_MINMAX_MUTE(name, min_dB, max_dB) \
-	unsigned int name[] = { TLV_DB_MINMAX_MUTE_ITEM(min_dB, max_dB) }
-
-/* linear volume between min_dB and max_dB (.01dB unit) */
-#define TLV_DB_LINEAR_ITEM(min_dB, max_dB)		    \
-	TLV_ITEM(SNDRV_CTL_TLVT_DB_LINEAR, (min_dB), (max_dB))
-#define DECLARE_TLV_DB_LINEAR(name, min_dB, max_dB)	\
-	unsigned int name[] = { TLV_DB_LINEAR_ITEM(min_dB, max_dB) }
-
-/* dB range container:
- * Items in dB range container must be ordered by their values and by their
- * dB values. This implies that larger values must correspond with larger
- * dB values (which is also required for all other mixer controls).
- */
-/* Each item is: <min> <max> <TLV> */
-#define TLV_DB_RANGE_ITEM(...) \
-	TLV_ITEM(SNDRV_CTL_TLVT_DB_RANGE, __VA_ARGS__)
-#define DECLARE_TLV_DB_RANGE(name, ...) \
-	unsigned int name[] = { TLV_DB_RANGE_ITEM(__VA_ARGS__) }
-/* The below assumes that each item TLV is 4 words like DB_SCALE or LINEAR */
-#define TLV_DB_RANGE_HEAD(num)			\
-	SNDRV_CTL_TLVT_DB_RANGE, 6 * (num) * sizeof(unsigned int)
-
-#define TLV_DB_GAIN_MUTE	-9999999
-
 #endif /* __SOUND_TLV_H */
diff --git a/include/uapi/sound/tlv.h b/include/uapi/sound/tlv.h
index ffc4f203146c..b741e0ef3643 100644
--- a/include/uapi/sound/tlv.h
+++ b/include/uapi/sound/tlv.h
@@ -28,4 +28,64 @@
 #define SNDRV_CTL_TLVT_CHMAP_VAR	0x102	/* channels freely swappable */
 #define SNDRV_CTL_TLVT_CHMAP_PAIRED	0x103	/* pair-wise swappable */
 
+/*
+ * TLV structure is right behind the struct snd_ctl_tlv:
+ *   unsigned int type  	- see SNDRV_CTL_TLVT_*
+ *   unsigned int length
+ *   .... data aligned to sizeof(unsigned int), use
+ *        block_length = (length + (sizeof(unsigned int) - 1)) &
+ *                       ~(sizeof(unsigned int) - 1)) ....
+ */
+#define TLV_ITEM(type, ...) \
+	(type), TLV_LENGTH(__VA_ARGS__), __VA_ARGS__
+#define TLV_LENGTH(...) \
+	((unsigned int)sizeof((const unsigned int[]) { __VA_ARGS__ }))
+
+#define TLV_CONTAINER_ITEM(...) \
+	TLV_ITEM(SNDRV_CTL_TLVT_CONTAINER, __VA_ARGS__)
+#define DECLARE_TLV_CONTAINER(name, ...) \
+	unsigned int name[] = { TLV_CONTAINER_ITEM(__VA_ARGS__) }
+
+#define TLV_DB_SCALE_MASK	0xffff
+#define TLV_DB_SCALE_MUTE	0x10000
+#define TLV_DB_SCALE_ITEM(min, step, mute)			\
+	TLV_ITEM(SNDRV_CTL_TLVT_DB_SCALE,			\
+		 (min),					\
+		 ((step) & TLV_DB_SCALE_MASK) |		\
+			((mute) ? TLV_DB_SCALE_MUTE : 0))
+#define DECLARE_TLV_DB_SCALE(name, min, step, mute) \
+	unsigned int name[] = { TLV_DB_SCALE_ITEM(min, step, mute) }
+
+/* dB scale specified with min/max values instead of step */
+#define TLV_DB_MINMAX_ITEM(min_dB, max_dB)			\
+	TLV_ITEM(SNDRV_CTL_TLVT_DB_MINMAX, (min_dB), (max_dB))
+#define TLV_DB_MINMAX_MUTE_ITEM(min_dB, max_dB)			\
+	TLV_ITEM(SNDRV_CTL_TLVT_DB_MINMAX_MUTE, (min_dB), (max_dB))
+#define DECLARE_TLV_DB_MINMAX(name, min_dB, max_dB) \
+	unsigned int name[] = { TLV_DB_MINMAX_ITEM(min_dB, max_dB) }
+#define DECLARE_TLV_DB_MINMAX_MUTE(name, min_dB, max_dB) \
+	unsigned int name[] = { TLV_DB_MINMAX_MUTE_ITEM(min_dB, max_dB) }
+
+/* linear volume between min_dB and max_dB (.01dB unit) */
+#define TLV_DB_LINEAR_ITEM(min_dB, max_dB)		    \
+	TLV_ITEM(SNDRV_CTL_TLVT_DB_LINEAR, (min_dB), (max_dB))
+#define DECLARE_TLV_DB_LINEAR(name, min_dB, max_dB)	\
+	unsigned int name[] = { TLV_DB_LINEAR_ITEM(min_dB, max_dB) }
+
+/* dB range container:
+ * Items in dB range container must be ordered by their values and by their
+ * dB values. This implies that larger values must correspond with larger
+ * dB values (which is also required for all other mixer controls).
+ */
+/* Each item is: <min> <max> <TLV> */
+#define TLV_DB_RANGE_ITEM(...) \
+	TLV_ITEM(SNDRV_CTL_TLVT_DB_RANGE, __VA_ARGS__)
+#define DECLARE_TLV_DB_RANGE(name, ...) \
+	unsigned int name[] = { TLV_DB_RANGE_ITEM(__VA_ARGS__) }
+/* The below assumes that each item TLV is 4 words like DB_SCALE or LINEAR */
+#define TLV_DB_RANGE_HEAD(num)			\
+	SNDRV_CTL_TLVT_DB_RANGE, 6 * (num) * sizeof(unsigned int)
+
+#define TLV_DB_GAIN_MUTE	-9999999
+
 #endif
-- 
cgit v1.2.3


From 46e860f76804f86ac6062e9bb0d6dbea47f23899 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Thu, 15 Sep 2016 07:25:20 +0900
Subject: ALSA: rename TLV-related macros so that they're friendly to user
 applications

In a previous commit, some macros newly appeared to UAPI header for TLV
packet. These macros have short names and they easily bring name conflist
to applications. The conflict can be avoided to rename them with a proper
prefix.

For this purpose, this commit renames these macros with prefix
'SNDRV_CTL_TLVD_'.

Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/tlv.h      | 26 ++++++++++++++++
 include/uapi/sound/tlv.h | 80 ++++++++++++++++++++++++++++--------------------
 2 files changed, 72 insertions(+), 34 deletions(-)

(limited to 'include/uapi')

diff --git a/include/sound/tlv.h b/include/sound/tlv.h
index 1f593b6747e9..6e2e7735d20a 100644
--- a/include/sound/tlv.h
+++ b/include/sound/tlv.h
@@ -24,4 +24,30 @@
 
 #include <uapi/sound/tlv.h>
 
+/* For historical reasons, these macros are aliases to the ones in UAPI. */
+#define TLV_ITEM			SNDRV_CTL_TLVD_ITEM
+#define TLV_LENGTH			SNDRV_CTL_TLVD_LENGTH
+
+#define TLV_CONTAINER_ITEM		SNDRV_CTL_TLVD_CONTAINER_ITEM
+#define DECLARE_TLV_CONTAINER		SNDRV_CTL_TLVD_DECLARE_CONTAINER
+
+#define TLV_DB_SCALE_MASK		SNDRV_CTL_TLVD_DB_SCALE_MASK
+#define TLV_DB_SCALE_MUTE		SNDRV_CTL_TLVD_DB_SCALE_MUTE
+#define TLV_DB_SCALE_ITEM		SNDRV_CTL_TLVD_DB_SCALE_ITEM
+#define DECLARE_TLV_DB_SCALE		SNDRV_CTL_TLVD_DECLARE_DB_SCALE
+
+#define TLV_DB_MINMAX_ITEM		SNDRV_CTL_TLVD_DB_MINMAX_ITEM
+#define TLV_DB_MINMAX_MUTE_ITEM		SNDRV_CTL_TLVD_DB_MINMAX_MUTE_ITEM
+#define DECLARE_TLV_DB_MINMAX		SNDRV_CTL_TLVD_DECLARE_DB_MINMAX
+#define DECLARE_TLV_DB_MINMAX_MUTE	SNDRV_CTL_TLVD_DECLARE_DB_MINMAX_MUTE
+
+#define TLV_DB_LINEAR_ITEM		SNDRV_CTL_TLVD_DB_LINEAR_ITEM
+#define DECLARE_TLV_DB_LINEAR		SNDRV_CTL_TLVD_DECLARE_DB_LINEAR
+
+#define TLV_DB_RANGE_ITEM		SNDRV_CTL_TLVD_DB_RANGE_ITEM
+#define DECLARE_TLV_DB_RANGE		SNDRV_CTL_TLVD_DECLARE_DB_RANGE
+#define TLV_DB_RANGE_HEAD		SNDRV_CTL_TLVD_DB_RANGE_HEAD
+
+#define TLV_DB_GAIN_MUTE		SNDRV_CTL_TLVD_DB_GAIN_MUTE
+
 #endif /* __SOUND_TLV_H */
diff --git a/include/uapi/sound/tlv.h b/include/uapi/sound/tlv.h
index b741e0ef3643..f3c198f8bee7 100644
--- a/include/uapi/sound/tlv.h
+++ b/include/uapi/sound/tlv.h
@@ -36,41 +36,51 @@
  *        block_length = (length + (sizeof(unsigned int) - 1)) &
  *                       ~(sizeof(unsigned int) - 1)) ....
  */
-#define TLV_ITEM(type, ...) \
-	(type), TLV_LENGTH(__VA_ARGS__), __VA_ARGS__
-#define TLV_LENGTH(...) \
+#define SNDRV_CTL_TLVD_ITEM(type, ...) \
+	(type), SNDRV_CTL_TLVD_LENGTH(__VA_ARGS__), __VA_ARGS__
+#define SNDRV_CTL_TLVD_LENGTH(...) \
 	((unsigned int)sizeof((const unsigned int[]) { __VA_ARGS__ }))
 
-#define TLV_CONTAINER_ITEM(...) \
-	TLV_ITEM(SNDRV_CTL_TLVT_CONTAINER, __VA_ARGS__)
-#define DECLARE_TLV_CONTAINER(name, ...) \
-	unsigned int name[] = { TLV_CONTAINER_ITEM(__VA_ARGS__) }
+#define SNDRV_CTL_TLVD_CONTAINER_ITEM(...) \
+	SNDRV_CTL_TLVD_ITEM(SNDRV_CTL_TLVT_CONTAINER, __VA_ARGS__)
+#define SNDRV_CTL_TLVD_DECLARE_CONTAINER(name, ...) \
+	unsigned int name[] = { \
+		SNDRV_CTL_TLVD_CONTAINER_ITEM(__VA_ARGS__) \
+	}
 
-#define TLV_DB_SCALE_MASK	0xffff
-#define TLV_DB_SCALE_MUTE	0x10000
-#define TLV_DB_SCALE_ITEM(min, step, mute)			\
-	TLV_ITEM(SNDRV_CTL_TLVT_DB_SCALE,			\
-		 (min),					\
-		 ((step) & TLV_DB_SCALE_MASK) |		\
-			((mute) ? TLV_DB_SCALE_MUTE : 0))
-#define DECLARE_TLV_DB_SCALE(name, min, step, mute) \
-	unsigned int name[] = { TLV_DB_SCALE_ITEM(min, step, mute) }
+#define SNDRV_CTL_TLVD_DB_SCALE_MASK	0xffff
+#define SNDRV_CTL_TLVD_DB_SCALE_MUTE	0x10000
+#define SNDRV_CTL_TLVD_DB_SCALE_ITEM(min, step, mute) \
+	SNDRV_CTL_TLVD_ITEM(SNDRV_CTL_TLVT_DB_SCALE, \
+			    (min), \
+			    ((step) & SNDRV_CTL_TLVD_DB_SCALE_MASK) | \
+			     ((mute) ? SNDRV_CTL_TLVD_DB_SCALE_MUTE : 0))
+#define SNDRV_CTL_TLVD_DECLARE_DB_SCALE(name, min, step, mute) \
+	unsigned int name[] = { \
+		SNDRV_CTL_TLVD_DB_SCALE_ITEM(min, step, mute) \
+	}
 
 /* dB scale specified with min/max values instead of step */
-#define TLV_DB_MINMAX_ITEM(min_dB, max_dB)			\
-	TLV_ITEM(SNDRV_CTL_TLVT_DB_MINMAX, (min_dB), (max_dB))
-#define TLV_DB_MINMAX_MUTE_ITEM(min_dB, max_dB)			\
-	TLV_ITEM(SNDRV_CTL_TLVT_DB_MINMAX_MUTE, (min_dB), (max_dB))
-#define DECLARE_TLV_DB_MINMAX(name, min_dB, max_dB) \
-	unsigned int name[] = { TLV_DB_MINMAX_ITEM(min_dB, max_dB) }
-#define DECLARE_TLV_DB_MINMAX_MUTE(name, min_dB, max_dB) \
-	unsigned int name[] = { TLV_DB_MINMAX_MUTE_ITEM(min_dB, max_dB) }
+#define SNDRV_CTL_TLVD_DB_MINMAX_ITEM(min_dB, max_dB) \
+	SNDRV_CTL_TLVD_ITEM(SNDRV_CTL_TLVT_DB_MINMAX, (min_dB), (max_dB))
+#define SNDRV_CTL_TLVD_DB_MINMAX_MUTE_ITEM(min_dB, max_dB) \
+	SNDRV_CTL_TLVD_ITEM(SNDRV_CTL_TLVT_DB_MINMAX_MUTE, (min_dB), (max_dB))
+#define SNDRV_CTL_TLVD_DECLARE_DB_MINMAX(name, min_dB, max_dB) \
+	unsigned int name[] = { \
+		SNDRV_CTL_TLVD_DB_MINMAX_ITEM(min_dB, max_dB) \
+	}
+#define SNDRV_CTL_TLVD_DECLARE_DB_MINMAX_MUTE(name, min_dB, max_dB) \
+	unsigned int name[] = { \
+		SNDRV_CTL_TLVD_DB_MINMAX_MUTE_ITEM(min_dB, max_dB) \
+	}
 
 /* linear volume between min_dB and max_dB (.01dB unit) */
-#define TLV_DB_LINEAR_ITEM(min_dB, max_dB)		    \
-	TLV_ITEM(SNDRV_CTL_TLVT_DB_LINEAR, (min_dB), (max_dB))
-#define DECLARE_TLV_DB_LINEAR(name, min_dB, max_dB)	\
-	unsigned int name[] = { TLV_DB_LINEAR_ITEM(min_dB, max_dB) }
+#define SNDRV_CTL_TLVD_DB_LINEAR_ITEM(min_dB, max_dB) \
+	SNDRV_CTL_TLVD_ITEM(SNDRV_CTL_TLVT_DB_LINEAR, (min_dB), (max_dB))
+#define SNDRV_CTL_TLVD_DECLARE_DB_LINEAR(name, min_dB, max_dB) \
+	unsigned int name[] = { \
+		SNDRV_CTL_TLVD_DB_LINEAR_ITEM(min_dB, max_dB) \
+	}
 
 /* dB range container:
  * Items in dB range container must be ordered by their values and by their
@@ -78,14 +88,16 @@
  * dB values (which is also required for all other mixer controls).
  */
 /* Each item is: <min> <max> <TLV> */
-#define TLV_DB_RANGE_ITEM(...) \
-	TLV_ITEM(SNDRV_CTL_TLVT_DB_RANGE, __VA_ARGS__)
-#define DECLARE_TLV_DB_RANGE(name, ...) \
-	unsigned int name[] = { TLV_DB_RANGE_ITEM(__VA_ARGS__) }
+#define SNDRV_CTL_TLVD_DB_RANGE_ITEM(...) \
+	SNDRV_CTL_TLVD_ITEM(SNDRV_CTL_TLVT_DB_RANGE, __VA_ARGS__)
+#define SNDRV_CTL_TLVD_DECLARE_DB_RANGE(name, ...) \
+	unsigned int name[] = { \
+		SNDRV_CTL_TLVD_DB_RANGE_ITEM(__VA_ARGS__) \
+	}
 /* The below assumes that each item TLV is 4 words like DB_SCALE or LINEAR */
-#define TLV_DB_RANGE_HEAD(num)			\
+#define SNDRV_CTL_TLVD_DB_RANGE_HEAD(num) \
 	SNDRV_CTL_TLVT_DB_RANGE, 6 * (num) * sizeof(unsigned int)
 
-#define TLV_DB_GAIN_MUTE	-9999999
+#define SNDRV_CTL_TLVD_DB_GAIN_MUTE	-9999999
 
 #endif
-- 
cgit v1.2.3


From d9c181e22a0599fa7e27c3717f56bc1b3b020e63 Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@gmail.com>
Date: Sat, 23 Apr 2016 10:08:59 -0400
Subject: drm/msm: extend the submit ioctl to pass in flags

We'll want to be able to pass in flags, such as asking for explicit
fencing, and possibly other things down the road.  Fortunately we
don't need a full 32b for the pipe-id.  So use the upper 16 bits
for flags (which could be extended or reduced later if needed, so
start adding flags from the high bits).

Since anything with the upper bits set would not be a valid pipe-id,
an old userspace would not set any of the upper bits, and an old
kernel would reject it as an invalid pipe-id.

Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/msm_gem_submit.c |  5 ++++-
 include/uapi/drm/msm_drm.h           | 15 ++++++++++++++-
 2 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 9766f9ae4b7d..0b96d85d99f9 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -370,7 +370,10 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 	/* for now, we just have 3d pipe.. eventually this would need to
 	 * be more clever to dispatch to appropriate gpu module:
 	 */
-	if (args->pipe != MSM_PIPE_3D0)
+	if (MSM_PIPE_ID(args->flags) != MSM_PIPE_3D0)
+		return -EINVAL;
+
+	if (MSM_PIPE_FLAGS(args->flags) & ~MSM_SUBMIT_FLAGS)
 		return -EINVAL;
 
 	ret = mutex_lock_interruptible(&dev->struct_mutex);
diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
index 49f778de8e06..913e08cd5ceb 100644
--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -42,6 +42,15 @@ extern "C" {
 #define MSM_PIPE_2D1         0x02
 #define MSM_PIPE_3D0         0x10
 
+/* The pipe-id just uses the lower bits, so can be OR'd with flags in
+ * the upper 16 bits (which could be extended further, if needed, maybe
+ * we extend/overload the pipe-id some day to deal with multiple rings,
+ * but even then I don't think we need the full lower 16 bits).
+ */
+#define MSM_PIPE_ID_MASK     0xffff
+#define MSM_PIPE_ID(x)       ((x) & MSM_PIPE_ID_MASK)
+#define MSM_PIPE_FLAGS(x)    ((x) & ~MSM_PIPE_ID_MASK)
+
 /* timeouts are specified in clock-monotonic absolute times (to simplify
  * restarting interrupted ioctls).  The following struct is logically the
  * same as 'struct timespec' but 32/64b ABI safe.
@@ -175,12 +184,16 @@ struct drm_msm_gem_submit_bo {
 	__u64 presumed;       /* in/out, presumed buffer address */
 };
 
+/* Valid submit ioctl flags: */
+/* to start, nothing.. */
+#define MSM_SUBMIT_FLAGS 0
+
 /* Each cmdstream submit consists of a table of buffers involved, and
  * one or more cmdstream buffers.  This allows for conditional execution
  * (context-restore), and IB buffers needed for per tile/bin draw cmds.
  */
 struct drm_msm_gem_submit {
-	__u32 pipe;           /* in, MSM_PIPE_x */
+	__u32 flags;          /* MSM_PIPE_x | MSM_SUBMIT_x */
 	__u32 fence;          /* out */
 	__u32 nr_bos;         /* in, number of submit_bo's */
 	__u32 nr_cmds;        /* in, number of submit_cmd's */
-- 
cgit v1.2.3


From f0a42bb5423a1387e54a2d3451a10d4358b8cfb6 Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@gmail.com>
Date: Thu, 16 Jun 2016 16:08:19 -0400
Subject: drm/msm: submit support for in-fences

Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/Kconfig          |  1 +
 drivers/gpu/drm/msm/msm_gem_submit.c | 34 +++++++++++++++++++++++++++++++---
 include/uapi/drm/msm_drm.h           |  9 +++++++--
 3 files changed, 39 insertions(+), 5 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig
index 7c7a0314a756..d96b2b6898a3 100644
--- a/drivers/gpu/drm/msm/Kconfig
+++ b/drivers/gpu/drm/msm/Kconfig
@@ -11,6 +11,7 @@ config DRM_MSM
 	select TMPFS
 	select QCOM_SCM
 	select SND_SOC_HDMI_CODEC if SND_SOC
+	select SYNC_FILE
 	default y
 	help
 	  DRM/KMS driver for MSM/snapdragon.
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 0b96d85d99f9..3a4a8ddeb1db 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -15,6 +15,8 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/sync_file.h>
+
 #include "msm_drv.h"
 #include "msm_gpu.h"
 #include "msm_gem.h"
@@ -361,6 +363,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 	struct msm_file_private *ctx = file->driver_priv;
 	struct msm_gem_submit *submit;
 	struct msm_gpu *gpu = priv->gpu;
+	struct fence *in_fence = NULL;
 	unsigned i;
 	int ret;
 
@@ -394,9 +397,32 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 	if (ret)
 		goto out;
 
-	ret = submit_fence_sync(submit);
-	if (ret)
-		goto out;
+	if (args->flags & MSM_SUBMIT_FENCE_FD_IN) {
+		in_fence = sync_file_get_fence(args->fence_fd);
+
+		if (!in_fence) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		/* TODO if we get an array-fence due to userspace merging multiple
+		 * fences, we need a way to determine if all the backing fences
+		 * are from our own context..
+		 */
+
+		if (in_fence->context != gpu->fctx->context) {
+			ret = fence_wait(in_fence, true);
+			if (ret)
+				goto out;
+		}
+
+	}
+
+	if (!(args->fence & MSM_SUBMIT_NO_IMPLICIT)) {
+		ret = submit_fence_sync(submit);
+		if (ret)
+			goto out;
+	}
 
 	ret = submit_pin_objects(submit);
 	if (ret)
@@ -467,6 +493,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 	args->fence = submit->fence->seqno;
 
 out:
+	if (in_fence)
+		fence_put(in_fence);
 	submit_cleanup(submit);
 	if (ret)
 		msm_gem_submit_free(submit);
diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
index 913e08cd5ceb..0402994cdbb7 100644
--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -185,8 +185,12 @@ struct drm_msm_gem_submit_bo {
 };
 
 /* Valid submit ioctl flags: */
-/* to start, nothing.. */
-#define MSM_SUBMIT_FLAGS 0
+#define MSM_SUBMIT_NO_IMPLICIT   0x80000000 /* disable implicit sync */
+#define MSM_SUBMIT_FENCE_FD_IN   0x40000000 /* enable input fence_fd */
+#define MSM_SUBMIT_FLAGS                ( \
+		MSM_SUBMIT_NO_IMPLICIT   | \
+		MSM_SUBMIT_FENCE_FD_IN   | \
+		0)
 
 /* Each cmdstream submit consists of a table of buffers involved, and
  * one or more cmdstream buffers.  This allows for conditional execution
@@ -199,6 +203,7 @@ struct drm_msm_gem_submit {
 	__u32 nr_cmds;        /* in, number of submit_cmd's */
 	__u64 __user bos;     /* in, ptr to array of submit_bo's */
 	__u64 __user cmds;    /* in, ptr to array of submit_cmd's */
+	__s32 fence_fd;       /* in/out fence fd (see MSM_SUBMIT_FENCE_FD_IN) */
 };
 
 /* The normal way to synchronize with the GPU is just to CPU_PREP on
-- 
cgit v1.2.3


From 4cd0945901a6dd0190824a98471449df9129d21c Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@gmail.com>
Date: Thu, 16 Jun 2016 16:43:49 -0400
Subject: drm/msm: submit support for out-fences

Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/msm_gem_submit.c | 25 +++++++++++++++++++++++++
 include/uapi/drm/msm_drm.h           |  4 +++-
 2 files changed, 28 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 65284febb2f9..3ac14cd1e5b9 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -364,6 +364,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 	struct msm_gem_submit *submit;
 	struct msm_gpu *gpu = priv->gpu;
 	struct fence *in_fence = NULL;
+	struct sync_file *sync_file = NULL;
+	int out_fence_fd = -1;
 	unsigned i;
 	int ret;
 
@@ -383,6 +385,14 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 	if (ret)
 		return ret;
 
+	if (args->flags & MSM_SUBMIT_FENCE_FD_OUT) {
+		out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
+		if (out_fence_fd < 0) {
+			ret = out_fence_fd;
+			goto out_unlock;
+		}
+	}
+
 	submit = submit_create(dev, gpu, args->nr_bos, args->nr_cmds);
 	if (!submit) {
 		ret = -ENOMEM;
@@ -495,10 +505,23 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 		goto out;
 	}
 
+	if (args->flags & MSM_SUBMIT_FENCE_FD_OUT) {
+		sync_file = sync_file_create(submit->fence);
+		if (!sync_file) {
+			ret = -ENOMEM;
+			goto out;
+		}
+	}
+
 	msm_gpu_submit(gpu, submit, ctx);
 
 	args->fence = submit->fence->seqno;
 
+	if (args->flags & MSM_SUBMIT_FENCE_FD_OUT) {
+		fd_install(out_fence_fd, sync_file->file);
+		args->fence_fd = out_fence_fd;
+	}
+
 out:
 	if (in_fence)
 		fence_put(in_fence);
@@ -506,6 +529,8 @@ out:
 	if (ret)
 		msm_gem_submit_free(submit);
 out_unlock:
+	if (ret && (out_fence_fd >= 0))
+		put_unused_fd(out_fence_fd);
 	mutex_unlock(&dev->struct_mutex);
 	return ret;
 }
diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
index 0402994cdbb7..8c51e8a0df89 100644
--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -187,9 +187,11 @@ struct drm_msm_gem_submit_bo {
 /* Valid submit ioctl flags: */
 #define MSM_SUBMIT_NO_IMPLICIT   0x80000000 /* disable implicit sync */
 #define MSM_SUBMIT_FENCE_FD_IN   0x40000000 /* enable input fence_fd */
+#define MSM_SUBMIT_FENCE_FD_OUT  0x20000000 /* enable output fence_fd */
 #define MSM_SUBMIT_FLAGS                ( \
 		MSM_SUBMIT_NO_IMPLICIT   | \
 		MSM_SUBMIT_FENCE_FD_IN   | \
+		MSM_SUBMIT_FENCE_FD_OUT  | \
 		0)
 
 /* Each cmdstream submit consists of a table of buffers involved, and
@@ -203,7 +205,7 @@ struct drm_msm_gem_submit {
 	__u32 nr_cmds;        /* in, number of submit_cmd's */
 	__u64 __user bos;     /* in, ptr to array of submit_bo's */
 	__u64 __user cmds;    /* in, ptr to array of submit_cmd's */
-	__s32 fence_fd;       /* in/out fence fd (see MSM_SUBMIT_FENCE_FD_IN) */
+	__s32 fence_fd;       /* in/out fence fd (see MSM_SUBMIT_FENCE_FD_IN/OUT) */
 };
 
 /* The normal way to synchronize with the GPU is just to CPU_PREP on
-- 
cgit v1.2.3


From 86da71b57383d40993cb90baafb3735cffe5d800 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Mon, 12 Sep 2016 20:13:09 -0400
Subject: net_sched: Introduce skbmod action

This action is intended to be an upgrade from a usability perspective
from pedit (as well as operational debugability).
Compare this:

sudo tc filter add dev $ETH parent 1: protocol ip prio 10 \
u32 match ip protocol 1 0xff flowid 1:2 \
action pedit munge offset -14 u8 set 0x02 \
munge offset -13 u8 set 0x15 \
munge offset -12 u8 set 0x15 \
munge offset -11 u8 set 0x15 \
munge offset -10 u16 set 0x1515 \
pipe

to:

sudo tc filter add dev $ETH parent 1: protocol ip prio 10 \
u32 match ip protocol 1 0xff flowid 1:2 \
action skbmod dmac 02:15:15:15:15:15

Also try to do a MAC address swap with pedit or worse
try to debug a policy with destination mac, source mac and
etherype. Then make few rules out of those and you'll get my point.

In the future common use cases on pedit can be migrated to this action
(as an example different fields in ip v4/6, transports like tcp/udp/sctp
etc). For this first cut, this allows modifying basic ethernet header.

The most important ethernet use case at the moment is when redirecting or
mirroring packets to a remote machine. The dst mac address needs a re-write
so that it doesnt get dropped or confuse an interconnecting (learning) switch
or dropped by a target machine (which looks at the dst mac). And at times
when flipping back the packet a swap of the MAC addresses is needed.

Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_act/tc_skbmod.h        |  30 ++++
 include/uapi/linux/tc_act/tc_skbmod.h |  39 +++++
 net/sched/Kconfig                     |  11 ++
 net/sched/Makefile                    |   1 +
 net/sched/act_skbmod.c                | 301 ++++++++++++++++++++++++++++++++++
 5 files changed, 382 insertions(+)
 create mode 100644 include/net/tc_act/tc_skbmod.h
 create mode 100644 include/uapi/linux/tc_act/tc_skbmod.h
 create mode 100644 net/sched/act_skbmod.c

(limited to 'include/uapi')

diff --git a/include/net/tc_act/tc_skbmod.h b/include/net/tc_act/tc_skbmod.h
new file mode 100644
index 000000000000..644a2116b47b
--- /dev/null
+++ b/include/net/tc_act/tc_skbmod.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2016, Jamal Hadi Salim
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+*/
+
+#ifndef __NET_TC_SKBMOD_H
+#define __NET_TC_SKBMOD_H
+
+#include <net/act_api.h>
+#include <linux/tc_act/tc_skbmod.h>
+
+struct tcf_skbmod_params {
+	struct rcu_head	rcu;
+	u64	flags; /*up to 64 types of operations; extend if needed */
+	u8	eth_dst[ETH_ALEN];
+	u16	eth_type;
+	u8	eth_src[ETH_ALEN];
+};
+
+struct tcf_skbmod {
+	struct tc_action	common;
+	struct tcf_skbmod_params __rcu *skbmod_p;
+};
+#define to_skbmod(a) ((struct tcf_skbmod *)a)
+
+#endif /* __NET_TC_SKBMOD_H */
diff --git a/include/uapi/linux/tc_act/tc_skbmod.h b/include/uapi/linux/tc_act/tc_skbmod.h
new file mode 100644
index 000000000000..10fc07da6c69
--- /dev/null
+++ b/include/uapi/linux/tc_act/tc_skbmod.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2016, Jamal Hadi Salim
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+*/
+
+#ifndef __LINUX_TC_SKBMOD_H
+#define __LINUX_TC_SKBMOD_H
+
+#include <linux/pkt_cls.h>
+
+#define TCA_ACT_SKBMOD 15
+
+#define SKBMOD_F_DMAC	0x1
+#define SKBMOD_F_SMAC	0x2
+#define SKBMOD_F_ETYPE	0x4
+#define SKBMOD_F_SWAPMAC 0x8
+
+struct tc_skbmod {
+	tc_gen;
+	__u64 flags;
+};
+
+enum {
+	TCA_SKBMOD_UNSPEC,
+	TCA_SKBMOD_TM,
+	TCA_SKBMOD_PARMS,
+	TCA_SKBMOD_DMAC,
+	TCA_SKBMOD_SMAC,
+	TCA_SKBMOD_ETYPE,
+	TCA_SKBMOD_PAD,
+	__TCA_SKBMOD_MAX
+};
+#define TCA_SKBMOD_MAX (__TCA_SKBMOD_MAX - 1)
+
+#endif
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 72e3426fa48f..7795d5a3f79a 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -749,6 +749,17 @@ config NET_ACT_CONNMARK
 	  To compile this code as a module, choose M here: the
 	  module will be called act_connmark.
 
+config NET_ACT_SKBMOD
+        tristate "skb data modification action"
+        depends on NET_CLS_ACT
+        ---help---
+         Say Y here to allow modification of skb data
+
+         If unsure, say N.
+
+         To compile this code as a module, choose M here: the
+         module will be called act_skbmod.
+
 config NET_ACT_IFE
         tristate "Inter-FE action based on IETF ForCES InterFE LFB"
         depends on NET_CLS_ACT
diff --git a/net/sched/Makefile b/net/sched/Makefile
index b9d046b9535a..148ae0d5ac2c 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_NET_ACT_CSUM)	+= act_csum.o
 obj-$(CONFIG_NET_ACT_VLAN)	+= act_vlan.o
 obj-$(CONFIG_NET_ACT_BPF)	+= act_bpf.o
 obj-$(CONFIG_NET_ACT_CONNMARK)	+= act_connmark.o
+obj-$(CONFIG_NET_ACT_SKBMOD)	+= act_skbmod.o
 obj-$(CONFIG_NET_ACT_IFE)	+= act_ife.o
 obj-$(CONFIG_NET_IFE_SKBMARK)	+= act_meta_mark.o
 obj-$(CONFIG_NET_IFE_SKBPRIO)	+= act_meta_skbprio.o
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
new file mode 100644
index 000000000000..e7d96381c908
--- /dev/null
+++ b/net/sched/act_skbmod.c
@@ -0,0 +1,301 @@
+/*
+ * net/sched/act_skbmod.c  skb data modifier
+ *
+ * Copyright (c) 2016 Jamal Hadi Salim <jhs@mojatatu.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+*/
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+
+#include <linux/tc_act/tc_skbmod.h>
+#include <net/tc_act/tc_skbmod.h>
+
+#define SKBMOD_TAB_MASK     15
+
+static int skbmod_net_id;
+static struct tc_action_ops act_skbmod_ops;
+
+#define MAX_EDIT_LEN ETH_HLEN
+static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a,
+			  struct tcf_result *res)
+{
+	struct tcf_skbmod *d = to_skbmod(a);
+	int action;
+	struct tcf_skbmod_params *p;
+	u64 flags;
+	int err;
+
+	tcf_lastuse_update(&d->tcf_tm);
+	bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb);
+
+	/* XXX: if you are going to edit more fields beyond ethernet header
+	 * (example when you add IP header replacement or vlan swap)
+	 * then MAX_EDIT_LEN needs to change appropriately
+	*/
+	err = skb_ensure_writable(skb, MAX_EDIT_LEN);
+	if (unlikely(err)) { /* best policy is to drop on the floor */
+		qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats));
+		return TC_ACT_SHOT;
+	}
+
+	rcu_read_lock();
+	action = READ_ONCE(d->tcf_action);
+	if (unlikely(action == TC_ACT_SHOT)) {
+		qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats));
+		rcu_read_unlock();
+		return action;
+	}
+
+	p = rcu_dereference(d->skbmod_p);
+	flags = p->flags;
+	if (flags & SKBMOD_F_DMAC)
+		ether_addr_copy(eth_hdr(skb)->h_dest, p->eth_dst);
+	if (flags & SKBMOD_F_SMAC)
+		ether_addr_copy(eth_hdr(skb)->h_source, p->eth_src);
+	if (flags & SKBMOD_F_ETYPE)
+		eth_hdr(skb)->h_proto = p->eth_type;
+	rcu_read_unlock();
+
+	if (flags & SKBMOD_F_SWAPMAC) {
+		u16 tmpaddr[ETH_ALEN / 2]; /* ether_addr_copy() requirement */
+		/*XXX: I am sure we can come up with more efficient swapping*/
+		ether_addr_copy((u8 *)tmpaddr, eth_hdr(skb)->h_dest);
+		ether_addr_copy(eth_hdr(skb)->h_dest, eth_hdr(skb)->h_source);
+		ether_addr_copy(eth_hdr(skb)->h_source, (u8 *)tmpaddr);
+	}
+
+	return action;
+}
+
+static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = {
+	[TCA_SKBMOD_PARMS]		= { .len = sizeof(struct tc_skbmod) },
+	[TCA_SKBMOD_DMAC]		= { .len = ETH_ALEN },
+	[TCA_SKBMOD_SMAC]		= { .len = ETH_ALEN },
+	[TCA_SKBMOD_ETYPE]		= { .type = NLA_U16 },
+};
+
+static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
+			   struct nlattr *est, struct tc_action **a,
+			   int ovr, int bind)
+{
+	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+	struct nlattr *tb[TCA_SKBMOD_MAX + 1];
+	struct tcf_skbmod_params *p, *p_old;
+	struct tc_skbmod *parm;
+	struct tcf_skbmod *d;
+	bool exists = false;
+	u8 *daddr = NULL;
+	u8 *saddr = NULL;
+	u16 eth_type = 0;
+	u32 lflags = 0;
+	int ret = 0, err;
+
+	if (!nla)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_SKBMOD_MAX, nla, skbmod_policy);
+	if (err < 0)
+		return err;
+
+	if (!tb[TCA_SKBMOD_PARMS])
+		return -EINVAL;
+
+	if (tb[TCA_SKBMOD_DMAC]) {
+		daddr = nla_data(tb[TCA_SKBMOD_DMAC]);
+		lflags |= SKBMOD_F_DMAC;
+	}
+
+	if (tb[TCA_SKBMOD_SMAC]) {
+		saddr = nla_data(tb[TCA_SKBMOD_SMAC]);
+		lflags |= SKBMOD_F_SMAC;
+	}
+
+	if (tb[TCA_SKBMOD_ETYPE]) {
+		eth_type = nla_get_u16(tb[TCA_SKBMOD_ETYPE]);
+		lflags |= SKBMOD_F_ETYPE;
+	}
+
+	parm = nla_data(tb[TCA_SKBMOD_PARMS]);
+	if (parm->flags & SKBMOD_F_SWAPMAC)
+		lflags = SKBMOD_F_SWAPMAC;
+
+	exists = tcf_hash_check(tn, parm->index, a, bind);
+	if (exists && bind)
+		return 0;
+
+	if (!lflags)
+		return -EINVAL;
+
+	if (!exists) {
+		ret = tcf_hash_create(tn, parm->index, est, a,
+				      &act_skbmod_ops, bind, true);
+		if (ret)
+			return ret;
+
+		ret = ACT_P_CREATED;
+	} else {
+		tcf_hash_release(*a, bind);
+		if (!ovr)
+			return -EEXIST;
+	}
+
+	d = to_skbmod(*a);
+
+	ASSERT_RTNL();
+	p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL);
+	if (unlikely(!p)) {
+		if (ovr)
+			tcf_hash_release(*a, bind);
+		return -ENOMEM;
+	}
+
+	p->flags = lflags;
+	d->tcf_action = parm->action;
+
+	p_old = rtnl_dereference(d->skbmod_p);
+
+	if (ovr)
+		spin_lock_bh(&d->tcf_lock);
+
+	if (lflags & SKBMOD_F_DMAC)
+		ether_addr_copy(p->eth_dst, daddr);
+	if (lflags & SKBMOD_F_SMAC)
+		ether_addr_copy(p->eth_src, saddr);
+	if (lflags & SKBMOD_F_ETYPE)
+		p->eth_type = htons(eth_type);
+
+	rcu_assign_pointer(d->skbmod_p, p);
+	if (ovr)
+		spin_unlock_bh(&d->tcf_lock);
+
+	if (p_old)
+		kfree_rcu(p_old, rcu);
+
+	if (ret == ACT_P_CREATED)
+		tcf_hash_insert(tn, *a);
+	return ret;
+}
+
+static void tcf_skbmod_cleanup(struct tc_action *a, int bind)
+{
+	struct tcf_skbmod *d = to_skbmod(a);
+	struct tcf_skbmod_params  *p;
+
+	p = rcu_dereference_protected(d->skbmod_p, 1);
+	kfree_rcu(p, rcu);
+}
+
+static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a,
+			   int bind, int ref)
+{
+	struct tcf_skbmod *d = to_skbmod(a);
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tcf_skbmod_params  *p = rtnl_dereference(d->skbmod_p);
+	struct tc_skbmod opt = {
+		.index   = d->tcf_index,
+		.refcnt  = d->tcf_refcnt - ref,
+		.bindcnt = d->tcf_bindcnt - bind,
+		.action  = d->tcf_action,
+	};
+	struct tcf_t t;
+
+	opt.flags  = p->flags;
+	if (nla_put(skb, TCA_SKBMOD_PARMS, sizeof(opt), &opt))
+		goto nla_put_failure;
+	if ((p->flags & SKBMOD_F_DMAC) &&
+	    nla_put(skb, TCA_SKBMOD_DMAC, ETH_ALEN, p->eth_dst))
+		goto nla_put_failure;
+	if ((p->flags & SKBMOD_F_SMAC) &&
+	    nla_put(skb, TCA_SKBMOD_SMAC, ETH_ALEN, p->eth_src))
+		goto nla_put_failure;
+	if ((p->flags & SKBMOD_F_ETYPE) &&
+	    nla_put_u16(skb, TCA_SKBMOD_ETYPE, ntohs(p->eth_type)))
+		goto nla_put_failure;
+
+	tcf_tm_dump(&t, &d->tcf_tm);
+	if (nla_put_64bit(skb, TCA_SKBMOD_TM, sizeof(t), &t, TCA_SKBMOD_PAD))
+		goto nla_put_failure;
+
+	return skb->len;
+nla_put_failure:
+	rcu_read_unlock();
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static int tcf_skbmod_walker(struct net *net, struct sk_buff *skb,
+			     struct netlink_callback *cb, int type,
+			     const struct tc_action_ops *ops)
+{
+	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+
+	return tcf_generic_walker(tn, skb, cb, type, ops);
+}
+
+static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+
+	return tcf_hash_search(tn, a, index);
+}
+
+static struct tc_action_ops act_skbmod_ops = {
+	.kind		=	"skbmod",
+	.type		=	TCA_ACT_SKBMOD,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_skbmod_run,
+	.dump		=	tcf_skbmod_dump,
+	.init		=	tcf_skbmod_init,
+	.cleanup	=	tcf_skbmod_cleanup,
+	.walk		=	tcf_skbmod_walker,
+	.lookup		=	tcf_skbmod_search,
+	.size		=	sizeof(struct tcf_skbmod),
+};
+
+static __net_init int skbmod_init_net(struct net *net)
+{
+	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+
+	return tc_action_net_init(tn, &act_skbmod_ops, SKBMOD_TAB_MASK);
+}
+
+static void __net_exit skbmod_exit_net(struct net *net)
+{
+	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+
+	tc_action_net_exit(tn);
+}
+
+static struct pernet_operations skbmod_net_ops = {
+	.init = skbmod_init_net,
+	.exit = skbmod_exit_net,
+	.id   = &skbmod_net_id,
+	.size = sizeof(struct tc_action_net),
+};
+
+MODULE_AUTHOR("Jamal Hadi Salim, <jhs@mojatatu.com>");
+MODULE_DESCRIPTION("SKB data mod-ing");
+MODULE_LICENSE("GPL");
+
+static int __init skbmod_init_module(void)
+{
+	return tcf_register_action(&act_skbmod_ops, &skbmod_net_ops);
+}
+
+static void __exit skbmod_cleanup_module(void)
+{
+	tcf_unregister_action(&act_skbmod_ops, &skbmod_net_ops);
+}
+
+module_init(skbmod_init_module);
+module_exit(skbmod_cleanup_module);
-- 
cgit v1.2.3


From aa72d708373dacfa690960b336543b867784b350 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Thu, 15 Sep 2016 15:28:22 +0300
Subject: net/sched: cls_flower: Support masking for matching on tcp/udp ports

Add the definitions for src/dst udp/tcp port masks and use
them when setting && dumping the relevant keys.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Paul Blakey <paulb@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_cls.h |  4 ++++
 net/sched/cls_flower.c       | 20 ++++++++++++--------
 2 files changed, 16 insertions(+), 8 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index f9c287c67eae..60ea2a084880 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -442,6 +442,10 @@ enum {
 	TCA_FLOWER_KEY_ENC_IPV6_DST,	/* struct in6_addr */
 	TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,/* struct in6_addr */
 
+	TCA_FLOWER_KEY_TCP_SRC_MASK,	/* be16 */
+	TCA_FLOWER_KEY_TCP_DST_MASK,	/* be16 */
+	TCA_FLOWER_KEY_UDP_SRC_MASK,	/* be16 */
+	TCA_FLOWER_KEY_UDP_DST_MASK,	/* be16 */
 	__TCA_FLOWER_MAX,
 };
 
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index b084b2aab2d7..027523c82797 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -335,6 +335,10 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
 	[TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK] = { .len = sizeof(struct in6_addr) },
 	[TCA_FLOWER_KEY_ENC_IPV6_DST]	= { .len = sizeof(struct in6_addr) },
 	[TCA_FLOWER_KEY_ENC_IPV6_DST_MASK] = { .len = sizeof(struct in6_addr) },
+	[TCA_FLOWER_KEY_TCP_SRC_MASK]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_TCP_DST_MASK]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_UDP_SRC_MASK]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_UDP_DST_MASK]	= { .type = NLA_U16 },
 };
 
 static void fl_set_key_val(struct nlattr **tb,
@@ -432,17 +436,17 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
 
 	if (key->basic.ip_proto == IPPROTO_TCP) {
 		fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
-			       &mask->tp.src, TCA_FLOWER_UNSPEC,
+			       &mask->tp.src, TCA_FLOWER_KEY_TCP_SRC_MASK,
 			       sizeof(key->tp.src));
 		fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
-			       &mask->tp.dst, TCA_FLOWER_UNSPEC,
+			       &mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK,
 			       sizeof(key->tp.dst));
 	} else if (key->basic.ip_proto == IPPROTO_UDP) {
 		fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
-			       &mask->tp.src, TCA_FLOWER_UNSPEC,
+			       &mask->tp.src, TCA_FLOWER_KEY_UDP_SRC_MASK,
 			       sizeof(key->tp.src));
 		fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
-			       &mask->tp.dst, TCA_FLOWER_UNSPEC,
+			       &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK,
 			       sizeof(key->tp.dst));
 	}
 
@@ -877,18 +881,18 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 
 	if (key->basic.ip_proto == IPPROTO_TCP &&
 	    (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
-			     &mask->tp.src, TCA_FLOWER_UNSPEC,
+			     &mask->tp.src, TCA_FLOWER_KEY_TCP_SRC_MASK,
 			     sizeof(key->tp.src)) ||
 	     fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
-			     &mask->tp.dst, TCA_FLOWER_UNSPEC,
+			     &mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK,
 			     sizeof(key->tp.dst))))
 		goto nla_put_failure;
 	else if (key->basic.ip_proto == IPPROTO_UDP &&
 		 (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
-				  &mask->tp.src, TCA_FLOWER_UNSPEC,
+				  &mask->tp.src, TCA_FLOWER_KEY_UDP_SRC_MASK,
 				  sizeof(key->tp.src)) ||
 		  fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
-				  &mask->tp.dst, TCA_FLOWER_UNSPEC,
+				  &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK,
 				  sizeof(key->tp.dst))))
 		goto nla_put_failure;
 
-- 
cgit v1.2.3


From 37a6c1512314d2439ef7136d773d5a470e0996b9 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Thu, 15 Sep 2016 15:28:24 +0300
Subject: net/sched: cls_flower: Specify vlan attributes format in the UAPI
 header

Specify the format (size and endianess) for the vlan attributes.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_cls.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 60ea2a084880..8915b61bbf83 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -428,9 +428,9 @@ enum {
 	TCA_FLOWER_KEY_UDP_DST,		/* be16 */
 
 	TCA_FLOWER_FLAGS,
-	TCA_FLOWER_KEY_VLAN_ID,
-	TCA_FLOWER_KEY_VLAN_PRIO,
-	TCA_FLOWER_KEY_VLAN_ETH_TYPE,
+	TCA_FLOWER_KEY_VLAN_ID,		/* be16 */
+	TCA_FLOWER_KEY_VLAN_PRIO,	/* u8   */
+	TCA_FLOWER_KEY_VLAN_ETH_TYPE,	/* be16 */
 
 	TCA_FLOWER_KEY_ENC_KEY_ID,	/* be32 */
 	TCA_FLOWER_KEY_ENC_IPV4_SRC,	/* be32 */
-- 
cgit v1.2.3


From c568d68341be7030f5647def68851e469b21ca11 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Fri, 16 Sep 2016 12:44:20 +0200
Subject: locks: fix file locking on overlayfs

This patch allows flock, posix locks, ofd locks and leases to work
correctly on overlayfs.

Instead of using the underlying inode for storing lock context use the
overlay inode.  This allows locks to be persistent across copy-up.

This is done by introducing locks_inode() helper and using it instead of
file_inode() to get the inode in locking code.  For non-overlayfs the two
are equivalent, except for an extra pointer dereference in locks_inode().

Since lock operations are in "struct file_operations" we must also make
sure not to call underlying filesystem's lock operations.  Introcude a
super block flag MS_NOREMOTELOCK to this effect.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Acked-by: Jeff Layton <jlayton@poochiereds.net>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
---
 fs/locks.c              | 50 +++++++++++++++++++++++++++----------------------
 fs/namespace.c          |  2 +-
 fs/open.c               |  2 +-
 fs/overlayfs/super.c    |  2 +-
 include/linux/fs.h      | 16 ++++++++++++++--
 include/uapi/linux/fs.h |  1 +
 6 files changed, 46 insertions(+), 27 deletions(-)

(limited to 'include/uapi')

diff --git a/fs/locks.c b/fs/locks.c
index ee1b15f6fc13..c1656cff53ee 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -139,6 +139,11 @@
 #define IS_LEASE(fl)	(fl->fl_flags & (FL_LEASE|FL_DELEG|FL_LAYOUT))
 #define IS_OFDLCK(fl)	(fl->fl_flags & FL_OFDLCK)
 
+static inline bool is_remote_lock(struct file *filp)
+{
+	return likely(!(filp->f_path.dentry->d_sb->s_flags & MS_NOREMOTELOCK));
+}
+
 static bool lease_breaking(struct file_lock *fl)
 {
 	return fl->fl_flags & (FL_UNLOCK_PENDING | FL_DOWNGRADE_PENDING);
@@ -791,7 +796,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
 {
 	struct file_lock *cfl;
 	struct file_lock_context *ctx;
-	struct inode *inode = file_inode(filp);
+	struct inode *inode = locks_inode(filp);
 
 	ctx = smp_load_acquire(&inode->i_flctx);
 	if (!ctx || list_empty_careful(&ctx->flc_posix)) {
@@ -1192,7 +1197,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
 int posix_lock_file(struct file *filp, struct file_lock *fl,
 			struct file_lock *conflock)
 {
-	return posix_lock_inode(file_inode(filp), fl, conflock);
+	return posix_lock_inode(locks_inode(filp), fl, conflock);
 }
 EXPORT_SYMBOL(posix_lock_file);
 
@@ -1232,7 +1237,7 @@ static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
 int locks_mandatory_locked(struct file *file)
 {
 	int ret;
-	struct inode *inode = file_inode(file);
+	struct inode *inode = locks_inode(file);
 	struct file_lock_context *ctx;
 	struct file_lock *fl;
 
@@ -1572,7 +1577,7 @@ EXPORT_SYMBOL(lease_get_mtime);
 int fcntl_getlease(struct file *filp)
 {
 	struct file_lock *fl;
-	struct inode *inode = file_inode(filp);
+	struct inode *inode = locks_inode(filp);
 	struct file_lock_context *ctx;
 	int type = F_UNLCK;
 	LIST_HEAD(dispose);
@@ -1580,7 +1585,7 @@ int fcntl_getlease(struct file *filp)
 	ctx = smp_load_acquire(&inode->i_flctx);
 	if (ctx && !list_empty_careful(&ctx->flc_lease)) {
 		spin_lock(&ctx->flc_lock);
-		time_out_leases(file_inode(filp), &dispose);
+		time_out_leases(inode, &dispose);
 		list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
 			if (fl->fl_file != filp)
 				continue;
@@ -1628,7 +1633,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
 {
 	struct file_lock *fl, *my_fl = NULL, *lease;
 	struct dentry *dentry = filp->f_path.dentry;
-	struct inode *inode = file_inode(filp);
+	struct inode *inode = dentry->d_inode;
 	struct file_lock_context *ctx;
 	bool is_deleg = (*flp)->fl_flags & FL_DELEG;
 	int error;
@@ -1742,7 +1747,7 @@ static int generic_delete_lease(struct file *filp, void *owner)
 {
 	int error = -EAGAIN;
 	struct file_lock *fl, *victim = NULL;
-	struct inode *inode = file_inode(filp);
+	struct inode *inode = locks_inode(filp);
 	struct file_lock_context *ctx;
 	LIST_HEAD(dispose);
 
@@ -1782,7 +1787,7 @@ static int generic_delete_lease(struct file *filp, void *owner)
 int generic_setlease(struct file *filp, long arg, struct file_lock **flp,
 			void **priv)
 {
-	struct inode *inode = file_inode(filp);
+	struct inode *inode = locks_inode(filp);
 	int error;
 
 	if ((!uid_eq(current_fsuid(), inode->i_uid)) && !capable(CAP_LEASE))
@@ -1830,7 +1835,7 @@ EXPORT_SYMBOL(generic_setlease);
 int
 vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
 {
-	if (filp->f_op->setlease)
+	if (filp->f_op->setlease && is_remote_lock(filp))
 		return filp->f_op->setlease(filp, arg, lease, priv);
 	else
 		return generic_setlease(filp, arg, lease, priv);
@@ -1979,7 +1984,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
 	if (error)
 		goto out_free;
 
-	if (f.file->f_op->flock)
+	if (f.file->f_op->flock && is_remote_lock(f.file))
 		error = f.file->f_op->flock(f.file,
 					  (can_sleep) ? F_SETLKW : F_SETLK,
 					  lock);
@@ -2005,7 +2010,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
  */
 int vfs_test_lock(struct file *filp, struct file_lock *fl)
 {
-	if (filp->f_op->lock)
+	if (filp->f_op->lock && is_remote_lock(filp))
 		return filp->f_op->lock(filp, F_GETLK, fl);
 	posix_test_lock(filp, fl);
 	return 0;
@@ -2129,7 +2134,7 @@ out:
  */
 int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
 {
-	if (filp->f_op->lock)
+	if (filp->f_op->lock && is_remote_lock(filp))
 		return filp->f_op->lock(filp, cmd, fl);
 	else
 		return posix_lock_file(filp, fl, conf);
@@ -2191,7 +2196,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
 	if (file_lock == NULL)
 		return -ENOLCK;
 
-	inode = file_inode(filp);
+	inode = locks_inode(filp);
 
 	/*
 	 * This might block, so we do it before checking the inode.
@@ -2343,7 +2348,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
 	if (copy_from_user(&flock, l, sizeof(flock)))
 		goto out;
 
-	inode = file_inode(filp);
+	inode = locks_inode(filp);
 
 	/* Don't allow mandatory locks on files that may be memory mapped
 	 * and shared.
@@ -2426,6 +2431,7 @@ out:
 void locks_remove_posix(struct file *filp, fl_owner_t owner)
 {
 	int error;
+	struct inode *inode = locks_inode(filp);
 	struct file_lock lock;
 	struct file_lock_context *ctx;
 
@@ -2434,7 +2440,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
 	 * posix_lock_file().  Another process could be setting a lock on this
 	 * file at the same time, but we wouldn't remove that lock anyway.
 	 */
-	ctx =  smp_load_acquire(&file_inode(filp)->i_flctx);
+	ctx =  smp_load_acquire(&inode->i_flctx);
 	if (!ctx || list_empty(&ctx->flc_posix))
 		return;
 
@@ -2452,7 +2458,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
 
 	if (lock.fl_ops && lock.fl_ops->fl_release_private)
 		lock.fl_ops->fl_release_private(&lock);
-	trace_locks_remove_posix(file_inode(filp), &lock, error);
+	trace_locks_remove_posix(inode, &lock, error);
 }
 
 EXPORT_SYMBOL(locks_remove_posix);
@@ -2469,12 +2475,12 @@ locks_remove_flock(struct file *filp, struct file_lock_context *flctx)
 		.fl_type = F_UNLCK,
 		.fl_end = OFFSET_MAX,
 	};
-	struct inode *inode = file_inode(filp);
+	struct inode *inode = locks_inode(filp);
 
 	if (list_empty(&flctx->flc_flock))
 		return;
 
-	if (filp->f_op->flock)
+	if (filp->f_op->flock && is_remote_lock(filp))
 		filp->f_op->flock(filp, F_SETLKW, &fl);
 	else
 		flock_lock_inode(inode, &fl);
@@ -2508,7 +2514,7 @@ void locks_remove_file(struct file *filp)
 {
 	struct file_lock_context *ctx;
 
-	ctx = smp_load_acquire(&file_inode(filp)->i_flctx);
+	ctx = smp_load_acquire(&locks_inode(filp)->i_flctx);
 	if (!ctx)
 		return;
 
@@ -2552,7 +2558,7 @@ EXPORT_SYMBOL(posix_unblock_lock);
  */
 int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
 {
-	if (filp->f_op->lock)
+	if (filp->f_op->lock && is_remote_lock(filp))
 		return filp->f_op->lock(filp, F_CANCELLK, fl);
 	return 0;
 }
@@ -2580,7 +2586,7 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
 		fl_pid = fl->fl_pid;
 
 	if (fl->fl_file != NULL)
-		inode = file_inode(fl->fl_file);
+		inode = locks_inode(fl->fl_file);
 
 	seq_printf(f, "%lld:%s ", id, pfx);
 	if (IS_POSIX(fl)) {
@@ -2682,7 +2688,7 @@ static void __show_fd_locks(struct seq_file *f,
 void show_fd_locks(struct seq_file *f,
 		  struct file *filp, struct files_struct *files)
 {
-	struct inode *inode = file_inode(filp);
+	struct inode *inode = locks_inode(filp);
 	struct file_lock_context *ctx;
 	int id = 0;
 
diff --git a/fs/namespace.c b/fs/namespace.c
index 7bb2cda3bfef..dcd9afe21e62 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2700,7 +2700,7 @@ long do_mount(const char *dev_name, const char __user *dir_name,
 
 	flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
 		   MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
-		   MS_STRICTATIME);
+		   MS_STRICTATIME | MS_NOREMOTELOCK);
 
 	if (flags & MS_REMOUNT)
 		retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
diff --git a/fs/open.c b/fs/open.c
index 4fd6e256f4f4..648fb9d3e97a 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -726,7 +726,7 @@ static int do_dentry_open(struct file *f,
 	if (error)
 		goto cleanup_all;
 
-	error = break_lease(inode, f->f_flags);
+	error = break_lease(locks_inode(f), f->f_flags);
 	if (error)
 		goto cleanup_all;
 
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index e2a94a26767b..3d0b9dee2b76 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1320,7 +1320,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_xattr = ovl_xattr_handlers;
 	sb->s_root = root_dentry;
 	sb->s_fs_info = ufs;
-	sb->s_flags |= MS_POSIXACL;
+	sb->s_flags |= MS_POSIXACL | MS_NOREMOTELOCK;
 
 	return 0;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7db097d673a8..8ee0f011547f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1065,6 +1065,18 @@ struct file_lock_context {
 
 extern void send_sigio(struct fown_struct *fown, int fd, int band);
 
+/*
+ * Return the inode to use for locking
+ *
+ * For overlayfs this should be the overlay inode, not the real inode returned
+ * by file_inode().  For any other fs file_inode(filp) and locks_inode(filp) are
+ * equal.
+ */
+static inline struct inode *locks_inode(const struct file *f)
+{
+	return f->f_path.dentry->d_inode;
+}
+
 #ifdef CONFIG_FILE_LOCKING
 extern int fcntl_getlk(struct file *, unsigned int, struct flock __user *);
 extern int fcntl_setlk(unsigned int, struct file *, unsigned int,
@@ -1252,7 +1264,7 @@ static inline struct dentry *file_dentry(const struct file *file)
 
 static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl)
 {
-	return locks_lock_inode_wait(file_inode(filp), fl);
+	return locks_lock_inode_wait(locks_inode(filp), fl);
 }
 
 struct fasync_struct {
@@ -2155,7 +2167,7 @@ static inline int mandatory_lock(struct inode *ino)
 
 static inline int locks_verify_locked(struct file *file)
 {
-	if (mandatory_lock(file_inode(file)))
+	if (mandatory_lock(locks_inode(file)))
 		return locks_mandatory_locked(file);
 	return 0;
 }
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 3b00f7c8943f..2473272169f2 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -132,6 +132,7 @@ struct inodes_stat_t {
 #define MS_LAZYTIME	(1<<25) /* Update the on-disk [acm]times lazily */
 
 /* These sb flags are internal to the kernel */
+#define MS_NOREMOTELOCK	(1<<27)
 #define MS_NOSEC	(1<<28)
 #define MS_BORN		(1<<29)
 #define MS_ACTIVE	(1<<30)
-- 
cgit v1.2.3


From cfc7381b3002756b1dcada32979e942aa3126e31 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Thu, 15 Sep 2016 13:00:29 -0700
Subject: ip_tunnel: add collect_md mode to IPIP tunnel

Similar to gre, vxlan, geneve tunnels allow IPIP tunnels to
operate in 'collect metadata' mode.
bpf_skb_[gs]et_tunnel_key() helpers can make use of it right away.
ovs can use it as well in the future (once appropriate ovs-vport
abstractions and user apis are added).
Note that just like in other tunnels we cannot cache the dst,
since tunnel_info metadata can be different for every packet.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Thomas Graf <tgraf@suug.ch>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_tunnels.h       |  2 ++
 include/uapi/linux/if_tunnel.h |  1 +
 net/ipv4/ip_tunnel.c           | 76 ++++++++++++++++++++++++++++++++++++++++++
 net/ipv4/ipip.c                | 35 +++++++++++++++----
 4 files changed, 108 insertions(+), 6 deletions(-)

(limited to 'include/uapi')

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index e598c639aa6f..59557c07904b 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -255,6 +255,8 @@ void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops);
 
 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 		    const struct iphdr *tnl_params, const u8 protocol);
+void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
+		       const u8 proto);
 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd);
 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict);
 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu);
diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index 9865c8caedde..18d5dc13985d 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -73,6 +73,7 @@ enum {
 	IFLA_IPTUN_ENCAP_FLAGS,
 	IFLA_IPTUN_ENCAP_SPORT,
 	IFLA_IPTUN_ENCAP_DPORT,
+	IFLA_IPTUN_COLLECT_METADATA,
 	__IFLA_IPTUN_MAX,
 };
 #define IFLA_IPTUN_MAX	(__IFLA_IPTUN_MAX - 1)
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 95649ebd2874..5719d6ba0824 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -55,6 +55,7 @@
 #include <net/netns/generic.h>
 #include <net/rtnetlink.h>
 #include <net/udp.h>
+#include <net/dst_metadata.h>
 
 #if IS_ENABLED(CONFIG_IPV6)
 #include <net/ipv6.h>
@@ -546,6 +547,81 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
 	return 0;
 }
 
+void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
+{
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	u32 headroom = sizeof(struct iphdr);
+	struct ip_tunnel_info *tun_info;
+	const struct ip_tunnel_key *key;
+	const struct iphdr *inner_iph;
+	struct rtable *rt;
+	struct flowi4 fl4;
+	__be16 df = 0;
+	u8 tos, ttl;
+
+	tun_info = skb_tunnel_info(skb);
+	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+		     ip_tunnel_info_af(tun_info) != AF_INET))
+		goto tx_error;
+	key = &tun_info->key;
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
+	tos = key->tos;
+	if (tos == 1) {
+		if (skb->protocol == htons(ETH_P_IP))
+			tos = inner_iph->tos;
+		else if (skb->protocol == htons(ETH_P_IPV6))
+			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
+	}
+	init_tunnel_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
+			 RT_TOS(tos), tunnel->parms.link);
+	if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
+		goto tx_error;
+	rt = ip_route_output_key(tunnel->net, &fl4);
+	if (IS_ERR(rt)) {
+		dev->stats.tx_carrier_errors++;
+		goto tx_error;
+	}
+	if (rt->dst.dev == dev) {
+		ip_rt_put(rt);
+		dev->stats.collisions++;
+		goto tx_error;
+	}
+	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
+	ttl = key->ttl;
+	if (ttl == 0) {
+		if (skb->protocol == htons(ETH_P_IP))
+			ttl = inner_iph->ttl;
+		else if (skb->protocol == htons(ETH_P_IPV6))
+			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
+		else
+			ttl = ip4_dst_hoplimit(&rt->dst);
+	}
+	if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
+		df = htons(IP_DF);
+	else if (skb->protocol == htons(ETH_P_IP))
+		df = inner_iph->frag_off & htons(IP_DF);
+	headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
+	if (headroom > dev->needed_headroom)
+		dev->needed_headroom = headroom;
+
+	if (skb_cow_head(skb, dev->needed_headroom)) {
+		ip_rt_put(rt);
+		goto tx_dropped;
+	}
+	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, key->tos,
+		      key->ttl, df, !net_eq(tunnel->net, dev_net(dev)));
+	return;
+tx_error:
+	dev->stats.tx_errors++;
+	goto kfree;
+tx_dropped:
+	dev->stats.tx_dropped++;
+kfree:
+	kfree_skb(skb);
+}
+EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
+
 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 		    const struct iphdr *tnl_params, u8 protocol)
 {
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 4ae3f8e6c6cc..c9392589c415 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -115,6 +115,7 @@
 #include <net/xfrm.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
+#include <net/dst_metadata.h>
 
 static bool log_ecn_error = true;
 module_param(log_ecn_error, bool, 0644);
@@ -193,6 +194,7 @@ static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
 {
 	struct net *net = dev_net(skb->dev);
 	struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
+	struct metadata_dst *tun_dst = NULL;
 	struct ip_tunnel *tunnel;
 	const struct iphdr *iph;
 
@@ -216,7 +218,12 @@ static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
 			tpi = &ipip_tpi;
 		if (iptunnel_pull_header(skb, 0, tpi->proto, false))
 			goto drop;
-		return ip_tunnel_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
+		if (tunnel->collect_md) {
+			tun_dst = ip_tun_rx_dst(skb, 0, 0, 0);
+			if (!tun_dst)
+				return 0;
+		}
+		return ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
 	}
 
 	return -1;
@@ -270,7 +277,10 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb,
 
 	skb_set_inner_ipproto(skb, ipproto);
 
-	ip_tunnel_xmit(skb, dev, tiph, ipproto);
+	if (tunnel->collect_md)
+		ip_md_tunnel_xmit(skb, dev, ipproto);
+	else
+		ip_tunnel_xmit(skb, dev, tiph, ipproto);
 	return NETDEV_TX_OK;
 
 tx_error:
@@ -380,13 +390,14 @@ static int ipip_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
 }
 
 static void ipip_netlink_parms(struct nlattr *data[],
-			       struct ip_tunnel_parm *parms)
+			       struct ip_tunnel_parm *parms, bool *collect_md)
 {
 	memset(parms, 0, sizeof(*parms));
 
 	parms->iph.version = 4;
 	parms->iph.protocol = IPPROTO_IPIP;
 	parms->iph.ihl = 5;
+	*collect_md = false;
 
 	if (!data)
 		return;
@@ -414,6 +425,9 @@ static void ipip_netlink_parms(struct nlattr *data[],
 
 	if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
 		parms->iph.frag_off = htons(IP_DF);
+
+	if (data[IFLA_IPTUN_COLLECT_METADATA])
+		*collect_md = true;
 }
 
 /* This function returns true when ENCAP attributes are present in the nl msg */
@@ -453,18 +467,18 @@ static bool ipip_netlink_encap_parms(struct nlattr *data[],
 static int ipip_newlink(struct net *src_net, struct net_device *dev,
 			struct nlattr *tb[], struct nlattr *data[])
 {
+	struct ip_tunnel *t = netdev_priv(dev);
 	struct ip_tunnel_parm p;
 	struct ip_tunnel_encap ipencap;
 
 	if (ipip_netlink_encap_parms(data, &ipencap)) {
-		struct ip_tunnel *t = netdev_priv(dev);
 		int err = ip_tunnel_encap_setup(t, &ipencap);
 
 		if (err < 0)
 			return err;
 	}
 
-	ipip_netlink_parms(data, &p);
+	ipip_netlink_parms(data, &p, &t->collect_md);
 	return ip_tunnel_newlink(dev, tb, &p);
 }
 
@@ -473,6 +487,7 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
 {
 	struct ip_tunnel_parm p;
 	struct ip_tunnel_encap ipencap;
+	bool collect_md;
 
 	if (ipip_netlink_encap_parms(data, &ipencap)) {
 		struct ip_tunnel *t = netdev_priv(dev);
@@ -482,7 +497,9 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
 			return err;
 	}
 
-	ipip_netlink_parms(data, &p);
+	ipip_netlink_parms(data, &p, &collect_md);
+	if (collect_md)
+		return -EINVAL;
 
 	if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
 	    (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
@@ -516,6 +533,8 @@ static size_t ipip_get_size(const struct net_device *dev)
 		nla_total_size(2) +
 		/* IFLA_IPTUN_ENCAP_DPORT */
 		nla_total_size(2) +
+		/* IFLA_IPTUN_COLLECT_METADATA */
+		nla_total_size(0) +
 		0;
 }
 
@@ -544,6 +563,9 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
 			tunnel->encap.flags))
 		goto nla_put_failure;
 
+	if (tunnel->collect_md)
+		if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA))
+			goto nla_put_failure;
 	return 0;
 
 nla_put_failure:
@@ -562,6 +584,7 @@ static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
 	[IFLA_IPTUN_ENCAP_FLAGS]	= { .type = NLA_U16 },
 	[IFLA_IPTUN_ENCAP_SPORT]	= { .type = NLA_U16 },
 	[IFLA_IPTUN_ENCAP_DPORT]	= { .type = NLA_U16 },
+	[IFLA_IPTUN_COLLECT_METADATA]	= { .type = NLA_FLAG },
 };
 
 static struct rtnl_link_ops ipip_link_ops __read_mostly = {
-- 
cgit v1.2.3


From 69ae6ad2ff37911903a90256e216d7e7ae460002 Mon Sep 17 00:00:00 2001
From: Nogah Frankel <nogahf@mellanox.com>
Date: Fri, 16 Sep 2016 15:05:37 +0200
Subject: net: core: Add offload stats to if_stats_msg

Add a nested attribute of offload stats to if_stats_msg
named IFLA_STATS_LINK_OFFLOAD_XSTATS.
Under it, add SW stats, meaning stats only per packets that went via
slowpath to the cpu, named IFLA_OFFLOAD_XSTATS_CPU_HIT.

Signed-off-by: Nogah Frankel <nogahf@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_link.h |   9 ++++
 net/core/rtnetlink.c         | 111 +++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 116 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 9bf3aecfe05b..2351776a724f 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -826,6 +826,7 @@ enum {
 	IFLA_STATS_LINK_64,
 	IFLA_STATS_LINK_XSTATS,
 	IFLA_STATS_LINK_XSTATS_SLAVE,
+	IFLA_STATS_LINK_OFFLOAD_XSTATS,
 	__IFLA_STATS_MAX,
 };
 
@@ -845,6 +846,14 @@ enum {
 };
 #define LINK_XSTATS_TYPE_MAX (__LINK_XSTATS_TYPE_MAX - 1)
 
+/* These are stats embedded into IFLA_STATS_LINK_OFFLOAD_XSTATS */
+enum {
+	IFLA_OFFLOAD_XSTATS_UNSPEC,
+	IFLA_OFFLOAD_XSTATS_CPU_HIT, /* struct rtnl_link_stats64 */
+	__IFLA_OFFLOAD_XSTATS_MAX
+};
+#define IFLA_OFFLOAD_XSTATS_MAX (__IFLA_OFFLOAD_XSTATS_MAX - 1)
+
 /* XDP section */
 
 enum {
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 937e459bdaa9..0dbae4244a89 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3577,6 +3577,91 @@ static bool stats_attr_valid(unsigned int mask, int attrid, int idxattr)
 	       (!idxattr || idxattr == attrid);
 }
 
+#define IFLA_OFFLOAD_XSTATS_FIRST (IFLA_OFFLOAD_XSTATS_UNSPEC + 1)
+static int rtnl_get_offload_stats_attr_size(int attr_id)
+{
+	switch (attr_id) {
+	case IFLA_OFFLOAD_XSTATS_CPU_HIT:
+		return sizeof(struct rtnl_link_stats64);
+	}
+
+	return 0;
+}
+
+static int rtnl_get_offload_stats(struct sk_buff *skb, struct net_device *dev,
+				  int *prividx)
+{
+	struct nlattr *attr = NULL;
+	int attr_id, size;
+	void *attr_data;
+	int err;
+
+	if (!(dev->netdev_ops && dev->netdev_ops->ndo_has_offload_stats &&
+	      dev->netdev_ops->ndo_get_offload_stats))
+		return -ENODATA;
+
+	for (attr_id = IFLA_OFFLOAD_XSTATS_FIRST;
+	     attr_id <= IFLA_OFFLOAD_XSTATS_MAX; attr_id++) {
+		if (attr_id < *prividx)
+			continue;
+
+		size = rtnl_get_offload_stats_attr_size(attr_id);
+		if (!size)
+			continue;
+
+		if (!dev->netdev_ops->ndo_has_offload_stats(attr_id))
+			continue;
+
+		attr = nla_reserve_64bit(skb, attr_id, size,
+					 IFLA_OFFLOAD_XSTATS_UNSPEC);
+		if (!attr)
+			goto nla_put_failure;
+
+		attr_data = nla_data(attr);
+		memset(attr_data, 0, size);
+		err = dev->netdev_ops->ndo_get_offload_stats(attr_id, dev,
+							     attr_data);
+		if (err)
+			goto get_offload_stats_failure;
+	}
+
+	if (!attr)
+		return -ENODATA;
+
+	*prividx = 0;
+	return 0;
+
+nla_put_failure:
+	err = -EMSGSIZE;
+get_offload_stats_failure:
+	*prividx = attr_id;
+	return err;
+}
+
+static int rtnl_get_offload_stats_size(const struct net_device *dev)
+{
+	int nla_size = 0;
+	int attr_id;
+	int size;
+
+	if (!(dev->netdev_ops && dev->netdev_ops->ndo_has_offload_stats &&
+	      dev->netdev_ops->ndo_get_offload_stats))
+		return 0;
+
+	for (attr_id = IFLA_OFFLOAD_XSTATS_FIRST;
+	     attr_id <= IFLA_OFFLOAD_XSTATS_MAX; attr_id++) {
+		if (!dev->netdev_ops->ndo_has_offload_stats(attr_id))
+			continue;
+		size = rtnl_get_offload_stats_attr_size(attr_id);
+		nla_size += nla_total_size_64bit(size);
+	}
+
+	if (nla_size != 0)
+		nla_size += nla_total_size(0);
+
+	return nla_size;
+}
+
 static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
 			       int type, u32 pid, u32 seq, u32 change,
 			       unsigned int flags, unsigned int filter_mask,
@@ -3586,6 +3671,7 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
 	struct nlmsghdr *nlh;
 	struct nlattr *attr;
 	int s_prividx = *prividx;
+	int err;
 
 	ASSERT_RTNL();
 
@@ -3614,8 +3700,6 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
 		const struct rtnl_link_ops *ops = dev->rtnl_link_ops;
 
 		if (ops && ops->fill_linkxstats) {
-			int err;
-
 			*idxattr = IFLA_STATS_LINK_XSTATS;
 			attr = nla_nest_start(skb,
 					      IFLA_STATS_LINK_XSTATS);
@@ -3639,8 +3723,6 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
 		if (master)
 			ops = master->rtnl_link_ops;
 		if (ops && ops->fill_linkxstats) {
-			int err;
-
 			*idxattr = IFLA_STATS_LINK_XSTATS_SLAVE;
 			attr = nla_nest_start(skb,
 					      IFLA_STATS_LINK_XSTATS_SLAVE);
@@ -3655,6 +3737,24 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
 		}
 	}
 
+	if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS,
+			     *idxattr)) {
+		*idxattr = IFLA_STATS_LINK_OFFLOAD_XSTATS;
+		attr = nla_nest_start(skb, IFLA_STATS_LINK_OFFLOAD_XSTATS);
+		if (!attr)
+			goto nla_put_failure;
+
+		err = rtnl_get_offload_stats(skb, dev, prividx);
+		if (err == -ENODATA)
+			nla_nest_cancel(skb, attr);
+		else
+			nla_nest_end(skb, attr);
+
+		if (err && err != -ENODATA)
+			goto nla_put_failure;
+		*idxattr = 0;
+	}
+
 	nlmsg_end(skb, nlh);
 
 	return 0;
@@ -3708,6 +3808,9 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev,
 		}
 	}
 
+	if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS, 0))
+		size += rtnl_get_offload_stats_size(dev);
+
 	return size;
 }
 
-- 
cgit v1.2.3


From 4fbae7d83c98c30efcf0a2a2ac55fbb75ef5a1a5 Mon Sep 17 00:00:00 2001
From: Mahesh Bandewar <maheshb@google.com>
Date: Fri, 16 Sep 2016 12:59:19 -0700
Subject: ipvlan: Introduce l3s mode

In a typical IPvlan L3 setup where master is in default-ns and
each slave is into different (slave) ns. In this setup egress
packet processing for traffic originating from slave-ns will
hit all NF_HOOKs in slave-ns as well as default-ns. However same
is not true for ingress processing. All these NF_HOOKs are
hit only in the slave-ns skipping them in the default-ns.
IPvlan in L3 mode is restrictive and if admins want to deploy
iptables rules in default-ns, this asymmetric data path makes it
impossible to do so.

This patch makes use of the l3_rcv() (added as part of l3mdev
enhancements) to perform input route lookup on RX packets without
changing the skb->dev and then uses nf_hook at NF_INET_LOCAL_IN
to change the skb->dev just before handing over skb to L4.

Signed-off-by: Mahesh Bandewar <maheshb@google.com>
CC: David Ahern <dsa@cumulusnetworks.com>
Reviewed-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ipvlan.txt |  7 ++-
 drivers/net/Kconfig                 |  1 +
 drivers/net/ipvlan/ipvlan.h         |  6 +++
 drivers/net/ipvlan/ipvlan_core.c    | 94 +++++++++++++++++++++++++++++++++++++
 drivers/net/ipvlan/ipvlan_main.c    | 87 +++++++++++++++++++++++++++++++---
 include/uapi/linux/if_link.h        |  1 +
 6 files changed, 188 insertions(+), 8 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/networking/ipvlan.txt b/Documentation/networking/ipvlan.txt
index 14422f8fcdc4..24196cef7c91 100644
--- a/Documentation/networking/ipvlan.txt
+++ b/Documentation/networking/ipvlan.txt
@@ -22,7 +22,7 @@ The driver can be built into the kernel (CONFIG_IPVLAN=y) or as a module
 	There are no module parameters for this driver and it can be configured
 using IProute2/ip utility.
 
-	ip link add link <master-dev> <slave-dev> type ipvlan mode { l2 | L3 }
+	ip link add link <master-dev> <slave-dev> type ipvlan mode { l2 | l3 | l3s }
 
 	e.g. ip link add link ipvl0 eth0 type ipvlan mode l2
 
@@ -48,6 +48,11 @@ master device for the L2 processing and routing from that instance will be
 used before packets are queued on the outbound device. In this mode the slaves
 will not receive nor can send multicast / broadcast traffic.
 
+4.3 L3S mode:
+	This is very similar to the L3 mode except that iptables (conn-tracking)
+works in this mode and hence it is L3-symmetric (L3s). This will have slightly less
+performance but that shouldn't matter since you are choosing this mode over plain-L3
+mode to make conn-tracking work.
 
 5. What to choose (macvlan vs. ipvlan)?
 	These two devices are very similar in many regards and the specific use
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 0c5415b05ea9..8768a625350d 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -149,6 +149,7 @@ config IPVLAN
     tristate "IP-VLAN support"
     depends on INET
     depends on IPV6
+    depends on NET_L3_MASTER_DEV
     ---help---
       This allows one to create virtual devices off of a main interface
       and packets will be delivered based on the dest L3 (IPv6/IPv4 addr)
diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h
index 695a5dc9ace3..7e0732f5ea07 100644
--- a/drivers/net/ipvlan/ipvlan.h
+++ b/drivers/net/ipvlan/ipvlan.h
@@ -23,11 +23,13 @@
 #include <linux/if_vlan.h>
 #include <linux/ip.h>
 #include <linux/inetdevice.h>
+#include <linux/netfilter.h>
 #include <net/ip.h>
 #include <net/ip6_route.h>
 #include <net/rtnetlink.h>
 #include <net/route.h>
 #include <net/addrconf.h>
+#include <net/l3mdev.h>
 
 #define IPVLAN_DRV	"ipvlan"
 #define IPV_DRV_VER	"0.1"
@@ -124,4 +126,8 @@ struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan,
 				   const void *iaddr, bool is_v6);
 bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6);
 void ipvlan_ht_addr_del(struct ipvl_addr *addr);
+struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb,
+			      u16 proto);
+unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb,
+			     const struct nf_hook_state *state);
 #endif /* __IPVLAN_H */
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index b5f9511d819e..b4e990743e1d 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -560,6 +560,7 @@ int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
 	case IPVLAN_MODE_L2:
 		return ipvlan_xmit_mode_l2(skb, dev);
 	case IPVLAN_MODE_L3:
+	case IPVLAN_MODE_L3S:
 		return ipvlan_xmit_mode_l3(skb, dev);
 	}
 
@@ -664,6 +665,8 @@ rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb)
 		return ipvlan_handle_mode_l2(pskb, port);
 	case IPVLAN_MODE_L3:
 		return ipvlan_handle_mode_l3(pskb, port);
+	case IPVLAN_MODE_L3S:
+		return RX_HANDLER_PASS;
 	}
 
 	/* Should not reach here */
@@ -672,3 +675,94 @@ rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb)
 	kfree_skb(skb);
 	return RX_HANDLER_CONSUMED;
 }
+
+static struct ipvl_addr *ipvlan_skb_to_addr(struct sk_buff *skb,
+					    struct net_device *dev)
+{
+	struct ipvl_addr *addr = NULL;
+	struct ipvl_port *port;
+	void *lyr3h;
+	int addr_type;
+
+	if (!dev || !netif_is_ipvlan_port(dev))
+		goto out;
+
+	port = ipvlan_port_get_rcu(dev);
+	if (!port || port->mode != IPVLAN_MODE_L3S)
+		goto out;
+
+	lyr3h = ipvlan_get_L3_hdr(skb, &addr_type);
+	if (!lyr3h)
+		goto out;
+
+	addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
+out:
+	return addr;
+}
+
+struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb,
+			      u16 proto)
+{
+	struct ipvl_addr *addr;
+	struct net_device *sdev;
+
+	addr = ipvlan_skb_to_addr(skb, dev);
+	if (!addr)
+		goto out;
+
+	sdev = addr->master->dev;
+	switch (proto) {
+	case AF_INET:
+	{
+		int err;
+		struct iphdr *ip4h = ip_hdr(skb);
+
+		err = ip_route_input_noref(skb, ip4h->daddr, ip4h->saddr,
+					   ip4h->tos, sdev);
+		if (unlikely(err))
+			goto out;
+		break;
+	}
+	case AF_INET6:
+	{
+		struct dst_entry *dst;
+		struct ipv6hdr *ip6h = ipv6_hdr(skb);
+		int flags = RT6_LOOKUP_F_HAS_SADDR;
+		struct flowi6 fl6 = {
+			.flowi6_iif   = sdev->ifindex,
+			.daddr        = ip6h->daddr,
+			.saddr        = ip6h->saddr,
+			.flowlabel    = ip6_flowinfo(ip6h),
+			.flowi6_mark  = skb->mark,
+			.flowi6_proto = ip6h->nexthdr,
+		};
+
+		skb_dst_drop(skb);
+		dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, flags);
+		skb_dst_set(skb, dst);
+		break;
+	}
+	default:
+		break;
+	}
+
+out:
+	return skb;
+}
+
+unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb,
+			     const struct nf_hook_state *state)
+{
+	struct ipvl_addr *addr;
+	unsigned int len;
+
+	addr = ipvlan_skb_to_addr(skb, skb->dev);
+	if (!addr)
+		goto out;
+
+	skb->dev = addr->master->dev;
+	len = skb->len + ETH_HLEN;
+	ipvlan_count_rx(addr->master, len, true, false);
+out:
+	return NF_ACCEPT;
+}
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 18b4e8c7f68a..f442eb366863 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -9,24 +9,87 @@
 
 #include "ipvlan.h"
 
+static u32 ipvl_nf_hook_refcnt = 0;
+
+static struct nf_hook_ops ipvl_nfops[] __read_mostly = {
+	{
+		.hook     = ipvlan_nf_input,
+		.pf       = NFPROTO_IPV4,
+		.hooknum  = NF_INET_LOCAL_IN,
+		.priority = INT_MAX,
+	},
+	{
+		.hook     = ipvlan_nf_input,
+		.pf       = NFPROTO_IPV6,
+		.hooknum  = NF_INET_LOCAL_IN,
+		.priority = INT_MAX,
+	},
+};
+
+static struct l3mdev_ops ipvl_l3mdev_ops __read_mostly = {
+	.l3mdev_l3_rcv = ipvlan_l3_rcv,
+};
+
 static void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev)
 {
 	ipvlan->dev->mtu = dev->mtu - ipvlan->mtu_adj;
 }
 
-static void ipvlan_set_port_mode(struct ipvl_port *port, u16 nval)
+static int ipvlan_register_nf_hook(void)
+{
+	int err = 0;
+
+	if (!ipvl_nf_hook_refcnt) {
+		err = _nf_register_hooks(ipvl_nfops, ARRAY_SIZE(ipvl_nfops));
+		if (!err)
+			ipvl_nf_hook_refcnt = 1;
+	} else {
+		ipvl_nf_hook_refcnt++;
+	}
+
+	return err;
+}
+
+static void ipvlan_unregister_nf_hook(void)
+{
+	WARN_ON(!ipvl_nf_hook_refcnt);
+
+	ipvl_nf_hook_refcnt--;
+	if (!ipvl_nf_hook_refcnt)
+		_nf_unregister_hooks(ipvl_nfops, ARRAY_SIZE(ipvl_nfops));
+}
+
+static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval)
 {
 	struct ipvl_dev *ipvlan;
+	struct net_device *mdev = port->dev;
+	int err = 0;
 
+	ASSERT_RTNL();
 	if (port->mode != nval) {
+		if (nval == IPVLAN_MODE_L3S) {
+			/* New mode is L3S */
+			err = ipvlan_register_nf_hook();
+			if (!err) {
+				mdev->l3mdev_ops = &ipvl_l3mdev_ops;
+				mdev->priv_flags |= IFF_L3MDEV_MASTER;
+			} else
+				return err;
+		} else if (port->mode == IPVLAN_MODE_L3S) {
+			/* Old mode was L3S */
+			mdev->priv_flags &= ~IFF_L3MDEV_MASTER;
+			ipvlan_unregister_nf_hook();
+			mdev->l3mdev_ops = NULL;
+		}
 		list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
-			if (nval == IPVLAN_MODE_L3)
+			if (nval == IPVLAN_MODE_L3 || nval == IPVLAN_MODE_L3S)
 				ipvlan->dev->flags |= IFF_NOARP;
 			else
 				ipvlan->dev->flags &= ~IFF_NOARP;
 		}
 		port->mode = nval;
 	}
+	return err;
 }
 
 static int ipvlan_port_create(struct net_device *dev)
@@ -74,6 +137,11 @@ static void ipvlan_port_destroy(struct net_device *dev)
 	struct ipvl_port *port = ipvlan_port_get_rtnl(dev);
 
 	dev->priv_flags &= ~IFF_IPVLAN_MASTER;
+	if (port->mode == IPVLAN_MODE_L3S) {
+		dev->priv_flags &= ~IFF_L3MDEV_MASTER;
+		ipvlan_unregister_nf_hook();
+		dev->l3mdev_ops = NULL;
+	}
 	netdev_rx_handler_unregister(dev);
 	cancel_work_sync(&port->wq);
 	__skb_queue_purge(&port->backlog);
@@ -132,7 +200,8 @@ static int ipvlan_open(struct net_device *dev)
 	struct net_device *phy_dev = ipvlan->phy_dev;
 	struct ipvl_addr *addr;
 
-	if (ipvlan->port->mode == IPVLAN_MODE_L3)
+	if (ipvlan->port->mode == IPVLAN_MODE_L3 ||
+	    ipvlan->port->mode == IPVLAN_MODE_L3S)
 		dev->flags |= IFF_NOARP;
 	else
 		dev->flags &= ~IFF_NOARP;
@@ -372,13 +441,14 @@ static int ipvlan_nl_changelink(struct net_device *dev,
 {
 	struct ipvl_dev *ipvlan = netdev_priv(dev);
 	struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev);
+	int err = 0;
 
 	if (data && data[IFLA_IPVLAN_MODE]) {
 		u16 nmode = nla_get_u16(data[IFLA_IPVLAN_MODE]);
 
-		ipvlan_set_port_mode(port, nmode);
+		err = ipvlan_set_port_mode(port, nmode);
 	}
-	return 0;
+	return err;
 }
 
 static size_t ipvlan_nl_getsize(const struct net_device *dev)
@@ -473,10 +543,13 @@ static int ipvlan_link_new(struct net *src_net, struct net_device *dev,
 		unregister_netdevice(dev);
 		return err;
 	}
+	err = ipvlan_set_port_mode(port, mode);
+	if (err) {
+		unregister_netdevice(dev);
+		return err;
+	}
 
 	list_add_tail_rcu(&ipvlan->pnode, &port->ipvlans);
-	ipvlan_set_port_mode(port, mode);
-
 	netif_stacked_transfer_operstate(phy_dev, dev);
 	return 0;
 }
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 2351776a724f..7ec9e99d5491 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -464,6 +464,7 @@ enum {
 enum ipvlan_mode {
 	IPVLAN_MODE_L2 = 0,
 	IPVLAN_MODE_L3,
+	IPVLAN_MODE_L3S,
 	IPVLAN_MODE_MAX
 };
 
-- 
cgit v1.2.3


From 7389e6ef347443ac90116c2208bbdfb4f9d135ba Mon Sep 17 00:00:00 2001
From: Charles-Antoine Couret <charles-antoine.couret@nexvision.fr>
Date: Thu, 15 Sep 2016 12:29:50 -0300
Subject: [media] SDI: add flag for SDI formats and SMPTE 125M definition

Adding others generic flags, which could be used by many
components like GS1662.

Signed-off-by: Charles-Antoine Couret <charles-antoine.couret@nexvision.fr>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 drivers/media/v4l2-core/v4l2-dv-timings.c | 11 +++++++----
 include/uapi/linux/v4l2-dv-timings.h      | 12 ++++++++++++
 include/uapi/linux/videodev2.h            |  8 ++++++++
 3 files changed, 27 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/media/v4l2-core/v4l2-dv-timings.c b/drivers/media/v4l2-core/v4l2-dv-timings.c
index 889de0a32152..730a7c392c1d 100644
--- a/drivers/media/v4l2-core/v4l2-dv-timings.c
+++ b/drivers/media/v4l2-core/v4l2-dv-timings.c
@@ -306,7 +306,7 @@ void v4l2_print_dv_timings(const char *dev_prefix, const char *prefix,
 			(bt->polarities & V4L2_DV_VSYNC_POS_POL) ? "+" : "-",
 			bt->il_vsync, bt->il_vbackporch);
 	pr_info("%s: pixelclock: %llu\n", dev_prefix, bt->pixelclock);
-	pr_info("%s: flags (0x%x):%s%s%s%s%s%s\n", dev_prefix, bt->flags,
+	pr_info("%s: flags (0x%x):%s%s%s%s%s%s%s\n", dev_prefix, bt->flags,
 			(bt->flags & V4L2_DV_FL_REDUCED_BLANKING) ?
 			" REDUCED_BLANKING" : "",
 			((bt->flags & V4L2_DV_FL_REDUCED_BLANKING) &&
@@ -318,12 +318,15 @@ void v4l2_print_dv_timings(const char *dev_prefix, const char *prefix,
 			(bt->flags & V4L2_DV_FL_HALF_LINE) ?
 			" HALF_LINE" : "",
 			(bt->flags & V4L2_DV_FL_IS_CE_VIDEO) ?
-			" CE_VIDEO" : "");
-	pr_info("%s: standards (0x%x):%s%s%s%s\n", dev_prefix, bt->standards,
+			" CE_VIDEO" : "",
+			(bt->flags & V4L2_DV_FL_FIRST_FIELD_EXTRA_LINE) ?
+			" FIRST_FIELD_EXTRA_LINE" : "");
+	pr_info("%s: standards (0x%x):%s%s%s%s%s\n", dev_prefix, bt->standards,
 			(bt->standards & V4L2_DV_BT_STD_CEA861) ?  " CEA" : "",
 			(bt->standards & V4L2_DV_BT_STD_DMT) ?  " DMT" : "",
 			(bt->standards & V4L2_DV_BT_STD_CVT) ?  " CVT" : "",
-			(bt->standards & V4L2_DV_BT_STD_GTF) ?  " GTF" : "");
+			(bt->standards & V4L2_DV_BT_STD_GTF) ?  " GTF" : "",
+			(bt->standards & V4L2_DV_BT_STD_SDI) ?  " SDI" : "");
 }
 EXPORT_SYMBOL_GPL(v4l2_print_dv_timings);
 
diff --git a/include/uapi/linux/v4l2-dv-timings.h b/include/uapi/linux/v4l2-dv-timings.h
index 086168e18ca8..f31957166337 100644
--- a/include/uapi/linux/v4l2-dv-timings.h
+++ b/include/uapi/linux/v4l2-dv-timings.h
@@ -934,4 +934,16 @@
 		V4L2_DV_FL_REDUCED_BLANKING) \
 }
 
+/* SDI timings definitions */
+
+/* SMPTE-125M */
+#define V4L2_DV_BT_SDI_720X487I60 { \
+	.type = V4L2_DV_BT_656_1120, \
+	V4L2_INIT_BT_TIMINGS(720, 487, 1, \
+		V4L2_DV_HSYNC_POS_POL, \
+		13500000, 16, 121, 0, 0, 19, 0, 0, 19, 0, \
+		V4L2_DV_BT_STD_SDI, \
+		V4L2_DV_FL_FIRST_FIELD_EXTRA_LINE) \
+}
+
 #endif
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 5529741202b5..94f123f3e04e 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -1274,6 +1274,7 @@ struct v4l2_bt_timings {
 #define V4L2_DV_BT_STD_DMT	(1 << 1)  /* VESA Discrete Monitor Timings */
 #define V4L2_DV_BT_STD_CVT	(1 << 2)  /* VESA Coordinated Video Timings */
 #define V4L2_DV_BT_STD_GTF	(1 << 3)  /* VESA Generalized Timings Formula */
+#define V4L2_DV_BT_STD_SDI	(1 << 4)  /* SDI Timings */
 
 /* Flags */
 
@@ -1305,6 +1306,11 @@ struct v4l2_bt_timings {
  * use the range 16-235) as opposed to 0-255. All formats defined in CEA-861
  * except for the 640x480 format are CE formats. */
 #define V4L2_DV_FL_IS_CE_VIDEO			(1 << 4)
+/* Some formats like SMPTE-125M have an interlaced signal with a odd
+ * total height. For these formats, if this flag is set, the first
+ * field has the extra line. If not, it is the second field.
+ */
+#define V4L2_DV_FL_FIRST_FIELD_EXTRA_LINE		(1 << 5)
 
 /* A few useful defines to calculate the total blanking and frame sizes */
 #define V4L2_DV_BT_BLANKING_WIDTH(bt) \
@@ -1429,6 +1435,8 @@ struct v4l2_input {
 /* field 'status' - analog */
 #define V4L2_IN_ST_NO_H_LOCK   0x00000100  /* No horizontal sync lock */
 #define V4L2_IN_ST_COLOR_KILL  0x00000200  /* Color killer is active */
+#define V4L2_IN_ST_NO_V_LOCK   0x00000400  /* No vertical sync lock */
+#define V4L2_IN_ST_NO_STD_LOCK 0x00000800  /* No standard format lock */
 
 /* field 'status' - digital */
 #define V4L2_IN_ST_NO_SYNC     0x00010000  /* No synchronization lock */
-- 
cgit v1.2.3


From a16039cbf1a1ee7e76d7d100f9e613998919ab91 Mon Sep 17 00:00:00 2001
From: Andrej Krutak <dev@andree.sk>
Date: Sun, 18 Sep 2016 20:59:32 +0200
Subject: ALSA: line6: Add hwdep interface to access the POD control messages

We must do it this way, because e.g. POD X3 won't play any sound unless
the host listens on the bulk EP, so we cannot export it only via libusb.

The driver currently doesn't use the bulk EP messages in other way,
in future it could e.g. sense/modify volume(s).

Signed-off-by: Andrej Krutak <dev@andree.sk>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/uapi/sound/asound.h |   3 +-
 sound/usb/line6/driver.c    | 152 ++++++++++++++++++++++++++++++++++++++++++--
 sound/usb/line6/driver.h    |  23 ++++++-
 3 files changed, 171 insertions(+), 7 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h
index 609cadb8739d..be353a78c303 100644
--- a/include/uapi/sound/asound.h
+++ b/include/uapi/sound/asound.h
@@ -106,9 +106,10 @@ enum {
 	SNDRV_HWDEP_IFACE_FW_OXFW,	/* Oxford OXFW970/971 based device */
 	SNDRV_HWDEP_IFACE_FW_DIGI00X,	/* Digidesign Digi 002/003 family */
 	SNDRV_HWDEP_IFACE_FW_TASCAM,	/* TASCAM FireWire series */
+	SNDRV_HWDEP_IFACE_LINE6,	/* Line6 USB processors */
 
 	/* Don't forget to change the following: */
-	SNDRV_HWDEP_IFACE_LAST = SNDRV_HWDEP_IFACE_FW_TASCAM
+	SNDRV_HWDEP_IFACE_LAST = SNDRV_HWDEP_IFACE_LINE6
 };
 
 struct snd_hwdep_info {
diff --git a/sound/usb/line6/driver.c b/sound/usb/line6/driver.c
index 8a71d45ce945..f9224ec141c8 100644
--- a/sound/usb/line6/driver.c
+++ b/sound/usb/line6/driver.c
@@ -17,6 +17,7 @@
 
 #include <sound/core.h>
 #include <sound/initval.h>
+#include <sound/hwdep.h>
 
 #include "capture.h"
 #include "driver.h"
@@ -303,7 +304,7 @@ static void line6_data_received(struct urb *urb)
 		for (;;) {
 			done =
 				line6_midibuf_read(mb, line6->buffer_message,
-						LINE6_MESSAGE_MAXLEN);
+						LINE6_MIDI_MESSAGE_MAXLEN);
 
 			if (done == 0)
 				break;
@@ -315,8 +316,11 @@ static void line6_data_received(struct urb *urb)
 				line6->process_message(line6);
 		}
 	} else {
+		line6->buffer_message = urb->transfer_buffer;
+		line6->message_length = urb->actual_length;
 		if (line6->process_message)
 			line6->process_message(line6);
+		line6->buffer_message = NULL;
 	}
 
 	line6_start_listen(line6);
@@ -473,14 +477,17 @@ static void line6_destruct(struct snd_card *card)
 	struct usb_line6 *line6 = card->private_data;
 	struct usb_device *usbdev = line6->usbdev;
 
-	/* free buffer memory first: */
-	if (line6->properties->capabilities & LINE6_CAP_CONTROL_MIDI)
+	/* Free buffer memory first. We cannot depend on the existence of private
+	 * data from the (podhd) module, it may be gone already during this call
+	 */
+	if (line6->buffer_message)
 		kfree(line6->buffer_message);
 
 	kfree(line6->buffer_listen);
 
 	/* then free URBs: */
 	usb_free_urb(line6->urb_listen);
+	line6->urb_listen = NULL;
 
 	/* decrement reference counters: */
 	usb_put_dev(usbdev);
@@ -522,6 +529,138 @@ static void line6_get_interval(struct usb_line6 *line6)
 	}
 }
 
+
+/* Enable buffering of incoming messages, flush the buffer */
+static int line6_hwdep_open(struct snd_hwdep *hw, struct file *file)
+{
+	struct usb_line6 *line6 = hw->private_data;
+
+	/* NOTE: hwdep layer provides atomicity here */
+
+	line6->messages.active = 1;
+
+	return 0;
+}
+
+/* Stop buffering */
+static int line6_hwdep_release(struct snd_hwdep *hw, struct file *file)
+{
+	struct usb_line6 *line6 = hw->private_data;
+
+	line6->messages.active = 0;
+
+	return 0;
+}
+
+/* Read from circular buffer, return to user */
+static long
+line6_hwdep_read(struct snd_hwdep *hwdep, char __user *buf, long count,
+					loff_t *offset)
+{
+	struct usb_line6 *line6 = hwdep->private_data;
+	long rv = 0;
+	unsigned int out_count;
+
+	if (mutex_lock_interruptible(&line6->messages.read_lock))
+		return -ERESTARTSYS;
+
+	while (kfifo_len(&line6->messages.fifo) == 0) {
+		mutex_unlock(&line6->messages.read_lock);
+
+		rv = wait_event_interruptible(
+			line6->messages.wait_queue,
+			kfifo_len(&line6->messages.fifo) != 0);
+		if (rv < 0)
+			return rv;
+
+		if (mutex_lock_interruptible(&line6->messages.read_lock))
+			return -ERESTARTSYS;
+	}
+
+	if (kfifo_peek_len(&line6->messages.fifo) > count) {
+		/* Buffer too small; allow re-read of the current item... */
+		rv = -EINVAL;
+	} else {
+		rv = kfifo_to_user(&line6->messages.fifo, buf, count, &out_count);
+		if (rv == 0)
+			rv = out_count;
+	}
+
+	mutex_unlock(&line6->messages.read_lock);
+	return rv;
+}
+
+/* Write directly (no buffering) to device by user*/
+static long
+line6_hwdep_write(struct snd_hwdep *hwdep, const char __user *data, long count,
+					loff_t *offset)
+{
+	struct usb_line6 *line6 = hwdep->private_data;
+	int rv;
+	char *data_copy;
+
+	if (count > line6->max_packet_size * LINE6_RAW_MESSAGES_MAXCOUNT) {
+		/* This is an arbitrary limit - still better than nothing... */
+		return -EINVAL;
+	}
+
+	data_copy = memdup_user(data, count);
+	if (IS_ERR(ERR_PTR))
+		return -ENOMEM;
+
+	rv = line6_send_raw_message(line6, data_copy, count);
+
+	kfree(data_copy);
+	return rv;
+}
+
+static const struct snd_hwdep_ops hwdep_ops = {
+	.open    = line6_hwdep_open,
+	.release = line6_hwdep_release,
+	.read    = line6_hwdep_read,
+	.write   = line6_hwdep_write,
+};
+
+/* Insert into circular buffer */
+static void line6_hwdep_push_message(struct usb_line6 *line6)
+{
+	if (!line6->messages.active)
+		return;
+
+	if (kfifo_avail(&line6->messages.fifo) >= line6->message_length) {
+		/* No race condition here, there's only one writer */
+		kfifo_in(&line6->messages.fifo,
+			line6->buffer_message, line6->message_length);
+	} /* else TODO: signal overflow */
+
+	wake_up_interruptible(&line6->messages.wait_queue);
+}
+
+static int line6_hwdep_init(struct usb_line6 *line6)
+{
+	int err;
+	struct snd_hwdep *hwdep;
+
+	/* TODO: usb_driver_claim_interface(); */
+	line6->process_message = line6_hwdep_push_message;
+	line6->messages.active = 0;
+	init_waitqueue_head(&line6->messages.wait_queue);
+	mutex_init(&line6->messages.read_lock);
+	INIT_KFIFO(line6->messages.fifo);
+
+	err = snd_hwdep_new(line6->card, "config", 0, &hwdep);
+	if (err < 0)
+		goto end;
+	strcpy(hwdep->name, "config");
+	hwdep->iface = SNDRV_HWDEP_IFACE_LINE6;
+	hwdep->ops = hwdep_ops;
+	hwdep->private_data = line6;
+	hwdep->exclusive = true;
+
+end:
+	return err;
+}
+
 static int line6_init_cap_control(struct usb_line6 *line6)
 {
 	int ret;
@@ -536,9 +675,13 @@ static int line6_init_cap_control(struct usb_line6 *line6)
 		return -ENOMEM;
 
 	if (line6->properties->capabilities & LINE6_CAP_CONTROL_MIDI) {
-		line6->buffer_message = kmalloc(LINE6_MESSAGE_MAXLEN, GFP_KERNEL);
+		line6->buffer_message = kmalloc(LINE6_MIDI_MESSAGE_MAXLEN, GFP_KERNEL);
 		if (!line6->buffer_message)
 			return -ENOMEM;
+	} else {
+		ret = line6_hwdep_init(line6);
+		if (ret < 0)
+			return ret;
 	}
 
 	ret = line6_start_listen(line6);
@@ -716,3 +859,4 @@ EXPORT_SYMBOL_GPL(line6_resume);
 MODULE_AUTHOR(DRIVER_AUTHOR);
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL");
+
diff --git a/sound/usb/line6/driver.h b/sound/usb/line6/driver.h
index 88cf1e750060..7e3a3aada222 100644
--- a/sound/usb/line6/driver.h
+++ b/sound/usb/line6/driver.h
@@ -12,8 +12,9 @@
 #ifndef DRIVER_H
 #define DRIVER_H
 
-#include <linux/spinlock.h>
 #include <linux/usb.h>
+#include <linux/mutex.h>
+#include <linux/kfifo.h>
 #include <sound/core.h>
 
 #include "midi.h"
@@ -32,7 +33,16 @@
 
 #define LINE6_TIMEOUT 1
 #define LINE6_BUFSIZE_LISTEN 64
-#define LINE6_MESSAGE_MAXLEN 256
+#define LINE6_MIDI_MESSAGE_MAXLEN 256
+
+#define LINE6_RAW_MESSAGES_MAXCOUNT_ORDER 7
+/* 4k packets are common, BUFSIZE * MAXCOUNT should be bigger... */
+#define LINE6_RAW_MESSAGES_MAXCOUNT (1 << LINE6_RAW_MESSAGES_MAXCOUNT_ORDER)
+
+
+#if LINE6_BUFSIZE_LISTEN > 65535
+#error "Use dynamic fifo instead"
+#endif
 
 /*
 	Line 6 MIDI control commands
@@ -156,6 +166,15 @@ struct usb_line6 {
 	/* Length of message to be processed, generated from MIDI layer  */
 	int message_length;
 
+	/* Circular buffer for non-MIDI control messages */
+	struct {
+		struct mutex read_lock;
+		wait_queue_head_t wait_queue;
+		unsigned int active:1;
+		STRUCT_KFIFO_REC_2(LINE6_BUFSIZE_LISTEN * LINE6_RAW_MESSAGES_MAXCOUNT)
+			fifo;
+	} messages;
+
 	/* If MIDI is supported, buffer_message contains the pre-processed data;
 	 * otherwise the data is only in urb_listen (buffer_incoming).
 	 */
-- 
cgit v1.2.3


From 408fbc22ef1efb00dd896acd00e9f7d9b641e047 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Sun, 18 Sep 2016 07:31:43 -0400
Subject: net sched ife action: Introduce skb tcindex metadata encap decap

Sample use case of how this is encoded:
user space via tuntap (or a connected VM/Machine/container)
encodes the tcindex TLV.

Sample use case of decoding:
IFE action decodes it and the skb->tc_index is then used to classify.
So something like this for encoded ICMP packets:

.. first decode then reclassify... skb->tcindex will be set
sudo $TC filter add dev $ETH parent ffff: prio 2 protocol 0xbeef \
u32 match u32 0 0 flowid 1:1 \
action ife decode reclassify

...next match the decode icmp packet...
sudo $TC filter add dev $ETH parent ffff: prio 4 protocol ip \
u32 match ip protocol 1 0xff flowid 1:1 \
action continue

... last classify it using the tcindex classifier and do someaction..
sudo $TC filter add dev $ETH parent ffff: prio 5 protocol ip \
handle 0x11 tcindex classid 1:1 \
action blah..

Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tc_act/tc_ife.h |  3 +-
 net/sched/Kconfig                  |  5 +++
 net/sched/Makefile                 |  1 +
 net/sched/act_meta_skbtcindex.c    | 79 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 87 insertions(+), 1 deletion(-)
 create mode 100644 net/sched/act_meta_skbtcindex.c

(limited to 'include/uapi')

diff --git a/include/uapi/linux/tc_act/tc_ife.h b/include/uapi/linux/tc_act/tc_ife.h
index 4ece02a77b9a..cd18360eca24 100644
--- a/include/uapi/linux/tc_act/tc_ife.h
+++ b/include/uapi/linux/tc_act/tc_ife.h
@@ -32,8 +32,9 @@ enum {
 #define IFE_META_HASHID 2
 #define	IFE_META_PRIO 3
 #define	IFE_META_QMAP 4
+#define	IFE_META_TCINDEX 5
 /*Can be overridden at runtime by module option*/
-#define	__IFE_META_MAX 5
+#define	__IFE_META_MAX 6
 #define IFE_META_MAX (__IFE_META_MAX - 1)
 
 #endif
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 7795d5a3f79a..87956a768d1b 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -793,6 +793,11 @@ config NET_IFE_SKBPRIO
         depends on NET_ACT_IFE
         ---help---
 
+config NET_IFE_SKBTCINDEX
+        tristate "Support to encoding decoding skb tcindex on IFE action"
+        depends on NET_ACT_IFE
+        ---help---
+
 config NET_CLS_IND
 	bool "Incoming device classification"
 	depends on NET_CLS_U32 || NET_CLS_FW
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 148ae0d5ac2c..4bdda3634e0b 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_NET_ACT_SKBMOD)	+= act_skbmod.o
 obj-$(CONFIG_NET_ACT_IFE)	+= act_ife.o
 obj-$(CONFIG_NET_IFE_SKBMARK)	+= act_meta_mark.o
 obj-$(CONFIG_NET_IFE_SKBPRIO)	+= act_meta_skbprio.o
+obj-$(CONFIG_NET_IFE_SKBTCINDEX)	+= act_meta_skbtcindex.o
 obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o
 obj-$(CONFIG_NET_SCH_FIFO)	+= sch_fifo.o
 obj-$(CONFIG_NET_SCH_CBQ)	+= sch_cbq.o
diff --git a/net/sched/act_meta_skbtcindex.c b/net/sched/act_meta_skbtcindex.c
new file mode 100644
index 000000000000..3b35774ce890
--- /dev/null
+++ b/net/sched/act_meta_skbtcindex.c
@@ -0,0 +1,79 @@
+/*
+ * net/sched/act_meta_tc_index.c IFE skb->tc_index metadata module
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * copyright Jamal Hadi Salim (2016)
+ *
+*/
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <uapi/linux/tc_act/tc_ife.h>
+#include <net/tc_act/tc_ife.h>
+#include <linux/rtnetlink.h>
+
+static int skbtcindex_encode(struct sk_buff *skb, void *skbdata,
+			     struct tcf_meta_info *e)
+{
+	u32 ifetc_index = skb->tc_index;
+
+	return ife_encode_meta_u16(ifetc_index, skbdata, e);
+}
+
+static int skbtcindex_decode(struct sk_buff *skb, void *data, u16 len)
+{
+	u16 ifetc_index = *(u16 *)data;
+
+	skb->tc_index = ntohs(ifetc_index);
+	return 0;
+}
+
+static int skbtcindex_check(struct sk_buff *skb, struct tcf_meta_info *e)
+{
+	return ife_check_meta_u16(skb->tc_index, e);
+}
+
+static struct tcf_meta_ops ife_skbtcindex_ops = {
+	.metaid = IFE_META_TCINDEX,
+	.metatype = NLA_U16,
+	.name = "tc_index",
+	.synopsis = "skb tc_index 16 bit metadata",
+	.check_presence = skbtcindex_check,
+	.encode = skbtcindex_encode,
+	.decode = skbtcindex_decode,
+	.get = ife_get_meta_u16,
+	.alloc = ife_alloc_meta_u16,
+	.release = ife_release_meta_gen,
+	.validate = ife_validate_meta_u16,
+	.owner = THIS_MODULE,
+};
+
+static int __init ifetc_index_init_module(void)
+{
+	return register_ife_op(&ife_skbtcindex_ops);
+}
+
+static void __exit ifetc_index_cleanup_module(void)
+{
+	unregister_ife_op(&ife_skbtcindex_ops);
+}
+
+module_init(ifetc_index_init_module);
+module_exit(ifetc_index_cleanup_module);
+
+MODULE_AUTHOR("Jamal Hadi Salim(2016)");
+MODULE_DESCRIPTION("Inter-FE skb tc_index metadata module");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_IFE_META(IFE_META_SKBTCINDEX);
-- 
cgit v1.2.3


From 823d1bc1082970fc02f8172859c789933ed84bc5 Mon Sep 17 00:00:00 2001
From: Emilio López <emilio.lopez@collabora.co.uk>
Date: Mon, 19 Sep 2016 01:21:20 -0300
Subject: dma-buf/sync_file: fix documentation error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The ioctl name and description on the documentation block don't
match the ioctl being defined. This was probably overlooked while
renaming the ioctls during the sync file destaging. This patch
provides a more accurate description of what the ioctl actually does.

Signed-off-by: Emilio López <emilio.lopez@collabora.co.uk>
Reviewed-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
Link: http://patchwork.freedesktop.org/patch/msgid/20160919042120.6280-1-emilio.lopez@collabora.co.uk
---
 include/uapi/linux/sync_file.h | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/sync_file.h b/include/uapi/linux/sync_file.h
index 413303d37b56..5b287d6970b3 100644
--- a/include/uapi/linux/sync_file.h
+++ b/include/uapi/linux/sync_file.h
@@ -85,15 +85,12 @@ struct sync_file_info {
 #define SYNC_IOC_MERGE		_IOWR(SYNC_IOC_MAGIC, 3, struct sync_merge_data)
 
 /**
- * DOC: SYNC_IOC_FENCE_INFO - get detailed information on a fence
+ * DOC: SYNC_IOC_FILE_INFO - get detailed information on a sync_file
  *
- * Takes a struct sync_file_info_data with extra space allocated for pt_info.
- * Caller should write the size of the buffer into len.  On return, len is
- * updated to reflect the total size of the sync_file_info_data including
- * pt_info.
- *
- * pt_info is a buffer containing sync_pt_infos for every sync_pt in the fence.
- * To iterate over the sync_pt_infos, use the sync_pt_info.len field.
+ * Takes a struct sync_file_info. If num_fences is 0, the field is updated
+ * with the actual number of fences. If num_fences is > 0, the system will
+ * use the pointer provided on sync_fence_info to return up to num_fences of
+ * struct sync_fence_info, with detailed fence information.
  */
 #define SYNC_IOC_FILE_INFO	_IOWR(SYNC_IOC_MAGIC, 4, struct sync_file_info)
 
-- 
cgit v1.2.3


From 36bbef52c7eb646ed6247055a2acd3851e317857 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 20 Sep 2016 00:26:13 +0200
Subject: bpf: direct packet write and access for helpers for clsact progs

This work implements direct packet access for helpers and direct packet
write in a similar fashion as already available for XDP types via commits
4acf6c0b84c9 ("bpf: enable direct packet data write for xdp progs") and
6841de8b0d03 ("bpf: allow helpers access the packet directly"), and as a
complementary feature to the already available direct packet read for tc
(cls/act) programs.

For enabling this, we need to introduce two helpers, bpf_skb_pull_data()
and bpf_csum_update(). The first is generally needed for both, read and
write, because they would otherwise only be limited to the current linear
skb head. Usually, when the data_end test fails, programs just bail out,
or, in the direct read case, use bpf_skb_load_bytes() as an alternative
to overcome this limitation. If such data sits in non-linear parts, we
can just pull them in once with the new helper, retest and eventually
access them.

At the same time, this also makes sure the skb is uncloned, which is, of
course, a necessary condition for direct write. As this needs to be an
invariant for the write part only, the verifier detects writes and adds
a prologue that is calling bpf_skb_pull_data() to effectively unclone the
skb from the very beginning in case it is indeed cloned. The heuristic
makes use of a similar trick that was done in 233577a22089 ("net: filter:
constify detection of pkt_type_offset"). This comes at zero cost for other
programs that do not use the direct write feature. Should a program use
this feature only sparsely and has read access for the most parts with,
for example, drop return codes, then such write action can be delegated
to a tail called program for mitigating this cost of potential uncloning
to a late point in time where it would have been paid similarly with the
bpf_skb_store_bytes() as well. Advantage of direct write is that the
writes are inlined whereas the helper cannot make any length assumptions
and thus needs to generate a call to memcpy() also for small sizes, as well
as cost of helper call itself with sanity checks are avoided. Plus, when
direct read is already used, we don't need to cache or perform rechecks
on the data boundaries (due to verifier invalidating previous checks for
helpers that change skb->data), so more complex programs using rewrites
can benefit from switching to direct read plus write.

For direct packet access to helpers, we save the otherwise needed copy into
a temp struct sitting on stack memory when use-case allows. Both facilities
are enabled via may_access_direct_pkt_data() in verifier. For now, we limit
this to map helpers and csum_diff, and can successively enable other helpers
where we find it makes sense. Helpers that definitely cannot be allowed for
this are those part of bpf_helper_changes_skb_data() since they can change
underlying data, and those that write into memory as this could happen for
packet typed args when still cloned. bpf_csum_update() helper accommodates
for the fact that we need to fixup checksum_complete when using direct write
instead of bpf_skb_store_bytes(), meaning the programs can use available
helpers like bpf_csum_diff(), and implement csum_add(), csum_sub(),
csum_block_add(), csum_block_sub() equivalents in eBPF together with the
new helper. A usage example will be provided for iproute2's examples/bpf/
directory.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h      |   4 +-
 include/linux/skbuff.h   |  14 ++++-
 include/uapi/linux/bpf.h |  21 ++++++++
 kernel/bpf/helpers.c     |   3 ++
 kernel/bpf/verifier.c    |  54 ++++++++++++++-----
 net/core/filter.c        | 134 +++++++++++++++++++++++++++++++++++++++++------
 6 files changed, 196 insertions(+), 34 deletions(-)

(limited to 'include/uapi')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 9a904f63f8c1..5691fdc83819 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -96,6 +96,7 @@ enum bpf_return_type {
 struct bpf_func_proto {
 	u64 (*func)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 	bool gpl_only;
+	bool pkt_access;
 	enum bpf_return_type ret_type;
 	enum bpf_arg_type arg1_type;
 	enum bpf_arg_type arg2_type;
@@ -151,7 +152,8 @@ struct bpf_verifier_ops {
 	 */
 	bool (*is_valid_access)(int off, int size, enum bpf_access_type type,
 				enum bpf_reg_type *reg_type);
-
+	int (*gen_prologue)(struct bpf_insn *insn, bool direct_write,
+			    const struct bpf_prog *prog);
 	u32 (*convert_ctx_access)(enum bpf_access_type type, int dst_reg,
 				  int src_reg, int ctx_off,
 				  struct bpf_insn *insn, struct bpf_prog *prog);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 4c5662f05bda..c6dab3f7457c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -676,13 +676,23 @@ struct sk_buff {
 	 */
 	kmemcheck_bitfield_begin(flags1);
 	__u16			queue_mapping;
+
+/* if you move cloned around you also must adapt those constants */
+#ifdef __BIG_ENDIAN_BITFIELD
+#define CLONED_MASK	(1 << 7)
+#else
+#define CLONED_MASK	1
+#endif
+#define CLONED_OFFSET()		offsetof(struct sk_buff, __cloned_offset)
+
+	__u8			__cloned_offset[0];
 	__u8			cloned:1,
 				nohdr:1,
 				fclone:2,
 				peeked:1,
 				head_frag:1,
-				xmit_more:1;
-	/* one bit hole */
+				xmit_more:1,
+				__unused:1; /* one bit hole */
 	kmemcheck_bitfield_end(flags1);
 
 	/* fields enclosed in headers_start/headers_end are copied
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f896dfac4ac0..e07432b9f8b8 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -398,6 +398,27 @@ enum bpf_func_id {
 	 */
 	BPF_FUNC_skb_change_tail,
 
+	/**
+	 * bpf_skb_pull_data(skb, len)
+	 * The helper will pull in non-linear data in case the
+	 * skb is non-linear and not all of len are part of the
+	 * linear section. Only needed for read/write with direct
+	 * packet access.
+	 * @skb: pointer to skb
+	 * @len: len to make read/writeable
+	 * Return: 0 on success or negative error
+	 */
+	BPF_FUNC_skb_pull_data,
+
+	/**
+	 * bpf_csum_update(skb, csum)
+	 * Adds csum into skb->csum in case of CHECKSUM_COMPLETE.
+	 * @skb: pointer to skb
+	 * @csum: csum to add
+	 * Return: csum on success or negative error
+	 */
+	BPF_FUNC_csum_update,
+
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index a5b8bf8cfcfd..39918402e6e9 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -36,6 +36,7 @@ BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key)
 const struct bpf_func_proto bpf_map_lookup_elem_proto = {
 	.func		= bpf_map_lookup_elem,
 	.gpl_only	= false,
+	.pkt_access	= true,
 	.ret_type	= RET_PTR_TO_MAP_VALUE_OR_NULL,
 	.arg1_type	= ARG_CONST_MAP_PTR,
 	.arg2_type	= ARG_PTR_TO_MAP_KEY,
@@ -51,6 +52,7 @@ BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key,
 const struct bpf_func_proto bpf_map_update_elem_proto = {
 	.func		= bpf_map_update_elem,
 	.gpl_only	= false,
+	.pkt_access	= true,
 	.ret_type	= RET_INTEGER,
 	.arg1_type	= ARG_CONST_MAP_PTR,
 	.arg2_type	= ARG_PTR_TO_MAP_KEY,
@@ -67,6 +69,7 @@ BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key)
 const struct bpf_func_proto bpf_map_delete_elem_proto = {
 	.func		= bpf_map_delete_elem,
 	.gpl_only	= false,
+	.pkt_access	= true,
 	.ret_type	= RET_INTEGER,
 	.arg1_type	= ARG_CONST_MAP_PTR,
 	.arg2_type	= ARG_PTR_TO_MAP_KEY,
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index bc138f34e38c..3a75ee3bdcd1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -196,6 +196,7 @@ struct verifier_env {
 	u32 used_map_cnt;		/* number of used maps */
 	u32 id_gen;			/* used to generate unique reg IDs */
 	bool allow_ptr_leaks;
+	bool seen_direct_write;
 };
 
 #define BPF_COMPLEXITY_LIMIT_INSNS	65536
@@ -204,6 +205,7 @@ struct verifier_env {
 struct bpf_call_arg_meta {
 	struct bpf_map *map_ptr;
 	bool raw_mode;
+	bool pkt_access;
 	int regno;
 	int access_size;
 };
@@ -654,10 +656,17 @@ static int check_map_access(struct verifier_env *env, u32 regno, int off,
 
 #define MAX_PACKET_OFF 0xffff
 
-static bool may_write_pkt_data(enum bpf_prog_type type)
+static bool may_access_direct_pkt_data(struct verifier_env *env,
+				       const struct bpf_call_arg_meta *meta)
 {
-	switch (type) {
+	switch (env->prog->type) {
+	case BPF_PROG_TYPE_SCHED_CLS:
+	case BPF_PROG_TYPE_SCHED_ACT:
 	case BPF_PROG_TYPE_XDP:
+		if (meta)
+			return meta->pkt_access;
+
+		env->seen_direct_write = true;
 		return true;
 	default:
 		return false;
@@ -817,7 +826,7 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,
 			err = check_stack_read(state, off, size, value_regno);
 		}
 	} else if (state->regs[regno].type == PTR_TO_PACKET) {
-		if (t == BPF_WRITE && !may_write_pkt_data(env->prog->type)) {
+		if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL)) {
 			verbose("cannot write into packet\n");
 			return -EACCES;
 		}
@@ -950,8 +959,8 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
 		return 0;
 	}
 
-	if (type == PTR_TO_PACKET && !may_write_pkt_data(env->prog->type)) {
-		verbose("helper access to the packet is not allowed for clsact\n");
+	if (type == PTR_TO_PACKET && !may_access_direct_pkt_data(env, meta)) {
+		verbose("helper access to the packet is not allowed\n");
 		return -EACCES;
 	}
 
@@ -1191,6 +1200,7 @@ static int check_call(struct verifier_env *env, int func_id)
 	changes_data = bpf_helper_changes_skb_data(fn->func);
 
 	memset(&meta, 0, sizeof(meta));
+	meta.pkt_access = fn->pkt_access;
 
 	/* We only support one arg being in raw mode at the moment, which
 	 * is sufficient for the helper functions we have right now.
@@ -2675,18 +2685,35 @@ static void convert_pseudo_ld_imm64(struct verifier_env *env)
  */
 static int convert_ctx_accesses(struct verifier_env *env)
 {
-	struct bpf_insn *insn = env->prog->insnsi;
-	int insn_cnt = env->prog->len;
-	struct bpf_insn insn_buf[16];
+	const struct bpf_verifier_ops *ops = env->prog->aux->ops;
+	struct bpf_insn insn_buf[16], *insn;
 	struct bpf_prog *new_prog;
 	enum bpf_access_type type;
-	int i;
+	int i, insn_cnt, cnt;
 
-	if (!env->prog->aux->ops->convert_ctx_access)
+	if (ops->gen_prologue) {
+		cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
+					env->prog);
+		if (cnt >= ARRAY_SIZE(insn_buf)) {
+			verbose("bpf verifier is misconfigured\n");
+			return -EINVAL;
+		} else if (cnt) {
+			new_prog = bpf_patch_insn_single(env->prog, 0,
+							 insn_buf, cnt);
+			if (!new_prog)
+				return -ENOMEM;
+			env->prog = new_prog;
+		}
+	}
+
+	if (!ops->convert_ctx_access)
 		return 0;
 
+	insn_cnt = env->prog->len;
+	insn = env->prog->insnsi;
+
 	for (i = 0; i < insn_cnt; i++, insn++) {
-		u32 insn_delta, cnt;
+		u32 insn_delta;
 
 		if (insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
 		    insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
@@ -2703,9 +2730,8 @@ static int convert_ctx_accesses(struct verifier_env *env)
 			continue;
 		}
 
-		cnt = env->prog->aux->ops->
-			convert_ctx_access(type, insn->dst_reg, insn->src_reg,
-					   insn->off, insn_buf, env->prog);
+		cnt = ops->convert_ctx_access(type, insn->dst_reg, insn->src_reg,
+					      insn->off, insn_buf, env->prog);
 		if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
 			verbose("bpf verifier is misconfigured\n");
 			return -EINVAL;
diff --git a/net/core/filter.c b/net/core/filter.c
index 298b146b47e7..0920c2ac1d00 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1362,6 +1362,11 @@ static inline int bpf_try_make_writable(struct sk_buff *skb,
 	return err;
 }
 
+static int bpf_try_make_head_writable(struct sk_buff *skb)
+{
+	return bpf_try_make_writable(skb, skb_headlen(skb));
+}
+
 static inline void bpf_push_mac_rcsum(struct sk_buff *skb)
 {
 	if (skb_at_tc_ingress(skb))
@@ -1441,6 +1446,28 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
 	.arg4_type	= ARG_CONST_STACK_SIZE,
 };
 
+BPF_CALL_2(bpf_skb_pull_data, struct sk_buff *, skb, u32, len)
+{
+	/* Idea is the following: should the needed direct read/write
+	 * test fail during runtime, we can pull in more data and redo
+	 * again, since implicitly, we invalidate previous checks here.
+	 *
+	 * Or, since we know how much we need to make read/writeable,
+	 * this can be done once at the program beginning for direct
+	 * access case. By this we overcome limitations of only current
+	 * headroom being accessible.
+	 */
+	return bpf_try_make_writable(skb, len ? : skb_headlen(skb));
+}
+
+static const struct bpf_func_proto bpf_skb_pull_data_proto = {
+	.func		= bpf_skb_pull_data,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+};
+
 BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset,
 	   u64, from, u64, to, u64, flags)
 {
@@ -1567,6 +1594,7 @@ BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size,
 static const struct bpf_func_proto bpf_csum_diff_proto = {
 	.func		= bpf_csum_diff,
 	.gpl_only	= false,
+	.pkt_access	= true,
 	.ret_type	= RET_INTEGER,
 	.arg1_type	= ARG_PTR_TO_STACK,
 	.arg2_type	= ARG_CONST_STACK_SIZE_OR_ZERO,
@@ -1575,6 +1603,26 @@ static const struct bpf_func_proto bpf_csum_diff_proto = {
 	.arg5_type	= ARG_ANYTHING,
 };
 
+BPF_CALL_2(bpf_csum_update, struct sk_buff *, skb, __wsum, csum)
+{
+	/* The interface is to be used in combination with bpf_csum_diff()
+	 * for direct packet writes. csum rotation for alignment as well
+	 * as emulating csum_sub() can be done from the eBPF program.
+	 */
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
+		return (skb->csum = csum_add(skb->csum, csum));
+
+	return -ENOTSUPP;
+}
+
+static const struct bpf_func_proto bpf_csum_update_proto = {
+	.func		= bpf_csum_update,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+};
+
 static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb)
 {
 	return dev_forward_skb(dev, skb);
@@ -1602,6 +1650,8 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
 BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
 {
 	struct net_device *dev;
+	struct sk_buff *clone;
+	int ret;
 
 	if (unlikely(flags & ~(BPF_F_INGRESS)))
 		return -EINVAL;
@@ -1610,14 +1660,25 @@ BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
 	if (unlikely(!dev))
 		return -EINVAL;
 
-	skb = skb_clone(skb, GFP_ATOMIC);
-	if (unlikely(!skb))
+	clone = skb_clone(skb, GFP_ATOMIC);
+	if (unlikely(!clone))
 		return -ENOMEM;
 
-	bpf_push_mac_rcsum(skb);
+	/* For direct write, we need to keep the invariant that the skbs
+	 * we're dealing with need to be uncloned. Should uncloning fail
+	 * here, we need to free the just generated clone to unclone once
+	 * again.
+	 */
+	ret = bpf_try_make_head_writable(skb);
+	if (unlikely(ret)) {
+		kfree_skb(clone);
+		return -ENOMEM;
+	}
+
+	bpf_push_mac_rcsum(clone);
 
 	return flags & BPF_F_INGRESS ?
-	       __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
+	       __bpf_rx_skb(dev, clone) : __bpf_tx_skb(dev, clone);
 }
 
 static const struct bpf_func_proto bpf_clone_redirect_proto = {
@@ -2063,19 +2124,14 @@ static const struct bpf_func_proto bpf_skb_change_tail_proto = {
 
 bool bpf_helper_changes_skb_data(void *func)
 {
-	if (func == bpf_skb_vlan_push)
-		return true;
-	if (func == bpf_skb_vlan_pop)
-		return true;
-	if (func == bpf_skb_store_bytes)
-		return true;
-	if (func == bpf_skb_change_proto)
-		return true;
-	if (func == bpf_skb_change_tail)
-		return true;
-	if (func == bpf_l3_csum_replace)
-		return true;
-	if (func == bpf_l4_csum_replace)
+	if (func == bpf_skb_vlan_push ||
+	    func == bpf_skb_vlan_pop ||
+	    func == bpf_skb_store_bytes ||
+	    func == bpf_skb_change_proto ||
+	    func == bpf_skb_change_tail ||
+	    func == bpf_skb_pull_data ||
+	    func == bpf_l3_csum_replace ||
+	    func == bpf_l4_csum_replace)
 		return true;
 
 	return false;
@@ -2440,8 +2496,12 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
 		return &bpf_skb_store_bytes_proto;
 	case BPF_FUNC_skb_load_bytes:
 		return &bpf_skb_load_bytes_proto;
+	case BPF_FUNC_skb_pull_data:
+		return &bpf_skb_pull_data_proto;
 	case BPF_FUNC_csum_diff:
 		return &bpf_csum_diff_proto;
+	case BPF_FUNC_csum_update:
+		return &bpf_csum_update_proto;
 	case BPF_FUNC_l3_csum_replace:
 		return &bpf_l3_csum_replace_proto;
 	case BPF_FUNC_l4_csum_replace:
@@ -2533,6 +2593,45 @@ static bool sk_filter_is_valid_access(int off, int size,
 	return __is_valid_access(off, size, type);
 }
 
+static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
+			       const struct bpf_prog *prog)
+{
+	struct bpf_insn *insn = insn_buf;
+
+	if (!direct_write)
+		return 0;
+
+	/* if (!skb->cloned)
+	 *       goto start;
+	 *
+	 * (Fast-path, otherwise approximation that we might be
+	 *  a clone, do the rest in helper.)
+	 */
+	*insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_6, BPF_REG_1, CLONED_OFFSET());
+	*insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_6, CLONED_MASK);
+	*insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 7);
+
+	/* ret = bpf_skb_pull_data(skb, 0); */
+	*insn++ = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
+	*insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_2, BPF_REG_2);
+	*insn++ = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+			       BPF_FUNC_skb_pull_data);
+	/* if (!ret)
+	 *      goto restore;
+	 * return TC_ACT_SHOT;
+	 */
+	*insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2);
+	*insn++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, TC_ACT_SHOT);
+	*insn++ = BPF_EXIT_INSN();
+
+	/* restore: */
+	*insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
+	/* start: */
+	*insn++ = prog->insnsi[0];
+
+	return insn - insn_buf;
+}
+
 static bool tc_cls_act_is_valid_access(int off, int size,
 				       enum bpf_access_type type,
 				       enum bpf_reg_type *reg_type)
@@ -2810,6 +2909,7 @@ static const struct bpf_verifier_ops tc_cls_act_ops = {
 	.get_func_proto		= tc_cls_act_func_proto,
 	.is_valid_access	= tc_cls_act_is_valid_access,
 	.convert_ctx_access	= tc_cls_act_convert_ctx_access,
+	.gen_prologue		= tc_cls_act_prologue,
 };
 
 static const struct bpf_verifier_ops xdp_ops = {
-- 
cgit v1.2.3


From 77879147a3481babffd7e368d977ab682545a6bd Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 19 Sep 2016 23:39:11 -0400
Subject: net_sched: sch_fq: add low_rate_threshold parameter

This commit adds to the fq module a low_rate_threshold parameter to
insert a delay after all packets if the socket requests a pacing rate
below the threshold.

This helps achieve more precise control of the sending rate with
low-rate paths, especially policers. The basic issue is that if a
congestion control module detects a policer at a certain rate, it may
want fq to be able to shape to that policed rate. That way the sender
can avoid policer drops by having the packets arrive at the policer at
or just under the policed rate.

The default threshold of 550Kbps was chosen analytically so that for
policers or links at 500Kbps or 512Kbps fq would very likely invoke
this mechanism, even if the pacing rate was briefly slightly above the
available bandwidth. This value was then empirically validated with
two years of production testing on YouTube video servers.

Signed-off-by: Van Jacobson <vanj@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Nandita Dukkipati <nanditad@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_sched.h |  2 ++
 net/sched/sch_fq.c             | 22 +++++++++++++++++++---
 2 files changed, 21 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 2382eed50278..f8e39dbaa781 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -792,6 +792,8 @@ enum {
 
 	TCA_FQ_ORPHAN_MASK,	/* mask applied to orphaned skb hashes */
 
+	TCA_FQ_LOW_RATE_THRESHOLD, /* per packet delay under this rate */
+
 	__TCA_FQ_MAX
 };
 
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index dc52cc10d6ed..5dd929cc1423 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -94,6 +94,7 @@ struct fq_sched_data {
 	u32		flow_max_rate;	/* optional max rate per flow */
 	u32		flow_plimit;	/* max packets per flow */
 	u32		orphan_mask;	/* mask for orphaned skb */
+	u32		low_rate_threshold;
 	struct rb_root	*fq_root;
 	u8		rate_enable;
 	u8		fq_trees_log;
@@ -433,7 +434,7 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch)
 	struct fq_flow_head *head;
 	struct sk_buff *skb;
 	struct fq_flow *f;
-	u32 rate;
+	u32 rate, plen;
 
 	skb = fq_dequeue_head(sch, &q->internal);
 	if (skb)
@@ -482,7 +483,7 @@ begin:
 	prefetch(&skb->end);
 	f->credit -= qdisc_pkt_len(skb);
 
-	if (f->credit > 0 || !q->rate_enable)
+	if (!q->rate_enable)
 		goto out;
 
 	/* Do not pace locally generated ack packets */
@@ -493,8 +494,15 @@ begin:
 	if (skb->sk)
 		rate = min(skb->sk->sk_pacing_rate, rate);
 
+	if (rate <= q->low_rate_threshold) {
+		f->credit = 0;
+		plen = qdisc_pkt_len(skb);
+	} else {
+		plen = max(qdisc_pkt_len(skb), q->quantum);
+		if (f->credit > 0)
+			goto out;
+	}
 	if (rate != ~0U) {
-		u32 plen = max(qdisc_pkt_len(skb), q->quantum);
 		u64 len = (u64)plen * NSEC_PER_SEC;
 
 		if (likely(rate))
@@ -662,6 +670,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
 	[TCA_FQ_FLOW_MAX_RATE]		= { .type = NLA_U32 },
 	[TCA_FQ_BUCKETS_LOG]		= { .type = NLA_U32 },
 	[TCA_FQ_FLOW_REFILL_DELAY]	= { .type = NLA_U32 },
+	[TCA_FQ_LOW_RATE_THRESHOLD]	= { .type = NLA_U32 },
 };
 
 static int fq_change(struct Qdisc *sch, struct nlattr *opt)
@@ -716,6 +725,10 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
 	if (tb[TCA_FQ_FLOW_MAX_RATE])
 		q->flow_max_rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]);
 
+	if (tb[TCA_FQ_LOW_RATE_THRESHOLD])
+		q->low_rate_threshold =
+			nla_get_u32(tb[TCA_FQ_LOW_RATE_THRESHOLD]);
+
 	if (tb[TCA_FQ_RATE_ENABLE]) {
 		u32 enable = nla_get_u32(tb[TCA_FQ_RATE_ENABLE]);
 
@@ -781,6 +794,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt)
 	q->fq_root		= NULL;
 	q->fq_trees_log		= ilog2(1024);
 	q->orphan_mask		= 1024 - 1;
+	q->low_rate_threshold	= 550000 / 8;
 	qdisc_watchdog_init(&q->watchdog, sch);
 
 	if (opt)
@@ -811,6 +825,8 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
 	    nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY,
 			jiffies_to_usecs(q->flow_refill_delay)) ||
 	    nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, q->orphan_mask) ||
+	    nla_put_u32(skb, TCA_FQ_LOW_RATE_THRESHOLD,
+			q->low_rate_threshold) ||
 	    nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log))
 		goto nla_put_failure;
 
-- 
cgit v1.2.3


From eb8329e0a04db0061f714f033b4454326ba147f4 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Mon, 19 Sep 2016 23:39:16 -0400
Subject: tcp: export data delivery rate

This commit export two new fields in struct tcp_info:

  tcpi_delivery_rate: The most recent goodput, as measured by
    tcp_rate_gen(). If the socket is limited by the sending
    application (e.g., no data to send), it reports the highest
    measurement instead of the most recent. The unit is bytes per
    second (like other rate fields in tcp_info).

  tcpi_delivery_rate_app_limited: A boolean indicating if the goodput
    was measured when the socket's throughput was limited by the
    sending application.

This delivery rate information can be useful for applications that
want to know the current throughput the TCP connection is seeing,
e.g. adaptive bitrate video streaming. It can also be very useful for
debugging or troubleshooting.

Signed-off-by: Van Jacobson <vanj@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Nandita Dukkipati <nanditad@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h      |  5 ++++-
 include/uapi/linux/tcp.h |  3 +++
 net/ipv4/tcp.c           | 11 ++++++++++-
 net/ipv4/tcp_rate.c      | 12 +++++++++++-
 4 files changed, 28 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index fdcd00ffcb66..a17ae7b85218 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -213,7 +213,8 @@ struct tcp_sock {
 		u8 reord;    /* reordering detected */
 	} rack;
 	u16	advmss;		/* Advertised MSS			*/
-	u8	unused;
+	u8	rate_app_limited:1,  /* rate_{delivered,interval_us} limited? */
+		unused:7;
 	u8	nonagle     : 4,/* Disable Nagle algorithm?             */
 		thin_lto    : 1,/* Use linear timeouts for thin streams */
 		thin_dupack : 1,/* Fast retransmit on first dupack      */
@@ -271,6 +272,8 @@ struct tcp_sock {
 	u32	app_limited;	/* limited until "delivered" reaches this val */
 	struct skb_mstamp first_tx_mstamp;  /* start of window send phase */
 	struct skb_mstamp delivered_mstamp; /* time we reached "delivered" */
+	u32	rate_delivered;    /* saved rate sample: packets delivered */
+	u32	rate_interval_us;  /* saved rate sample: time elapsed */
 
  	u32	rcv_wnd;	/* Current receiver window		*/
 	u32	write_seq;	/* Tail(+1) of data held in tcp send buffer */
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 482898fc433a..73ac0db487f8 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -167,6 +167,7 @@ struct tcp_info {
 	__u8	tcpi_backoff;
 	__u8	tcpi_options;
 	__u8	tcpi_snd_wscale : 4, tcpi_rcv_wscale : 4;
+	__u8	tcpi_delivery_rate_app_limited:1;
 
 	__u32	tcpi_rto;
 	__u32	tcpi_ato;
@@ -211,6 +212,8 @@ struct tcp_info {
 	__u32	tcpi_min_rtt;
 	__u32	tcpi_data_segs_in;	/* RFC4898 tcpEStatsDataSegsIn */
 	__u32	tcpi_data_segs_out;	/* RFC4898 tcpEStatsDataSegsOut */
+
+	__u64   tcpi_delivery_rate;
 };
 
 /* for TCP_MD5SIG socket option */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 2250f891f931..f253e5019d22 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2712,7 +2712,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 {
 	const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
 	const struct inet_connection_sock *icsk = inet_csk(sk);
-	u32 now = tcp_time_stamp;
+	u32 now = tcp_time_stamp, intv;
 	unsigned int start;
 	int notsent_bytes;
 	u64 rate64;
@@ -2802,6 +2802,15 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 	info->tcpi_min_rtt = tcp_min_rtt(tp);
 	info->tcpi_data_segs_in = tp->data_segs_in;
 	info->tcpi_data_segs_out = tp->data_segs_out;
+
+	info->tcpi_delivery_rate_app_limited = tp->rate_app_limited ? 1 : 0;
+	rate = READ_ONCE(tp->rate_delivered);
+	intv = READ_ONCE(tp->rate_interval_us);
+	if (rate && intv) {
+		rate64 = (u64)rate * tp->mss_cache * USEC_PER_SEC;
+		do_div(rate64, intv);
+		put_unaligned(rate64, &info->tcpi_delivery_rate);
+	}
 }
 EXPORT_SYMBOL_GPL(tcp_get_info);
 
diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
index 52ff84be59ab..9be1581a5a08 100644
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -149,12 +149,22 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
 	 * for connections suffer heavy or prolonged losses.
 	 */
 	if (unlikely(rs->interval_us < tcp_min_rtt(tp))) {
-		rs->interval_us = -1;
 		if (!rs->is_retrans)
 			pr_debug("tcp rate: %ld %d %u %u %u\n",
 				 rs->interval_us, rs->delivered,
 				 inet_csk(sk)->icsk_ca_state,
 				 tp->rx_opt.sack_ok, tcp_min_rtt(tp));
+		rs->interval_us = -1;
+		return;
+	}
+
+	/* Record the last non-app-limited or the highest app-limited bw */
+	if (!rs->is_app_limited ||
+	    ((u64)rs->delivered * tp->rate_interval_us >=
+	     (u64)tp->rate_delivered * rs->interval_us)) {
+		tp->rate_delivered = rs->delivered;
+		tp->rate_interval_us = rs->interval_us;
+		tp->rate_app_limited = rs->is_app_limited;
 	}
 }
 
-- 
cgit v1.2.3


From 0f8782ea14974ce992618b55f0c041ef43ed0b78 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Mon, 19 Sep 2016 23:39:23 -0400
Subject: tcp_bbr: add BBR congestion control

This commit implements a new TCP congestion control algorithm: BBR
(Bottleneck Bandwidth and RTT). A detailed description of BBR will be
published in ACM Queue, Vol. 14 No. 5, September-October 2016, as
"BBR: Congestion-Based Congestion Control".

BBR has significantly increased throughput and reduced latency for
connections on Google's internal backbone networks and google.com and
YouTube Web servers.

BBR requires only changes on the sender side, not in the network or
the receiver side. Thus it can be incrementally deployed on today's
Internet, or in datacenters.

The Internet has predominantly used loss-based congestion control
(largely Reno or CUBIC) since the 1980s, relying on packet loss as the
signal to slow down. While this worked well for many years, loss-based
congestion control is unfortunately out-dated in today's networks. On
today's Internet, loss-based congestion control causes the infamous
bufferbloat problem, often causing seconds of needless queuing delay,
since it fills the bloated buffers in many last-mile links. On today's
high-speed long-haul links using commodity switches with shallow
buffers, loss-based congestion control has abysmal throughput because
it over-reacts to losses caused by transient traffic bursts.

In 1981 Kleinrock and Gale showed that the optimal operating point for
a network maximizes delivered bandwidth while minimizing delay and
loss, not only for single connections but for the network as a
whole. Finding that optimal operating point has been elusive, since
any single network measurement is ambiguous: network measurements are
the result of both bandwidth and propagation delay, and those two
cannot be measured simultaneously.

While it is impossible to disambiguate any single bandwidth or RTT
measurement, a connection's behavior over time tells a clearer
story. BBR uses a measurement strategy designed to resolve this
ambiguity. It combines these measurements with a robust servo loop
using recent control systems advances to implement a distributed
congestion control algorithm that reacts to actual congestion, not
packet loss or transient queue delay, and is designed to converge with
high probability to a point near the optimal operating point.

In a nutshell, BBR creates an explicit model of the network pipe by
sequentially probing the bottleneck bandwidth and RTT. On the arrival
of each ACK, BBR derives the current delivery rate of the last round
trip, and feeds it through a windowed max-filter to estimate the
bottleneck bandwidth. Conversely it uses a windowed min-filter to
estimate the round trip propagation delay. The max-filtered bandwidth
and min-filtered RTT estimates form BBR's model of the network pipe.

Using its model, BBR sets control parameters to govern sending
behavior. The primary control is the pacing rate: BBR applies a gain
multiplier to transmit faster or slower than the observed bottleneck
bandwidth. The conventional congestion window (cwnd) is now the
secondary control; the cwnd is set to a small multiple of the
estimated BDP (bandwidth-delay product) in order to allow full
utilization and bandwidth probing while bounding the potential amount
of queue at the bottleneck.

When a BBR connection starts, it enters STARTUP mode and applies a
high gain to perform an exponential search to quickly probe the
bottleneck bandwidth (doubling its sending rate each round trip, like
slow start). However, instead of continuing until it fills up the
buffer (i.e. a loss), or until delay or ACK spacing reaches some
threshold (like Hystart), it uses its model of the pipe to estimate
when that pipe is full: it estimates the pipe is full when it notices
the estimated bandwidth has stopped growing. At that point it exits
STARTUP and enters DRAIN mode, where it reduces its pacing rate to
drain the queue it estimates it has created.

Then BBR enters steady state. In steady state, PROBE_BW mode cycles
between first pacing faster to probe for more bandwidth, then pacing
slower to drain any queue that created if no more bandwidth was
available, and then cruising at the estimated bandwidth to utilize the
pipe without creating excess queue. Occasionally, on an as-needed
basis, it sends significantly slower to probe for RTT (PROBE_RTT
mode).

BBR has been fully deployed on Google's wide-area backbone networks
and we're experimenting with BBR on Google.com and YouTube on a global
scale.  Replacing CUBIC with BBR has resulted in significant
improvements in network latency and application (RPC, browser, and
video) metrics. For more details please refer to our upcoming ACM
Queue publication.

Example performance results, to illustrate the difference between BBR
and CUBIC:

Resilience to random loss (e.g. from shallow buffers):
  Consider a netperf TCP_STREAM test lasting 30 secs on an emulated
  path with a 10Gbps bottleneck, 100ms RTT, and 1% packet loss
  rate. CUBIC gets 3.27 Mbps, and BBR gets 9150 Mbps (2798x higher).

Low latency with the bloated buffers common in today's last-mile links:
  Consider a netperf TCP_STREAM test lasting 120 secs on an emulated
  path with a 10Mbps bottleneck, 40ms RTT, and 1000-packet bottleneck
  buffer. Both fully utilize the bottleneck bandwidth, but BBR
  achieves this with a median RTT 25x lower (43 ms instead of 1.09
  secs).

Our long-term goal is to improve the congestion control algorithms
used on the Internet. We are hopeful that BBR can help advance the
efforts toward this goal, and motivate the community to do further
research.

Test results, performance evaluations, feedback, and BBR-related
discussions are very welcome in the public e-mail list for BBR:

  https://groups.google.com/forum/#!forum/bbr-dev

NOTE: BBR *must* be used with the fq qdisc ("man tc-fq") with pacing
enabled, since pacing is integral to the BBR design and
implementation. BBR without pacing would not function properly, and
may incur unnecessary high packet loss rates.

Signed-off-by: Van Jacobson <vanj@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Nandita Dukkipati <nanditad@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/inet_diag.h |  13 +
 net/ipv4/Kconfig               |  18 +
 net/ipv4/Makefile              |   1 +
 net/ipv4/tcp_bbr.c             | 896 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 928 insertions(+)
 create mode 100644 net/ipv4/tcp_bbr.c

(limited to 'include/uapi')

diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h
index b5c366f87b3e..509cd961068d 100644
--- a/include/uapi/linux/inet_diag.h
+++ b/include/uapi/linux/inet_diag.h
@@ -124,6 +124,7 @@ enum {
 	INET_DIAG_PEERS,
 	INET_DIAG_PAD,
 	INET_DIAG_MARK,
+	INET_DIAG_BBRINFO,
 	__INET_DIAG_MAX,
 };
 
@@ -157,8 +158,20 @@ struct tcp_dctcp_info {
 	__u32	dctcp_ab_tot;
 };
 
+/* INET_DIAG_BBRINFO */
+
+struct tcp_bbr_info {
+	/* u64 bw: max-filtered BW (app throughput) estimate in Byte per sec: */
+	__u32	bbr_bw_lo;		/* lower 32 bits of bw */
+	__u32	bbr_bw_hi;		/* upper 32 bits of bw */
+	__u32	bbr_min_rtt;		/* min-filtered RTT in uSec */
+	__u32	bbr_pacing_gain;	/* pacing gain shifted left 8 bits */
+	__u32	bbr_cwnd_gain;		/* cwnd gain shifted left 8 bits */
+};
+
 union tcp_cc_info {
 	struct tcpvegas_info	vegas;
 	struct tcp_dctcp_info	dctcp;
+	struct tcp_bbr_info	bbr;
 };
 #endif /* _UAPI_INET_DIAG_H_ */
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 50d6a9b49f6c..300b06888fdf 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -640,6 +640,21 @@ config TCP_CONG_CDG
 	  D.A. Hayes and G. Armitage. "Revisiting TCP congestion control using
 	  delay gradients." In Networking 2011. Preprint: http://goo.gl/No3vdg
 
+config TCP_CONG_BBR
+	tristate "BBR TCP"
+	default n
+	---help---
+
+	BBR (Bottleneck Bandwidth and RTT) TCP congestion control aims to
+	maximize network utilization and minimize queues. It builds an explicit
+	model of the the bottleneck delivery rate and path round-trip
+	propagation delay. It tolerates packet loss and delay unrelated to
+	congestion. It can operate over LAN, WAN, cellular, wifi, or cable
+	modem links. It can coexist with flows that use loss-based congestion
+	control, and can operate with shallow buffers, deep buffers,
+	bufferbloat, policers, or AQM schemes that do not provide a delay
+	signal. It requires the fq ("Fair Queue") pacing packet scheduler.
+
 choice
 	prompt "Default TCP congestion control"
 	default DEFAULT_CUBIC
@@ -674,6 +689,9 @@ choice
 	config DEFAULT_CDG
 		bool "CDG" if TCP_CONG_CDG=y
 
+	config DEFAULT_BBR
+		bool "BBR" if TCP_CONG_BBR=y
+
 	config DEFAULT_RENO
 		bool "Reno"
 endchoice
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 9cfff1a0bf71..bc6a6c8b9bcd 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -41,6 +41,7 @@ obj-$(CONFIG_INET_DIAG) += inet_diag.o
 obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o
 obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o
 obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o
+obj-$(CONFIG_TCP_CONG_BBR) += tcp_bbr.o
 obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o
 obj-$(CONFIG_TCP_CONG_CDG) += tcp_cdg.o
 obj-$(CONFIG_TCP_CONG_CUBIC) += tcp_cubic.o
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
new file mode 100644
index 000000000000..0ea66c2c9344
--- /dev/null
+++ b/net/ipv4/tcp_bbr.c
@@ -0,0 +1,896 @@
+/* Bottleneck Bandwidth and RTT (BBR) congestion control
+ *
+ * BBR congestion control computes the sending rate based on the delivery
+ * rate (throughput) estimated from ACKs. In a nutshell:
+ *
+ *   On each ACK, update our model of the network path:
+ *      bottleneck_bandwidth = windowed_max(delivered / elapsed, 10 round trips)
+ *      min_rtt = windowed_min(rtt, 10 seconds)
+ *   pacing_rate = pacing_gain * bottleneck_bandwidth
+ *   cwnd = max(cwnd_gain * bottleneck_bandwidth * min_rtt, 4)
+ *
+ * The core algorithm does not react directly to packet losses or delays,
+ * although BBR may adjust the size of next send per ACK when loss is
+ * observed, or adjust the sending rate if it estimates there is a
+ * traffic policer, in order to keep the drop rate reasonable.
+ *
+ * BBR is described in detail in:
+ *   "BBR: Congestion-Based Congestion Control",
+ *   Neal Cardwell, Yuchung Cheng, C. Stephen Gunn, Soheil Hassas Yeganeh,
+ *   Van Jacobson. ACM Queue, Vol. 14 No. 5, September-October 2016.
+ *
+ * There is a public e-mail list for discussing BBR development and testing:
+ *   https://groups.google.com/forum/#!forum/bbr-dev
+ *
+ * NOTE: BBR *must* be used with the fq qdisc ("man tc-fq") with pacing enabled,
+ * since pacing is integral to the BBR design and implementation.
+ * BBR without pacing would not function properly, and may incur unnecessary
+ * high packet loss rates.
+ */
+#include <linux/module.h>
+#include <net/tcp.h>
+#include <linux/inet_diag.h>
+#include <linux/inet.h>
+#include <linux/random.h>
+#include <linux/win_minmax.h>
+
+/* Scale factor for rate in pkt/uSec unit to avoid truncation in bandwidth
+ * estimation. The rate unit ~= (1500 bytes / 1 usec / 2^24) ~= 715 bps.
+ * This handles bandwidths from 0.06pps (715bps) to 256Mpps (3Tbps) in a u32.
+ * Since the minimum window is >=4 packets, the lower bound isn't
+ * an issue. The upper bound isn't an issue with existing technologies.
+ */
+#define BW_SCALE 24
+#define BW_UNIT (1 << BW_SCALE)
+
+#define BBR_SCALE 8	/* scaling factor for fractions in BBR (e.g. gains) */
+#define BBR_UNIT (1 << BBR_SCALE)
+
+/* BBR has the following modes for deciding how fast to send: */
+enum bbr_mode {
+	BBR_STARTUP,	/* ramp up sending rate rapidly to fill pipe */
+	BBR_DRAIN,	/* drain any queue created during startup */
+	BBR_PROBE_BW,	/* discover, share bw: pace around estimated bw */
+	BBR_PROBE_RTT,	/* cut cwnd to min to probe min_rtt */
+};
+
+/* BBR congestion control block */
+struct bbr {
+	u32	min_rtt_us;	        /* min RTT in min_rtt_win_sec window */
+	u32	min_rtt_stamp;	        /* timestamp of min_rtt_us */
+	u32	probe_rtt_done_stamp;   /* end time for BBR_PROBE_RTT mode */
+	struct minmax bw;	/* Max recent delivery rate in pkts/uS << 24 */
+	u32	rtt_cnt;	    /* count of packet-timed rounds elapsed */
+	u32     next_rtt_delivered; /* scb->tx.delivered at end of round */
+	struct skb_mstamp cycle_mstamp;  /* time of this cycle phase start */
+	u32     mode:3,		     /* current bbr_mode in state machine */
+		prev_ca_state:3,     /* CA state on previous ACK */
+		packet_conservation:1,  /* use packet conservation? */
+		restore_cwnd:1,	     /* decided to revert cwnd to old value */
+		round_start:1,	     /* start of packet-timed tx->ack round? */
+		tso_segs_goal:7,     /* segments we want in each skb we send */
+		idle_restart:1,	     /* restarting after idle? */
+		probe_rtt_round_done:1,  /* a BBR_PROBE_RTT round at 4 pkts? */
+		unused:5,
+		lt_is_sampling:1,    /* taking long-term ("LT") samples now? */
+		lt_rtt_cnt:7,	     /* round trips in long-term interval */
+		lt_use_bw:1;	     /* use lt_bw as our bw estimate? */
+	u32	lt_bw;		     /* LT est delivery rate in pkts/uS << 24 */
+	u32	lt_last_delivered;   /* LT intvl start: tp->delivered */
+	u32	lt_last_stamp;	     /* LT intvl start: tp->delivered_mstamp */
+	u32	lt_last_lost;	     /* LT intvl start: tp->lost */
+	u32	pacing_gain:10,	/* current gain for setting pacing rate */
+		cwnd_gain:10,	/* current gain for setting cwnd */
+		full_bw_cnt:3,	/* number of rounds without large bw gains */
+		cycle_idx:3,	/* current index in pacing_gain cycle array */
+		unused_b:6;
+	u32	prior_cwnd;	/* prior cwnd upon entering loss recovery */
+	u32	full_bw;	/* recent bw, to estimate if pipe is full */
+};
+
+#define CYCLE_LEN	8	/* number of phases in a pacing gain cycle */
+
+/* Window length of bw filter (in rounds): */
+static const int bbr_bw_rtts = CYCLE_LEN + 2;
+/* Window length of min_rtt filter (in sec): */
+static const u32 bbr_min_rtt_win_sec = 10;
+/* Minimum time (in ms) spent at bbr_cwnd_min_target in BBR_PROBE_RTT mode: */
+static const u32 bbr_probe_rtt_mode_ms = 200;
+/* Skip TSO below the following bandwidth (bits/sec): */
+static const int bbr_min_tso_rate = 1200000;
+
+/* We use a high_gain value of 2/ln(2) because it's the smallest pacing gain
+ * that will allow a smoothly increasing pacing rate that will double each RTT
+ * and send the same number of packets per RTT that an un-paced, slow-starting
+ * Reno or CUBIC flow would:
+ */
+static const int bbr_high_gain  = BBR_UNIT * 2885 / 1000 + 1;
+/* The pacing gain of 1/high_gain in BBR_DRAIN is calculated to typically drain
+ * the queue created in BBR_STARTUP in a single round:
+ */
+static const int bbr_drain_gain = BBR_UNIT * 1000 / 2885;
+/* The gain for deriving steady-state cwnd tolerates delayed/stretched ACKs: */
+static const int bbr_cwnd_gain  = BBR_UNIT * 2;
+/* The pacing_gain values for the PROBE_BW gain cycle, to discover/share bw: */
+static const int bbr_pacing_gain[] = {
+	BBR_UNIT * 5 / 4,	/* probe for more available bw */
+	BBR_UNIT * 3 / 4,	/* drain queue and/or yield bw to other flows */
+	BBR_UNIT, BBR_UNIT, BBR_UNIT,	/* cruise at 1.0*bw to utilize pipe, */
+	BBR_UNIT, BBR_UNIT, BBR_UNIT	/* without creating excess queue... */
+};
+/* Randomize the starting gain cycling phase over N phases: */
+static const u32 bbr_cycle_rand = 7;
+
+/* Try to keep at least this many packets in flight, if things go smoothly. For
+ * smooth functioning, a sliding window protocol ACKing every other packet
+ * needs at least 4 packets in flight:
+ */
+static const u32 bbr_cwnd_min_target = 4;
+
+/* To estimate if BBR_STARTUP mode (i.e. high_gain) has filled pipe... */
+/* If bw has increased significantly (1.25x), there may be more bw available: */
+static const u32 bbr_full_bw_thresh = BBR_UNIT * 5 / 4;
+/* But after 3 rounds w/o significant bw growth, estimate pipe is full: */
+static const u32 bbr_full_bw_cnt = 3;
+
+/* "long-term" ("LT") bandwidth estimator parameters... */
+/* The minimum number of rounds in an LT bw sampling interval: */
+static const u32 bbr_lt_intvl_min_rtts = 4;
+/* If lost/delivered ratio > 20%, interval is "lossy" and we may be policed: */
+static const u32 bbr_lt_loss_thresh = 50;
+/* If 2 intervals have a bw ratio <= 1/8, their bw is "consistent": */
+static const u32 bbr_lt_bw_ratio = BBR_UNIT / 8;
+/* If 2 intervals have a bw diff <= 4 Kbit/sec their bw is "consistent": */
+static const u32 bbr_lt_bw_diff = 4000 / 8;
+/* If we estimate we're policed, use lt_bw for this many round trips: */
+static const u32 bbr_lt_bw_max_rtts = 48;
+
+/* Do we estimate that STARTUP filled the pipe? */
+static bool bbr_full_bw_reached(const struct sock *sk)
+{
+	const struct bbr *bbr = inet_csk_ca(sk);
+
+	return bbr->full_bw_cnt >= bbr_full_bw_cnt;
+}
+
+/* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */
+static u32 bbr_max_bw(const struct sock *sk)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	return minmax_get(&bbr->bw);
+}
+
+/* Return the estimated bandwidth of the path, in pkts/uS << BW_SCALE. */
+static u32 bbr_bw(const struct sock *sk)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	return bbr->lt_use_bw ? bbr->lt_bw : bbr_max_bw(sk);
+}
+
+/* Return rate in bytes per second, optionally with a gain.
+ * The order here is chosen carefully to avoid overflow of u64. This should
+ * work for input rates of up to 2.9Tbit/sec and gain of 2.89x.
+ */
+static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain)
+{
+	rate *= tcp_mss_to_mtu(sk, tcp_sk(sk)->mss_cache);
+	rate *= gain;
+	rate >>= BBR_SCALE;
+	rate *= USEC_PER_SEC;
+	return rate >> BW_SCALE;
+}
+
+/* Pace using current bw estimate and a gain factor. In order to help drive the
+ * network toward lower queues while maintaining high utilization and low
+ * latency, the average pacing rate aims to be slightly (~1%) lower than the
+ * estimated bandwidth. This is an important aspect of the design. In this
+ * implementation this slightly lower pacing rate is achieved implicitly by not
+ * including link-layer headers in the packet size used for the pacing rate.
+ */
+static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+	u64 rate = bw;
+
+	rate = bbr_rate_bytes_per_sec(sk, rate, gain);
+	rate = min_t(u64, rate, sk->sk_max_pacing_rate);
+	if (bbr->mode != BBR_STARTUP || rate > sk->sk_pacing_rate)
+		sk->sk_pacing_rate = rate;
+}
+
+/* Return count of segments we want in the skbs we send, or 0 for default. */
+static u32 bbr_tso_segs_goal(struct sock *sk)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	return bbr->tso_segs_goal;
+}
+
+static void bbr_set_tso_segs_goal(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+	u32 min_segs;
+
+	min_segs = sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
+	bbr->tso_segs_goal = min(tcp_tso_autosize(sk, tp->mss_cache, min_segs),
+				 0x7FU);
+}
+
+/* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */
+static void bbr_save_cwnd(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	if (bbr->prev_ca_state < TCP_CA_Recovery && bbr->mode != BBR_PROBE_RTT)
+		bbr->prior_cwnd = tp->snd_cwnd;  /* this cwnd is good enough */
+	else  /* loss recovery or BBR_PROBE_RTT have temporarily cut cwnd */
+		bbr->prior_cwnd = max(bbr->prior_cwnd, tp->snd_cwnd);
+}
+
+static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	if (event == CA_EVENT_TX_START && tp->app_limited) {
+		bbr->idle_restart = 1;
+		/* Avoid pointless buffer overflows: pace at est. bw if we don't
+		 * need more speed (we're restarting from idle and app-limited).
+		 */
+		if (bbr->mode == BBR_PROBE_BW)
+			bbr_set_pacing_rate(sk, bbr_bw(sk), BBR_UNIT);
+	}
+}
+
+/* Find target cwnd. Right-size the cwnd based on min RTT and the
+ * estimated bottleneck bandwidth:
+ *
+ * cwnd = bw * min_rtt * gain = BDP * gain
+ *
+ * The key factor, gain, controls the amount of queue. While a small gain
+ * builds a smaller queue, it becomes more vulnerable to noise in RTT
+ * measurements (e.g., delayed ACKs or other ACK compression effects). This
+ * noise may cause BBR to under-estimate the rate.
+ *
+ * To achieve full performance in high-speed paths, we budget enough cwnd to
+ * fit full-sized skbs in-flight on both end hosts to fully utilize the path:
+ *   - one skb in sending host Qdisc,
+ *   - one skb in sending host TSO/GSO engine
+ *   - one skb being received by receiver host LRO/GRO/delayed-ACK engine
+ * Don't worry, at low rates (bbr_min_tso_rate) this won't bloat cwnd because
+ * in such cases tso_segs_goal is 1. The minimum cwnd is 4 packets,
+ * which allows 2 outstanding 2-packet sequences, to try to keep pipe
+ * full even with ACK-every-other-packet delayed ACKs.
+ */
+static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+	u32 cwnd;
+	u64 w;
+
+	/* If we've never had a valid RTT sample, cap cwnd at the initial
+	 * default. This should only happen when the connection is not using TCP
+	 * timestamps and has retransmitted all of the SYN/SYNACK/data packets
+	 * ACKed so far. In this case, an RTO can cut cwnd to 1, in which
+	 * case we need to slow-start up toward something safe: TCP_INIT_CWND.
+	 */
+	if (unlikely(bbr->min_rtt_us == ~0U))	 /* no valid RTT samples yet? */
+		return TCP_INIT_CWND;  /* be safe: cap at default initial cwnd*/
+
+	w = (u64)bw * bbr->min_rtt_us;
+
+	/* Apply a gain to the given value, then remove the BW_SCALE shift. */
+	cwnd = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT;
+
+	/* Allow enough full-sized skbs in flight to utilize end systems. */
+	cwnd += 3 * bbr->tso_segs_goal;
+
+	/* Reduce delayed ACKs by rounding up cwnd to the next even number. */
+	cwnd = (cwnd + 1) & ~1U;
+
+	return cwnd;
+}
+
+/* An optimization in BBR to reduce losses: On the first round of recovery, we
+ * follow the packet conservation principle: send P packets per P packets acked.
+ * After that, we slow-start and send at most 2*P packets per P packets acked.
+ * After recovery finishes, or upon undo, we restore the cwnd we had when
+ * recovery started (capped by the target cwnd based on estimated BDP).
+ *
+ * TODO(ycheng/ncardwell): implement a rate-based approach.
+ */
+static bool bbr_set_cwnd_to_recover_or_restore(
+	struct sock *sk, const struct rate_sample *rs, u32 acked, u32 *new_cwnd)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+	u8 prev_state = bbr->prev_ca_state, state = inet_csk(sk)->icsk_ca_state;
+	u32 cwnd = tp->snd_cwnd;
+
+	/* An ACK for P pkts should release at most 2*P packets. We do this
+	 * in two steps. First, here we deduct the number of lost packets.
+	 * Then, in bbr_set_cwnd() we slow start up toward the target cwnd.
+	 */
+	if (rs->losses > 0)
+		cwnd = max_t(s32, cwnd - rs->losses, 1);
+
+	if (state == TCP_CA_Recovery && prev_state != TCP_CA_Recovery) {
+		/* Starting 1st round of Recovery, so do packet conservation. */
+		bbr->packet_conservation = 1;
+		bbr->next_rtt_delivered = tp->delivered;  /* start round now */
+		/* Cut unused cwnd from app behavior, TSQ, or TSO deferral: */
+		cwnd = tcp_packets_in_flight(tp) + acked;
+	} else if (prev_state >= TCP_CA_Recovery && state < TCP_CA_Recovery) {
+		/* Exiting loss recovery; restore cwnd saved before recovery. */
+		bbr->restore_cwnd = 1;
+		bbr->packet_conservation = 0;
+	}
+	bbr->prev_ca_state = state;
+
+	if (bbr->restore_cwnd) {
+		/* Restore cwnd after exiting loss recovery or PROBE_RTT. */
+		cwnd = max(cwnd, bbr->prior_cwnd);
+		bbr->restore_cwnd = 0;
+	}
+
+	if (bbr->packet_conservation) {
+		*new_cwnd = max(cwnd, tcp_packets_in_flight(tp) + acked);
+		return true;	/* yes, using packet conservation */
+	}
+	*new_cwnd = cwnd;
+	return false;
+}
+
+/* Slow-start up toward target cwnd (if bw estimate is growing, or packet loss
+ * has drawn us down below target), or snap down to target if we're above it.
+ */
+static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs,
+			 u32 acked, u32 bw, int gain)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+	u32 cwnd = 0, target_cwnd = 0;
+
+	if (!acked)
+		return;
+
+	if (bbr_set_cwnd_to_recover_or_restore(sk, rs, acked, &cwnd))
+		goto done;
+
+	/* If we're below target cwnd, slow start cwnd toward target cwnd. */
+	target_cwnd = bbr_target_cwnd(sk, bw, gain);
+	if (bbr_full_bw_reached(sk))  /* only cut cwnd if we filled the pipe */
+		cwnd = min(cwnd + acked, target_cwnd);
+	else if (cwnd < target_cwnd || tp->delivered < TCP_INIT_CWND)
+		cwnd = cwnd + acked;
+	cwnd = max(cwnd, bbr_cwnd_min_target);
+
+done:
+	tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp);	/* apply global cap */
+	if (bbr->mode == BBR_PROBE_RTT)  /* drain queue, refresh min_rtt */
+		tp->snd_cwnd = min(tp->snd_cwnd, bbr_cwnd_min_target);
+}
+
+/* End cycle phase if it's time and/or we hit the phase's in-flight target. */
+static bool bbr_is_next_cycle_phase(struct sock *sk,
+				    const struct rate_sample *rs)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+	bool is_full_length =
+		skb_mstamp_us_delta(&tp->delivered_mstamp, &bbr->cycle_mstamp) >
+		bbr->min_rtt_us;
+	u32 inflight, bw;
+
+	/* The pacing_gain of 1.0 paces at the estimated bw to try to fully
+	 * use the pipe without increasing the queue.
+	 */
+	if (bbr->pacing_gain == BBR_UNIT)
+		return is_full_length;		/* just use wall clock time */
+
+	inflight = rs->prior_in_flight;  /* what was in-flight before ACK? */
+	bw = bbr_max_bw(sk);
+
+	/* A pacing_gain > 1.0 probes for bw by trying to raise inflight to at
+	 * least pacing_gain*BDP; this may take more than min_rtt if min_rtt is
+	 * small (e.g. on a LAN). We do not persist if packets are lost, since
+	 * a path with small buffers may not hold that much.
+	 */
+	if (bbr->pacing_gain > BBR_UNIT)
+		return is_full_length &&
+			(rs->losses ||  /* perhaps pacing_gain*BDP won't fit */
+			 inflight >= bbr_target_cwnd(sk, bw, bbr->pacing_gain));
+
+	/* A pacing_gain < 1.0 tries to drain extra queue we added if bw
+	 * probing didn't find more bw. If inflight falls to match BDP then we
+	 * estimate queue is drained; persisting would underutilize the pipe.
+	 */
+	return is_full_length ||
+		inflight <= bbr_target_cwnd(sk, bw, BBR_UNIT);
+}
+
+static void bbr_advance_cycle_phase(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1);
+	bbr->cycle_mstamp = tp->delivered_mstamp;
+	bbr->pacing_gain = bbr_pacing_gain[bbr->cycle_idx];
+}
+
+/* Gain cycling: cycle pacing gain to converge to fair share of available bw. */
+static void bbr_update_cycle_phase(struct sock *sk,
+				   const struct rate_sample *rs)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	if ((bbr->mode == BBR_PROBE_BW) && !bbr->lt_use_bw &&
+	    bbr_is_next_cycle_phase(sk, rs))
+		bbr_advance_cycle_phase(sk);
+}
+
+static void bbr_reset_startup_mode(struct sock *sk)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	bbr->mode = BBR_STARTUP;
+	bbr->pacing_gain = bbr_high_gain;
+	bbr->cwnd_gain	 = bbr_high_gain;
+}
+
+static void bbr_reset_probe_bw_mode(struct sock *sk)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	bbr->mode = BBR_PROBE_BW;
+	bbr->pacing_gain = BBR_UNIT;
+	bbr->cwnd_gain = bbr_cwnd_gain;
+	bbr->cycle_idx = CYCLE_LEN - 1 - prandom_u32_max(bbr_cycle_rand);
+	bbr_advance_cycle_phase(sk);	/* flip to next phase of gain cycle */
+}
+
+static void bbr_reset_mode(struct sock *sk)
+{
+	if (!bbr_full_bw_reached(sk))
+		bbr_reset_startup_mode(sk);
+	else
+		bbr_reset_probe_bw_mode(sk);
+}
+
+/* Start a new long-term sampling interval. */
+static void bbr_reset_lt_bw_sampling_interval(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	bbr->lt_last_stamp = tp->delivered_mstamp.stamp_jiffies;
+	bbr->lt_last_delivered = tp->delivered;
+	bbr->lt_last_lost = tp->lost;
+	bbr->lt_rtt_cnt = 0;
+}
+
+/* Completely reset long-term bandwidth sampling. */
+static void bbr_reset_lt_bw_sampling(struct sock *sk)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	bbr->lt_bw = 0;
+	bbr->lt_use_bw = 0;
+	bbr->lt_is_sampling = false;
+	bbr_reset_lt_bw_sampling_interval(sk);
+}
+
+/* Long-term bw sampling interval is done. Estimate whether we're policed. */
+static void bbr_lt_bw_interval_done(struct sock *sk, u32 bw)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+	u32 diff;
+
+	if (bbr->lt_bw) {  /* do we have bw from a previous interval? */
+		/* Is new bw close to the lt_bw from the previous interval? */
+		diff = abs(bw - bbr->lt_bw);
+		if ((diff * BBR_UNIT <= bbr_lt_bw_ratio * bbr->lt_bw) ||
+		    (bbr_rate_bytes_per_sec(sk, diff, BBR_UNIT) <=
+		     bbr_lt_bw_diff)) {
+			/* All criteria are met; estimate we're policed. */
+			bbr->lt_bw = (bw + bbr->lt_bw) >> 1;  /* avg 2 intvls */
+			bbr->lt_use_bw = 1;
+			bbr->pacing_gain = BBR_UNIT;  /* try to avoid drops */
+			bbr->lt_rtt_cnt = 0;
+			return;
+		}
+	}
+	bbr->lt_bw = bw;
+	bbr_reset_lt_bw_sampling_interval(sk);
+}
+
+/* Token-bucket traffic policers are common (see "An Internet-Wide Analysis of
+ * Traffic Policing", SIGCOMM 2016). BBR detects token-bucket policers and
+ * explicitly models their policed rate, to reduce unnecessary losses. We
+ * estimate that we're policed if we see 2 consecutive sampling intervals with
+ * consistent throughput and high packet loss. If we think we're being policed,
+ * set lt_bw to the "long-term" average delivery rate from those 2 intervals.
+ */
+static void bbr_lt_bw_sampling(struct sock *sk, const struct rate_sample *rs)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+	u32 lost, delivered;
+	u64 bw;
+	s32 t;
+
+	if (bbr->lt_use_bw) {	/* already using long-term rate, lt_bw? */
+		if (bbr->mode == BBR_PROBE_BW && bbr->round_start &&
+		    ++bbr->lt_rtt_cnt >= bbr_lt_bw_max_rtts) {
+			bbr_reset_lt_bw_sampling(sk);    /* stop using lt_bw */
+			bbr_reset_probe_bw_mode(sk);  /* restart gain cycling */
+		}
+		return;
+	}
+
+	/* Wait for the first loss before sampling, to let the policer exhaust
+	 * its tokens and estimate the steady-state rate allowed by the policer.
+	 * Starting samples earlier includes bursts that over-estimate the bw.
+	 */
+	if (!bbr->lt_is_sampling) {
+		if (!rs->losses)
+			return;
+		bbr_reset_lt_bw_sampling_interval(sk);
+		bbr->lt_is_sampling = true;
+	}
+
+	/* To avoid underestimates, reset sampling if we run out of data. */
+	if (rs->is_app_limited) {
+		bbr_reset_lt_bw_sampling(sk);
+		return;
+	}
+
+	if (bbr->round_start)
+		bbr->lt_rtt_cnt++;	/* count round trips in this interval */
+	if (bbr->lt_rtt_cnt < bbr_lt_intvl_min_rtts)
+		return;		/* sampling interval needs to be longer */
+	if (bbr->lt_rtt_cnt > 4 * bbr_lt_intvl_min_rtts) {
+		bbr_reset_lt_bw_sampling(sk);  /* interval is too long */
+		return;
+	}
+
+	/* End sampling interval when a packet is lost, so we estimate the
+	 * policer tokens were exhausted. Stopping the sampling before the
+	 * tokens are exhausted under-estimates the policed rate.
+	 */
+	if (!rs->losses)
+		return;
+
+	/* Calculate packets lost and delivered in sampling interval. */
+	lost = tp->lost - bbr->lt_last_lost;
+	delivered = tp->delivered - bbr->lt_last_delivered;
+	/* Is loss rate (lost/delivered) >= lt_loss_thresh? If not, wait. */
+	if (!delivered || (lost << BBR_SCALE) < bbr_lt_loss_thresh * delivered)
+		return;
+
+	/* Find average delivery rate in this sampling interval. */
+	t = (s32)(tp->delivered_mstamp.stamp_jiffies - bbr->lt_last_stamp);
+	if (t < 1)
+		return;		/* interval is less than one jiffy, so wait */
+	t = jiffies_to_usecs(t);
+	/* Interval long enough for jiffies_to_usecs() to return a bogus 0? */
+	if (t < 1) {
+		bbr_reset_lt_bw_sampling(sk);  /* interval too long; reset */
+		return;
+	}
+	bw = (u64)delivered * BW_UNIT;
+	do_div(bw, t);
+	bbr_lt_bw_interval_done(sk, bw);
+}
+
+/* Estimate the bandwidth based on how fast packets are delivered */
+static void bbr_update_bw(struct sock *sk, const struct rate_sample *rs)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+	u64 bw;
+
+	bbr->round_start = 0;
+	if (rs->delivered < 0 || rs->interval_us <= 0)
+		return; /* Not a valid observation */
+
+	/* See if we've reached the next RTT */
+	if (!before(rs->prior_delivered, bbr->next_rtt_delivered)) {
+		bbr->next_rtt_delivered = tp->delivered;
+		bbr->rtt_cnt++;
+		bbr->round_start = 1;
+		bbr->packet_conservation = 0;
+	}
+
+	bbr_lt_bw_sampling(sk, rs);
+
+	/* Divide delivered by the interval to find a (lower bound) bottleneck
+	 * bandwidth sample. Delivered is in packets and interval_us in uS and
+	 * ratio will be <<1 for most connections. So delivered is first scaled.
+	 */
+	bw = (u64)rs->delivered * BW_UNIT;
+	do_div(bw, rs->interval_us);
+
+	/* If this sample is application-limited, it is likely to have a very
+	 * low delivered count that represents application behavior rather than
+	 * the available network rate. Such a sample could drag down estimated
+	 * bw, causing needless slow-down. Thus, to continue to send at the
+	 * last measured network rate, we filter out app-limited samples unless
+	 * they describe the path bw at least as well as our bw model.
+	 *
+	 * So the goal during app-limited phase is to proceed with the best
+	 * network rate no matter how long. We automatically leave this
+	 * phase when app writes faster than the network can deliver :)
+	 */
+	if (!rs->is_app_limited || bw >= bbr_max_bw(sk)) {
+		/* Incorporate new sample into our max bw filter. */
+		minmax_running_max(&bbr->bw, bbr_bw_rtts, bbr->rtt_cnt, bw);
+	}
+}
+
+/* Estimate when the pipe is full, using the change in delivery rate: BBR
+ * estimates that STARTUP filled the pipe if the estimated bw hasn't changed by
+ * at least bbr_full_bw_thresh (25%) after bbr_full_bw_cnt (3) non-app-limited
+ * rounds. Why 3 rounds: 1: rwin autotuning grows the rwin, 2: we fill the
+ * higher rwin, 3: we get higher delivery rate samples. Or transient
+ * cross-traffic or radio noise can go away. CUBIC Hystart shares a similar
+ * design goal, but uses delay and inter-ACK spacing instead of bandwidth.
+ */
+static void bbr_check_full_bw_reached(struct sock *sk,
+				      const struct rate_sample *rs)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+	u32 bw_thresh;
+
+	if (bbr_full_bw_reached(sk) || !bbr->round_start || rs->is_app_limited)
+		return;
+
+	bw_thresh = (u64)bbr->full_bw * bbr_full_bw_thresh >> BBR_SCALE;
+	if (bbr_max_bw(sk) >= bw_thresh) {
+		bbr->full_bw = bbr_max_bw(sk);
+		bbr->full_bw_cnt = 0;
+		return;
+	}
+	++bbr->full_bw_cnt;
+}
+
+/* If pipe is probably full, drain the queue and then enter steady-state. */
+static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) {
+		bbr->mode = BBR_DRAIN;	/* drain queue we created */
+		bbr->pacing_gain = bbr_drain_gain;	/* pace slow to drain */
+		bbr->cwnd_gain = bbr_high_gain;	/* maintain cwnd */
+	}	/* fall through to check if in-flight is already small: */
+	if (bbr->mode == BBR_DRAIN &&
+	    tcp_packets_in_flight(tcp_sk(sk)) <=
+	    bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT))
+		bbr_reset_probe_bw_mode(sk);  /* we estimate queue is drained */
+}
+
+/* The goal of PROBE_RTT mode is to have BBR flows cooperatively and
+ * periodically drain the bottleneck queue, to converge to measure the true
+ * min_rtt (unloaded propagation delay). This allows the flows to keep queues
+ * small (reducing queuing delay and packet loss) and achieve fairness among
+ * BBR flows.
+ *
+ * The min_rtt filter window is 10 seconds. When the min_rtt estimate expires,
+ * we enter PROBE_RTT mode and cap the cwnd at bbr_cwnd_min_target=4 packets.
+ * After at least bbr_probe_rtt_mode_ms=200ms and at least one packet-timed
+ * round trip elapsed with that flight size <= 4, we leave PROBE_RTT mode and
+ * re-enter the previous mode. BBR uses 200ms to approximately bound the
+ * performance penalty of PROBE_RTT's cwnd capping to roughly 2% (200ms/10s).
+ *
+ * Note that flows need only pay 2% if they are busy sending over the last 10
+ * seconds. Interactive applications (e.g., Web, RPCs, video chunks) often have
+ * natural silences or low-rate periods within 10 seconds where the rate is low
+ * enough for long enough to drain its queue in the bottleneck. We pick up
+ * these min RTT measurements opportunistically with our min_rtt filter. :-)
+ */
+static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+	bool filter_expired;
+
+	/* Track min RTT seen in the min_rtt_win_sec filter window: */
+	filter_expired = after(tcp_time_stamp,
+			       bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ);
+	if (rs->rtt_us >= 0 &&
+	    (rs->rtt_us <= bbr->min_rtt_us || filter_expired)) {
+		bbr->min_rtt_us = rs->rtt_us;
+		bbr->min_rtt_stamp = tcp_time_stamp;
+	}
+
+	if (bbr_probe_rtt_mode_ms > 0 && filter_expired &&
+	    !bbr->idle_restart && bbr->mode != BBR_PROBE_RTT) {
+		bbr->mode = BBR_PROBE_RTT;  /* dip, drain queue */
+		bbr->pacing_gain = BBR_UNIT;
+		bbr->cwnd_gain = BBR_UNIT;
+		bbr_save_cwnd(sk);  /* note cwnd so we can restore it */
+		bbr->probe_rtt_done_stamp = 0;
+	}
+
+	if (bbr->mode == BBR_PROBE_RTT) {
+		/* Ignore low rate samples during this mode. */
+		tp->app_limited =
+			(tp->delivered + tcp_packets_in_flight(tp)) ? : 1;
+		/* Maintain min packets in flight for max(200 ms, 1 round). */
+		if (!bbr->probe_rtt_done_stamp &&
+		    tcp_packets_in_flight(tp) <= bbr_cwnd_min_target) {
+			bbr->probe_rtt_done_stamp = tcp_time_stamp +
+				msecs_to_jiffies(bbr_probe_rtt_mode_ms);
+			bbr->probe_rtt_round_done = 0;
+			bbr->next_rtt_delivered = tp->delivered;
+		} else if (bbr->probe_rtt_done_stamp) {
+			if (bbr->round_start)
+				bbr->probe_rtt_round_done = 1;
+			if (bbr->probe_rtt_round_done &&
+			    after(tcp_time_stamp, bbr->probe_rtt_done_stamp)) {
+				bbr->min_rtt_stamp = tcp_time_stamp;
+				bbr->restore_cwnd = 1;  /* snap to prior_cwnd */
+				bbr_reset_mode(sk);
+			}
+		}
+	}
+	bbr->idle_restart = 0;
+}
+
+static void bbr_update_model(struct sock *sk, const struct rate_sample *rs)
+{
+	bbr_update_bw(sk, rs);
+	bbr_update_cycle_phase(sk, rs);
+	bbr_check_full_bw_reached(sk, rs);
+	bbr_check_drain(sk, rs);
+	bbr_update_min_rtt(sk, rs);
+}
+
+static void bbr_main(struct sock *sk, const struct rate_sample *rs)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+	u32 bw;
+
+	bbr_update_model(sk, rs);
+
+	bw = bbr_bw(sk);
+	bbr_set_pacing_rate(sk, bw, bbr->pacing_gain);
+	bbr_set_tso_segs_goal(sk);
+	bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain);
+}
+
+static void bbr_init(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+	u64 bw;
+
+	bbr->prior_cwnd = 0;
+	bbr->tso_segs_goal = 0;	 /* default segs per skb until first ACK */
+	bbr->rtt_cnt = 0;
+	bbr->next_rtt_delivered = 0;
+	bbr->prev_ca_state = TCP_CA_Open;
+	bbr->packet_conservation = 0;
+
+	bbr->probe_rtt_done_stamp = 0;
+	bbr->probe_rtt_round_done = 0;
+	bbr->min_rtt_us = tcp_min_rtt(tp);
+	bbr->min_rtt_stamp = tcp_time_stamp;
+
+	minmax_reset(&bbr->bw, bbr->rtt_cnt, 0);  /* init max bw to 0 */
+
+	/* Initialize pacing rate to: high_gain * init_cwnd / RTT. */
+	bw = (u64)tp->snd_cwnd * BW_UNIT;
+	do_div(bw, (tp->srtt_us >> 3) ? : USEC_PER_MSEC);
+	sk->sk_pacing_rate = 0;		/* force an update of sk_pacing_rate */
+	bbr_set_pacing_rate(sk, bw, bbr_high_gain);
+
+	bbr->restore_cwnd = 0;
+	bbr->round_start = 0;
+	bbr->idle_restart = 0;
+	bbr->full_bw = 0;
+	bbr->full_bw_cnt = 0;
+	bbr->cycle_mstamp.v64 = 0;
+	bbr->cycle_idx = 0;
+	bbr_reset_lt_bw_sampling(sk);
+	bbr_reset_startup_mode(sk);
+}
+
+static u32 bbr_sndbuf_expand(struct sock *sk)
+{
+	/* Provision 3 * cwnd since BBR may slow-start even during recovery. */
+	return 3;
+}
+
+/* In theory BBR does not need to undo the cwnd since it does not
+ * always reduce cwnd on losses (see bbr_main()). Keep it for now.
+ */
+static u32 bbr_undo_cwnd(struct sock *sk)
+{
+	return tcp_sk(sk)->snd_cwnd;
+}
+
+/* Entering loss recovery, so save cwnd for when we exit or undo recovery. */
+static u32 bbr_ssthresh(struct sock *sk)
+{
+	bbr_save_cwnd(sk);
+	return TCP_INFINITE_SSTHRESH;	 /* BBR does not use ssthresh */
+}
+
+static size_t bbr_get_info(struct sock *sk, u32 ext, int *attr,
+			   union tcp_cc_info *info)
+{
+	if (ext & (1 << (INET_DIAG_BBRINFO - 1)) ||
+	    ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
+		struct tcp_sock *tp = tcp_sk(sk);
+		struct bbr *bbr = inet_csk_ca(sk);
+		u64 bw = bbr_bw(sk);
+
+		bw = bw * tp->mss_cache * USEC_PER_SEC >> BW_SCALE;
+		memset(&info->bbr, 0, sizeof(info->bbr));
+		info->bbr.bbr_bw_lo		= (u32)bw;
+		info->bbr.bbr_bw_hi		= (u32)(bw >> 32);
+		info->bbr.bbr_min_rtt		= bbr->min_rtt_us;
+		info->bbr.bbr_pacing_gain	= bbr->pacing_gain;
+		info->bbr.bbr_cwnd_gain		= bbr->cwnd_gain;
+		*attr = INET_DIAG_BBRINFO;
+		return sizeof(info->bbr);
+	}
+	return 0;
+}
+
+static void bbr_set_state(struct sock *sk, u8 new_state)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	if (new_state == TCP_CA_Loss) {
+		struct rate_sample rs = { .losses = 1 };
+
+		bbr->prev_ca_state = TCP_CA_Loss;
+		bbr->full_bw = 0;
+		bbr->round_start = 1;	/* treat RTO like end of a round */
+		bbr_lt_bw_sampling(sk, &rs);
+	}
+}
+
+static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
+	.flags		= TCP_CONG_NON_RESTRICTED,
+	.name		= "bbr",
+	.owner		= THIS_MODULE,
+	.init		= bbr_init,
+	.cong_control	= bbr_main,
+	.sndbuf_expand	= bbr_sndbuf_expand,
+	.undo_cwnd	= bbr_undo_cwnd,
+	.cwnd_event	= bbr_cwnd_event,
+	.ssthresh	= bbr_ssthresh,
+	.tso_segs_goal	= bbr_tso_segs_goal,
+	.get_info	= bbr_get_info,
+	.set_state	= bbr_set_state,
+};
+
+static int __init bbr_register(void)
+{
+	BUILD_BUG_ON(sizeof(struct bbr) > ICSK_CA_PRIV_SIZE);
+	return tcp_register_congestion_control(&tcp_bbr_cong_ops);
+}
+
+static void __exit bbr_unregister(void)
+{
+	tcp_unregister_congestion_control(&tcp_bbr_cong_ops);
+}
+
+module_init(bbr_register);
+module_exit(bbr_unregister);
+
+MODULE_AUTHOR("Van Jacobson <vanj@google.com>");
+MODULE_AUTHOR("Neal Cardwell <ncardwell@google.com>");
+MODULE_AUTHOR("Yuchung Cheng <ycheng@google.com>");
+MODULE_AUTHOR("Soheil Hassas Yeganeh <soheil@google.com>");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("TCP BBR (Bottleneck Bandwidth and RTT)");
-- 
cgit v1.2.3


From 0d01d45f1b251448590c710baa32f722e43c62c7 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 21 Sep 2016 11:43:54 +0100
Subject: net: cls_bpf: limit hardware offload by software-only flag

Add cls_bpf support for the TCA_CLS_FLAGS_SKIP_HW flag.
Unlike U32 and flower cls_bpf already has some netlink
flags defined.  Create a new attribute to be able to use
the same flag values as the above.

Unlike U32 and flower reject unknown flags.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h        |  1 +
 include/uapi/linux/pkt_cls.h |  1 +
 net/sched/cls_bpf.c          | 22 ++++++++++++++++++++--
 3 files changed, 22 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 41e8071dff87..57af9f3032ff 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -498,6 +498,7 @@ struct tc_cls_bpf_offload {
 	struct bpf_prog *prog;
 	const char *name;
 	bool exts_integrated;
+	u32 gen_flags;
 };
 
 #endif
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 8915b61bbf83..8fd715f806a2 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -396,6 +396,7 @@ enum {
 	TCA_BPF_FD,
 	TCA_BPF_NAME,
 	TCA_BPF_FLAGS,
+	TCA_BPF_FLAGS_GEN,
 	__TCA_BPF_MAX,
 };
 
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 6523c5b4c0a5..ebf01f7c1470 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -27,6 +27,8 @@ MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>");
 MODULE_DESCRIPTION("TC BPF based classifier");
 
 #define CLS_BPF_NAME_LEN	256
+#define CLS_BPF_SUPPORTED_GEN_FLAGS		\
+	TCA_CLS_FLAGS_SKIP_HW
 
 struct cls_bpf_head {
 	struct list_head plist;
@@ -40,6 +42,7 @@ struct cls_bpf_prog {
 	struct tcf_result res;
 	bool exts_integrated;
 	bool offloaded;
+	u32 gen_flags;
 	struct tcf_exts exts;
 	u32 handle;
 	union {
@@ -55,6 +58,7 @@ struct cls_bpf_prog {
 static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
 	[TCA_BPF_CLASSID]	= { .type = NLA_U32 },
 	[TCA_BPF_FLAGS]		= { .type = NLA_U32 },
+	[TCA_BPF_FLAGS_GEN]	= { .type = NLA_U32 },
 	[TCA_BPF_FD]		= { .type = NLA_U32 },
 	[TCA_BPF_NAME]		= { .type = NLA_NUL_STRING,
 				    .len = CLS_BPF_NAME_LEN },
@@ -154,6 +158,7 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
 	bpf_offload.prog = prog->filter;
 	bpf_offload.name = prog->bpf_name;
 	bpf_offload.exts_integrated = prog->exts_integrated;
+	bpf_offload.gen_flags = prog->gen_flags;
 
 	return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
 					     tp->protocol, &offload);
@@ -167,14 +172,14 @@ static void cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
 	enum tc_clsbpf_command cmd;
 
 	if (oldprog && oldprog->offloaded) {
-		if (tc_should_offload(dev, tp, 0)) {
+		if (tc_should_offload(dev, tp, prog->gen_flags)) {
 			cmd = TC_CLSBPF_REPLACE;
 		} else {
 			obj = oldprog;
 			cmd = TC_CLSBPF_DESTROY;
 		}
 	} else {
-		if (!tc_should_offload(dev, tp, 0))
+		if (!tc_should_offload(dev, tp, prog->gen_flags))
 			return;
 		cmd = TC_CLSBPF_ADD;
 	}
@@ -370,6 +375,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 {
 	bool is_bpf, is_ebpf, have_exts = false;
 	struct tcf_exts exts;
+	u32 gen_flags = 0;
 	int ret;
 
 	is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS];
@@ -394,8 +400,17 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 
 		have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT;
 	}
+	if (tb[TCA_BPF_FLAGS_GEN]) {
+		gen_flags = nla_get_u32(tb[TCA_BPF_FLAGS_GEN]);
+		if (gen_flags & ~CLS_BPF_SUPPORTED_GEN_FLAGS ||
+		    !tc_flags_valid(gen_flags)) {
+			ret = -EINVAL;
+			goto errout;
+		}
+	}
 
 	prog->exts_integrated = have_exts;
+	prog->gen_flags = gen_flags;
 
 	ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
 		       cls_bpf_prog_from_efd(tb, prog, tp);
@@ -568,6 +583,9 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 		bpf_flags |= TCA_BPF_FLAG_ACT_DIRECT;
 	if (bpf_flags && nla_put_u32(skb, TCA_BPF_FLAGS, bpf_flags))
 		goto nla_put_failure;
+	if (prog->gen_flags &&
+	    nla_put_u32(skb, TCA_BPF_FLAGS_GEN, prog->gen_flags))
+		goto nla_put_failure;
 
 	nla_nest_end(skb, nest);
 
-- 
cgit v1.2.3


From 45a497f2d149a4a8061c61518a79d59f1f3034b2 Mon Sep 17 00:00:00 2001
From: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Date: Mon, 19 Sep 2016 19:11:10 +0300
Subject: net/sched: act_vlan: Introduce TCA_VLAN_ACT_MODIFY vlan action

TCA_VLAN_ACT_MODIFY allows one to change an existing tag.

It accepts same attributes as TCA_VLAN_ACT_PUSH (protocol, id,
priority).
If packet is vlan tagged, then the tag gets overwritten according to
user specified attributes.

For example, this allows user to replace a tag's vid while preserving
its priority bits (as opposed to "action vlan pop pipe action vlan push").

Signed-off-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tc_act/tc_vlan.h |  1 +
 net/sched/act_vlan.c                | 29 ++++++++++++++++++++++++++++-
 2 files changed, 29 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/tc_act/tc_vlan.h b/include/uapi/linux/tc_act/tc_vlan.h
index be72b6e3843b..bddb272b843f 100644
--- a/include/uapi/linux/tc_act/tc_vlan.h
+++ b/include/uapi/linux/tc_act/tc_vlan.h
@@ -16,6 +16,7 @@
 
 #define TCA_VLAN_ACT_POP	1
 #define TCA_VLAN_ACT_PUSH	2
+#define TCA_VLAN_ACT_MODIFY	3
 
 struct tc_vlan {
 	tc_gen;
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 59a8d3150ae2..a95c00b119da 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -30,6 +30,7 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
 	struct tcf_vlan *v = to_vlan(a);
 	int action;
 	int err;
+	u16 tci;
 
 	spin_lock(&v->tcf_lock);
 	tcf_lastuse_update(&v->tcf_tm);
@@ -48,6 +49,30 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
 		if (err)
 			goto drop;
 		break;
+	case TCA_VLAN_ACT_MODIFY:
+		/* No-op if no vlan tag (either hw-accel or in-payload) */
+		if (!skb_vlan_tagged(skb))
+			goto unlock;
+		/* extract existing tag (and guarantee no hw-accel tag) */
+		if (skb_vlan_tag_present(skb)) {
+			tci = skb_vlan_tag_get(skb);
+			skb->vlan_tci = 0;
+		} else {
+			/* in-payload vlan tag, pop it */
+			err = __skb_vlan_pop(skb, &tci);
+			if (err)
+				goto drop;
+		}
+		/* replace the vid */
+		tci = (tci & ~VLAN_VID_MASK) | v->tcfv_push_vid;
+		/* replace prio bits, if tcfv_push_prio specified */
+		if (v->tcfv_push_prio) {
+			tci &= ~VLAN_PRIO_MASK;
+			tci |= v->tcfv_push_prio << VLAN_PRIO_SHIFT;
+		}
+		/* put updated tci as hwaccel tag */
+		__vlan_hwaccel_put_tag(skb, v->tcfv_push_proto, tci);
+		break;
 	default:
 		BUG();
 	}
@@ -102,6 +127,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 	case TCA_VLAN_ACT_POP:
 		break;
 	case TCA_VLAN_ACT_PUSH:
+	case TCA_VLAN_ACT_MODIFY:
 		if (!tb[TCA_VLAN_PUSH_VLAN_ID]) {
 			if (exists)
 				tcf_hash_release(*a, bind);
@@ -185,7 +211,8 @@ static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a,
 	if (nla_put(skb, TCA_VLAN_PARMS, sizeof(opt), &opt))
 		goto nla_put_failure;
 
-	if (v->tcfv_action == TCA_VLAN_ACT_PUSH &&
+	if ((v->tcfv_action == TCA_VLAN_ACT_PUSH ||
+	     v->tcfv_action == TCA_VLAN_ACT_MODIFY) &&
 	    (nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, v->tcfv_push_vid) ||
 	     nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL,
 			  v->tcfv_push_proto) ||
-- 
cgit v1.2.3


From 2b03bf732488a3c2e920afe22c03b82cb8477e28 Mon Sep 17 00:00:00 2001
From: Laura Garcia Liebana <nevola@gmail.com>
Date: Tue, 13 Sep 2016 13:49:53 +0200
Subject: netfilter: nft_numgen: add number generation offset

Add support of an offset value for incremental counter and random. With
this option the sysadmin is able to start the counter to a certain value
and then apply the generated number.

Example:

	meta mark set numgen inc mod 2 offset 100

This will generate marks with the serie 100, 101, 100, 101, ...

Suggested-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Laura Garcia Liebana <nevola@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h |  2 ++
 net/netfilter/nft_numgen.c               | 32 ++++++++++++++++++++++++++------
 2 files changed, 28 insertions(+), 6 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index bc0eb6a1066d..bcfb892ff148 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1136,12 +1136,14 @@ enum nft_trace_types {
  * @NFTA_NG_DREG: destination register (NLA_U32)
  * @NFTA_NG_MODULUS: maximum counter value (NLA_U32)
  * @NFTA_NG_TYPE: operation type (NLA_U32)
+ * @NFTA_NG_OFFSET: offset to be added to the counter (NLA_U32)
  */
 enum nft_ng_attributes {
 	NFTA_NG_UNSPEC,
 	NFTA_NG_DREG,
 	NFTA_NG_MODULUS,
 	NFTA_NG_TYPE,
+	NFTA_NG_OFFSET,
 	__NFTA_NG_MAX
 };
 #define NFTA_NG_MAX	(__NFTA_NG_MAX - 1)
diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c
index f173ebec30a7..55bc5ab78d4a 100644
--- a/net/netfilter/nft_numgen.c
+++ b/net/netfilter/nft_numgen.c
@@ -23,6 +23,7 @@ struct nft_ng_inc {
 	enum nft_registers      dreg:8;
 	u32			modulus;
 	atomic_t		counter;
+	u32			offset;
 };
 
 static void nft_ng_inc_eval(const struct nft_expr *expr,
@@ -37,13 +38,14 @@ static void nft_ng_inc_eval(const struct nft_expr *expr,
 		nval = (oval + 1 < priv->modulus) ? oval + 1 : 0;
 	} while (atomic_cmpxchg(&priv->counter, oval, nval) != oval);
 
-	regs->data[priv->dreg] = nval;
+	regs->data[priv->dreg] = nval + priv->offset;
 }
 
 static const struct nla_policy nft_ng_policy[NFTA_NG_MAX + 1] = {
 	[NFTA_NG_DREG]		= { .type = NLA_U32 },
 	[NFTA_NG_MODULUS]	= { .type = NLA_U32 },
 	[NFTA_NG_TYPE]		= { .type = NLA_U32 },
+	[NFTA_NG_OFFSET]	= { .type = NLA_U32 },
 };
 
 static int nft_ng_inc_init(const struct nft_ctx *ctx,
@@ -52,10 +54,16 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx,
 {
 	struct nft_ng_inc *priv = nft_expr_priv(expr);
 
+	if (tb[NFTA_NG_OFFSET])
+		priv->offset = ntohl(nla_get_be32(tb[NFTA_NG_OFFSET]));
+
 	priv->modulus = ntohl(nla_get_be32(tb[NFTA_NG_MODULUS]));
 	if (priv->modulus == 0)
 		return -ERANGE;
 
+	if (priv->offset + priv->modulus - 1 < priv->offset)
+		return -EOVERFLOW;
+
 	priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]);
 	atomic_set(&priv->counter, 0);
 
@@ -64,7 +72,7 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx,
 }
 
 static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg,
-		       u32 modulus, enum nft_ng_types type)
+		       u32 modulus, enum nft_ng_types type, u32 offset)
 {
 	if (nft_dump_register(skb, NFTA_NG_DREG, dreg))
 		goto nla_put_failure;
@@ -72,6 +80,8 @@ static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg,
 		goto nla_put_failure;
 	if (nla_put_be32(skb, NFTA_NG_TYPE, htonl(type)))
 		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_NG_OFFSET, htonl(offset)))
+		goto nla_put_failure;
 
 	return 0;
 
@@ -83,12 +93,14 @@ static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
 	const struct nft_ng_inc *priv = nft_expr_priv(expr);
 
-	return nft_ng_dump(skb, priv->dreg, priv->modulus, NFT_NG_INCREMENTAL);
+	return nft_ng_dump(skb, priv->dreg, priv->modulus, NFT_NG_INCREMENTAL,
+			   priv->offset);
 }
 
 struct nft_ng_random {
 	enum nft_registers      dreg:8;
 	u32			modulus;
+	u32			offset;
 };
 
 static void nft_ng_random_eval(const struct nft_expr *expr,
@@ -97,9 +109,10 @@ static void nft_ng_random_eval(const struct nft_expr *expr,
 {
 	struct nft_ng_random *priv = nft_expr_priv(expr);
 	struct rnd_state *state = this_cpu_ptr(&nft_numgen_prandom_state);
+	u32 val;
 
-	regs->data[priv->dreg] = reciprocal_scale(prandom_u32_state(state),
-						  priv->modulus);
+	val = reciprocal_scale(prandom_u32_state(state), priv->modulus);
+	regs->data[priv->dreg] = val + priv->offset;
 }
 
 static int nft_ng_random_init(const struct nft_ctx *ctx,
@@ -108,10 +121,16 @@ static int nft_ng_random_init(const struct nft_ctx *ctx,
 {
 	struct nft_ng_random *priv = nft_expr_priv(expr);
 
+	if (tb[NFTA_NG_OFFSET])
+		priv->offset = ntohl(nla_get_be32(tb[NFTA_NG_OFFSET]));
+
 	priv->modulus = ntohl(nla_get_be32(tb[NFTA_NG_MODULUS]));
 	if (priv->modulus == 0)
 		return -ERANGE;
 
+	if (priv->offset + priv->modulus - 1 < priv->offset)
+		return -EOVERFLOW;
+
 	prandom_init_once(&nft_numgen_prandom_state);
 
 	priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]);
@@ -124,7 +143,8 @@ static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
 	const struct nft_ng_random *priv = nft_expr_priv(expr);
 
-	return nft_ng_dump(skb, priv->dreg, priv->modulus, NFT_NG_RANDOM);
+	return nft_ng_dump(skb, priv->dreg, priv->modulus, NFT_NG_RANDOM,
+			   priv->offset);
 }
 
 static struct nft_expr_type nft_ng_type;
-- 
cgit v1.2.3


From 6786741dbf99e44fb0c0ed85a37582b8a26f1c3b Mon Sep 17 00:00:00 2001
From: Andrey Vagin <avagin@openvz.org>
Date: Tue, 6 Sep 2016 00:47:14 -0700
Subject: nsfs: add ioctl to get an owning user namespace for ns file
 descriptor

Each namespace has an owning user namespace and now there is not way
to discover these relationships.

Understending namespaces relationships allows to answer the question:
what capability does process X have to perform operations on a resource
governed by namespace Y?

After a long discussion, Eric W. Biederman proposed to use ioctl-s for
this purpose.

The NS_GET_USERNS ioctl returns a file descriptor to an owning user
namespace.
It returns EPERM if a target namespace is outside of a current user
namespace.

v2: rename parent to relative

v3: Add a missing mntput when returning -EAGAIN --EWB

Acked-by: Serge Hallyn <serge@hallyn.com>
Link: https://lkml.org/lkml/2016/7/6/158
Signed-off-by: Andrei Vagin <avagin@openvz.org>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/nsfs.c                 | 96 ++++++++++++++++++++++++++++++++++++++++-------
 include/uapi/linux/nsfs.h | 11 ++++++
 2 files changed, 94 insertions(+), 13 deletions(-)
 create mode 100644 include/uapi/linux/nsfs.h

(limited to 'include/uapi')

diff --git a/fs/nsfs.c b/fs/nsfs.c
index 8f20d6016e20..3887da470f7e 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -5,11 +5,16 @@
 #include <linux/magic.h>
 #include <linux/ktime.h>
 #include <linux/seq_file.h>
+#include <linux/user_namespace.h>
+#include <linux/nsfs.h>
 
 static struct vfsmount *nsfs_mnt;
 
+static long ns_ioctl(struct file *filp, unsigned int ioctl,
+			unsigned long arg);
 static const struct file_operations ns_file_operations = {
 	.llseek		= no_llseek,
+	.unlocked_ioctl = ns_ioctl,
 };
 
 static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
@@ -44,22 +49,14 @@ static void nsfs_evict(struct inode *inode)
 	ns->ops->put(ns);
 }
 
-void *ns_get_path(struct path *path, struct task_struct *task,
-			const struct proc_ns_operations *ns_ops)
+static void *__ns_get_path(struct path *path, struct ns_common *ns)
 {
 	struct vfsmount *mnt = mntget(nsfs_mnt);
 	struct qstr qname = { .name = "", };
 	struct dentry *dentry;
 	struct inode *inode;
-	struct ns_common *ns;
 	unsigned long d;
 
-again:
-	ns = ns_ops->get(task);
-	if (!ns) {
-		mntput(mnt);
-		return ERR_PTR(-ENOENT);
-	}
 	rcu_read_lock();
 	d = atomic_long_read(&ns->stashed);
 	if (!d)
@@ -68,7 +65,7 @@ again:
 	if (!lockref_get_not_dead(&dentry->d_lockref))
 		goto slow;
 	rcu_read_unlock();
-	ns_ops->put(ns);
+	ns->ops->put(ns);
 got_it:
 	path->mnt = mnt;
 	path->dentry = dentry;
@@ -77,7 +74,7 @@ slow:
 	rcu_read_unlock();
 	inode = new_inode_pseudo(mnt->mnt_sb);
 	if (!inode) {
-		ns_ops->put(ns);
+		ns->ops->put(ns);
 		mntput(mnt);
 		return ERR_PTR(-ENOMEM);
 	}
@@ -95,17 +92,90 @@ slow:
 		return ERR_PTR(-ENOMEM);
 	}
 	d_instantiate(dentry, inode);
-	dentry->d_fsdata = (void *)ns_ops;
+	dentry->d_fsdata = (void *)ns->ops;
 	d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry);
 	if (d) {
 		d_delete(dentry);	/* make sure ->d_prune() does nothing */
 		dput(dentry);
+		mntput(mnt);
 		cpu_relax();
-		goto again;
+		return ERR_PTR(-EAGAIN);
 	}
 	goto got_it;
 }
 
+void *ns_get_path(struct path *path, struct task_struct *task,
+			const struct proc_ns_operations *ns_ops)
+{
+	struct ns_common *ns;
+	void *ret;
+
+again:
+	ns = ns_ops->get(task);
+	if (!ns)
+		return ERR_PTR(-ENOENT);
+
+	ret = __ns_get_path(path, ns);
+	if (IS_ERR(ret) && PTR_ERR(ret) == -EAGAIN)
+		goto again;
+	return ret;
+}
+
+static int open_related_ns(struct ns_common *ns,
+		   struct ns_common *(*get_ns)(struct ns_common *ns))
+{
+	struct path path = {};
+	struct file *f;
+	void *err;
+	int fd;
+
+	fd = get_unused_fd_flags(O_CLOEXEC);
+	if (fd < 0)
+		return fd;
+
+	while (1) {
+		struct ns_common *relative;
+
+		relative = get_ns(ns);
+		if (IS_ERR(relative)) {
+			put_unused_fd(fd);
+			return PTR_ERR(relative);
+		}
+
+		err = __ns_get_path(&path, relative);
+		if (IS_ERR(err) && PTR_ERR(err) == -EAGAIN)
+			continue;
+		break;
+	}
+	if (IS_ERR(err)) {
+		put_unused_fd(fd);
+		return PTR_ERR(err);
+	}
+
+	f = dentry_open(&path, O_RDONLY, current_cred());
+	path_put(&path);
+	if (IS_ERR(f)) {
+		put_unused_fd(fd);
+		fd = PTR_ERR(f);
+	} else
+		fd_install(fd, f);
+
+	return fd;
+}
+
+static long ns_ioctl(struct file *filp, unsigned int ioctl,
+			unsigned long arg)
+{
+	struct ns_common *ns = get_proc_ns(file_inode(filp));
+
+	switch (ioctl) {
+	case NS_GET_USERNS:
+		return open_related_ns(ns, ns_get_owner);
+	default:
+		return -ENOTTY;
+	}
+}
+
 int ns_get_name(char *buf, size_t size, struct task_struct *task,
 			const struct proc_ns_operations *ns_ops)
 {
diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h
new file mode 100644
index 000000000000..5cacd5c1b5d7
--- /dev/null
+++ b/include/uapi/linux/nsfs.h
@@ -0,0 +1,11 @@
+#ifndef __LINUX_NSFS_H
+#define __LINUX_NSFS_H
+
+#include <linux/ioctl.h>
+
+#define NSIO	0xb7
+
+/* Returns a file descriptor that refers to an owning user namespace */
+#define NS_GET_USERNS	_IO(NSIO, 0x1)
+
+#endif /* __LINUX_NSFS_H */
-- 
cgit v1.2.3


From a7306ed8d94af729ecef8b6e37506a1c6fc14788 Mon Sep 17 00:00:00 2001
From: Andrey Vagin <avagin@openvz.org>
Date: Tue, 6 Sep 2016 00:47:15 -0700
Subject: nsfs: add ioctl to get a parent namespace

Pid and user namepaces are hierarchical. There is no way to discover
parent-child relationships.

In a future we will use this interface to dump and restore nested
namespaces.

Acked-by: Serge Hallyn <serge@hallyn.com>
Signed-off-by: Andrei Vagin <avagin@openvz.org>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/nsfs.c                 |  4 ++++
 include/linux/proc_ns.h   |  1 +
 include/uapi/linux/nsfs.h |  2 ++
 kernel/pid_namespace.c    | 19 +++++++++++++++++++
 kernel/user_namespace.c   |  1 +
 5 files changed, 27 insertions(+)

(limited to 'include/uapi')

diff --git a/fs/nsfs.c b/fs/nsfs.c
index 3887da470f7e..fb7b397a1297 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -171,6 +171,10 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl,
 	switch (ioctl) {
 	case NS_GET_USERNS:
 		return open_related_ns(ns, ns_get_owner);
+	case NS_GET_PARENT:
+		if (!ns->ops->get_parent)
+			return -EINVAL;
+		return open_related_ns(ns, ns->ops->get_parent);
 	default:
 		return -ENOTTY;
 	}
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index ca85a4348ffc..12cb8bd81d2d 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -19,6 +19,7 @@ struct proc_ns_operations {
 	void (*put)(struct ns_common *ns);
 	int (*install)(struct nsproxy *nsproxy, struct ns_common *ns);
 	struct user_namespace *(*owner)(struct ns_common *ns);
+	struct ns_common *(*get_parent)(struct ns_common *ns);
 };
 
 extern const struct proc_ns_operations netns_operations;
diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h
index 5cacd5c1b5d7..3af617230d1b 100644
--- a/include/uapi/linux/nsfs.h
+++ b/include/uapi/linux/nsfs.h
@@ -7,5 +7,7 @@
 
 /* Returns a file descriptor that refers to an owning user namespace */
 #define NS_GET_USERNS	_IO(NSIO, 0x1)
+/* Returns a file descriptor that refers to a parent namespace */
+#define NS_GET_PARENT	_IO(NSIO, 0x2)
 
 #endif /* __LINUX_NSFS_H */
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index c02d744225e1..4fa2d56a936c 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -388,6 +388,24 @@ static int pidns_install(struct nsproxy *nsproxy, struct ns_common *ns)
 	return 0;
 }
 
+static struct ns_common *pidns_get_parent(struct ns_common *ns)
+{
+	struct pid_namespace *active = task_active_pid_ns(current);
+	struct pid_namespace *pid_ns, *p;
+
+	/* See if the parent is in the current namespace */
+	pid_ns = p = to_pid_ns(ns)->parent;
+	for (;;) {
+		if (!p)
+			return ERR_PTR(-EPERM);
+		if (p == active)
+			break;
+		p = p->parent;
+	}
+
+	return &get_pid_ns(pid_ns)->ns;
+}
+
 static struct user_namespace *pidns_owner(struct ns_common *ns)
 {
 	return to_pid_ns(ns)->user_ns;
@@ -400,6 +418,7 @@ const struct proc_ns_operations pidns_operations = {
 	.put		= pidns_put,
 	.install	= pidns_install,
 	.owner		= pidns_owner,
+	.get_parent	= pidns_get_parent,
 };
 
 static __init int pid_namespaces_init(void)
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 0ef683a03c20..a58a219b99c6 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -1034,6 +1034,7 @@ const struct proc_ns_operations userns_operations = {
 	.put		= userns_put,
 	.install	= userns_install,
 	.owner		= userns_owner,
+	.get_parent	= ns_get_owner,
 };
 
 static __init int user_namespaces_init(void)
-- 
cgit v1.2.3


From 8061bb54436c19fd16b7c734a69ff60bac26e3e9 Mon Sep 17 00:00:00 2001
From: Liping Zhang <liping.zhang@spreadtrum.com>
Date: Wed, 14 Sep 2016 23:41:46 +0800
Subject: netfilter: nft_queue: add _SREG_QNUM attr to select the queue number

Currently, the user can specify the queue numbers by _QUEUE_NUM and
_QUEUE_TOTAL attributes, this is enough in most situations.

But acctually, it is not very flexible, for example:
  tcp dport 80 mapped to queue0
  tcp dport 81 mapped to queue1
  tcp dport 82 mapped to queue2
In order to do this thing, we must add 3 nft rules, and more
mapping meant more rules ...

So take one register to select the queue number, then we can add one
simple rule to mapping queues, maybe like this:
  queue num tcp dport map { 80:0, 81:1, 82:2 ... }

Florian Westphal also proposed wider usage scenarios:
  queue num jhash ip saddr . ip daddr mod ...
  queue num meta cpu ...
  queue num meta mark ...

The last point is how to load a queue number from sreg, although we can
use *(u16*)&regs->data[reg] to load the queue number, just like nat expr
to load its l4port do.

But we will cooperate with hash expr, meta cpu, meta mark expr and so on.
They all store the result to u32 type, so cast it to u16 pointer and
dereference it will generate wrong result in the big endian system.

So just keep it simple, we treat queue number as u32 type, although u16
type is already enough.

Suggested-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Liping Zhang <liping.zhang@spreadtrum.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h |   2 +
 net/netfilter/nft_queue.c                | 102 +++++++++++++++++++++++++++----
 2 files changed, 92 insertions(+), 12 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index bcfb892ff148..1cf41dd838b2 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -894,12 +894,14 @@ enum nft_log_attributes {
  * @NFTA_QUEUE_NUM: netlink queue to send messages to (NLA_U16)
  * @NFTA_QUEUE_TOTAL: number of queues to load balance packets on (NLA_U16)
  * @NFTA_QUEUE_FLAGS: various flags (NLA_U16)
+ * @NFTA_QUEUE_SREG_QNUM: source register of queue number (NLA_U32: nft_registers)
  */
 enum nft_queue_attributes {
 	NFTA_QUEUE_UNSPEC,
 	NFTA_QUEUE_NUM,
 	NFTA_QUEUE_TOTAL,
 	NFTA_QUEUE_FLAGS,
+	NFTA_QUEUE_SREG_QNUM,
 	__NFTA_QUEUE_MAX
 };
 #define NFTA_QUEUE_MAX		(__NFTA_QUEUE_MAX - 1)
diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c
index d16d59959ff6..393d359a1889 100644
--- a/net/netfilter/nft_queue.c
+++ b/net/netfilter/nft_queue.c
@@ -22,9 +22,10 @@
 static u32 jhash_initval __read_mostly;
 
 struct nft_queue {
-	u16	queuenum;
-	u16	queues_total;
-	u16	flags;
+	enum nft_registers	sreg_qnum:8;
+	u16			queuenum;
+	u16			queues_total;
+	u16			flags;
 };
 
 static void nft_queue_eval(const struct nft_expr *expr,
@@ -54,26 +55,39 @@ static void nft_queue_eval(const struct nft_expr *expr,
 	regs->verdict.code = ret;
 }
 
+static void nft_queue_sreg_eval(const struct nft_expr *expr,
+				struct nft_regs *regs,
+				const struct nft_pktinfo *pkt)
+{
+	struct nft_queue *priv = nft_expr_priv(expr);
+	u32 queue, ret;
+
+	queue = regs->data[priv->sreg_qnum];
+
+	ret = NF_QUEUE_NR(queue);
+	if (priv->flags & NFT_QUEUE_FLAG_BYPASS)
+		ret |= NF_VERDICT_FLAG_QUEUE_BYPASS;
+
+	regs->verdict.code = ret;
+}
+
 static const struct nla_policy nft_queue_policy[NFTA_QUEUE_MAX + 1] = {
 	[NFTA_QUEUE_NUM]	= { .type = NLA_U16 },
 	[NFTA_QUEUE_TOTAL]	= { .type = NLA_U16 },
 	[NFTA_QUEUE_FLAGS]	= { .type = NLA_U16 },
+	[NFTA_QUEUE_SREG_QNUM]	= { .type = NLA_U32 },
 };
 
 static int nft_queue_init(const struct nft_ctx *ctx,
-			   const struct nft_expr *expr,
-			   const struct nlattr * const tb[])
+			  const struct nft_expr *expr,
+			  const struct nlattr * const tb[])
 {
 	struct nft_queue *priv = nft_expr_priv(expr);
 	u32 maxid;
 
-	if (tb[NFTA_QUEUE_NUM] == NULL)
-		return -EINVAL;
-
-	init_hashrandom(&jhash_initval);
 	priv->queuenum = ntohs(nla_get_be16(tb[NFTA_QUEUE_NUM]));
 
-	if (tb[NFTA_QUEUE_TOTAL] != NULL)
+	if (tb[NFTA_QUEUE_TOTAL])
 		priv->queues_total = ntohs(nla_get_be16(tb[NFTA_QUEUE_TOTAL]));
 	else
 		priv->queues_total = 1;
@@ -85,11 +99,34 @@ static int nft_queue_init(const struct nft_ctx *ctx,
 	if (maxid > U16_MAX)
 		return -ERANGE;
 
-	if (tb[NFTA_QUEUE_FLAGS] != NULL) {
+	if (tb[NFTA_QUEUE_FLAGS]) {
+		priv->flags = ntohs(nla_get_be16(tb[NFTA_QUEUE_FLAGS]));
+		if (priv->flags & ~NFT_QUEUE_FLAG_MASK)
+			return -EINVAL;
+	}
+	return 0;
+}
+
+static int nft_queue_sreg_init(const struct nft_ctx *ctx,
+			       const struct nft_expr *expr,
+			       const struct nlattr * const tb[])
+{
+	struct nft_queue *priv = nft_expr_priv(expr);
+	int err;
+
+	priv->sreg_qnum = nft_parse_register(tb[NFTA_QUEUE_SREG_QNUM]);
+	err = nft_validate_register_load(priv->sreg_qnum, sizeof(u32));
+	if (err < 0)
+		return err;
+
+	if (tb[NFTA_QUEUE_FLAGS]) {
 		priv->flags = ntohs(nla_get_be16(tb[NFTA_QUEUE_FLAGS]));
 		if (priv->flags & ~NFT_QUEUE_FLAG_MASK)
 			return -EINVAL;
+		if (priv->flags & NFT_QUEUE_FLAG_CPU_FANOUT)
+			return -EOPNOTSUPP;
 	}
+
 	return 0;
 }
 
@@ -108,6 +145,21 @@ nla_put_failure:
 	return -1;
 }
 
+static int
+nft_queue_sreg_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_queue *priv = nft_expr_priv(expr);
+
+	if (nft_dump_register(skb, NFTA_QUEUE_SREG_QNUM, priv->sreg_qnum) ||
+	    nla_put_be16(skb, NFTA_QUEUE_FLAGS, htons(priv->flags)))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
 static struct nft_expr_type nft_queue_type;
 static const struct nft_expr_ops nft_queue_ops = {
 	.type		= &nft_queue_type,
@@ -117,9 +169,35 @@ static const struct nft_expr_ops nft_queue_ops = {
 	.dump		= nft_queue_dump,
 };
 
+static const struct nft_expr_ops nft_queue_sreg_ops = {
+	.type		= &nft_queue_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_queue)),
+	.eval		= nft_queue_sreg_eval,
+	.init		= nft_queue_sreg_init,
+	.dump		= nft_queue_sreg_dump,
+};
+
+static const struct nft_expr_ops *
+nft_queue_select_ops(const struct nft_ctx *ctx,
+		     const struct nlattr * const tb[])
+{
+	if (tb[NFTA_QUEUE_NUM] && tb[NFTA_QUEUE_SREG_QNUM])
+		return ERR_PTR(-EINVAL);
+
+	init_hashrandom(&jhash_initval);
+
+	if (tb[NFTA_QUEUE_NUM])
+		return &nft_queue_ops;
+
+	if (tb[NFTA_QUEUE_SREG_QNUM])
+		return &nft_queue_sreg_ops;
+
+	return ERR_PTR(-EINVAL);
+}
+
 static struct nft_expr_type nft_queue_type __read_mostly = {
 	.name		= "queue",
-	.ops		= &nft_queue_ops,
+	.select_ops	= &nft_queue_select_ops,
 	.policy		= nft_queue_policy,
 	.maxattr	= NFTA_QUEUE_MAX,
 	.owner		= THIS_MODULE,
-- 
cgit v1.2.3


From fefa569a9d4bc4b7758c0fddd75bb0382c95da77 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 22 Sep 2016 08:58:55 -0700
Subject: net_sched: sch_fq: account for schedule/timers drifts

It looks like the following patch can make FQ very precise, even in VM
or stressed hosts. It matters at high pacing rates.

We take into account the difference between the time that was programmed
when last packet was sent, and current time (a drift of tens of usecs is
often observed)

Add an EWMA of the unthrottle latency to help diagnostics.

This latency is the difference between current time and oldest packet in
delayed RB-tree. This accounts for the high resolution timer latency,
but can be different under stress, as fq_check_throttled() can be
opportunistically be called from a dequeue() called after an enqueue()
for a different flow.

Tested:
// Start a 10Gbit flow
$ netperf --google-pacing-rate 1250000000 -H lpaa24 -l 10000 -- -K bbr &

Before patch :
$ sar -n DEV 10 5 | grep eth0 | grep Average
Average:         eth0  17106.04 756876.84   1102.75 1119049.02      0.00      0.00      0.52

After patch :
$ sar -n DEV 10 5 | grep eth0 | grep Average
Average:         eth0  17867.00 800245.90   1151.77 1183172.12      0.00      0.00      0.52

A new iproute2 tc can output the 'unthrottle latency' :

$ tc -s qd sh dev eth0 | grep latency
  0 gc, 0 highprio, 32490767 throttled, 2382 ns latency

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_sched.h |  2 +-
 net/sched/sch_fq.c             | 21 ++++++++++++++++++---
 2 files changed, 19 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index f8e39dbaa781..df7451d35131 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -811,7 +811,7 @@ struct tc_fq_qd_stats {
 	__u32	flows;
 	__u32	inactive_flows;
 	__u32	throttled_flows;
-	__u32	pad;
+	__u32	unthrottle_latency_ns;
 };
 
 /* Heavy-Hitter Filter */
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 5dd929cc1423..18e752439f6f 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -86,6 +86,7 @@ struct fq_sched_data {
 
 	struct rb_root	delayed;	/* for rate limited flows */
 	u64		time_next_delayed_flow;
+	unsigned long	unthrottle_latency_ns;
 
 	struct fq_flow	internal;	/* for non classified or high prio packets */
 	u32		quantum;
@@ -408,11 +409,19 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 
 static void fq_check_throttled(struct fq_sched_data *q, u64 now)
 {
+	unsigned long sample;
 	struct rb_node *p;
 
 	if (q->time_next_delayed_flow > now)
 		return;
 
+	/* Update unthrottle latency EWMA.
+	 * This is cheap and can help diagnosing timer/latency problems.
+	 */
+	sample = (unsigned long)(now - q->time_next_delayed_flow);
+	q->unthrottle_latency_ns -= q->unthrottle_latency_ns >> 3;
+	q->unthrottle_latency_ns += sample >> 3;
+
 	q->time_next_delayed_flow = ~0ULL;
 	while ((p = rb_first(&q->delayed)) != NULL) {
 		struct fq_flow *f = container_of(p, struct fq_flow, rate_node);
@@ -515,7 +524,12 @@ begin:
 			len = NSEC_PER_SEC;
 			q->stat_pkts_too_long++;
 		}
-
+		/* Account for schedule/timers drifts.
+		 * f->time_next_packet was set when prior packet was sent,
+		 * and current time (@now) can be too late by tens of us.
+		 */
+		if (f->time_next_packet)
+			len -= min(len/2, now - f->time_next_packet);
 		f->time_next_packet = now + len;
 	}
 out:
@@ -787,6 +801,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt)
 	q->initial_quantum	= 10 * psched_mtu(qdisc_dev(sch));
 	q->flow_refill_delay	= msecs_to_jiffies(40);
 	q->flow_max_rate	= ~0U;
+	q->time_next_delayed_flow = ~0ULL;
 	q->rate_enable		= 1;
 	q->new_flows.first	= NULL;
 	q->old_flows.first	= NULL;
@@ -854,8 +869,8 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 	st.flows		  = q->flows;
 	st.inactive_flows	  = q->inactive_flows;
 	st.throttled_flows	  = q->throttled_flows;
-	st.pad			  = 0;
-
+	st.unthrottle_latency_ns  = min_t(unsigned long,
+					  q->unthrottle_latency_ns, ~0U);
 	sch_tree_unlock(sch);
 
 	return gnet_stats_copy_app(d, &st, sizeof(st));
-- 
cgit v1.2.3


From 7a4b28c6cc9ffac50f791b99cc7e46106436e5d8 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 23 Sep 2016 01:28:37 +0200
Subject: bpf: add helper to invalidate hash

Add a small helper that complements 36bbef52c7eb ("bpf: direct packet
write and access for helpers for clsact progs") for invalidating the
current skb->hash after mangling on headers via direct packet write.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h |  7 +++++++
 net/core/filter.c        | 18 ++++++++++++++++++
 2 files changed, 25 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index e07432b9f8b8..f09c70b97eca 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -419,6 +419,13 @@ enum bpf_func_id {
 	 */
 	BPF_FUNC_csum_update,
 
+	/**
+	 * bpf_set_hash_invalid(skb)
+	 * Invalidate current skb>hash.
+	 * @skb: pointer to skb
+	 */
+	BPF_FUNC_set_hash_invalid,
+
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/net/core/filter.c b/net/core/filter.c
index acf84fbfb043..00351cdf7d0c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1777,6 +1777,22 @@ static const struct bpf_func_proto bpf_get_hash_recalc_proto = {
 	.arg1_type	= ARG_PTR_TO_CTX,
 };
 
+BPF_CALL_1(bpf_set_hash_invalid, struct sk_buff *, skb)
+{
+	/* After all direct packet write, this can be used once for
+	 * triggering a lazy recalc on next skb_get_hash() invocation.
+	 */
+	skb_clear_hash(skb);
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_set_hash_invalid_proto = {
+	.func		= bpf_set_hash_invalid,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+};
+
 BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto,
 	   u16, vlan_tci)
 {
@@ -2534,6 +2550,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
 		return &bpf_get_route_realm_proto;
 	case BPF_FUNC_get_hash_recalc:
 		return &bpf_get_hash_recalc_proto;
+	case BPF_FUNC_set_hash_invalid:
+		return &bpf_set_hash_invalid_proto;
 	case BPF_FUNC_perf_event_output:
 		return &bpf_skb_event_output_proto;
 	case BPF_FUNC_get_smp_processor_id:
-- 
cgit v1.2.3


From 79aab093a0b5370d7fc4e99df75996f4744dc03f Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Thu, 22 Sep 2016 12:11:15 +0300
Subject: net: Update API for VF vlan protocol 802.1ad support

Introduce new rtnl UAPI that exposes a list of vlans per VF, giving
the ability for user-space application to specify it for the VF, as an
option to support 802.1ad.
We adjusted IP Link tool to support this option.

For future use cases, the new UAPI supports multiple vlans. For now we
limit the list size to a single vlan in kernel.
Add IFLA_VF_VLAN_LIST in addition to IFLA_VF_VLAN to keep backward
compatibility with older versions of IP Link tool.

Add a vlan protocol parameter to the ndo_set_vf_vlan callback.
We kept 802.1Q as the drivers' default vlan protocol.
Suitable ip link tool command examples:
  Set vf vlan protocol 802.1ad:
    ip link set eth0 vf 1 vlan 100 proto 802.1ad
  Set vf to VST (802.1Q) mode:
    ip link set eth0 vf 1 vlan 100 proto 802.1Q
  Or by omitting the new parameter
    ip link set eth0 vf 1 vlan 100

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h    |  3 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c  |  9 ++-
 drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c    |  6 +-
 drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h    |  2 +-
 drivers/net/ethernet/emulex/benet/be_main.c        |  6 +-
 drivers/net/ethernet/intel/fm10k/fm10k.h           |  2 +-
 drivers/net/ethernet/intel/fm10k/fm10k_iov.c       |  6 +-
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 11 ++-
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h |  4 +-
 drivers/net/ethernet/intel/igb/igb_main.c          |  9 ++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c     |  5 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h     |  2 +-
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c     |  6 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  6 +-
 drivers/net/ethernet/qlogic/qede/qede_main.c       |  6 +-
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h  |  2 +-
 .../net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c   |  5 +-
 drivers/net/ethernet/sfc/sriov.c                   |  5 +-
 drivers/net/ethernet/sfc/sriov.h                   |  2 +-
 include/linux/if_link.h                            |  1 +
 include/linux/netdevice.h                          |  6 +-
 include/uapi/linux/if_link.h                       | 19 ++++-
 net/core/rtnetlink.c                               | 80 ++++++++++++++++++----
 23 files changed, 161 insertions(+), 42 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index 0e68fadecfdb..243cb9748d35 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -492,7 +492,8 @@ int __bnx2x_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
 int bnx2x_get_vf_config(struct net_device *dev, int vf,
 			struct ifla_vf_info *ivi);
 int bnx2x_set_vf_mac(struct net_device *dev, int queue, u8 *mac);
-int bnx2x_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos);
+int bnx2x_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
+		      __be16 vlan_proto);
 
 /* select_queue callback */
 u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb,
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 6c586b045d1d..3f77d0863543 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -2521,7 +2521,8 @@ void bnx2x_pf_set_vfs_vlan(struct bnx2x *bp)
 	for_each_vf(bp, vfidx) {
 		bulletin = BP_VF_BULLETIN(bp, vfidx);
 		if (bulletin->valid_bitmap & (1 << VLAN_VALID))
-			bnx2x_set_vf_vlan(bp->dev, vfidx, bulletin->vlan, 0);
+			bnx2x_set_vf_vlan(bp->dev, vfidx, bulletin->vlan, 0,
+					  htons(ETH_P_8021Q));
 	}
 }
 
@@ -2781,7 +2782,8 @@ static int bnx2x_set_vf_vlan_filter(struct bnx2x *bp, struct bnx2x_virtf *vf,
 	return 0;
 }
 
-int bnx2x_set_vf_vlan(struct net_device *dev, int vfidx, u16 vlan, u8 qos)
+int bnx2x_set_vf_vlan(struct net_device *dev, int vfidx, u16 vlan, u8 qos,
+		      __be16 vlan_proto)
 {
 	struct pf_vf_bulletin_content *bulletin = NULL;
 	struct bnx2x *bp = netdev_priv(dev);
@@ -2796,6 +2798,9 @@ int bnx2x_set_vf_vlan(struct net_device *dev, int vfidx, u16 vlan, u8 qos)
 		return -EINVAL;
 	}
 
+	if (vlan_proto != htons(ETH_P_8021Q))
+		return -EPROTONOSUPPORT;
+
 	DP(BNX2X_MSG_IOV, "configuring VF %d with VLAN %d qos %d\n",
 	   vfidx, vlan, 0);
 
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index 8be718508600..ec6cd18842c3 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -174,7 +174,8 @@ int bnxt_set_vf_mac(struct net_device *dev, int vf_id, u8 *mac)
 	return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 }
 
-int bnxt_set_vf_vlan(struct net_device *dev, int vf_id, u16 vlan_id, u8 qos)
+int bnxt_set_vf_vlan(struct net_device *dev, int vf_id, u16 vlan_id, u8 qos,
+		     __be16 vlan_proto)
 {
 	struct hwrm_func_cfg_input req = {0};
 	struct bnxt *bp = netdev_priv(dev);
@@ -185,6 +186,9 @@ int bnxt_set_vf_vlan(struct net_device *dev, int vf_id, u16 vlan_id, u8 qos)
 	if (bp->hwrm_spec_code < 0x10201)
 		return -ENOTSUPP;
 
+	if (vlan_proto != htons(ETH_P_8021Q))
+		return -EPROTONOSUPPORT;
+
 	rc = bnxt_vf_ndo_prep(bp, vf_id);
 	if (rc)
 		return rc;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
index 0392670ab49c..1ab72e4820af 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
@@ -12,7 +12,7 @@
 
 int bnxt_get_vf_config(struct net_device *, int, struct ifla_vf_info *);
 int bnxt_set_vf_mac(struct net_device *, int, u8 *);
-int bnxt_set_vf_vlan(struct net_device *, int, u16, u8);
+int bnxt_set_vf_vlan(struct net_device *, int, u16, u8, __be16);
 int bnxt_set_vf_bw(struct net_device *, int, int, int);
 int bnxt_set_vf_link_state(struct net_device *, int, int);
 int bnxt_set_vf_spoofchk(struct net_device *, int, bool);
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 9a94840c5757..ac513e6627d1 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -1895,7 +1895,8 @@ static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
 	return 0;
 }
 
-static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos)
+static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
+			  __be16 vlan_proto)
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
@@ -1907,6 +1908,9 @@ static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos)
 	if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
 		return -EINVAL;
 
+	if (vlan_proto != htons(ETH_P_8021Q))
+		return -EPROTONOSUPPORT;
+
 	if (vlan || qos) {
 		vlan |= qos << VLAN_PRIO_SHIFT;
 		status = be_set_vf_tvt(adapter, vf, vlan);
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h
index 67ff01aeb11a..4d19e46f7c55 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k.h
@@ -507,7 +507,7 @@ int fm10k_iov_configure(struct pci_dev *pdev, int num_vfs);
 s32 fm10k_iov_update_pvid(struct fm10k_intfc *interface, u16 glort, u16 pvid);
 int fm10k_ndo_set_vf_mac(struct net_device *netdev, int vf_idx, u8 *mac);
 int fm10k_ndo_set_vf_vlan(struct net_device *netdev,
-			  int vf_idx, u16 vid, u8 qos);
+			  int vf_idx, u16 vid, u8 qos, __be16 vlan_proto);
 int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, int rate,
 			int unused);
 int fm10k_ndo_get_vf_config(struct net_device *netdev,
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
index d9dec81f6b6d..5f4dac0d36ef 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
@@ -445,7 +445,7 @@ int fm10k_ndo_set_vf_mac(struct net_device *netdev, int vf_idx, u8 *mac)
 }
 
 int fm10k_ndo_set_vf_vlan(struct net_device *netdev, int vf_idx, u16 vid,
-			  u8 qos)
+			  u8 qos, __be16 vlan_proto)
 {
 	struct fm10k_intfc *interface = netdev_priv(netdev);
 	struct fm10k_iov_data *iov_data = interface->iov_data;
@@ -460,6 +460,10 @@ int fm10k_ndo_set_vf_vlan(struct net_device *netdev, int vf_idx, u16 vid,
 	if (qos || (vid > (VLAN_VID_MASK - 1)))
 		return -EINVAL;
 
+	/* VF VLAN Protocol part to default is unsupported */
+	if (vlan_proto != htons(ETH_P_8021Q))
+		return -EPROTONOSUPPORT;
+
 	vf_info = &iov_data->vf_info[vf_idx];
 
 	/* exit if there is nothing to do */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index da3423561b3a..724d8740d4cc 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -2747,11 +2747,12 @@ error_param:
  * @vf_id: VF identifier
  * @vlan_id: mac address
  * @qos: priority setting
+ * @vlan_proto: vlan protocol
  *
  * program VF vlan id and/or qos
  **/
-int i40e_ndo_set_vf_port_vlan(struct net_device *netdev,
-			      int vf_id, u16 vlan_id, u8 qos)
+int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id,
+			      u16 vlan_id, u8 qos, __be16 vlan_proto)
 {
 	u16 vlanprio = vlan_id | (qos << I40E_VLAN_PRIORITY_SHIFT);
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
@@ -2774,6 +2775,12 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev,
 		goto error_pvid;
 	}
 
+	if (vlan_proto != htons(ETH_P_8021Q)) {
+		dev_err(&pf->pdev->dev, "VF VLAN protocol is not supported\n");
+		ret = -EPROTONOSUPPORT;
+		goto error_pvid;
+	}
+
 	vf = &(pf->vf[vf_id]);
 	vsi = pf->vsi[vf->lan_vsi_idx];
 	if (!test_bit(I40E_VF_STAT_INIT, &vf->vf_states)) {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
index 875174141451..4012d069939a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
@@ -129,8 +129,8 @@ void i40e_vc_notify_vf_reset(struct i40e_vf *vf);
 
 /* VF configuration related iplink handlers */
 int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac);
-int i40e_ndo_set_vf_port_vlan(struct net_device *netdev,
-			      int vf_id, u16 vlan_id, u8 qos);
+int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id,
+			      u16 vlan_id, u8 qos, __be16 vlan_proto);
 int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate,
 		       int max_tx_rate);
 int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting);
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index af75eac5fa16..a83aa13a5bf4 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -169,7 +169,7 @@ static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
-			       int vf, u16 vlan, u8 qos);
+			       int vf, u16 vlan, u8 qos, __be16 vlan_proto);
 static int igb_ndo_set_vf_bw(struct net_device *, int, int, int);
 static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf,
 				   bool setting);
@@ -6222,14 +6222,17 @@ static int igb_disable_port_vlan(struct igb_adapter *adapter, int vf)
 	return 0;
 }
 
-static int igb_ndo_set_vf_vlan(struct net_device *netdev,
-			       int vf, u16 vlan, u8 qos)
+static int igb_ndo_set_vf_vlan(struct net_device *netdev, int vf,
+			       u16 vlan, u8 qos, __be16 vlan_proto)
 {
 	struct igb_adapter *adapter = netdev_priv(netdev);
 
 	if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
 		return -EINVAL;
 
+	if (vlan_proto != htons(ETH_P_8021Q))
+		return -EPROTONOSUPPORT;
+
 	return (vlan || qos) ? igb_enable_port_vlan(adapter, vf, vlan, qos) :
 			       igb_disable_port_vlan(adapter, vf);
 }
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index 8618599dfd6f..b18590a995db 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -1354,13 +1354,16 @@ static int ixgbe_disable_port_vlan(struct ixgbe_adapter *adapter, int vf)
 	return err;
 }
 
-int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos)
+int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
+			  u8 qos, __be16 vlan_proto)
 {
 	int err = 0;
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 
 	if ((vf >= adapter->num_vfs) || (vlan > 4095) || (qos > 7))
 		return -EINVAL;
+	if (vlan_proto != htons(ETH_P_8021Q))
+		return -EPROTONOSUPPORT;
 	if (vlan || qos) {
 		/* Check if there is already a port VLAN set, if so
 		 * we have to delete the old one first before we
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
index 47e65e2f886a..0c7977d27b71 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
@@ -43,7 +43,7 @@ void ixgbe_disable_tx_rx(struct ixgbe_adapter *adapter);
 void ixgbe_ping_all_vfs(struct ixgbe_adapter *adapter);
 int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int queue, u8 *mac);
 int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int queue, u16 vlan,
-			   u8 qos);
+			   u8 qos, __be16 vlan_proto);
 int ixgbe_link_mbps(struct ixgbe_adapter *adapter);
 int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int min_tx_rate,
 			int max_tx_rate);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index a94f8a3f026c..132eeeafcdc4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2400,11 +2400,15 @@ static int mlx4_en_set_vf_mac(struct net_device *dev, int queue, u8 *mac)
 	return mlx4_set_vf_mac(mdev->dev, en_priv->port, queue, mac_u64);
 }
 
-static int mlx4_en_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos)
+static int mlx4_en_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos,
+			       __be16 vlan_proto)
 {
 	struct mlx4_en_priv *en_priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = en_priv->mdev;
 
+	if (vlan_proto != htons(ETH_P_8021Q))
+		return -EPROTONOSUPPORT;
+
 	return mlx4_set_vf_vlan(mdev->dev, en_priv->port, vf, vlan, qos);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index c12792314be7..b58cfe37dead 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2917,11 +2917,15 @@ static int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
 	return mlx5_eswitch_set_vport_mac(mdev->priv.eswitch, vf + 1, mac);
 }
 
-static int mlx5e_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos)
+static int mlx5e_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos,
+			     __be16 vlan_proto)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 	struct mlx5_core_dev *mdev = priv->mdev;
 
+	if (vlan_proto != htons(ETH_P_8021Q))
+		return -EPROTONOSUPPORT;
+
 	return mlx5_eswitch_set_vport_vlan(mdev->priv.eswitch, vf + 1,
 					   vlan, qos);
 }
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index cd23a2946db7..0e198fe89d1a 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -100,7 +100,8 @@ static int qede_alloc_rx_buffer(struct qede_dev *edev,
 static void qede_link_update(void *dev, struct qed_link_output *link);
 
 #ifdef CONFIG_QED_SRIOV
-static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos)
+static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos,
+			    __be16 vlan_proto)
 {
 	struct qede_dev *edev = netdev_priv(ndev);
 
@@ -109,6 +110,9 @@ static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos)
 		return -EINVAL;
 	}
 
+	if (vlan_proto != htons(ETH_P_8021Q))
+		return -EPROTONOSUPPORT;
+
 	DP_VERBOSE(edev, QED_MSG_IOV, "Setting Vlan 0x%04x to VF [%d]\n",
 		   vlan, vf);
 
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h
index 24061b9b92e8..5f327659efa7 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h
@@ -238,7 +238,7 @@ int qlcnic_sriov_set_vf_mac(struct net_device *, int, u8 *);
 int qlcnic_sriov_set_vf_tx_rate(struct net_device *, int, int, int);
 int qlcnic_sriov_get_vf_config(struct net_device *, int ,
 			       struct ifla_vf_info *);
-int qlcnic_sriov_set_vf_vlan(struct net_device *, int, u16, u8);
+int qlcnic_sriov_set_vf_vlan(struct net_device *, int, u16, u8, __be16);
 int qlcnic_sriov_set_vf_spoofchk(struct net_device *, int, bool);
 #else
 static inline void qlcnic_sriov_pf_disable(struct qlcnic_adapter *adapter) {}
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
index afd687e5e779..50eaafa3eaba 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
@@ -1915,7 +1915,7 @@ int qlcnic_sriov_set_vf_tx_rate(struct net_device *netdev, int vf,
 }
 
 int qlcnic_sriov_set_vf_vlan(struct net_device *netdev, int vf,
-			     u16 vlan, u8 qos)
+			     u16 vlan, u8 qos, __be16 vlan_proto)
 {
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
 	struct qlcnic_sriov *sriov = adapter->ahw->sriov;
@@ -1928,6 +1928,9 @@ int qlcnic_sriov_set_vf_vlan(struct net_device *netdev, int vf,
 	if (vf >= sriov->num_vfs || qos > 7)
 		return -EINVAL;
 
+	if (vlan_proto != htons(ETH_P_8021Q))
+		return -EPROTONOSUPPORT;
+
 	if (vlan > MAX_VLAN_ID) {
 		netdev_err(netdev,
 			   "Invalid VLAN ID, allowed range is [0 - %d]\n",
diff --git a/drivers/net/ethernet/sfc/sriov.c b/drivers/net/ethernet/sfc/sriov.c
index 816c44689e67..9abcf4aded30 100644
--- a/drivers/net/ethernet/sfc/sriov.c
+++ b/drivers/net/ethernet/sfc/sriov.c
@@ -22,7 +22,7 @@ int efx_sriov_set_vf_mac(struct net_device *net_dev, int vf_i, u8 *mac)
 }
 
 int efx_sriov_set_vf_vlan(struct net_device *net_dev, int vf_i, u16 vlan,
-			  u8 qos)
+			  u8 qos, __be16 vlan_proto)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
 
@@ -31,6 +31,9 @@ int efx_sriov_set_vf_vlan(struct net_device *net_dev, int vf_i, u16 vlan,
 		    (qos & ~(VLAN_PRIO_MASK >> VLAN_PRIO_SHIFT)))
 			return -EINVAL;
 
+		if (vlan_proto != htons(ETH_P_8021Q))
+			return -EPROTONOSUPPORT;
+
 		return efx->type->sriov_set_vf_vlan(efx, vf_i, vlan, qos);
 	} else {
 		return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/sfc/sriov.h b/drivers/net/ethernet/sfc/sriov.h
index 400df526586d..ba1762e7f216 100644
--- a/drivers/net/ethernet/sfc/sriov.h
+++ b/drivers/net/ethernet/sfc/sriov.h
@@ -16,7 +16,7 @@
 
 int efx_sriov_set_vf_mac(struct net_device *net_dev, int vf_i, u8 *mac);
 int efx_sriov_set_vf_vlan(struct net_device *net_dev, int vf_i, u16 vlan,
-			  u8 qos);
+			  u8 qos, __be16 vlan_proto);
 int efx_sriov_set_vf_spoofchk(struct net_device *net_dev, int vf_i,
 			      bool spoofchk);
 int efx_sriov_get_vf_config(struct net_device *net_dev, int vf_i,
diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index f923d15b432c..0b17c585b5cd 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -25,5 +25,6 @@ struct ifla_vf_info {
 	__u32 max_tx_rate;
 	__u32 rss_query_en;
 	__u32 trusted;
+	__be16 vlan_proto;
 };
 #endif /* _LINUX_IF_LINK_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 69f242c71865..1e8a5c734d72 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -946,7 +946,8 @@ struct netdev_xdp {
  *
  *	SR-IOV management functions.
  * int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8* mac);
- * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, u8 qos);
+ * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan,
+ *			  u8 qos, __be16 proto);
  * int (*ndo_set_vf_rate)(struct net_device *dev, int vf, int min_tx_rate,
  *			  int max_tx_rate);
  * int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting);
@@ -1187,7 +1188,8 @@ struct net_device_ops {
 	int			(*ndo_set_vf_mac)(struct net_device *dev,
 						  int queue, u8 *mac);
 	int			(*ndo_set_vf_vlan)(struct net_device *dev,
-						   int queue, u16 vlan, u8 qos);
+						   int queue, u16 vlan,
+						   u8 qos, __be16 proto);
 	int			(*ndo_set_vf_rate)(struct net_device *dev,
 						   int vf, int min_tx_rate,
 						   int max_tx_rate);
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 7ec9e99d5491..b4fba662cd32 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -619,7 +619,7 @@ enum {
 enum {
 	IFLA_VF_UNSPEC,
 	IFLA_VF_MAC,		/* Hardware queue specific attributes */
-	IFLA_VF_VLAN,
+	IFLA_VF_VLAN,		/* VLAN ID and QoS */
 	IFLA_VF_TX_RATE,	/* Max TX Bandwidth Allocation */
 	IFLA_VF_SPOOFCHK,	/* Spoof Checking on/off switch */
 	IFLA_VF_LINK_STATE,	/* link state enable/disable/auto switch */
@@ -631,6 +631,7 @@ enum {
 	IFLA_VF_TRUST,		/* Trust VF */
 	IFLA_VF_IB_NODE_GUID,	/* VF Infiniband node GUID */
 	IFLA_VF_IB_PORT_GUID,	/* VF Infiniband port GUID */
+	IFLA_VF_VLAN_LIST,	/* nested list of vlans, option for QinQ */
 	__IFLA_VF_MAX,
 };
 
@@ -647,6 +648,22 @@ struct ifla_vf_vlan {
 	__u32 qos;
 };
 
+enum {
+	IFLA_VF_VLAN_INFO_UNSPEC,
+	IFLA_VF_VLAN_INFO,	/* VLAN ID, QoS and VLAN protocol */
+	__IFLA_VF_VLAN_INFO_MAX,
+};
+
+#define IFLA_VF_VLAN_INFO_MAX (__IFLA_VF_VLAN_INFO_MAX - 1)
+#define MAX_VLAN_LIST_LEN 1
+
+struct ifla_vf_vlan_info {
+	__u32 vf;
+	__u32 vlan; /* 0 - 4095, 0 disables VLAN filter */
+	__u32 qos;
+	__be16 vlan_proto; /* VLAN protocol either 802.1Q or 802.1ad */
+};
+
 struct ifla_vf_tx_rate {
 	__u32 vf;
 	__u32 rate; /* Max TX bandwidth in Mbps, 0 disables throttling */
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 0dbae4244a89..3ac8946bf244 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -843,7 +843,10 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
 		size += nla_total_size(num_vfs * sizeof(struct nlattr));
 		size += num_vfs *
 			(nla_total_size(sizeof(struct ifla_vf_mac)) +
-			 nla_total_size(sizeof(struct ifla_vf_vlan)) +
+			 nla_total_size(MAX_VLAN_LIST_LEN *
+					sizeof(struct nlattr)) +
+			 nla_total_size(MAX_VLAN_LIST_LEN *
+					sizeof(struct ifla_vf_vlan_info)) +
 			 nla_total_size(sizeof(struct ifla_vf_spoofchk)) +
 			 nla_total_size(sizeof(struct ifla_vf_rate)) +
 			 nla_total_size(sizeof(struct ifla_vf_link_state)) +
@@ -1111,14 +1114,15 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
 					       struct nlattr *vfinfo)
 {
 	struct ifla_vf_rss_query_en vf_rss_query_en;
+	struct nlattr *vf, *vfstats, *vfvlanlist;
 	struct ifla_vf_link_state vf_linkstate;
+	struct ifla_vf_vlan_info vf_vlan_info;
 	struct ifla_vf_spoofchk vf_spoofchk;
 	struct ifla_vf_tx_rate vf_tx_rate;
 	struct ifla_vf_stats vf_stats;
 	struct ifla_vf_trust vf_trust;
 	struct ifla_vf_vlan vf_vlan;
 	struct ifla_vf_rate vf_rate;
-	struct nlattr *vf, *vfstats;
 	struct ifla_vf_mac vf_mac;
 	struct ifla_vf_info ivi;
 
@@ -1135,11 +1139,14 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
 	 * IFLA_VF_LINK_STATE_AUTO which equals zero
 	 */
 	ivi.linkstate = 0;
+	/* VLAN Protocol by default is 802.1Q */
+	ivi.vlan_proto = htons(ETH_P_8021Q);
 	if (dev->netdev_ops->ndo_get_vf_config(dev, vfs_num, &ivi))
 		return 0;
 
 	vf_mac.vf =
 		vf_vlan.vf =
+		vf_vlan_info.vf =
 		vf_rate.vf =
 		vf_tx_rate.vf =
 		vf_spoofchk.vf =
@@ -1150,6 +1157,9 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
 	memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
 	vf_vlan.vlan = ivi.vlan;
 	vf_vlan.qos = ivi.qos;
+	vf_vlan_info.vlan = ivi.vlan;
+	vf_vlan_info.qos = ivi.qos;
+	vf_vlan_info.vlan_proto = ivi.vlan_proto;
 	vf_tx_rate.rate = ivi.max_tx_rate;
 	vf_rate.min_tx_rate = ivi.min_tx_rate;
 	vf_rate.max_tx_rate = ivi.max_tx_rate;
@@ -1158,10 +1168,8 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
 	vf_rss_query_en.setting = ivi.rss_query_en;
 	vf_trust.setting = ivi.trusted;
 	vf = nla_nest_start(skb, IFLA_VF_INFO);
-	if (!vf) {
-		nla_nest_cancel(skb, vfinfo);
-		return -EMSGSIZE;
-	}
+	if (!vf)
+		goto nla_put_vfinfo_failure;
 	if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) ||
 	    nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) ||
 	    nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate),
@@ -1177,17 +1185,23 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
 		    &vf_rss_query_en) ||
 	    nla_put(skb, IFLA_VF_TRUST,
 		    sizeof(vf_trust), &vf_trust))
-		return -EMSGSIZE;
+		goto nla_put_vf_failure;
+	vfvlanlist = nla_nest_start(skb, IFLA_VF_VLAN_LIST);
+	if (!vfvlanlist)
+		goto nla_put_vf_failure;
+	if (nla_put(skb, IFLA_VF_VLAN_INFO, sizeof(vf_vlan_info),
+		    &vf_vlan_info)) {
+		nla_nest_cancel(skb, vfvlanlist);
+		goto nla_put_vf_failure;
+	}
+	nla_nest_end(skb, vfvlanlist);
 	memset(&vf_stats, 0, sizeof(vf_stats));
 	if (dev->netdev_ops->ndo_get_vf_stats)
 		dev->netdev_ops->ndo_get_vf_stats(dev, vfs_num,
 						&vf_stats);
 	vfstats = nla_nest_start(skb, IFLA_VF_STATS);
-	if (!vfstats) {
-		nla_nest_cancel(skb, vf);
-		nla_nest_cancel(skb, vfinfo);
-		return -EMSGSIZE;
-	}
+	if (!vfstats)
+		goto nla_put_vf_failure;
 	if (nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_PACKETS,
 			      vf_stats.rx_packets, IFLA_VF_STATS_PAD) ||
 	    nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_PACKETS,
@@ -1199,11 +1213,19 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
 	    nla_put_u64_64bit(skb, IFLA_VF_STATS_BROADCAST,
 			      vf_stats.broadcast, IFLA_VF_STATS_PAD) ||
 	    nla_put_u64_64bit(skb, IFLA_VF_STATS_MULTICAST,
-			      vf_stats.multicast, IFLA_VF_STATS_PAD))
-		return -EMSGSIZE;
+			      vf_stats.multicast, IFLA_VF_STATS_PAD)) {
+		nla_nest_cancel(skb, vfstats);
+		goto nla_put_vf_failure;
+	}
 	nla_nest_end(skb, vfstats);
 	nla_nest_end(skb, vf);
 	return 0;
+
+nla_put_vf_failure:
+	nla_nest_cancel(skb, vf);
+nla_put_vfinfo_failure:
+	nla_nest_cancel(skb, vfinfo);
+	return -EMSGSIZE;
 }
 
 static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
@@ -1448,6 +1470,7 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
 static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
 	[IFLA_VF_MAC]		= { .len = sizeof(struct ifla_vf_mac) },
 	[IFLA_VF_VLAN]		= { .len = sizeof(struct ifla_vf_vlan) },
+	[IFLA_VF_VLAN_LIST]     = { .type = NLA_NESTED },
 	[IFLA_VF_TX_RATE]	= { .len = sizeof(struct ifla_vf_tx_rate) },
 	[IFLA_VF_SPOOFCHK]	= { .len = sizeof(struct ifla_vf_spoofchk) },
 	[IFLA_VF_RATE]		= { .len = sizeof(struct ifla_vf_rate) },
@@ -1704,7 +1727,34 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
 		err = -EOPNOTSUPP;
 		if (ops->ndo_set_vf_vlan)
 			err = ops->ndo_set_vf_vlan(dev, ivv->vf, ivv->vlan,
-						   ivv->qos);
+						   ivv->qos,
+						   htons(ETH_P_8021Q));
+		if (err < 0)
+			return err;
+	}
+
+	if (tb[IFLA_VF_VLAN_LIST]) {
+		struct ifla_vf_vlan_info *ivvl[MAX_VLAN_LIST_LEN];
+		struct nlattr *attr;
+		int rem, len = 0;
+
+		err = -EOPNOTSUPP;
+		if (!ops->ndo_set_vf_vlan)
+			return err;
+
+		nla_for_each_nested(attr, tb[IFLA_VF_VLAN_LIST], rem) {
+			if (nla_type(attr) != IFLA_VF_VLAN_INFO ||
+			    nla_len(attr) < NLA_HDRLEN) {
+				return -EINVAL;
+			}
+			if (len >= MAX_VLAN_LIST_LEN)
+				return -EOPNOTSUPP;
+			ivvl[len] = nla_data(attr);
+
+			len++;
+		}
+		err = ops->ndo_set_vf_vlan(dev, ivvl[0]->vf, ivvl[0]->vlan,
+					   ivvl[0]->qos, ivvl[0]->vlan_proto);
 		if (err < 0)
 			return err;
 	}
-- 
cgit v1.2.3


From 11d5f15723c9f39d7c131d0149d024c17dbef676 Mon Sep 17 00:00:00 2001
From: Vishwanath Pai <vpai@akamai.com>
Date: Thu, 22 Sep 2016 12:43:44 -0400
Subject: netfilter: xt_hashlimit: Create revision 2 to support higher pps
 rates

Create a new revision for the hashlimit iptables extension module. Rev 2
will support higher pps of upto 1 million, Version 1 supports only 10k.

To support this we have to increase the size of the variables avg and
burst in hashlimit_cfg to 64-bit. Create two new structs hashlimit_cfg2
and xt_hashlimit_mtinfo2 and also create newer versions of all the
functions for match, checkentry and destroy.

Some of the functions like hashlimit_mt, hashlimit_mt_check etc are very
similar in both rev1 and rev2 with only minor changes, so I have split
those functions and moved all the common code to a *_common function.

Signed-off-by: Vishwanath Pai <vpai@akamai.com>
Signed-off-by: Joshua Hunt <johunt@akamai.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/xt_hashlimit.h |  23 ++
 net/netfilter/xt_hashlimit.c                | 330 ++++++++++++++++++++++------
 2 files changed, 285 insertions(+), 68 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/netfilter/xt_hashlimit.h b/include/uapi/linux/netfilter/xt_hashlimit.h
index 6db90372f09c..3efc0ca18345 100644
--- a/include/uapi/linux/netfilter/xt_hashlimit.h
+++ b/include/uapi/linux/netfilter/xt_hashlimit.h
@@ -6,6 +6,7 @@
 
 /* timings are in milliseconds. */
 #define XT_HASHLIMIT_SCALE 10000
+#define XT_HASHLIMIT_SCALE_v2 1000000llu
 /* 1/10,000 sec period => max of 10,000/sec.  Min rate is then 429490
  * seconds, or one packet every 59 hours.
  */
@@ -63,6 +64,20 @@ struct hashlimit_cfg1 {
 	__u8 srcmask, dstmask;
 };
 
+struct hashlimit_cfg2 {
+	__u64 avg;		/* Average secs between packets * scale */
+	__u64 burst;		/* Period multiplier for upper limit. */
+	__u32 mode;		/* bitmask of XT_HASHLIMIT_HASH_* */
+
+	/* user specified */
+	__u32 size;		/* how many buckets */
+	__u32 max;		/* max number of entries */
+	__u32 gc_interval;	/* gc interval */
+	__u32 expire;		/* when do entries expire? */
+
+	__u8 srcmask, dstmask;
+};
+
 struct xt_hashlimit_mtinfo1 {
 	char name[IFNAMSIZ];
 	struct hashlimit_cfg1 cfg;
@@ -71,4 +86,12 @@ struct xt_hashlimit_mtinfo1 {
 	struct xt_hashlimit_htable *hinfo __attribute__((aligned(8)));
 };
 
+struct xt_hashlimit_mtinfo2 {
+	char name[NAME_MAX];
+	struct hashlimit_cfg2 cfg;
+
+	/* Used internally by the kernel */
+	struct xt_hashlimit_htable *hinfo __attribute__((aligned(8)));
+};
+
 #endif /* _UAPI_XT_HASHLIMIT_H */
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index e93d9e0a3f35..44a095ecc7b7 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -57,6 +57,7 @@ static inline struct hashlimit_net *hashlimit_pernet(struct net *net)
 
 /* need to declare this at the top */
 static const struct file_operations dl_file_ops_v1;
+static const struct file_operations dl_file_ops;
 
 /* hash table crap */
 struct dsthash_dst {
@@ -86,8 +87,8 @@ struct dsthash_ent {
 	unsigned long expires;		/* precalculated expiry time */
 	struct {
 		unsigned long prev;	/* last modification */
-		u_int32_t credit;
-		u_int32_t credit_cap, cost;
+		u_int64_t credit;
+		u_int64_t credit_cap, cost;
 	} rateinfo;
 	struct rcu_head rcu;
 };
@@ -98,7 +99,7 @@ struct xt_hashlimit_htable {
 	u_int8_t family;
 	bool rnd_initialized;
 
-	struct hashlimit_cfg1 cfg;	/* config */
+	struct hashlimit_cfg2 cfg;	/* config */
 
 	/* used internally */
 	spinlock_t lock;		/* lock for list_head */
@@ -114,6 +115,30 @@ struct xt_hashlimit_htable {
 	struct hlist_head hash[0];	/* hashtable itself */
 };
 
+static int
+cfg_copy(struct hashlimit_cfg2 *to, void *from, int revision)
+{
+	if (revision == 1) {
+		struct hashlimit_cfg1 *cfg = (struct hashlimit_cfg1 *)from;
+
+		to->mode = cfg->mode;
+		to->avg = cfg->avg;
+		to->burst = cfg->burst;
+		to->size = cfg->size;
+		to->max = cfg->max;
+		to->gc_interval = cfg->gc_interval;
+		to->expire = cfg->expire;
+		to->srcmask = cfg->srcmask;
+		to->dstmask = cfg->dstmask;
+	} else if (revision == 2) {
+		memcpy(to, from, sizeof(struct hashlimit_cfg2));
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static DEFINE_MUTEX(hashlimit_mutex);	/* protects htables list */
 static struct kmem_cache *hashlimit_cachep __read_mostly;
 
@@ -215,16 +240,18 @@ dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent)
 }
 static void htable_gc(struct work_struct *work);
 
-static int htable_create_v1(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
-			    u_int8_t family)
+static int htable_create(struct net *net, struct hashlimit_cfg2 *cfg,
+			 const char *name, u_int8_t family,
+			 struct xt_hashlimit_htable **out_hinfo,
+			 int revision)
 {
 	struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
 	struct xt_hashlimit_htable *hinfo;
-	unsigned int size;
-	unsigned int i;
+	unsigned int size, i;
+	int ret;
 
-	if (minfo->cfg.size) {
-		size = minfo->cfg.size;
+	if (cfg->size) {
+		size = cfg->size;
 	} else {
 		size = (totalram_pages << PAGE_SHIFT) / 16384 /
 		       sizeof(struct list_head);
@@ -238,10 +265,14 @@ static int htable_create_v1(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
 	                sizeof(struct list_head) * size);
 	if (hinfo == NULL)
 		return -ENOMEM;
-	minfo->hinfo = hinfo;
+	*out_hinfo = hinfo;
 
 	/* copy match config into hashtable config */
-	memcpy(&hinfo->cfg, &minfo->cfg, sizeof(hinfo->cfg));
+	ret = cfg_copy(&hinfo->cfg, (void *)cfg, 2);
+
+	if (ret)
+		return ret;
+
 	hinfo->cfg.size = size;
 	if (hinfo->cfg.max == 0)
 		hinfo->cfg.max = 8 * hinfo->cfg.size;
@@ -255,17 +286,18 @@ static int htable_create_v1(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
 	hinfo->count = 0;
 	hinfo->family = family;
 	hinfo->rnd_initialized = false;
-	hinfo->name = kstrdup(minfo->name, GFP_KERNEL);
+	hinfo->name = kstrdup(name, GFP_KERNEL);
 	if (!hinfo->name) {
 		vfree(hinfo);
 		return -ENOMEM;
 	}
 	spin_lock_init(&hinfo->lock);
 
-	hinfo->pde = proc_create_data(minfo->name, 0,
+	hinfo->pde = proc_create_data(name, 0,
 		(family == NFPROTO_IPV4) ?
 		hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit,
-		&dl_file_ops_v1, hinfo);
+		(revision == 1) ? &dl_file_ops_v1 : &dl_file_ops,
+		hinfo);
 	if (hinfo->pde == NULL) {
 		kfree(hinfo->name);
 		vfree(hinfo);
@@ -399,6 +431,7 @@ static void htable_put(struct xt_hashlimit_htable *hinfo)
    CREDITS_PER_JIFFY*HZ*60*60*24 < 2^32 ie.
 */
 #define MAX_CPJ_v1 (0xFFFFFFFF / (HZ*60*60*24))
+#define MAX_CPJ (0xFFFFFFFFFFFFFFFF / (HZ*60*60*24))
 
 /* Repeated shift and or gives us all 1s, final shift and add 1 gives
  * us the power of 2 below the theoretical max, so GCC simply does a
@@ -408,8 +441,11 @@ static void htable_put(struct xt_hashlimit_htable *hinfo)
 #define _POW2_BELOW8(x) (_POW2_BELOW4(x)|_POW2_BELOW4((x)>>4))
 #define _POW2_BELOW16(x) (_POW2_BELOW8(x)|_POW2_BELOW8((x)>>8))
 #define _POW2_BELOW32(x) (_POW2_BELOW16(x)|_POW2_BELOW16((x)>>16))
+#define _POW2_BELOW64(x) (_POW2_BELOW32(x)|_POW2_BELOW32((x)>>32))
 #define POW2_BELOW32(x) ((_POW2_BELOW32(x)>>1) + 1)
+#define POW2_BELOW64(x) ((_POW2_BELOW64(x)>>1) + 1)
 
+#define CREDITS_PER_JIFFY POW2_BELOW64(MAX_CPJ)
 #define CREDITS_PER_JIFFY_v1 POW2_BELOW32(MAX_CPJ_v1)
 
 /* in byte mode, the lowest possible rate is one packet/second.
@@ -425,15 +461,24 @@ static u32 xt_hashlimit_len_to_chunks(u32 len)
 }
 
 /* Precision saver. */
-static u32 user2credits(u32 user)
+static u64 user2credits(u64 user, int revision)
 {
-	/* If multiplying would overflow... */
-	if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY_v1))
-		/* Divide first. */
-		return (user / XT_HASHLIMIT_SCALE) *\
-					HZ * CREDITS_PER_JIFFY_v1;
+	if (revision == 1) {
+		/* If multiplying would overflow... */
+		if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY_v1))
+			/* Divide first. */
+			return (user / XT_HASHLIMIT_SCALE) *\
+						HZ * CREDITS_PER_JIFFY_v1;
+
+		return (user * HZ * CREDITS_PER_JIFFY_v1) \
+						/ XT_HASHLIMIT_SCALE;
+	} else {
+		if (user > 0xFFFFFFFFFFFFFFFF / (HZ*CREDITS_PER_JIFFY))
+			return (user / XT_HASHLIMIT_SCALE_v2) *\
+						HZ * CREDITS_PER_JIFFY;
 
-	return (user * HZ * CREDITS_PER_JIFFY_v1) / XT_HASHLIMIT_SCALE;
+		return (user * HZ * CREDITS_PER_JIFFY) / XT_HASHLIMIT_SCALE_v2;
+	}
 }
 
 static u32 user2credits_byte(u32 user)
@@ -443,10 +488,11 @@ static u32 user2credits_byte(u32 user)
 	return (u32) (us >> 32);
 }
 
-static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, u32 mode)
+static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now,
+			    u32 mode, int revision)
 {
 	unsigned long delta = now - dh->rateinfo.prev;
-	u32 cap;
+	u64 cap, cpj;
 
 	if (delta == 0)
 		return;
@@ -454,7 +500,7 @@ static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, u32 mode)
 	dh->rateinfo.prev = now;
 
 	if (mode & XT_HASHLIMIT_BYTES) {
-		u32 tmp = dh->rateinfo.credit;
+		u64 tmp = dh->rateinfo.credit;
 		dh->rateinfo.credit += CREDITS_PER_JIFFY_BYTES * delta;
 		cap = CREDITS_PER_JIFFY_BYTES * HZ;
 		if (tmp >= dh->rateinfo.credit) {/* overflow */
@@ -462,7 +508,9 @@ static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, u32 mode)
 			return;
 		}
 	} else {
-		dh->rateinfo.credit += delta * CREDITS_PER_JIFFY_v1;
+		cpj = (revision == 1) ?
+			CREDITS_PER_JIFFY_v1 : CREDITS_PER_JIFFY;
+		dh->rateinfo.credit += delta * cpj;
 		cap = dh->rateinfo.credit_cap;
 	}
 	if (dh->rateinfo.credit > cap)
@@ -470,7 +518,7 @@ static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, u32 mode)
 }
 
 static void rateinfo_init(struct dsthash_ent *dh,
-			  struct xt_hashlimit_htable *hinfo)
+			  struct xt_hashlimit_htable *hinfo, int revision)
 {
 	dh->rateinfo.prev = jiffies;
 	if (hinfo->cfg.mode & XT_HASHLIMIT_BYTES) {
@@ -479,8 +527,8 @@ static void rateinfo_init(struct dsthash_ent *dh,
 		dh->rateinfo.credit_cap = hinfo->cfg.burst;
 	} else {
 		dh->rateinfo.credit = user2credits(hinfo->cfg.avg *
-						   hinfo->cfg.burst);
-		dh->rateinfo.cost = user2credits(hinfo->cfg.avg);
+						   hinfo->cfg.burst, revision);
+		dh->rateinfo.cost = user2credits(hinfo->cfg.avg, revision);
 		dh->rateinfo.credit_cap = dh->rateinfo.credit;
 	}
 }
@@ -604,15 +652,15 @@ static u32 hashlimit_byte_cost(unsigned int len, struct dsthash_ent *dh)
 }
 
 static bool
-hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
+hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par,
+		    struct xt_hashlimit_htable *hinfo,
+		    const struct hashlimit_cfg2 *cfg, int revision)
 {
-	const struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
-	struct xt_hashlimit_htable *hinfo = info->hinfo;
 	unsigned long now = jiffies;
 	struct dsthash_ent *dh;
 	struct dsthash_dst dst;
 	bool race = false;
-	u32 cost;
+	u64 cost;
 
 	if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0)
 		goto hotdrop;
@@ -627,18 +675,18 @@ hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
 		} else if (race) {
 			/* Already got an entry, update expiration timeout */
 			dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
-			rateinfo_recalc(dh, now, hinfo->cfg.mode);
+			rateinfo_recalc(dh, now, hinfo->cfg.mode, revision);
 		} else {
 			dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire);
-			rateinfo_init(dh, hinfo);
+			rateinfo_init(dh, hinfo, revision);
 		}
 	} else {
 		/* update expiration timeout */
 		dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
-		rateinfo_recalc(dh, now, hinfo->cfg.mode);
+		rateinfo_recalc(dh, now, hinfo->cfg.mode, revision);
 	}
 
-	if (info->cfg.mode & XT_HASHLIMIT_BYTES)
+	if (cfg->mode & XT_HASHLIMIT_BYTES)
 		cost = hashlimit_byte_cost(skb->len, dh);
 	else
 		cost = dh->rateinfo.cost;
@@ -648,70 +696,126 @@ hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
 		dh->rateinfo.credit -= cost;
 		spin_unlock(&dh->lock);
 		rcu_read_unlock_bh();
-		return !(info->cfg.mode & XT_HASHLIMIT_INVERT);
+		return !(cfg->mode & XT_HASHLIMIT_INVERT);
 	}
 
 	spin_unlock(&dh->lock);
 	rcu_read_unlock_bh();
 	/* default match is underlimit - so over the limit, we need to invert */
-	return info->cfg.mode & XT_HASHLIMIT_INVERT;
+	return cfg->mode & XT_HASHLIMIT_INVERT;
 
  hotdrop:
 	par->hotdrop = true;
 	return false;
 }
 
-static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par)
+static bool
+hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
+	struct xt_hashlimit_htable *hinfo = info->hinfo;
+	struct hashlimit_cfg2 cfg = {};
+	int ret;
+
+	ret = cfg_copy(&cfg, (void *)&info->cfg, 1);
+
+	if (ret)
+		return ret;
+
+	return hashlimit_mt_common(skb, par, hinfo, &cfg, 1);
+}
+
+static bool
+hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_hashlimit_mtinfo2 *info = par->matchinfo;
+	struct xt_hashlimit_htable *hinfo = info->hinfo;
+
+	return hashlimit_mt_common(skb, par, hinfo, &info->cfg, 2);
+}
+
+static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
+				     struct xt_hashlimit_htable **hinfo,
+				     struct hashlimit_cfg2 *cfg,
+				     const char *name, int revision)
 {
 	struct net *net = par->net;
-	struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
 	int ret;
 
-	if (info->cfg.gc_interval == 0 || info->cfg.expire == 0)
-		return -EINVAL;
-	if (info->name[sizeof(info->name)-1] != '\0')
+	if (cfg->gc_interval == 0 || cfg->expire == 0)
 		return -EINVAL;
 	if (par->family == NFPROTO_IPV4) {
-		if (info->cfg.srcmask > 32 || info->cfg.dstmask > 32)
+		if (cfg->srcmask > 32 || cfg->dstmask > 32)
 			return -EINVAL;
 	} else {
-		if (info->cfg.srcmask > 128 || info->cfg.dstmask > 128)
+		if (cfg->srcmask > 128 || cfg->dstmask > 128)
 			return -EINVAL;
 	}
 
-	if (info->cfg.mode & ~XT_HASHLIMIT_ALL) {
+	if (cfg->mode & ~XT_HASHLIMIT_ALL) {
 		pr_info("Unknown mode mask %X, kernel too old?\n",
-						info->cfg.mode);
+						cfg->mode);
 		return -EINVAL;
 	}
 
 	/* Check for overflow. */
-	if (info->cfg.mode & XT_HASHLIMIT_BYTES) {
-		if (user2credits_byte(info->cfg.avg) == 0) {
-			pr_info("overflow, rate too high: %u\n", info->cfg.avg);
+	if (cfg->mode & XT_HASHLIMIT_BYTES) {
+		if (user2credits_byte(cfg->avg) == 0) {
+			pr_info("overflow, rate too high: %llu\n", cfg->avg);
 			return -EINVAL;
 		}
-	} else if (info->cfg.burst == 0 ||
-		    user2credits(info->cfg.avg * info->cfg.burst) <
-		    user2credits(info->cfg.avg)) {
-			pr_info("overflow, try lower: %u/%u\n",
-				info->cfg.avg, info->cfg.burst);
+	} else if (cfg->burst == 0 ||
+		    user2credits(cfg->avg * cfg->burst, revision) <
+		    user2credits(cfg->avg, revision)) {
+			pr_info("overflow, try lower: %llu/%llu\n",
+				cfg->avg, cfg->burst);
 			return -ERANGE;
 	}
 
 	mutex_lock(&hashlimit_mutex);
-	info->hinfo = htable_find_get(net, info->name, par->family);
-	if (info->hinfo == NULL) {
-		ret = htable_create_v1(net, info, par->family);
+	*hinfo = htable_find_get(net, name, par->family);
+	if (*hinfo == NULL) {
+		ret = htable_create(net, cfg, name, par->family,
+				    hinfo, revision);
 		if (ret < 0) {
 			mutex_unlock(&hashlimit_mutex);
 			return ret;
 		}
 	}
 	mutex_unlock(&hashlimit_mutex);
+
 	return 0;
 }
 
+static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par)
+{
+	struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
+	struct hashlimit_cfg2 cfg = {};
+	int ret;
+
+	if (info->name[sizeof(info->name) - 1] != '\0')
+		return -EINVAL;
+
+	ret = cfg_copy(&cfg, (void *)&info->cfg, 1);
+
+	if (ret)
+		return ret;
+
+	return hashlimit_mt_check_common(par, &info->hinfo,
+					 &cfg, info->name, 1);
+}
+
+static int hashlimit_mt_check(const struct xt_mtchk_param *par)
+{
+	struct xt_hashlimit_mtinfo2 *info = par->matchinfo;
+
+	if (info->name[sizeof(info->name) - 1] != '\0')
+		return -EINVAL;
+
+	return hashlimit_mt_check_common(par, &info->hinfo, &info->cfg,
+					 info->name, 2);
+}
+
 static void hashlimit_mt_destroy_v1(const struct xt_mtdtor_param *par)
 {
 	const struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
@@ -719,6 +823,13 @@ static void hashlimit_mt_destroy_v1(const struct xt_mtdtor_param *par)
 	htable_put(info->hinfo);
 }
 
+static void hashlimit_mt_destroy(const struct xt_mtdtor_param *par)
+{
+	const struct xt_hashlimit_mtinfo2 *info = par->matchinfo;
+
+	htable_put(info->hinfo);
+}
+
 static struct xt_match hashlimit_mt_reg[] __read_mostly = {
 	{
 		.name           = "hashlimit",
@@ -730,6 +841,16 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = {
 		.destroy        = hashlimit_mt_destroy_v1,
 		.me             = THIS_MODULE,
 	},
+	{
+		.name           = "hashlimit",
+		.revision       = 2,
+		.family         = NFPROTO_IPV4,
+		.match          = hashlimit_mt,
+		.matchsize      = sizeof(struct xt_hashlimit_mtinfo2),
+		.checkentry     = hashlimit_mt_check,
+		.destroy        = hashlimit_mt_destroy,
+		.me             = THIS_MODULE,
+	},
 #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
 	{
 		.name           = "hashlimit",
@@ -741,6 +862,16 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = {
 		.destroy        = hashlimit_mt_destroy_v1,
 		.me             = THIS_MODULE,
 	},
+	{
+		.name           = "hashlimit",
+		.revision       = 2,
+		.family         = NFPROTO_IPV6,
+		.match          = hashlimit_mt,
+		.matchsize      = sizeof(struct xt_hashlimit_mtinfo2),
+		.checkentry     = hashlimit_mt_check,
+		.destroy        = hashlimit_mt_destroy,
+		.me             = THIS_MODULE,
+	},
 #endif
 };
 
@@ -787,18 +918,12 @@ static void dl_seq_stop(struct seq_file *s, void *v)
 	spin_unlock_bh(&htable->lock);
 }
 
-static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family,
-			       struct seq_file *s)
+static void dl_seq_print(struct dsthash_ent *ent, u_int8_t family,
+			 struct seq_file *s)
 {
-	const struct xt_hashlimit_htable *ht = s->private;
-
-	spin_lock(&ent->lock);
-	/* recalculate to show accurate numbers */
-	rateinfo_recalc(ent, jiffies, ht->cfg.mode);
-
 	switch (family) {
 	case NFPROTO_IPV4:
-		seq_printf(s, "%ld %pI4:%u->%pI4:%u %u %u %u\n",
+		seq_printf(s, "%ld %pI4:%u->%pI4:%u %llu %llu %llu\n",
 			   (long)(ent->expires - jiffies)/HZ,
 			   &ent->dst.ip.src,
 			   ntohs(ent->dst.src_port),
@@ -809,7 +934,7 @@ static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family,
 		break;
 #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
 	case NFPROTO_IPV6:
-		seq_printf(s, "%ld %pI6:%u->%pI6:%u %u %u %u\n",
+		seq_printf(s, "%ld %pI6:%u->%pI6:%u %llu %llu %llu\n",
 			   (long)(ent->expires - jiffies)/HZ,
 			   &ent->dst.ip6.src,
 			   ntohs(ent->dst.src_port),
@@ -822,6 +947,34 @@ static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family,
 	default:
 		BUG();
 	}
+}
+
+static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family,
+			       struct seq_file *s)
+{
+	const struct xt_hashlimit_htable *ht = s->private;
+
+	spin_lock(&ent->lock);
+	/* recalculate to show accurate numbers */
+	rateinfo_recalc(ent, jiffies, ht->cfg.mode, 1);
+
+	dl_seq_print(ent, family, s);
+
+	spin_unlock(&ent->lock);
+	return seq_has_overflowed(s);
+}
+
+static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
+			    struct seq_file *s)
+{
+	const struct xt_hashlimit_htable *ht = s->private;
+
+	spin_lock(&ent->lock);
+	/* recalculate to show accurate numbers */
+	rateinfo_recalc(ent, jiffies, ht->cfg.mode, 2);
+
+	dl_seq_print(ent, family, s);
+
 	spin_unlock(&ent->lock);
 	return seq_has_overflowed(s);
 }
@@ -840,6 +993,20 @@ static int dl_seq_show_v1(struct seq_file *s, void *v)
 	return 0;
 }
 
+static int dl_seq_show(struct seq_file *s, void *v)
+{
+	struct xt_hashlimit_htable *htable = s->private;
+	unsigned int *bucket = (unsigned int *)v;
+	struct dsthash_ent *ent;
+
+	if (!hlist_empty(&htable->hash[*bucket])) {
+		hlist_for_each_entry(ent, &htable->hash[*bucket], node)
+			if (dl_seq_real_show(ent, htable->family, s))
+				return -1;
+	}
+	return 0;
+}
+
 static const struct seq_operations dl_seq_ops_v1 = {
 	.start = dl_seq_start,
 	.next  = dl_seq_next,
@@ -847,6 +1014,13 @@ static const struct seq_operations dl_seq_ops_v1 = {
 	.show  = dl_seq_show_v1
 };
 
+static const struct seq_operations dl_seq_ops = {
+	.start = dl_seq_start,
+	.next  = dl_seq_next,
+	.stop  = dl_seq_stop,
+	.show  = dl_seq_show
+};
+
 static int dl_proc_open_v1(struct inode *inode, struct file *file)
 {
 	int ret = seq_open(file, &dl_seq_ops_v1);
@@ -858,6 +1032,18 @@ static int dl_proc_open_v1(struct inode *inode, struct file *file)
 	return ret;
 }
 
+static int dl_proc_open(struct inode *inode, struct file *file)
+{
+	int ret = seq_open(file, &dl_seq_ops);
+
+	if (!ret) {
+		struct seq_file *sf = file->private_data;
+
+		sf->private = PDE_DATA(inode);
+	}
+	return ret;
+}
+
 static const struct file_operations dl_file_ops_v1 = {
 	.owner   = THIS_MODULE,
 	.open    = dl_proc_open_v1,
@@ -866,6 +1052,14 @@ static const struct file_operations dl_file_ops_v1 = {
 	.release = seq_release
 };
 
+static const struct file_operations dl_file_ops = {
+	.owner   = THIS_MODULE,
+	.open    = dl_proc_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release
+};
+
 static int __net_init hashlimit_proc_net_init(struct net *net)
 {
 	struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
-- 
cgit v1.2.3


From 318824d3d0795309b448563faa698d3c02035db4 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Sat, 24 Sep 2016 19:28:23 +0900
Subject: ALSA: control: cage TLV_DB_RANGE_HEAD in kernel land because it was
 obsoleted

In commit bf1d1c9b6179 ("ALSA: tlv: add DECLARE_TLV_DB_RANGE()"), the new
macro was added so that "dB range information can be specified without
having to count the items manually for TLV_DB_RANGE_HEAD()". In short,
TLV_DB_RANGE_HEAD macro was obsoleted.

In commit 46e860f76804 ("ALSA: rename TLV-related macros so that they're
friendly to user applications"), TLV-related macros are exposed for
applications in user land to get content of data structured by
Type/Length/Value shape. The commit managed to expose TLV-related macros
as many as possible, while obsoleted TLV_DB_RANGE_HEAD() was included to
the list of exposed macros.

This situation brings some confusions to application developers because
they might think all exposed macros have their own purpose and useful for
applications.

For the reason, this commit moves TLV_DB_RANGE_HEAD macro from UAPI header
to a header for kernel land, again. The above commit is done within the
same development period for kernel 4.9, thus not published yet. This
commit might certainly brings no confusions to user land.

Reference: commit bf1d1c9b6179 ("ALSA: tlv: add DECLARE_TLV_DB_RANGE()")
Reference: commit 46e860f76804 ("ALSA: rename TLV-related macros so that they're friendly to user applications")
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/tlv.h      | 9 ++++++++-
 include/uapi/sound/tlv.h | 3 ---
 2 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/include/sound/tlv.h b/include/sound/tlv.h
index 6e2e7735d20a..3677ebb928d5 100644
--- a/include/sound/tlv.h
+++ b/include/sound/tlv.h
@@ -46,8 +46,15 @@
 
 #define TLV_DB_RANGE_ITEM		SNDRV_CTL_TLVD_DB_RANGE_ITEM
 #define DECLARE_TLV_DB_RANGE		SNDRV_CTL_TLVD_DECLARE_DB_RANGE
-#define TLV_DB_RANGE_HEAD		SNDRV_CTL_TLVD_DB_RANGE_HEAD
 
 #define TLV_DB_GAIN_MUTE		SNDRV_CTL_TLVD_DB_GAIN_MUTE
 
+/*
+ * The below assumes that each item TLV is 4 words like DB_SCALE or LINEAR.
+ * This is an old fasion and obsoleted by commit bf1d1c9b6179("ALSA: tlv: add
+ * DECLARE_TLV_DB_RANGE()").
+ */
+#define TLV_DB_RANGE_HEAD(num) \
+	SNDRV_CTL_TLVT_DB_RANGE, 6 * (num) * sizeof(unsigned int)
+
 #endif /* __SOUND_TLV_H */
diff --git a/include/uapi/sound/tlv.h b/include/uapi/sound/tlv.h
index f3c198f8bee7..b4df440c015b 100644
--- a/include/uapi/sound/tlv.h
+++ b/include/uapi/sound/tlv.h
@@ -94,9 +94,6 @@
 	unsigned int name[] = { \
 		SNDRV_CTL_TLVD_DB_RANGE_ITEM(__VA_ARGS__) \
 	}
-/* The below assumes that each item TLV is 4 words like DB_SCALE or LINEAR */
-#define SNDRV_CTL_TLVD_DB_RANGE_HEAD(num) \
-	SNDRV_CTL_TLVT_DB_RANGE, 6 * (num) * sizeof(unsigned int)
 
 #define SNDRV_CTL_TLVD_DB_GAIN_MUTE	-9999999
 
-- 
cgit v1.2.3


From 0f3cd9b3697708c86a825ae3cedabf7be6fd3e72 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 23 Sep 2016 15:23:33 +0200
Subject: netfilter: nf_tables: add range expression

Inverse ranges != [a,b] are not currently possible because rules are
composites of && operations, and we need to express this:

	data < a || data > b

This patch adds a new range expression. Positive ranges can be already
through two cmp expressions:

	cmp(sreg, data, >=)
	cmp(sreg, data, <=)

This new range expression provides an alternative way to express this.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables_core.h   |   3 +
 include/uapi/linux/netfilter/nf_tables.h |  29 +++++++
 net/netfilter/Makefile                   |   3 +-
 net/netfilter/nf_tables_core.c           |   7 +-
 net/netfilter/nft_range.c                | 138 +++++++++++++++++++++++++++++++
 5 files changed, 178 insertions(+), 2 deletions(-)
 create mode 100644 net/netfilter/nft_range.c

(limited to 'include/uapi')

diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index a9060dd99db7..00f4f6b1b1ba 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -28,6 +28,9 @@ extern const struct nft_expr_ops nft_cmp_fast_ops;
 int nft_cmp_module_init(void);
 void nft_cmp_module_exit(void);
 
+int nft_range_module_init(void);
+void nft_range_module_exit(void);
+
 int nft_lookup_module_init(void);
 void nft_lookup_module_exit(void);
 
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 1cf41dd838b2..c6c4477c136b 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -546,6 +546,35 @@ enum nft_cmp_attributes {
 };
 #define NFTA_CMP_MAX		(__NFTA_CMP_MAX - 1)
 
+/**
+ * enum nft_range_ops - nf_tables range operator
+ *
+ * @NFT_RANGE_EQ: equal
+ * @NFT_RANGE_NEQ: not equal
+ */
+enum nft_range_ops {
+	NFT_RANGE_EQ,
+	NFT_RANGE_NEQ,
+};
+
+/**
+ * enum nft_range_attributes - nf_tables range expression netlink attributes
+ *
+ * @NFTA_RANGE_SREG: source register of data to compare (NLA_U32: nft_registers)
+ * @NFTA_RANGE_OP: cmp operation (NLA_U32: nft_cmp_ops)
+ * @NFTA_RANGE_FROM_DATA: data range from (NLA_NESTED: nft_data_attributes)
+ * @NFTA_RANGE_TO_DATA: data range to (NLA_NESTED: nft_data_attributes)
+ */
+enum nft_range_attributes {
+	NFTA_RANGE_UNSPEC,
+	NFTA_RANGE_SREG,
+	NFTA_RANGE_OP,
+	NFTA_RANGE_FROM_DATA,
+	NFTA_RANGE_TO_DATA,
+	__NFTA_RANGE_MAX
+};
+#define NFTA_RANGE_MAX		(__NFTA_RANGE_MAX - 1)
+
 enum nft_lookup_flags {
 	NFT_LOOKUP_F_INV = (1 << 0),
 };
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 0c8581100ac6..c23c3c84416f 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -71,8 +71,9 @@ obj-$(CONFIG_NF_DUP_NETDEV)	+= nf_dup_netdev.o
 
 # nf_tables
 nf_tables-objs += nf_tables_core.o nf_tables_api.o nf_tables_trace.o
-nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o nft_dynset.o
+nf_tables-objs += nft_immediate.o nft_cmp.o nft_range.o
 nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o
+nf_tables-objs += nft_lookup.o nft_dynset.o
 
 obj-$(CONFIG_NF_TABLES)		+= nf_tables.o
 obj-$(CONFIG_NF_TABLES_INET)	+= nf_tables_inet.o
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 67259cefef06..7c94ce0080d5 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -263,8 +263,13 @@ int __init nf_tables_core_module_init(void)
 	if (err < 0)
 		goto err7;
 
-	return 0;
+	err = nft_range_module_init();
+	if (err < 0)
+		goto err8;
 
+	return 0;
+err8:
+	nft_dynset_module_exit();
 err7:
 	nft_payload_module_exit();
 err6:
diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c
new file mode 100644
index 000000000000..c6d5358482d1
--- /dev/null
+++ b/net/netfilter/nft_range.c
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_range_expr {
+	struct nft_data		data_from;
+	struct nft_data		data_to;
+	enum nft_registers	sreg:8;
+	u8			len;
+	enum nft_range_ops	op:8;
+};
+
+static void nft_range_eval(const struct nft_expr *expr,
+			 struct nft_regs *regs,
+			 const struct nft_pktinfo *pkt)
+{
+	const struct nft_range_expr *priv = nft_expr_priv(expr);
+	bool mismatch;
+	int d1, d2;
+
+	d1 = memcmp(&regs->data[priv->sreg], &priv->data_from, priv->len);
+	d2 = memcmp(&regs->data[priv->sreg], &priv->data_to, priv->len);
+	switch (priv->op) {
+	case NFT_RANGE_EQ:
+		mismatch = (d1 < 0 || d2 > 0);
+		break;
+	case NFT_RANGE_NEQ:
+		mismatch = (d1 >= 0 && d2 <= 0);
+		break;
+	}
+
+	if (mismatch)
+		regs->verdict.code = NFT_BREAK;
+}
+
+static const struct nla_policy nft_range_policy[NFTA_RANGE_MAX + 1] = {
+	[NFTA_RANGE_SREG]		= { .type = NLA_U32 },
+	[NFTA_RANGE_OP]			= { .type = NLA_U32 },
+	[NFTA_RANGE_FROM_DATA]		= { .type = NLA_NESTED },
+	[NFTA_RANGE_TO_DATA]		= { .type = NLA_NESTED },
+};
+
+static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+			const struct nlattr * const tb[])
+{
+	struct nft_range_expr *priv = nft_expr_priv(expr);
+	struct nft_data_desc desc_from, desc_to;
+	int err;
+
+	err = nft_data_init(NULL, &priv->data_from, sizeof(priv->data_from),
+			    &desc_from, tb[NFTA_RANGE_FROM_DATA]);
+	if (err < 0)
+		return err;
+
+	err = nft_data_init(NULL, &priv->data_to, sizeof(priv->data_to),
+			    &desc_to, tb[NFTA_RANGE_TO_DATA]);
+	if (err < 0)
+		goto err1;
+
+	if (desc_from.len != desc_to.len) {
+		err = -EINVAL;
+		goto err2;
+	}
+
+	priv->sreg = nft_parse_register(tb[NFTA_RANGE_SREG]);
+	err = nft_validate_register_load(priv->sreg, desc_from.len);
+	if (err < 0)
+		goto err2;
+
+	priv->op  = ntohl(nla_get_be32(tb[NFTA_RANGE_OP]));
+	priv->len = desc_from.len;
+	return 0;
+err2:
+	nft_data_uninit(&priv->data_to, desc_to.type);
+err1:
+	nft_data_uninit(&priv->data_from, desc_from.type);
+	return err;
+}
+
+static int nft_range_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_range_expr *priv = nft_expr_priv(expr);
+
+	if (nft_dump_register(skb, NFTA_RANGE_SREG, priv->sreg))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_RANGE_OP, htonl(priv->op)))
+		goto nla_put_failure;
+
+	if (nft_data_dump(skb, NFTA_RANGE_FROM_DATA, &priv->data_from,
+			  NFT_DATA_VALUE, priv->len) < 0 ||
+	    nft_data_dump(skb, NFTA_RANGE_TO_DATA, &priv->data_to,
+			  NFT_DATA_VALUE, priv->len) < 0)
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_type nft_range_type;
+static const struct nft_expr_ops nft_range_ops = {
+	.type		= &nft_range_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_range_expr)),
+	.eval		= nft_range_eval,
+	.init		= nft_range_init,
+	.dump		= nft_range_dump,
+};
+
+static struct nft_expr_type nft_range_type __read_mostly = {
+	.name		= "range",
+	.ops		= &nft_range_ops,
+	.policy		= nft_range_policy,
+	.maxattr	= NFTA_RANGE_MAX,
+	.owner		= THIS_MODULE,
+};
+
+int __init nft_range_module_init(void)
+{
+	return nft_register_expr(&nft_range_type);
+}
+
+void nft_range_module_exit(void)
+{
+	nft_unregister_expr(&nft_range_type);
+}
-- 
cgit v1.2.3


From ff107d27761ff4b644c82c209e004ec9c8fbbc22 Mon Sep 17 00:00:00 2001
From: Liping Zhang <liping.zhang@spreadtrum.com>
Date: Sun, 25 Sep 2016 16:35:56 +0800
Subject: netfilter: nft_log: complete NFTA_LOG_FLAGS attr support

NFTA_LOG_FLAGS attribute is already supported, but the related
NF_LOG_XXX flags are not exposed to the userspace. So we cannot
explicitly enable log flags to log uid, tcp sequence, ip options
and so on, i.e. such rule "nft add rule filter output log uid"
is not supported yet.

So move NF_LOG_XXX macro definitions to the uapi/../nf_log.h. In
order to keep consistent with other modules, change NF_LOG_MASK to
refer to all supported log flags. On the other hand, add a new
NF_LOG_DEFAULT_MASK to refer to the original default log flags.

Finally, if user specify the unsupported log flags or NFTA_LOG_GROUP
and NFTA_LOG_FLAGS are set at the same time, report EINVAL to the
userspace.

Signed-off-by: Liping Zhang <liping.zhang@spreadtrum.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_log.h        | 11 +++--------
 include/uapi/linux/netfilter/nf_log.h | 12 ++++++++++++
 net/bridge/netfilter/ebt_log.c        |  2 +-
 net/ipv4/netfilter/ip_tables.c        |  2 +-
 net/ipv4/netfilter/nf_log_arp.c       |  2 +-
 net/ipv4/netfilter/nf_log_ipv4.c      |  4 ++--
 net/ipv6/netfilter/ip6_tables.c       |  2 +-
 net/ipv6/netfilter/nf_log_ipv6.c      |  4 ++--
 net/netfilter/nf_tables_core.c        |  2 +-
 net/netfilter/nft_log.c               |  9 ++++++++-
 10 files changed, 32 insertions(+), 18 deletions(-)
 create mode 100644 include/uapi/linux/netfilter/nf_log.h

(limited to 'include/uapi')

diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h
index ee07dc8b0a7b..309cd267be4f 100644
--- a/include/net/netfilter/nf_log.h
+++ b/include/net/netfilter/nf_log.h
@@ -2,15 +2,10 @@
 #define _NF_LOG_H
 
 #include <linux/netfilter.h>
+#include <linux/netfilter/nf_log.h>
 
-/* those NF_LOG_* defines and struct nf_loginfo are legacy definitios that will
- * disappear once iptables is replaced with pkttables.  Please DO NOT use them
- * for any new code! */
-#define NF_LOG_TCPSEQ		0x01	/* Log TCP sequence numbers */
-#define NF_LOG_TCPOPT		0x02	/* Log TCP options */
-#define NF_LOG_IPOPT		0x04	/* Log IP options */
-#define NF_LOG_UID		0x08	/* Log UID owning local socket */
-#define NF_LOG_MASK		0x0f
+/* Log tcp sequence, tcp options, ip options and uid owning local socket */
+#define NF_LOG_DEFAULT_MASK	0x0f
 
 /* This flag indicates that copy_len field in nf_loginfo is set */
 #define NF_LOG_F_COPY_LEN	0x1
diff --git a/include/uapi/linux/netfilter/nf_log.h b/include/uapi/linux/netfilter/nf_log.h
new file mode 100644
index 000000000000..8be21e02387d
--- /dev/null
+++ b/include/uapi/linux/netfilter/nf_log.h
@@ -0,0 +1,12 @@
+#ifndef _NETFILTER_NF_LOG_H
+#define _NETFILTER_NF_LOG_H
+
+#define NF_LOG_TCPSEQ		0x01	/* Log TCP sequence numbers */
+#define NF_LOG_TCPOPT		0x02	/* Log TCP options */
+#define NF_LOG_IPOPT		0x04	/* Log IP options */
+#define NF_LOG_UID		0x08	/* Log UID owning local socket */
+#define NF_LOG_NFLOG		0x10	/* Unsupported, don't reuse */
+#define NF_LOG_MACDECODE	0x20	/* Decode MAC header */
+#define NF_LOG_MASK		0x2f
+
+#endif /* _NETFILTER_NF_LOG_H */
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 152300d164ac..9a11086ba6ff 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -91,7 +91,7 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum,
 	if (loginfo->type == NF_LOG_TYPE_LOG)
 		bitmask = loginfo->u.log.logflags;
 	else
-		bitmask = NF_LOG_MASK;
+		bitmask = NF_LOG_DEFAULT_MASK;
 
 	if ((bitmask & EBT_LOG_IP) && eth_hdr(skb)->h_proto ==
 	   htons(ETH_P_IP)) {
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index f993545a3373..7c00ce90adb8 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -156,7 +156,7 @@ static struct nf_loginfo trace_loginfo = {
 	.u = {
 		.log = {
 			.level = 4,
-			.logflags = NF_LOG_MASK,
+			.logflags = NF_LOG_DEFAULT_MASK,
 		},
 	},
 };
diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c
index 8945c2653814..b24795e2ee6d 100644
--- a/net/ipv4/netfilter/nf_log_arp.c
+++ b/net/ipv4/netfilter/nf_log_arp.c
@@ -30,7 +30,7 @@ static struct nf_loginfo default_loginfo = {
 	.u = {
 		.log = {
 			.level	  = LOGLEVEL_NOTICE,
-			.logflags = NF_LOG_MASK,
+			.logflags = NF_LOG_DEFAULT_MASK,
 		},
 	},
 };
diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c
index 20f225593a8b..5b571e1b5f15 100644
--- a/net/ipv4/netfilter/nf_log_ipv4.c
+++ b/net/ipv4/netfilter/nf_log_ipv4.c
@@ -29,7 +29,7 @@ static struct nf_loginfo default_loginfo = {
 	.u = {
 		.log = {
 			.level	  = LOGLEVEL_NOTICE,
-			.logflags = NF_LOG_MASK,
+			.logflags = NF_LOG_DEFAULT_MASK,
 		},
 	},
 };
@@ -46,7 +46,7 @@ static void dump_ipv4_packet(struct nf_log_buf *m,
 	if (info->type == NF_LOG_TYPE_LOG)
 		logflags = info->u.log.logflags;
 	else
-		logflags = NF_LOG_MASK;
+		logflags = NF_LOG_DEFAULT_MASK;
 
 	ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph);
 	if (ih == NULL) {
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 552fac2f390a..55aacea24396 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -190,7 +190,7 @@ static struct nf_loginfo trace_loginfo = {
 	.u = {
 		.log = {
 			.level = LOGLEVEL_WARNING,
-			.logflags = NF_LOG_MASK,
+			.logflags = NF_LOG_DEFAULT_MASK,
 		},
 	},
 };
diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c
index c1bcf699a23d..f6aee2895fee 100644
--- a/net/ipv6/netfilter/nf_log_ipv6.c
+++ b/net/ipv6/netfilter/nf_log_ipv6.c
@@ -30,7 +30,7 @@ static struct nf_loginfo default_loginfo = {
 	.u = {
 		.log = {
 			.level	  = LOGLEVEL_NOTICE,
-			.logflags = NF_LOG_MASK,
+			.logflags = NF_LOG_DEFAULT_MASK,
 		},
 	},
 };
@@ -52,7 +52,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m,
 	if (info->type == NF_LOG_TYPE_LOG)
 		logflags = info->u.log.logflags;
 	else
-		logflags = NF_LOG_MASK;
+		logflags = NF_LOG_DEFAULT_MASK;
 
 	ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h);
 	if (ih == NULL) {
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 7c94ce0080d5..0dd5c695482f 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -34,7 +34,7 @@ static struct nf_loginfo trace_loginfo = {
 	.u = {
 		.log = {
 			.level = LOGLEVEL_WARNING,
-			.logflags = NF_LOG_MASK,
+			.logflags = NF_LOG_DEFAULT_MASK,
 	        },
 	},
 };
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index 24a73bb26e94..1b01404bb33f 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -58,8 +58,11 @@ static int nft_log_init(const struct nft_ctx *ctx,
 	if (tb[NFTA_LOG_LEVEL] != NULL &&
 	    tb[NFTA_LOG_GROUP] != NULL)
 		return -EINVAL;
-	if (tb[NFTA_LOG_GROUP] != NULL)
+	if (tb[NFTA_LOG_GROUP] != NULL) {
 		li->type = NF_LOG_TYPE_ULOG;
+		if (tb[NFTA_LOG_FLAGS] != NULL)
+			return -EINVAL;
+	}
 
 	nla = tb[NFTA_LOG_PREFIX];
 	if (nla != NULL) {
@@ -87,6 +90,10 @@ static int nft_log_init(const struct nft_ctx *ctx,
 		if (tb[NFTA_LOG_FLAGS] != NULL) {
 			li->u.log.logflags =
 				ntohl(nla_get_be32(tb[NFTA_LOG_FLAGS]));
+			if (li->u.log.logflags & ~NF_LOG_MASK) {
+				err = -EINVAL;
+				goto err1;
+			}
 		}
 		break;
 	case NF_LOG_TYPE_ULOG:
-- 
cgit v1.2.3


From 8564e38206de2ff005a27c8d7c2ce3869a44f0dd Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 19 Sep 2016 09:44:44 +0200
Subject: cfg80211: add checks for beacon rate, extend to mesh

The previous commit added support for specifying the beacon rate
for AP mode. Add features checks to this, and extend it to also
support the rate configuration for mesh networks. For IBSS it's
not as simple due to joining etc., so that's not yet supported.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  4 +++-
 include/uapi/linux/nl80211.h | 17 +++++++++++++++-
 net/wireless/nl80211.c       | 46 +++++++++++++++++++++++++++++++++-----------
 3 files changed, 54 insertions(+), 13 deletions(-)

(limited to 'include/uapi')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index e0949c8bc2d1..ed37304fa09d 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -712,7 +712,7 @@ struct cfg80211_bitrate_mask {
  *	MAC address based access control
  * @pbss: If set, start as a PCP instead of AP. Relevant for DMG
  *	networks.
- * @beacon_rate: masks for setting user configured beacon tx rate.
+ * @beacon_rate: bitrate to be used for beacons
  */
 struct cfg80211_ap_settings {
 	struct cfg80211_chan_def chandef;
@@ -1365,6 +1365,7 @@ struct mesh_config {
  * @beacon_interval: beacon interval to use
  * @mcast_rate: multicat rate for Mesh Node [6Mbps is the default for 802.11a]
  * @basic_rates: basic rates to use when creating the mesh
+ * @beacon_rate: bitrate to be used for beacons
  *
  * These parameters are fixed when the mesh is created.
  */
@@ -1385,6 +1386,7 @@ struct mesh_setup {
 	u16 beacon_interval;
 	int mcast_rate[NUM_NL80211_BANDS];
 	u32 basic_rates;
+	struct cfg80211_bitrate_mask beacon_rate;
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 220694151434..ec10d1b2838f 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1343,7 +1343,13 @@ enum nl80211_commands {
  *	enum nl80211_band value is used as the index (nla_type() of the nested
  *	data. If a band is not included, it will be configured to allow all
  *	rates based on negotiated supported rates information. This attribute
- *	is used with %NL80211_CMD_SET_TX_BITRATE_MASK.
+ *	is used with %NL80211_CMD_SET_TX_BITRATE_MASK and with starting AP,
+ *	and joining mesh networks (not IBSS yet). In the later case, it must
+ *	specify just a single bitrate, which is to be used for the beacon.
+ *	The driver must also specify support for this with the extended
+ *	features NL80211_EXT_FEATURE_BEACON_RATE_LEGACY,
+ *	NL80211_EXT_FEATURE_BEACON_RATE_HT and
+ *	NL80211_EXT_FEATURE_BEACON_RATE_VHT.
  *
  * @NL80211_ATTR_FRAME_MATCH: A binary attribute which typically must contain
  *	at least one byte, currently used with @NL80211_CMD_REGISTER_FRAME.
@@ -4551,6 +4557,12 @@ enum nl80211_feature_flags {
  *	(if available).
  * @NL80211_EXT_FEATURE_SET_SCAN_DWELL: This driver supports configuration of
  *	channel dwell time.
+ * @NL80211_EXT_FEATURE_BEACON_RATE_LEGACY: Driver supports beacon rate
+ *	configuration (AP/mesh), supporting a legacy (non HT/VHT) rate.
+ * @NL80211_EXT_FEATURE_BEACON_RATE_HT: Driver supports beacon rate
+ *	configuration (AP/mesh) with HT rates.
+ * @NL80211_EXT_FEATURE_BEACON_RATE_VHT: Driver supports beacon rate
+ *	configuration (AP/mesh) with VHT rates.
  *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
@@ -4562,6 +4574,9 @@ enum nl80211_ext_feature_index {
 	NL80211_EXT_FEATURE_SCAN_START_TIME,
 	NL80211_EXT_FEATURE_BSS_PARENT_TSF,
 	NL80211_EXT_FEATURE_SET_SCAN_DWELL,
+	NL80211_EXT_FEATURE_BEACON_RATE_LEGACY,
+	NL80211_EXT_FEATURE_BEACON_RATE_HT,
+	NL80211_EXT_FEATURE_BEACON_RATE_VHT,
 
 	/* add new features before the definition below */
 	NUM_NL80211_EXT_FEATURES,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index a10484da60c0..b8441e60b0f6 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3569,13 +3569,12 @@ out:
 	return 0;
 }
 
-static int validate_beacon_tx_rate(struct cfg80211_ap_settings *params)
+static int validate_beacon_tx_rate(struct cfg80211_registered_device *rdev,
+				   enum nl80211_band band,
+				   struct cfg80211_bitrate_mask *beacon_rate)
 {
-	u32 rate, count_ht, count_vht, i;
-	enum nl80211_band band;
-
-	band = params->chandef.chan->band;
-	rate = params->beacon_rate.control[band].legacy;
+	u32 count_ht, count_vht, i;
+	u32 rate = beacon_rate->control[band].legacy;
 
 	/* Allow only one rate */
 	if (hweight32(rate) > 1)
@@ -3583,9 +3582,9 @@ static int validate_beacon_tx_rate(struct cfg80211_ap_settings *params)
 
 	count_ht = 0;
 	for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++) {
-		if (hweight8(params->beacon_rate.control[band].ht_mcs[i]) > 1) {
+		if (hweight8(beacon_rate->control[band].ht_mcs[i]) > 1) {
 			return -EINVAL;
-		} else if (params->beacon_rate.control[band].ht_mcs[i]) {
+		} else if (beacon_rate->control[band].ht_mcs[i]) {
 			count_ht++;
 			if (count_ht > 1)
 				return -EINVAL;
@@ -3596,9 +3595,9 @@ static int validate_beacon_tx_rate(struct cfg80211_ap_settings *params)
 
 	count_vht = 0;
 	for (i = 0; i < NL80211_VHT_NSS_MAX; i++) {
-		if (hweight16(params->beacon_rate.control[band].vht_mcs[i]) > 1) {
+		if (hweight16(beacon_rate->control[band].vht_mcs[i]) > 1) {
 			return -EINVAL;
-		} else if (params->beacon_rate.control[band].vht_mcs[i]) {
+		} else if (beacon_rate->control[band].vht_mcs[i]) {
 			count_vht++;
 			if (count_vht > 1)
 				return -EINVAL;
@@ -3610,6 +3609,19 @@ static int validate_beacon_tx_rate(struct cfg80211_ap_settings *params)
 	if ((count_ht && count_vht) || (!rate && !count_ht && !count_vht))
 		return -EINVAL;
 
+	if (rate &&
+	    !wiphy_ext_feature_isset(&rdev->wiphy,
+				     NL80211_EXT_FEATURE_BEACON_RATE_LEGACY))
+		return -EINVAL;
+	if (count_ht &&
+	    !wiphy_ext_feature_isset(&rdev->wiphy,
+				     NL80211_EXT_FEATURE_BEACON_RATE_HT))
+		return -EINVAL;
+	if (count_vht &&
+	    !wiphy_ext_feature_isset(&rdev->wiphy,
+				     NL80211_EXT_FEATURE_BEACON_RATE_VHT))
+		return -EINVAL;
+
 	return 0;
 }
 
@@ -3847,7 +3859,8 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
 		if (err)
 			return err;
 
-		err = validate_beacon_tx_rate(&params);
+		err = validate_beacon_tx_rate(rdev, params.chandef.chan->band,
+					      &params.beacon_rate);
 		if (err)
 			return err;
 	}
@@ -9406,6 +9419,17 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
 			return err;
 	}
 
+	if (info->attrs[NL80211_ATTR_TX_RATES]) {
+		err = nl80211_parse_tx_bitrate_mask(info, &setup.beacon_rate);
+		if (err)
+			return err;
+
+		err = validate_beacon_tx_rate(rdev, setup.chandef.chan->band,
+					      &setup.beacon_rate);
+		if (err)
+			return err;
+	}
+
 	return cfg80211_join_mesh(rdev, dev, &setup, &cfg);
 }
 
-- 
cgit v1.2.3


From b4c8eb0379c8d3300a54210edd2235fd1e81a8a6 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 16 Sep 2016 16:28:26 -0400
Subject: nfs: add a new NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK constant

As defined in RFC 5661, section 18.16.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/uapi/linux/nfs4.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h
index 2b871e0858d9..4ae62796bfde 100644
--- a/include/uapi/linux/nfs4.h
+++ b/include/uapi/linux/nfs4.h
@@ -39,8 +39,9 @@
 #define NFS4_FH_VOL_MIGRATION		0x0004
 #define NFS4_FH_VOL_RENAME		0x0008
 
-#define NFS4_OPEN_RESULT_CONFIRM 0x0002
-#define NFS4_OPEN_RESULT_LOCKTYPE_POSIX 0x0004
+#define NFS4_OPEN_RESULT_CONFIRM		0x0002
+#define NFS4_OPEN_RESULT_LOCKTYPE_POSIX		0x0004
+#define NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK	0x0020
 
 #define NFS4_SHARE_ACCESS_MASK	0x000F
 #define NFS4_SHARE_ACCESS_READ	0x0001
-- 
cgit v1.2.3


From 8e5470c9839caff94fe334e67ff7e7ace587282a Mon Sep 17 00:00:00 2001
From: Thor Thayer <tthayer@opensource.altera.com>
Date: Thu, 22 Sep 2016 14:56:16 -0500
Subject: serial: 8250: Set Altera 16550 TX FIFO Threshold

The Altera 16550 soft IP UART requires 2 additional registers for
TX FIFO threshold support. These 2 registers enable the TX FIFO
Low Watermark and set the TX FIFO Low Watermark.
Set the TX FIFO threshold to the FIFO size - tx_loadsz.

Signed-off-by: Thor Thayer <tthayer@opensource.altera.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_port.c | 43 +++++++++++++++++++++++++++++++++++++
 include/uapi/linux/serial_reg.h     |  8 +++++++
 2 files changed, 51 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 28d29fac1973..1bfb6fdbaa20 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -1870,6 +1870,30 @@ static int exar_handle_irq(struct uart_port *port)
 	return ret;
 }
 
+/*
+ * Newer 16550 compatible parts such as the SC16C650 & Altera 16550 Soft IP
+ * have a programmable TX threshold that triggers the THRE interrupt in
+ * the IIR register. In this case, the THRE interrupt indicates the FIFO
+ * has space available. Load it up with tx_loadsz bytes.
+ */
+static int serial8250_tx_threshold_handle_irq(struct uart_port *port)
+{
+	unsigned long flags;
+	unsigned int iir = serial_port_in(port, UART_IIR);
+
+	/* TX Threshold IRQ triggered so load up FIFO */
+	if ((iir & UART_IIR_ID) == UART_IIR_THRI) {
+		struct uart_8250_port *up = up_to_u8250p(port);
+
+		spin_lock_irqsave(&port->lock, flags);
+		serial8250_tx_chars(up);
+		spin_unlock_irqrestore(&port->lock, flags);
+	}
+
+	iir = serial_port_in(port, UART_IIR);
+	return serial8250_handle_irq(port, iir);
+}
+
 static unsigned int serial8250_tx_empty(struct uart_port *port)
 {
 	struct uart_8250_port *up = up_to_u8250p(port);
@@ -2159,6 +2183,25 @@ int serial8250_do_startup(struct uart_port *port)
 		serial_port_out(port, UART_LCR, 0);
 	}
 
+	/*
+	 * For the Altera 16550 variants, set TX threshold trigger level.
+	 */
+	if (((port->type == PORT_ALTR_16550_F32) ||
+	     (port->type == PORT_ALTR_16550_F64) ||
+	     (port->type == PORT_ALTR_16550_F128)) && (port->fifosize > 1)) {
+		/* Bounds checking of TX threshold (valid 0 to fifosize-2) */
+		if ((up->tx_loadsz < 2) || (up->tx_loadsz > port->fifosize)) {
+			pr_err("ttyS%d TX FIFO Threshold errors, skipping\n",
+			       serial_index(port));
+		} else {
+			serial_port_out(port, UART_ALTR_AFR,
+					UART_ALTR_EN_TXFIFO_LW);
+			serial_port_out(port, UART_ALTR_TX_LOW,
+					port->fifosize - up->tx_loadsz);
+			port->handle_irq = serial8250_tx_threshold_handle_irq;
+		}
+	}
+
 	if (port->irq) {
 		unsigned char iir1;
 		/*
diff --git a/include/uapi/linux/serial_reg.h b/include/uapi/linux/serial_reg.h
index 1e5ac4e776da..b4c04842a8c0 100644
--- a/include/uapi/linux/serial_reg.h
+++ b/include/uapi/linux/serial_reg.h
@@ -376,5 +376,13 @@
 #define UART_EXAR_TXTRG		0x0a	/* Tx FIFO trigger level write-only */
 #define UART_EXAR_RXTRG		0x0b	/* Rx FIFO trigger level write-only */
 
+/*
+ * These are definitions for the Altera ALTR_16550_F32/F64/F128
+ * Normalized from 0x100 to 0x40 because of shift by 2 (32 bit regs).
+ */
+#define UART_ALTR_AFR		0x40	/* Additional Features Register */
+#define UART_ALTR_EN_TXFIFO_LW	0x01	/* Enable the TX FIFO Low Watermark */
+#define UART_ALTR_TX_LOW	0x41	/* Tx FIFO Low Watermark */
+
 #endif /* _LINUX_SERIAL_REG_H */
 
-- 
cgit v1.2.3


From bc8bcf3b150a29cd8d3f17a1aeb19a804ea683fa Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Tue, 27 Sep 2016 13:03:23 +0200
Subject: posix_acl: uapi header split

Export the base definitions and the xattr representation of POSIX ACLs
to user space.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/posix_acl.h            | 22 +-------------------
 include/linux/posix_acl_xattr.h      | 18 +----------------
 include/uapi/linux/Kbuild            |  2 ++
 include/uapi/linux/posix_acl.h       | 39 ++++++++++++++++++++++++++++++++++++
 include/uapi/linux/posix_acl_xattr.h | 38 +++++++++++++++++++++++++++++++++++
 5 files changed, 81 insertions(+), 38 deletions(-)
 create mode 100644 include/uapi/linux/posix_acl.h
 create mode 100644 include/uapi/linux/posix_acl_xattr.h

(limited to 'include/uapi')

diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h
index d5d3d741f028..5433eea8e97c 100644
--- a/include/linux/posix_acl.h
+++ b/include/linux/posix_acl.h
@@ -11,27 +11,7 @@
 #include <linux/bug.h>
 #include <linux/slab.h>
 #include <linux/rcupdate.h>
-
-#define ACL_UNDEFINED_ID	(-1)
-
-/* a_type field in acl_user_posix_entry_t */
-#define ACL_TYPE_ACCESS		(0x8000)
-#define ACL_TYPE_DEFAULT	(0x4000)
-
-/* e_tag entry in struct posix_acl_entry */
-#define ACL_USER_OBJ		(0x01)
-#define ACL_USER		(0x02)
-#define ACL_GROUP_OBJ		(0x04)
-#define ACL_GROUP		(0x08)
-#define ACL_MASK		(0x10)
-#define ACL_OTHER		(0x20)
-
-/* permissions in the e_perm field */
-#define ACL_READ		(0x04)
-#define ACL_WRITE		(0x02)
-#define ACL_EXECUTE		(0x01)
-//#define ACL_ADD		(0x08)
-//#define ACL_DELETE		(0x10)
+#include <uapi/linux/posix_acl.h>
 
 struct posix_acl_entry {
 	short			e_tag;
diff --git a/include/linux/posix_acl_xattr.h b/include/linux/posix_acl_xattr.h
index d23d36842322..8b867e3bf3aa 100644
--- a/include/linux/posix_acl_xattr.h
+++ b/include/linux/posix_acl_xattr.h
@@ -10,25 +10,9 @@
 #define _POSIX_ACL_XATTR_H
 
 #include <uapi/linux/xattr.h>
+#include <uapi/linux/posix_acl_xattr.h>
 #include <linux/posix_acl.h>
 
-/* Supported ACL a_version fields */
-#define POSIX_ACL_XATTR_VERSION	0x0002
-
-/* An undefined entry e_id value */
-#define ACL_UNDEFINED_ID	(-1)
-
-struct posix_acl_xattr_entry {
-	__le16			e_tag;
-	__le16			e_perm;
-	__le32			e_id;
-};
-
-struct posix_acl_xattr_header {
-	__le32			a_version;
-};
-
-
 static inline size_t
 posix_acl_xattr_size(int count)
 {
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 185f8ea2702f..e266739168ec 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -335,6 +335,8 @@ header-y += pkt_cls.h
 header-y += pkt_sched.h
 header-y += pmu.h
 header-y += poll.h
+header-y += posix_acl.h
+header-y += posix_acl_xattr.h
 header-y += posix_types.h
 header-y += ppdev.h
 header-y += ppp-comp.h
diff --git a/include/uapi/linux/posix_acl.h b/include/uapi/linux/posix_acl.h
new file mode 100644
index 000000000000..1037cb19aa17
--- /dev/null
+++ b/include/uapi/linux/posix_acl.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2002 Andreas Gruenbacher <a.gruenbacher@computer.org>
+ * Copyright (C) 2016 Red Hat, Inc.
+ *
+ * This file is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ */
+
+#ifndef __UAPI_POSIX_ACL_H
+#define __UAPI_POSIX_ACL_H
+
+#define ACL_UNDEFINED_ID	(-1)
+
+/* a_type field in acl_user_posix_entry_t */
+#define ACL_TYPE_ACCESS		(0x8000)
+#define ACL_TYPE_DEFAULT	(0x4000)
+
+/* e_tag entry in struct posix_acl_entry */
+#define ACL_USER_OBJ		(0x01)
+#define ACL_USER		(0x02)
+#define ACL_GROUP_OBJ		(0x04)
+#define ACL_GROUP		(0x08)
+#define ACL_MASK		(0x10)
+#define ACL_OTHER		(0x20)
+
+/* permissions in the e_perm field */
+#define ACL_READ		(0x04)
+#define ACL_WRITE		(0x02)
+#define ACL_EXECUTE		(0x01)
+
+#endif  /* __UAPI_POSIX_ACL_H */
diff --git a/include/uapi/linux/posix_acl_xattr.h b/include/uapi/linux/posix_acl_xattr.h
new file mode 100644
index 000000000000..8b579844109b
--- /dev/null
+++ b/include/uapi/linux/posix_acl_xattr.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2002 Andreas Gruenbacher <a.gruenbacher@computer.org>
+ * Copyright (C) 2016 Red Hat, Inc.
+ *
+ * This file is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ */
+
+#ifndef __UAPI_POSIX_ACL_XATTR_H
+#define __UAPI_POSIX_ACL_XATTR_H
+
+#include <linux/types.h>
+
+/* Supported ACL a_version fields */
+#define POSIX_ACL_XATTR_VERSION	0x0002
+
+/* An undefined entry e_id value */
+#define ACL_UNDEFINED_ID	(-1)
+
+struct posix_acl_xattr_entry {
+	__le16			e_tag;
+	__le16			e_perm;
+	__le32			e_id;
+};
+
+struct posix_acl_xattr_header {
+	__le32			a_version;
+};
+
+#endif	/* __UAPI_POSIX_ACL_XATTR_H */
-- 
cgit v1.2.3


From 7ff89ac608d9e856cae6fa651553fa0709bf9c50 Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Thu, 18 Aug 2016 12:05:25 -0400
Subject: audit: add exclude filter extension to feature bitmap

Add to the audit feature bitmap to indicate availability of the
extension of the exclude filter to include PID, UID, AUID, GID, SUBJ_*.

RFE: add additional fields for use in audit filter exclude rules
https://github.com/linux-audit/audit-kernel/issues/5

Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/uapi/linux/audit.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index d820aa979620..76c5e7eb1189 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -327,9 +327,11 @@ enum {
 #define AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT	0x00000001
 #define AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME	0x00000002
 #define AUDIT_FEATURE_BITMAP_EXECUTABLE_PATH	0x00000004
+#define AUDIT_FEATURE_BITMAP_EXCLUDE_EXTEND	0x00000008
 #define AUDIT_FEATURE_BITMAP_ALL (AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT | \
 				  AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME | \
-				  AUDIT_FEATURE_BITMAP_EXECUTABLE_PATH)
+				  AUDIT_FEATURE_BITMAP_EXECUTABLE_PATH | \
+				  AUDIT_FEATURE_BITMAP_EXCLUDE_EXTEND)
 
 /* deprecated: AUDIT_VERSION_* */
 #define AUDIT_VERSION_LATEST 		AUDIT_FEATURE_BITMAP_ALL
-- 
cgit v1.2.3


From 54f9c4d0778b3f9ab791b1b7eb1a5d2809f02f50 Mon Sep 17 00:00:00 2001
From: Alistair Popple <alistair@popple.id.au>
Date: Tue, 20 Sep 2016 09:01:38 +0200
Subject: ipmi: add an Aspeed BT IPMI BMC driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds a simple device driver to expose the iBT interface on
Aspeed SOCs (AST2400 and AST2500) as a character device. Such SOCs are
commonly used as BMCs (BaseBoard Management Controllers) and this
driver implements the BMC side of the BT interface.

The BT (Block Transfer) interface is used to perform in-band IPMI
communication between a host and its BMC. Entire messages are buffered
before sending a notification to the other end, host or BMC, that
there is data to be read. Usually, the host emits requests and the BMC
responses but the specification provides a mean for the BMC to send
SMS Attention (BMC-to-Host attention or System Management Software
attention) messages.

For this purpose, the driver introduces a specific ioctl on the
device: 'BT_BMC_IOCTL_SMS_ATN' that can be used by the system running
on the BMC to signal the host of such an event.

The device name defaults to '/dev/ipmi-bt-host'

Signed-off-by: Alistair Popple <alistair@popple.id.au>
Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Joel Stanley <joel@jms.id.au>
[clg: - checkpatch fixes
      - added a devicetree binding documentation
      - replace 'bt_host' by 'bt_bmc' to reflect that the driver is
        the BMC side of the IPMI BT interface
      - renamed the device to 'ipmi-bt-host'
      - introduced a temporary buffer to copy_{to,from}_user
      - used platform_get_irq()
      - moved the driver under drivers/char/ipmi/ but kept it as a misc
        device
      - changed the compatible cell to "aspeed,ast2400-bt-bmc"
]
Signed-off-by: Cédric Le Goater <clg@kaod.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
[clg: - checkpatch --strict fixes
      - removed the use of devm_iounmap, devm_kfree in cleanup paths
      - introduced an atomic-t to limit opens to 1
      - introduced a mutex to protect write/read operations]
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 Documentation/devicetree/bindings/ipmi.txt         |  25 -
 .../bindings/ipmi/aspeed,ast2400-bt-bmc.txt        |  23 +
 .../devicetree/bindings/ipmi/ipmi-smic.txt         |  25 +
 drivers/Makefile                                   |   2 +-
 drivers/char/ipmi/Kconfig                          |   7 +
 drivers/char/ipmi/Makefile                         |   1 +
 drivers/char/ipmi/bt-bmc.c                         | 510 +++++++++++++++++++++
 include/uapi/linux/Kbuild                          |   1 +
 include/uapi/linux/bt-bmc.h                        |  18 +
 9 files changed, 586 insertions(+), 26 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/ipmi.txt
 create mode 100644 Documentation/devicetree/bindings/ipmi/aspeed,ast2400-bt-bmc.txt
 create mode 100644 Documentation/devicetree/bindings/ipmi/ipmi-smic.txt
 create mode 100644 drivers/char/ipmi/bt-bmc.c
 create mode 100644 include/uapi/linux/bt-bmc.h

(limited to 'include/uapi')

diff --git a/Documentation/devicetree/bindings/ipmi.txt b/Documentation/devicetree/bindings/ipmi.txt
deleted file mode 100644
index d5f1a877ed3e..000000000000
--- a/Documentation/devicetree/bindings/ipmi.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-IPMI device
-
-Required properties:
-- compatible: should be one of ipmi-kcs, ipmi-smic, or ipmi-bt
-- device_type: should be ipmi
-- reg: Address and length of the register set for the device
-
-Optional properties:
-- interrupts: The interrupt for the device.  Without this the interface
-	is polled.
-- reg-size - The size of the register.  Defaults to 1
-- reg-spacing - The number of bytes between register starts.  Defaults to 1
-- reg-shift - The amount to shift the registers to the right to get the data
-	into bit zero.
-
-Example:
-
-smic@fff3a000 {
-	compatible = "ipmi-smic";
-	device_type = "ipmi";
-	reg = <0xfff3a000 0x1000>;
-	interrupts = <0 24 4>;
-	reg-size = <4>;
-	reg-spacing = <4>;
-};
diff --git a/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-bt-bmc.txt b/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-bt-bmc.txt
new file mode 100644
index 000000000000..fbbacd958240
--- /dev/null
+++ b/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-bt-bmc.txt
@@ -0,0 +1,23 @@
+* Aspeed BT (Block Transfer) IPMI interface
+
+The Aspeed SOCs (AST2400 and AST2500) are commonly used as BMCs
+(BaseBoard Management Controllers) and the BT interface can be used to
+perform in-band IPMI communication with their host.
+
+Required properties:
+
+- compatible : should be "aspeed,ast2400-bt-bmc"
+- reg: physical address and size of the registers
+
+Optional properties:
+
+- interrupts: interrupt generated by the BT interface. without an
+  interrupt, the driver will operate in poll mode.
+
+Example:
+
+	ibt@1e789140 {
+		compatible = "aspeed,ast2400-bt-bmc";
+		reg = <0x1e789140 0x18>;
+		interrupts = <8>;
+	};
diff --git a/Documentation/devicetree/bindings/ipmi/ipmi-smic.txt b/Documentation/devicetree/bindings/ipmi/ipmi-smic.txt
new file mode 100644
index 000000000000..d5f1a877ed3e
--- /dev/null
+++ b/Documentation/devicetree/bindings/ipmi/ipmi-smic.txt
@@ -0,0 +1,25 @@
+IPMI device
+
+Required properties:
+- compatible: should be one of ipmi-kcs, ipmi-smic, or ipmi-bt
+- device_type: should be ipmi
+- reg: Address and length of the register set for the device
+
+Optional properties:
+- interrupts: The interrupt for the device.  Without this the interface
+	is polled.
+- reg-size - The size of the register.  Defaults to 1
+- reg-spacing - The number of bytes between register starts.  Defaults to 1
+- reg-shift - The amount to shift the registers to the right to get the data
+	into bit zero.
+
+Example:
+
+smic@fff3a000 {
+	compatible = "ipmi-smic";
+	device_type = "ipmi";
+	reg = <0xfff3a000 0x1000>;
+	interrupts = <0 24 4>;
+	reg-size = <4>;
+	reg-spacing = <4>;
+};
diff --git a/drivers/Makefile b/drivers/Makefile
index 53abb4a5f736..5a9e7b6b7928 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -21,7 +21,7 @@ obj-y				+= video/
 obj-y				+= idle/
 
 # IPMI must come before ACPI in order to provide IPMI opregion support
-obj-$(CONFIG_IPMI_HANDLER)	+= char/ipmi/
+obj-y				+= char/ipmi/
 
 obj-$(CONFIG_ACPI)		+= acpi/
 obj-$(CONFIG_SFI)		+= sfi/
diff --git a/drivers/char/ipmi/Kconfig b/drivers/char/ipmi/Kconfig
index 5a9350b1069a..2c234e3e7513 100644
--- a/drivers/char/ipmi/Kconfig
+++ b/drivers/char/ipmi/Kconfig
@@ -76,3 +76,10 @@ config IPMI_POWEROFF
 	 the IPMI management controller is capable of this.
 
 endif # IPMI_HANDLER
+
+config ASPEED_BT_IPMI_BMC
+	tristate "BT IPMI bmc driver"
+	help
+	  Provides a driver for the BT (Block Transfer) IPMI interface
+	  found on Aspeed SOCs (AST2400 and AST2500). The driver
+	  implements the BMC side of the BT interface.
diff --git a/drivers/char/ipmi/Makefile b/drivers/char/ipmi/Makefile
index f3ffde1f5f1f..0d98cd91def1 100644
--- a/drivers/char/ipmi/Makefile
+++ b/drivers/char/ipmi/Makefile
@@ -11,3 +11,4 @@ obj-$(CONFIG_IPMI_SSIF) += ipmi_ssif.o
 obj-$(CONFIG_IPMI_POWERNV) += ipmi_powernv.o
 obj-$(CONFIG_IPMI_WATCHDOG) += ipmi_watchdog.o
 obj-$(CONFIG_IPMI_POWEROFF) += ipmi_poweroff.o
+obj-$(CONFIG_ASPEED_BT_IPMI_BMC) += bt-bmc.o
diff --git a/drivers/char/ipmi/bt-bmc.c b/drivers/char/ipmi/bt-bmc.c
new file mode 100644
index 000000000000..2e880bf0be26
--- /dev/null
+++ b/drivers/char/ipmi/bt-bmc.c
@@ -0,0 +1,510 @@
+/*
+ * Copyright (c) 2015-2016, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/atomic.h>
+#include <linux/bt-bmc.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+
+/*
+ * This is a BMC device used to communicate to the host
+ */
+#define DEVICE_NAME	"ipmi-bt-host"
+
+#define BT_IO_BASE	0xe4
+#define BT_IRQ		10
+
+#define BT_CR0		0x0
+#define   BT_CR0_IO_BASE		16
+#define   BT_CR0_IRQ			12
+#define   BT_CR0_EN_CLR_SLV_RDP		0x8
+#define   BT_CR0_EN_CLR_SLV_WRP		0x4
+#define   BT_CR0_ENABLE_IBT		0x1
+#define BT_CR1		0x4
+#define   BT_CR1_IRQ_H2B	0x01
+#define   BT_CR1_IRQ_HBUSY	0x40
+#define BT_CR2		0x8
+#define   BT_CR2_IRQ_H2B	0x01
+#define   BT_CR2_IRQ_HBUSY	0x40
+#define BT_CR3		0xc
+#define BT_CTRL		0x10
+#define   BT_CTRL_B_BUSY		0x80
+#define   BT_CTRL_H_BUSY		0x40
+#define   BT_CTRL_OEM0			0x20
+#define   BT_CTRL_SMS_ATN		0x10
+#define   BT_CTRL_B2H_ATN		0x08
+#define   BT_CTRL_H2B_ATN		0x04
+#define   BT_CTRL_CLR_RD_PTR		0x02
+#define   BT_CTRL_CLR_WR_PTR		0x01
+#define BT_BMC2HOST	0x14
+#define BT_INTMASK	0x18
+#define   BT_INTMASK_B2H_IRQEN		0x01
+#define   BT_INTMASK_B2H_IRQ		0x02
+#define   BT_INTMASK_BMC_HWRST		0x80
+
+#define BT_BMC_BUFFER_SIZE 256
+
+struct bt_bmc {
+	struct device		dev;
+	struct miscdevice	miscdev;
+	void __iomem		*base;
+	int			irq;
+	wait_queue_head_t	queue;
+	struct timer_list	poll_timer;
+	struct mutex		mutex;
+};
+
+static atomic_t open_count = ATOMIC_INIT(0);
+
+static u8 bt_inb(struct bt_bmc *bt_bmc, int reg)
+{
+	return ioread8(bt_bmc->base + reg);
+}
+
+static void bt_outb(struct bt_bmc *bt_bmc, u8 data, int reg)
+{
+	iowrite8(data, bt_bmc->base + reg);
+}
+
+static void clr_rd_ptr(struct bt_bmc *bt_bmc)
+{
+	bt_outb(bt_bmc, BT_CTRL_CLR_RD_PTR, BT_CTRL);
+}
+
+static void clr_wr_ptr(struct bt_bmc *bt_bmc)
+{
+	bt_outb(bt_bmc, BT_CTRL_CLR_WR_PTR, BT_CTRL);
+}
+
+static void clr_h2b_atn(struct bt_bmc *bt_bmc)
+{
+	bt_outb(bt_bmc, BT_CTRL_H2B_ATN, BT_CTRL);
+}
+
+static void set_b_busy(struct bt_bmc *bt_bmc)
+{
+	if (!(bt_inb(bt_bmc, BT_CTRL) & BT_CTRL_B_BUSY))
+		bt_outb(bt_bmc, BT_CTRL_B_BUSY, BT_CTRL);
+}
+
+static void clr_b_busy(struct bt_bmc *bt_bmc)
+{
+	if (bt_inb(bt_bmc, BT_CTRL) & BT_CTRL_B_BUSY)
+		bt_outb(bt_bmc, BT_CTRL_B_BUSY, BT_CTRL);
+}
+
+static void set_b2h_atn(struct bt_bmc *bt_bmc)
+{
+	bt_outb(bt_bmc, BT_CTRL_B2H_ATN, BT_CTRL);
+}
+
+static u8 bt_read(struct bt_bmc *bt_bmc)
+{
+	return bt_inb(bt_bmc, BT_BMC2HOST);
+}
+
+static ssize_t bt_readn(struct bt_bmc *bt_bmc, u8 *buf, size_t n)
+{
+	int i;
+
+	for (i = 0; i < n; i++)
+		buf[i] = bt_read(bt_bmc);
+	return n;
+}
+
+static void bt_write(struct bt_bmc *bt_bmc, u8 c)
+{
+	bt_outb(bt_bmc, c, BT_BMC2HOST);
+}
+
+static ssize_t bt_writen(struct bt_bmc *bt_bmc, u8 *buf, size_t n)
+{
+	int i;
+
+	for (i = 0; i < n; i++)
+		bt_write(bt_bmc, buf[i]);
+	return n;
+}
+
+static void set_sms_atn(struct bt_bmc *bt_bmc)
+{
+	bt_outb(bt_bmc, BT_CTRL_SMS_ATN, BT_CTRL);
+}
+
+static struct bt_bmc *file_bt_bmc(struct file *file)
+{
+	return container_of(file->private_data, struct bt_bmc, miscdev);
+}
+
+static int bt_bmc_open(struct inode *inode, struct file *file)
+{
+	struct bt_bmc *bt_bmc = file_bt_bmc(file);
+
+	if (atomic_inc_return(&open_count) == 1) {
+		clr_b_busy(bt_bmc);
+		return 0;
+	}
+
+	atomic_dec(&open_count);
+	return -EBUSY;
+}
+
+/*
+ * The BT (Block Transfer) interface means that entire messages are
+ * buffered by the host before a notification is sent to the BMC that
+ * there is data to be read. The first byte is the length and the
+ * message data follows. The read operation just tries to capture the
+ * whole before returning it to userspace.
+ *
+ * BT Message format :
+ *
+ *    Byte 1  Byte 2     Byte 3  Byte 4  Byte 5:N
+ *    Length  NetFn/LUN  Seq     Cmd     Data
+ *
+ */
+static ssize_t bt_bmc_read(struct file *file, char __user *buf,
+			   size_t count, loff_t *ppos)
+{
+	struct bt_bmc *bt_bmc = file_bt_bmc(file);
+	u8 len;
+	int len_byte = 1;
+	u8 kbuffer[BT_BMC_BUFFER_SIZE];
+	ssize_t ret = 0;
+	ssize_t nread;
+
+	if (!access_ok(VERIFY_WRITE, buf, count))
+		return -EFAULT;
+
+	WARN_ON(*ppos);
+
+	if (wait_event_interruptible(bt_bmc->queue,
+				     bt_inb(bt_bmc, BT_CTRL) & BT_CTRL_H2B_ATN))
+		return -ERESTARTSYS;
+
+	mutex_lock(&bt_bmc->mutex);
+
+	if (unlikely(!(bt_inb(bt_bmc, BT_CTRL) & BT_CTRL_H2B_ATN))) {
+		ret = -EIO;
+		goto out_unlock;
+	}
+
+	set_b_busy(bt_bmc);
+	clr_h2b_atn(bt_bmc);
+	clr_rd_ptr(bt_bmc);
+
+	/*
+	 * The BT frames start with the message length, which does not
+	 * include the length byte.
+	 */
+	kbuffer[0] = bt_read(bt_bmc);
+	len = kbuffer[0];
+
+	/* We pass the length back to userspace as well */
+	if (len + 1 > count)
+		len = count - 1;
+
+	while (len) {
+		nread = min_t(ssize_t, len, sizeof(kbuffer) - len_byte);
+
+		bt_readn(bt_bmc, kbuffer + len_byte, nread);
+
+		if (copy_to_user(buf, kbuffer, nread + len_byte)) {
+			ret = -EFAULT;
+			break;
+		}
+		len -= nread;
+		buf += nread + len_byte;
+		ret += nread + len_byte;
+		len_byte = 0;
+	}
+
+	clr_b_busy(bt_bmc);
+
+out_unlock:
+	mutex_unlock(&bt_bmc->mutex);
+	return ret;
+}
+
+/*
+ * BT Message response format :
+ *
+ *    Byte 1  Byte 2     Byte 3  Byte 4  Byte 5  Byte 6:N
+ *    Length  NetFn/LUN  Seq     Cmd     Code    Data
+ */
+static ssize_t bt_bmc_write(struct file *file, const char __user *buf,
+			    size_t count, loff_t *ppos)
+{
+	struct bt_bmc *bt_bmc = file_bt_bmc(file);
+	u8 kbuffer[BT_BMC_BUFFER_SIZE];
+	ssize_t ret = 0;
+	ssize_t nwritten;
+
+	/*
+	 * send a minimum response size
+	 */
+	if (count < 5)
+		return -EINVAL;
+
+	if (!access_ok(VERIFY_READ, buf, count))
+		return -EFAULT;
+
+	WARN_ON(*ppos);
+
+	/*
+	 * There's no interrupt for clearing bmc busy so we have to
+	 * poll
+	 */
+	if (wait_event_interruptible(bt_bmc->queue,
+				     !(bt_inb(bt_bmc, BT_CTRL) &
+				       (BT_CTRL_H_BUSY | BT_CTRL_B2H_ATN))))
+		return -ERESTARTSYS;
+
+	mutex_lock(&bt_bmc->mutex);
+
+	if (unlikely(bt_inb(bt_bmc, BT_CTRL) &
+		     (BT_CTRL_H_BUSY | BT_CTRL_B2H_ATN))) {
+		ret = -EIO;
+		goto out_unlock;
+	}
+
+	clr_wr_ptr(bt_bmc);
+
+	while (count) {
+		nwritten = min_t(ssize_t, count, sizeof(kbuffer));
+		if (copy_from_user(&kbuffer, buf, nwritten)) {
+			ret = -EFAULT;
+			break;
+		}
+
+		bt_writen(bt_bmc, kbuffer, nwritten);
+
+		count -= nwritten;
+		buf += nwritten;
+		ret += nwritten;
+	}
+
+	set_b2h_atn(bt_bmc);
+
+out_unlock:
+	mutex_unlock(&bt_bmc->mutex);
+	return ret;
+}
+
+static long bt_bmc_ioctl(struct file *file, unsigned int cmd,
+			 unsigned long param)
+{
+	struct bt_bmc *bt_bmc = file_bt_bmc(file);
+
+	switch (cmd) {
+	case BT_BMC_IOCTL_SMS_ATN:
+		set_sms_atn(bt_bmc);
+		return 0;
+	}
+	return -EINVAL;
+}
+
+static int bt_bmc_release(struct inode *inode, struct file *file)
+{
+	struct bt_bmc *bt_bmc = file_bt_bmc(file);
+
+	atomic_dec(&open_count);
+	set_b_busy(bt_bmc);
+	return 0;
+}
+
+static unsigned int bt_bmc_poll(struct file *file, poll_table *wait)
+{
+	struct bt_bmc *bt_bmc = file_bt_bmc(file);
+	unsigned int mask = 0;
+	u8 ctrl;
+
+	poll_wait(file, &bt_bmc->queue, wait);
+
+	ctrl = bt_inb(bt_bmc, BT_CTRL);
+
+	if (ctrl & BT_CTRL_H2B_ATN)
+		mask |= POLLIN;
+
+	if (!(ctrl & (BT_CTRL_H_BUSY | BT_CTRL_B2H_ATN)))
+		mask |= POLLOUT;
+
+	return mask;
+}
+
+static const struct file_operations bt_bmc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= bt_bmc_open,
+	.read		= bt_bmc_read,
+	.write		= bt_bmc_write,
+	.release	= bt_bmc_release,
+	.poll		= bt_bmc_poll,
+	.unlocked_ioctl	= bt_bmc_ioctl,
+};
+
+static void poll_timer(unsigned long data)
+{
+	struct bt_bmc *bt_bmc = (void *)data;
+
+	bt_bmc->poll_timer.expires += msecs_to_jiffies(500);
+	wake_up(&bt_bmc->queue);
+	add_timer(&bt_bmc->poll_timer);
+}
+
+static irqreturn_t bt_bmc_irq(int irq, void *arg)
+{
+	struct bt_bmc *bt_bmc = arg;
+	u32 reg;
+
+	reg = ioread32(bt_bmc->base + BT_CR2);
+	reg &= BT_CR2_IRQ_H2B | BT_CR2_IRQ_HBUSY;
+	if (!reg)
+		return IRQ_NONE;
+
+	/* ack pending IRQs */
+	iowrite32(reg, bt_bmc->base + BT_CR2);
+
+	wake_up(&bt_bmc->queue);
+	return IRQ_HANDLED;
+}
+
+static int bt_bmc_config_irq(struct bt_bmc *bt_bmc,
+			     struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	u32 reg;
+	int rc;
+
+	bt_bmc->irq = platform_get_irq(pdev, 0);
+	if (!bt_bmc->irq)
+		return -ENODEV;
+
+	rc = devm_request_irq(dev, bt_bmc->irq, bt_bmc_irq, IRQF_SHARED,
+			      DEVICE_NAME, bt_bmc);
+	if (rc < 0) {
+		dev_warn(dev, "Unable to request IRQ %d\n", bt_bmc->irq);
+		bt_bmc->irq = 0;
+		return rc;
+	}
+
+	/*
+	 * Configure IRQs on the bmc clearing the H2B and HBUSY bits;
+	 * H2B will be asserted when the bmc has data for us; HBUSY
+	 * will be cleared (along with B2H) when we can write the next
+	 * message to the BT buffer
+	 */
+	reg = ioread32(bt_bmc->base + BT_CR1);
+	reg |= BT_CR1_IRQ_H2B | BT_CR1_IRQ_HBUSY;
+	iowrite32(reg, bt_bmc->base + BT_CR1);
+
+	return 0;
+}
+
+static int bt_bmc_probe(struct platform_device *pdev)
+{
+	struct bt_bmc *bt_bmc;
+	struct device *dev;
+	struct resource *res;
+	int rc;
+
+	if (!pdev || !pdev->dev.of_node)
+		return -ENODEV;
+
+	dev = &pdev->dev;
+	dev_info(dev, "Found bt bmc device\n");
+
+	bt_bmc = devm_kzalloc(dev, sizeof(*bt_bmc), GFP_KERNEL);
+	if (!bt_bmc)
+		return -ENOMEM;
+
+	dev_set_drvdata(&pdev->dev, bt_bmc);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(dev, "Unable to find resources\n");
+		return -ENXIO;
+	}
+
+	bt_bmc->base = devm_ioremap_resource(&pdev->dev, res);
+	if (!bt_bmc->base)
+		return -ENOMEM;
+
+	mutex_init(&bt_bmc->mutex);
+	init_waitqueue_head(&bt_bmc->queue);
+
+	bt_bmc->miscdev.minor	= MISC_DYNAMIC_MINOR,
+		bt_bmc->miscdev.name	= DEVICE_NAME,
+		bt_bmc->miscdev.fops	= &bt_bmc_fops,
+		bt_bmc->miscdev.parent = dev;
+	rc = misc_register(&bt_bmc->miscdev);
+	if (rc) {
+		dev_err(dev, "Unable to register misc device\n");
+		return rc;
+	}
+
+	bt_bmc_config_irq(bt_bmc, pdev);
+
+	if (bt_bmc->irq) {
+		dev_info(dev, "Using IRQ %d\n", bt_bmc->irq);
+	} else {
+		dev_info(dev, "No IRQ; using timer\n");
+		setup_timer(&bt_bmc->poll_timer, poll_timer,
+			    (unsigned long)bt_bmc);
+		bt_bmc->poll_timer.expires = jiffies + msecs_to_jiffies(10);
+		add_timer(&bt_bmc->poll_timer);
+	}
+
+	iowrite32((BT_IO_BASE << BT_CR0_IO_BASE) |
+		  (BT_IRQ << BT_CR0_IRQ) |
+		  BT_CR0_EN_CLR_SLV_RDP |
+		  BT_CR0_EN_CLR_SLV_WRP |
+		  BT_CR0_ENABLE_IBT,
+		  bt_bmc->base + BT_CR0);
+
+	clr_b_busy(bt_bmc);
+
+	return 0;
+}
+
+static int bt_bmc_remove(struct platform_device *pdev)
+{
+	struct bt_bmc *bt_bmc = dev_get_drvdata(&pdev->dev);
+
+	misc_deregister(&bt_bmc->miscdev);
+	if (!bt_bmc->irq)
+		del_timer_sync(&bt_bmc->poll_timer);
+	return 0;
+}
+
+static const struct of_device_id bt_bmc_match[] = {
+	{ .compatible = "aspeed,ast2400-bt-bmc" },
+	{ },
+};
+
+static struct platform_driver bt_bmc_driver = {
+	.driver = {
+		.name		= DEVICE_NAME,
+		.of_match_table = bt_bmc_match,
+	},
+	.probe = bt_bmc_probe,
+	.remove = bt_bmc_remove,
+};
+
+module_platform_driver(bt_bmc_driver);
+
+MODULE_DEVICE_TABLE(of, bt_bmc_match);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Alistair Popple <alistair@popple.id.au>");
+MODULE_DESCRIPTION("Linux device interface to the BT interface");
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 185f8ea2702f..17b12942c67d 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -74,6 +74,7 @@ header-y += bpf_common.h
 header-y += bpf.h
 header-y += bpqether.h
 header-y += bsg.h
+header-y += bt-bmc.h
 header-y += btrfs.h
 header-y += can.h
 header-y += capability.h
diff --git a/include/uapi/linux/bt-bmc.h b/include/uapi/linux/bt-bmc.h
new file mode 100644
index 000000000000..d9ec766a63d0
--- /dev/null
+++ b/include/uapi/linux/bt-bmc.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2015-2016, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _UAPI_LINUX_BT_BMC_H
+#define _UAPI_LINUX_BT_BMC_H
+
+#include <linux/ioctl.h>
+
+#define __BT_BMC_IOCTL_MAGIC	0xb1
+#define BT_BMC_IOCTL_SMS_ATN	_IO(__BT_BMC_IOCTL_MAGIC, 0x00)
+
+#endif /* _UAPI_LINUX_BT_BMC_H */
-- 
cgit v1.2.3


From bd11f0741fa5a2c296629898ad07759dd12b35bb Mon Sep 17 00:00:00 2001
From: Maciej Żenczykowski <maze@google.com>
Date: Tue, 27 Sep 2016 23:57:58 -0700
Subject: ipv6 addrconf: implement RFC7559 router solicitation backoff
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This implements:
  https://tools.ietf.org/html/rfc7559

Backoff is performed according to RFC3315 section 14:
  https://tools.ietf.org/html/rfc3315#section-14

We allow setting /proc/sys/net/ipv6/conf/*/router_solicitations
to a negative value meaning an unlimited number of retransmits,
and we make this the new default (inline with the RFC).

We also add a new setting:
  /proc/sys/net/ipv6/conf/*/router_solicitation_max_interval
defaulting to 1 hour (per RFC recommendation).

Signed-off-by: Maciej Żenczykowski <maze@google.com>
Acked-by: Erik Kline <ek@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h      |  1 +
 include/net/addrconf.h    |  3 ++-
 include/net/if_inet6.h    |  1 +
 include/uapi/linux/ipv6.h |  1 +
 net/ipv6/addrconf.c       | 51 ++++++++++++++++++++++++++++++++++++++++-------
 5 files changed, 49 insertions(+), 8 deletions(-)

(limited to 'include/uapi')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index c6dbcd84a2c7..7e9a789be5e0 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -18,6 +18,7 @@ struct ipv6_devconf {
 	__s32		dad_transmits;
 	__s32		rtr_solicits;
 	__s32		rtr_solicit_interval;
+	__s32		rtr_solicit_max_interval;
 	__s32		rtr_solicit_delay;
 	__s32		force_mld_version;
 	__s32		mldv1_unsolicited_report_interval;
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 9826d3a9464c..f2d072787947 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -1,8 +1,9 @@
 #ifndef _ADDRCONF_H
 #define _ADDRCONF_H
 
-#define MAX_RTR_SOLICITATIONS		3
+#define MAX_RTR_SOLICITATIONS		-1		/* unlimited */
 #define RTR_SOLICITATION_INTERVAL	(4*HZ)
+#define RTR_SOLICITATION_MAX_INTERVAL	(3600*HZ)	/* 1 hour */
 
 #define MIN_VALID_LIFETIME		(2*3600)	/* 2 hours */
 
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index 1c8b6820b694..515352c6280a 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -201,6 +201,7 @@ struct inet6_dev {
 	struct ipv6_devstat	stats;
 
 	struct timer_list	rs_timer;
+	__s32			rs_interval;	/* in jiffies */
 	__u8			rs_probes;
 
 	__u8			addr_gen_mode;
diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index 395876060f50..8c2772340c3f 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -177,6 +177,7 @@ enum {
 	DEVCONF_DROP_UNICAST_IN_L2_MULTICAST,
 	DEVCONF_DROP_UNSOLICITED_NA,
 	DEVCONF_KEEP_ADDR_ON_DOWN,
+	DEVCONF_RTR_SOLICIT_MAX_INTERVAL,
 	DEVCONF_MAX
 };
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 35d4baa55c9d..87183983724d 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -112,6 +112,27 @@ static inline u32 cstamp_delta(unsigned long cstamp)
 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
 }
 
+static inline s32 rfc3315_s14_backoff_init(s32 irt)
+{
+	/* multiply 'initial retransmission time' by 0.9 .. 1.1 */
+	u64 tmp = (900000 + prandom_u32() % 200001) * (u64)irt;
+	do_div(tmp, 1000000);
+	return (s32)tmp;
+}
+
+static inline s32 rfc3315_s14_backoff_update(s32 rt, s32 mrt)
+{
+	/* multiply 'retransmission timeout' by 1.9 .. 2.1 */
+	u64 tmp = (1900000 + prandom_u32() % 200001) * (u64)rt;
+	do_div(tmp, 1000000);
+	if ((s32)tmp > mrt) {
+		/* multiply 'maximum retransmission time' by 0.9 .. 1.1 */
+		tmp = (900000 + prandom_u32() % 200001) * (u64)mrt;
+		do_div(tmp, 1000000);
+	}
+	return (s32)tmp;
+}
+
 #ifdef CONFIG_SYSCTL
 static int addrconf_sysctl_register(struct inet6_dev *idev);
 static void addrconf_sysctl_unregister(struct inet6_dev *idev);
@@ -187,6 +208,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
 	.dad_transmits		= 1,
 	.rtr_solicits		= MAX_RTR_SOLICITATIONS,
 	.rtr_solicit_interval	= RTR_SOLICITATION_INTERVAL,
+	.rtr_solicit_max_interval = RTR_SOLICITATION_MAX_INTERVAL,
 	.rtr_solicit_delay	= MAX_RTR_SOLICITATION_DELAY,
 	.use_tempaddr		= 0,
 	.temp_valid_lft		= TEMP_VALID_LIFETIME,
@@ -232,6 +254,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
 	.dad_transmits		= 1,
 	.rtr_solicits		= MAX_RTR_SOLICITATIONS,
 	.rtr_solicit_interval	= RTR_SOLICITATION_INTERVAL,
+	.rtr_solicit_max_interval = RTR_SOLICITATION_MAX_INTERVAL,
 	.rtr_solicit_delay	= MAX_RTR_SOLICITATION_DELAY,
 	.use_tempaddr		= 0,
 	.temp_valid_lft		= TEMP_VALID_LIFETIME,
@@ -3687,7 +3710,7 @@ static void addrconf_rs_timer(unsigned long data)
 	if (idev->if_flags & IF_RA_RCVD)
 		goto out;
 
-	if (idev->rs_probes++ < idev->cnf.rtr_solicits) {
+	if (idev->rs_probes++ < idev->cnf.rtr_solicits || idev->cnf.rtr_solicits < 0) {
 		write_unlock(&idev->lock);
 		if (!ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE))
 			ndisc_send_rs(dev, &lladdr,
@@ -3696,11 +3719,13 @@ static void addrconf_rs_timer(unsigned long data)
 			goto put;
 
 		write_lock(&idev->lock);
+		idev->rs_interval = rfc3315_s14_backoff_update(
+			idev->rs_interval, idev->cnf.rtr_solicit_max_interval);
 		/* The wait after the last probe can be shorter */
 		addrconf_mod_rs_timer(idev, (idev->rs_probes ==
 					     idev->cnf.rtr_solicits) ?
 				      idev->cnf.rtr_solicit_delay :
-				      idev->cnf.rtr_solicit_interval);
+				      idev->rs_interval);
 	} else {
 		/*
 		 * Note: we do not support deprecated "all on-link"
@@ -3949,7 +3974,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
 	send_mld = ifp->scope == IFA_LINK && ipv6_lonely_lladdr(ifp);
 	send_rs = send_mld &&
 		  ipv6_accept_ra(ifp->idev) &&
-		  ifp->idev->cnf.rtr_solicits > 0 &&
+		  ifp->idev->cnf.rtr_solicits != 0 &&
 		  (dev->flags&IFF_LOOPBACK) == 0;
 	read_unlock_bh(&ifp->idev->lock);
 
@@ -3971,10 +3996,11 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
 
 		write_lock_bh(&ifp->idev->lock);
 		spin_lock(&ifp->lock);
+		ifp->idev->rs_interval = rfc3315_s14_backoff_init(
+			ifp->idev->cnf.rtr_solicit_interval);
 		ifp->idev->rs_probes = 1;
 		ifp->idev->if_flags |= IF_RS_SENT;
-		addrconf_mod_rs_timer(ifp->idev,
-				      ifp->idev->cnf.rtr_solicit_interval);
+		addrconf_mod_rs_timer(ifp->idev, ifp->idev->rs_interval);
 		spin_unlock(&ifp->lock);
 		write_unlock_bh(&ifp->idev->lock);
 	}
@@ -4891,6 +4917,8 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 	array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits;
 	array[DEVCONF_RTR_SOLICIT_INTERVAL] =
 		jiffies_to_msecs(cnf->rtr_solicit_interval);
+	array[DEVCONF_RTR_SOLICIT_MAX_INTERVAL] =
+		jiffies_to_msecs(cnf->rtr_solicit_max_interval);
 	array[DEVCONF_RTR_SOLICIT_DELAY] =
 		jiffies_to_msecs(cnf->rtr_solicit_delay);
 	array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version;
@@ -5099,7 +5127,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
 		return -EINVAL;
 	if (!ipv6_accept_ra(idev))
 		return -EINVAL;
-	if (idev->cnf.rtr_solicits <= 0)
+	if (idev->cnf.rtr_solicits == 0)
 		return -EINVAL;
 
 	write_lock_bh(&idev->lock);
@@ -5128,8 +5156,10 @@ update_lft:
 
 	if (update_rs) {
 		idev->if_flags |= IF_RS_SENT;
+		idev->rs_interval = rfc3315_s14_backoff_init(
+			idev->cnf.rtr_solicit_interval);
 		idev->rs_probes = 1;
-		addrconf_mod_rs_timer(idev, idev->cnf.rtr_solicit_interval);
+		addrconf_mod_rs_timer(idev, idev->rs_interval);
 	}
 
 	/* Well, that's kinda nasty ... */
@@ -5777,6 +5807,13 @@ static const struct ctl_table addrconf_sysctl[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_jiffies,
 	},
+	{
+		.procname	= "router_solicitation_max_interval",
+		.data		= &ipv6_devconf.rtr_solicit_max_interval,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
 	{
 		.procname	= "router_solicitation_delay",
 		.data		= &ipv6_devconf.rtr_solicit_delay,
-- 
cgit v1.2.3


From cb3b7d87652aeb37cfb5295a6157a3280dae10cb Mon Sep 17 00:00:00 2001
From: Ayala Beker <ayala.beker@intel.com>
Date: Tue, 20 Sep 2016 17:31:13 +0300
Subject: cfg80211: add start / stop NAN commands

This allows user space to start/stop NAN interface.
A NAN interface is like P2P device in a few aspects: it
doesn't have a netdev associated to it.
Add the new interface type and prevent operations that
can't be executed on NAN interface like scan.

Define several attributes that may be configured by user space
when starting NAN functionality (master preference and dual
band operation)

Signed-off-by: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 21 +++++++++-
 include/uapi/linux/nl80211.h | 47 +++++++++++++++++++++++
 net/mac80211/cfg.c           |  2 +
 net/mac80211/chan.c          |  3 ++
 net/mac80211/iface.c         |  4 ++
 net/mac80211/offchannel.c    |  1 +
 net/mac80211/rx.c            |  3 ++
 net/mac80211/util.c          |  1 +
 net/wireless/chan.c          |  2 +
 net/wireless/core.c          | 34 +++++++++++++++++
 net/wireless/core.h          |  3 ++
 net/wireless/mlme.c          |  1 +
 net/wireless/nl80211.c       | 91 ++++++++++++++++++++++++++++++++++++++++++--
 net/wireless/rdev-ops.h      | 20 ++++++++++
 net/wireless/trace.h         | 27 +++++++++++++
 net/wireless/util.c          |  6 ++-
 16 files changed, 260 insertions(+), 6 deletions(-)

(limited to 'include/uapi')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 68dca3d93b85..9898e1f883e2 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2313,6 +2313,19 @@ struct cfg80211_qos_map {
 	struct cfg80211_dscp_range up[8];
 };
 
+/**
+ * struct cfg80211_nan_conf - NAN configuration
+ *
+ * This struct defines NAN configuration parameters
+ *
+ * @master_pref: master preference (1 - 255)
+ * @dual: dual band operation mode, see &enum nl80211_nan_dual_band_conf
+ */
+struct cfg80211_nan_conf {
+	u8 master_pref;
+	u8 dual;
+};
+
 /**
  * struct cfg80211_ops - backend description for wireless configuration
  *
@@ -2601,6 +2614,8 @@ struct cfg80211_qos_map {
  *	and returning to the base channel for communication with the AP.
  * @tdls_cancel_channel_switch: Stop channel-switching with a TDLS peer. Both
  *	peers must be on the base channel when the call completes.
+ * @start_nan: Start the NAN interface.
+ * @stop_nan: Stop the NAN interface.
  */
 struct cfg80211_ops {
 	int	(*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -2866,6 +2881,9 @@ struct cfg80211_ops {
 	void	(*tdls_cancel_channel_switch)(struct wiphy *wiphy,
 					      struct net_device *dev,
 					      const u8 *addr);
+	int	(*start_nan)(struct wiphy *wiphy, struct wireless_dev *wdev,
+			     struct cfg80211_nan_conf *conf);
+	void	(*stop_nan)(struct wiphy *wiphy, struct wireless_dev *wdev);
 };
 
 /*
@@ -3626,6 +3644,7 @@ struct cfg80211_cached_keys;
  *	beacons, 0 when not valid
  * @address: The address for this device, valid only if @netdev is %NULL
  * @p2p_started: true if this is a P2P Device that has been started
+ * @nan_started: true if this is a NAN interface that has been started
  * @cac_started: true if DFS channel availability check has been started
  * @cac_start_time: timestamp (jiffies) when the dfs state was entered.
  * @cac_time_ms: CAC time in ms
@@ -3657,7 +3676,7 @@ struct wireless_dev {
 
 	struct mutex mtx;
 
-	bool use_4addr, p2p_started;
+	bool use_4addr, p2p_started, nan_started;
 
 	u8 address[ETH_ALEN] __aligned(sizeof(u16));
 
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index ec10d1b2838f..98fd3ec8598d 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -838,6 +838,16 @@
  *	not running. The driver indicates the status of the scan through
  *	cfg80211_scan_done().
  *
+ * @NL80211_CMD_START_NAN: Start NAN operation, identified by its
+ *	%NL80211_ATTR_WDEV interface. This interface must have been previously
+ *	created with %NL80211_CMD_NEW_INTERFACE. After it has been started, the
+ *	NAN interface will create or join a cluster. This command must have a
+ *	valid %NL80211_ATTR_NAN_MASTER_PREF attribute and optional
+ *	%NL80211_ATTR_NAN_DUAL attributes.
+ *	After this command NAN functions can be added.
+ * @NL80211_CMD_STOP_NAN: Stop the NAN operation, identified by
+ *	its %NL80211_ATTR_WDEV interface.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -1026,6 +1036,9 @@ enum nl80211_commands {
 
 	NL80211_CMD_ABORT_SCAN,
 
+	NL80211_CMD_START_NAN,
+	NL80211_CMD_STOP_NAN,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -1739,6 +1752,12 @@ enum nl80211_commands {
  *	regulatory indoor configuration would be owned by the netlink socket
  *	that configured the indoor setting, and the indoor operation would be
  *	cleared when the socket is closed.
+ *	If set during NAN interface creation, the interface will be destroyed
+ *	if the socket is closed just like any other interface. Moreover, only
+ *	the netlink socket that created the interface will be allowed to add
+ *	and remove functions. NAN notifications will be sent in unicast to that
+ *	socket. Without this attribute, any socket can add functions and the
+ *	notifications will be sent to the %NL80211_MCGRP_NAN multicast group.
  *
  * @NL80211_ATTR_TDLS_INITIATOR: flag attribute indicating the current end is
  *	the TDLS link initiator.
@@ -1873,6 +1892,14 @@ enum nl80211_commands {
  * @NL80211_ATTR_MESH_PEER_AID: Association ID for the mesh peer (u16). This is
  *	used to pull the stored data for mesh peer in power save state.
  *
+ * @NL80211_ATTR_NAN_MASTER_PREF: the master preference to be used by
+ *	%NL80211_CMD_START_NAN. Its type is u8 and it can't be 0.
+ *	Also, values 1 and 255 are reserved for certification purposes and
+ *	should not be used during a normal device operation.
+ * @NL80211_ATTR_NAN_DUAL: NAN dual band operation config (see
+ *	&enum nl80211_nan_dual_band_conf). This attribute is used with
+ *	%NL80211_CMD_START_NAN.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -2267,6 +2294,9 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_MESH_PEER_AID,
 
+	NL80211_ATTR_NAN_MASTER_PREF,
+	NL80211_ATTR_NAN_DUAL,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -2345,6 +2375,7 @@ enum nl80211_attrs {
  *	commands to create and destroy one
  * @NL80211_IF_TYPE_OCB: Outside Context of a BSS
  *	This mode corresponds to the MIB variable dot11OCBActivated=true
+ * @NL80211_IFTYPE_NAN: NAN device interface type (not a netdev)
  * @NL80211_IFTYPE_MAX: highest interface type number currently defined
  * @NUM_NL80211_IFTYPES: number of defined interface types
  *
@@ -2365,6 +2396,7 @@ enum nl80211_iftype {
 	NL80211_IFTYPE_P2P_GO,
 	NL80211_IFTYPE_P2P_DEVICE,
 	NL80211_IFTYPE_OCB,
+	NL80211_IFTYPE_NAN,
 
 	/* keep last */
 	NUM_NL80211_IFTYPES,
@@ -4870,4 +4902,19 @@ enum nl80211_bss_select_attr {
 	NL80211_BSS_SELECT_ATTR_MAX = __NL80211_BSS_SELECT_ATTR_AFTER_LAST - 1
 };
 
+/**
+ * enum nl80211_nan_dual_band_conf - NAN dual band configuration
+ *
+ * Defines the NAN dual band mode of operation
+ *
+ * @NL80211_NAN_BAND_DEFAULT: device default mode
+ * @NL80211_NAN_BAND_2GHZ: 2.4GHz mode
+ * @NL80211_NAN_BAND_5GHZ: 5GHz mode
+  */
+enum nl80211_nan_dual_band_conf {
+	NL80211_NAN_BAND_DEFAULT	= 1 << 0,
+	NL80211_NAN_BAND_2GHZ		= 1 << 1,
+	NL80211_NAN_BAND_5GHZ		= 1 << 2,
+};
+
 #endif /* __LINUX_NL80211_H */
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index e29ff5749944..a74027f887bc 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -257,6 +257,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
 	case NL80211_IFTYPE_WDS:
 	case NL80211_IFTYPE_MONITOR:
 	case NL80211_IFTYPE_P2P_DEVICE:
+	case NL80211_IFTYPE_NAN:
 	case NL80211_IFTYPE_UNSPECIFIED:
 	case NUM_NL80211_IFTYPES:
 	case NL80211_IFTYPE_P2P_CLIENT:
@@ -2036,6 +2037,7 @@ static int ieee80211_scan(struct wiphy *wiphy,
 		     !(req->flags & NL80211_SCAN_FLAG_AP)))
 			return -EOPNOTSUPP;
 		break;
+	case NL80211_IFTYPE_NAN:
 	default:
 		return -EOPNOTSUPP;
 	}
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 74142d07ad31..d035801569eb 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -274,6 +274,7 @@ ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local,
 				    ieee80211_get_max_required_bw(sdata));
 			break;
 		case NL80211_IFTYPE_P2P_DEVICE:
+		case NL80211_IFTYPE_NAN:
 			continue;
 		case NL80211_IFTYPE_ADHOC:
 		case NL80211_IFTYPE_WDS:
@@ -718,6 +719,7 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local,
 
 		switch (sdata->vif.type) {
 		case NL80211_IFTYPE_P2P_DEVICE:
+		case NL80211_IFTYPE_NAN:
 			continue;
 		case NL80211_IFTYPE_STATION:
 			if (!sdata->u.mgd.associated)
@@ -980,6 +982,7 @@ ieee80211_vif_chanctx_reservation_complete(struct ieee80211_sub_if_data *sdata)
 	case NL80211_IFTYPE_P2P_CLIENT:
 	case NL80211_IFTYPE_P2P_GO:
 	case NL80211_IFTYPE_P2P_DEVICE:
+	case NL80211_IFTYPE_NAN:
 	case NUM_NL80211_IFTYPES:
 		WARN_ON(1);
 		break;
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index b0abddc714ef..e694ca2baad0 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -545,6 +545,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
 	case NL80211_IFTYPE_ADHOC:
 	case NL80211_IFTYPE_P2P_DEVICE:
 	case NL80211_IFTYPE_OCB:
+	case NL80211_IFTYPE_NAN:
 		/* no special treatment */
 		break;
 	case NL80211_IFTYPE_UNSPECIFIED:
@@ -660,6 +661,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
 			break;
 		case NL80211_IFTYPE_WDS:
 		case NL80211_IFTYPE_P2P_DEVICE:
+		case NL80211_IFTYPE_NAN:
 			break;
 		default:
 			/* not reached */
@@ -948,6 +950,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
 		/* relies on synchronize_rcu() below */
 		RCU_INIT_POINTER(local->p2p_sdata, NULL);
 		/* fall through */
+	case NL80211_IFTYPE_NAN:
 	default:
 		cancel_work_sync(&sdata->work);
 		/*
@@ -1457,6 +1460,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
 		break;
 	case NL80211_IFTYPE_AP_VLAN:
 	case NL80211_IFTYPE_P2P_DEVICE:
+	case NL80211_IFTYPE_NAN:
 		sdata->vif.bss_conf.bssid = sdata->vif.addr;
 		break;
 	case NL80211_IFTYPE_UNSPECIFIED:
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index 55a9c5b94ce1..75d5c960ce67 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -838,6 +838,7 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
 	case NL80211_IFTYPE_P2P_DEVICE:
 		need_offchan = true;
 		break;
+	case NL80211_IFTYPE_NAN:
 	default:
 		return -EOPNOTSUPP;
 	}
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index e796060b7c5e..c9489a86e6d6 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -3586,6 +3586,9 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
 		       ieee80211_is_probe_req(hdr->frame_control) ||
 		       ieee80211_is_probe_resp(hdr->frame_control) ||
 		       ieee80211_is_beacon(hdr->frame_control);
+	case NL80211_IFTYPE_NAN:
+		/* Currently no frames on NAN interface are allowed */
+		return false;
 	default:
 		break;
 	}
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index b6865d884487..2c78541f695c 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1975,6 +1975,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 		case NL80211_IFTYPE_AP_VLAN:
 		case NL80211_IFTYPE_MONITOR:
 		case NL80211_IFTYPE_P2P_DEVICE:
+		case NL80211_IFTYPE_NAN:
 			/* nothing to do */
 			break;
 		case NL80211_IFTYPE_UNSPECIFIED:
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 0f506220a3bd..5497d022fada 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -372,6 +372,7 @@ int cfg80211_chandef_dfs_required(struct wiphy *wiphy,
 	case NL80211_IFTYPE_AP_VLAN:
 	case NL80211_IFTYPE_WDS:
 	case NL80211_IFTYPE_P2P_DEVICE:
+	case NL80211_IFTYPE_NAN:
 		break;
 	case NL80211_IFTYPE_UNSPECIFIED:
 	case NUM_NL80211_IFTYPES:
@@ -946,6 +947,7 @@ cfg80211_get_chan_state(struct wireless_dev *wdev,
 	case NL80211_IFTYPE_AP_VLAN:
 	case NL80211_IFTYPE_WDS:
 	case NL80211_IFTYPE_P2P_DEVICE:
+	case NL80211_IFTYPE_NAN:
 		/* these interface types don't really have a channel */
 		return;
 	case NL80211_IFTYPE_UNSPECIFIED:
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 4911cd997b9a..013987243c0b 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -225,6 +225,23 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
 	}
 }
 
+void cfg80211_stop_nan(struct cfg80211_registered_device *rdev,
+		       struct wireless_dev *wdev)
+{
+	ASSERT_RTNL();
+
+	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_NAN))
+		return;
+
+	if (!wdev->nan_started)
+		return;
+
+	rdev_stop_nan(rdev, wdev);
+	wdev->nan_started = false;
+
+	rdev->opencount--;
+}
+
 void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy)
 {
 	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
@@ -242,6 +259,9 @@ void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy)
 		case NL80211_IFTYPE_P2P_DEVICE:
 			cfg80211_stop_p2p_device(rdev, wdev);
 			break;
+		case NL80211_IFTYPE_NAN:
+			cfg80211_stop_nan(rdev, wdev);
+			break;
 		default:
 			break;
 		}
@@ -537,6 +557,11 @@ static int wiphy_verify_combinations(struct wiphy *wiphy)
 				    c->limits[j].max > 1))
 				return -EINVAL;
 
+			/* Only a single NAN can be allowed */
+			if (WARN_ON(types & BIT(NL80211_IFTYPE_NAN) &&
+				    c->limits[j].max > 1))
+				return -EINVAL;
+
 			cnt += c->limits[j].max;
 			/*
 			 * Don't advertise an unsupported type
@@ -579,6 +604,10 @@ int wiphy_register(struct wiphy *wiphy)
 		     !rdev->ops->tdls_cancel_channel_switch)))
 		return -EINVAL;
 
+	if (WARN_ON((wiphy->interface_modes & BIT(NL80211_IFTYPE_NAN)) &&
+		    (!rdev->ops->start_nan || !rdev->ops->stop_nan)))
+		return -EINVAL;
+
 	/*
 	 * if a wiphy has unsupported modes for regulatory channel enforcement,
 	 * opt-out of enforcement checking
@@ -589,6 +618,7 @@ int wiphy_register(struct wiphy *wiphy)
 				       BIT(NL80211_IFTYPE_P2P_GO) |
 				       BIT(NL80211_IFTYPE_ADHOC) |
 				       BIT(NL80211_IFTYPE_P2P_DEVICE) |
+				       BIT(NL80211_IFTYPE_NAN) |
 				       BIT(NL80211_IFTYPE_AP_VLAN) |
 				       BIT(NL80211_IFTYPE_MONITOR)))
 		wiphy->regulatory_flags |= REGULATORY_IGNORE_STALE_KICKOFF;
@@ -916,6 +946,9 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev)
 		cfg80211_mlme_purge_registrations(wdev);
 		cfg80211_stop_p2p_device(rdev, wdev);
 		break;
+	case NL80211_IFTYPE_NAN:
+		cfg80211_stop_nan(rdev, wdev);
+		break;
 	default:
 		WARN_ON_ONCE(1);
 		break;
@@ -979,6 +1012,7 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev,
 		/* must be handled by mac80211/driver, has no APIs */
 		break;
 	case NL80211_IFTYPE_P2P_DEVICE:
+	case NL80211_IFTYPE_NAN:
 		/* cannot happen, has no netdev */
 		break;
 	case NL80211_IFTYPE_AP_VLAN:
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 554f87d0f991..08d2e948c9ad 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -488,6 +488,9 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev,
 void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
 			      struct wireless_dev *wdev);
 
+void cfg80211_stop_nan(struct cfg80211_registered_device *rdev,
+		       struct wireless_dev *wdev);
+
 #define CFG80211_MAX_NUM_DIFFERENT_CHANNELS 10
 
 #ifdef CONFIG_CFG80211_DEVELOPER_WARNINGS
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index d6abb0704db5..cbb48e26a871 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -634,6 +634,7 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
 			 * fall through, P2P device only supports
 			 * public action frames
 			 */
+		case NL80211_IFTYPE_NAN:
 		default:
 			err = -EOPNOTSUPP;
 			break;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index b8441e60b0f6..9e9fb37087fc 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -409,6 +409,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 		.len = VHT_MUMIMO_GROUPS_DATA_LEN
 	},
 	[NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR] = { .len = ETH_ALEN },
+	[NL80211_ATTR_NAN_MASTER_PREF] = { .type = NLA_U8 },
+	[NL80211_ATTR_NAN_DUAL] = { .type = NLA_U8 },
 };
 
 /* policy for the key attributes */
@@ -934,6 +936,7 @@ static int nl80211_key_allowed(struct wireless_dev *wdev)
 	case NL80211_IFTYPE_UNSPECIFIED:
 	case NL80211_IFTYPE_OCB:
 	case NL80211_IFTYPE_MONITOR:
+	case NL80211_IFTYPE_NAN:
 	case NL80211_IFTYPE_P2P_DEVICE:
 	case NL80211_IFTYPE_WDS:
 	case NUM_NL80211_IFTYPES:
@@ -2819,7 +2822,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
 	    !(rdev->wiphy.interface_modes & (1 << type)))
 		return -EOPNOTSUPP;
 
-	if ((type == NL80211_IFTYPE_P2P_DEVICE ||
+	if ((type == NL80211_IFTYPE_P2P_DEVICE || type == NL80211_IFTYPE_NAN ||
 	     rdev->wiphy.features & NL80211_FEATURE_MAC_ON_CREATE) &&
 	    info->attrs[NL80211_ATTR_MAC]) {
 		nla_memcpy(params.macaddr, info->attrs[NL80211_ATTR_MAC],
@@ -2875,9 +2878,10 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
 		       wdev->mesh_id_up_len);
 		wdev_unlock(wdev);
 		break;
+	case NL80211_IFTYPE_NAN:
 	case NL80211_IFTYPE_P2P_DEVICE:
 		/*
-		 * P2P Device doesn't have a netdev, so doesn't go
+		 * P2P Device and NAN do not have a netdev, so don't go
 		 * through the netdev notifier and must be added here
 		 */
 		mutex_init(&wdev->mtx);
@@ -6434,6 +6438,9 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
 
 	wiphy = &rdev->wiphy;
 
+	if (wdev->iftype == NL80211_IFTYPE_NAN)
+		return -EOPNOTSUPP;
+
 	if (!rdev->ops->scan)
 		return -EOPNOTSUPP;
 
@@ -8977,6 +8984,7 @@ static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info)
 	case NL80211_IFTYPE_P2P_GO:
 	case NL80211_IFTYPE_P2P_DEVICE:
 		break;
+	case NL80211_IFTYPE_NAN:
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -9022,6 +9030,7 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
 	case NL80211_IFTYPE_MESH_POINT:
 	case NL80211_IFTYPE_P2P_GO:
 		break;
+	case NL80211_IFTYPE_NAN:
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -9138,6 +9147,7 @@ static int nl80211_tx_mgmt_cancel_wait(struct sk_buff *skb, struct genl_info *in
 	case NL80211_IFTYPE_P2P_GO:
 	case NL80211_IFTYPE_P2P_DEVICE:
 		break;
+	case NL80211_IFTYPE_NAN:
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -10504,6 +10514,58 @@ static int nl80211_stop_p2p_device(struct sk_buff *skb, struct genl_info *info)
 	return 0;
 }
 
+static int nl80211_start_nan(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct wireless_dev *wdev = info->user_ptr[1];
+	struct cfg80211_nan_conf conf = {};
+	int err;
+
+	if (wdev->iftype != NL80211_IFTYPE_NAN)
+		return -EOPNOTSUPP;
+
+	if (wdev->nan_started)
+		return -EEXIST;
+
+	if (rfkill_blocked(rdev->rfkill))
+		return -ERFKILL;
+
+	if (!info->attrs[NL80211_ATTR_NAN_MASTER_PREF])
+		return -EINVAL;
+
+	if (!info->attrs[NL80211_ATTR_NAN_DUAL])
+		return -EINVAL;
+
+	conf.master_pref =
+		nla_get_u8(info->attrs[NL80211_ATTR_NAN_MASTER_PREF]);
+	if (!conf.master_pref)
+		return -EINVAL;
+
+	conf.dual = nla_get_u8(info->attrs[NL80211_ATTR_NAN_DUAL]);
+
+	err = rdev_start_nan(rdev, wdev, &conf);
+	if (err)
+		return err;
+
+	wdev->nan_started = true;
+	rdev->opencount++;
+
+	return 0;
+}
+
+static int nl80211_stop_nan(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct wireless_dev *wdev = info->user_ptr[1];
+
+	if (wdev->iftype != NL80211_IFTYPE_NAN)
+		return -EOPNOTSUPP;
+
+	cfg80211_stop_nan(rdev, wdev);
+
+	return 0;
+}
+
 static int nl80211_get_protocol_features(struct sk_buff *skb,
 					 struct genl_info *info)
 {
@@ -11205,7 +11267,14 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
 
 			dev_hold(dev);
 		} else if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP) {
-			if (!wdev->p2p_started) {
+			if (wdev->iftype == NL80211_IFTYPE_P2P_DEVICE &&
+			    !wdev->p2p_started) {
+				if (rtnl)
+					rtnl_unlock();
+				return -ENETDOWN;
+			}
+			if (wdev->iftype == NL80211_IFTYPE_NAN &&
+			    !wdev->nan_started) {
 				if (rtnl)
 					rtnl_unlock();
 				return -ENETDOWN;
@@ -11838,6 +11907,22 @@ static const struct genl_ops nl80211_ops[] = {
 		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
+	{
+		.cmd = NL80211_CMD_START_NAN,
+		.doit = nl80211_start_nan,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_WDEV |
+				  NL80211_FLAG_NEED_RTNL,
+	},
+	{
+		.cmd = NL80211_CMD_STOP_NAN,
+		.doit = nl80211_stop_nan,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
 	{
 		.cmd = NL80211_CMD_SET_MCAST_RATE,
 		.doit = nl80211_set_mcast_rate,
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 85ff30bee2b9..afb68a8428b9 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -887,6 +887,26 @@ static inline void rdev_stop_p2p_device(struct cfg80211_registered_device *rdev,
 	trace_rdev_return_void(&rdev->wiphy);
 }
 
+static inline int rdev_start_nan(struct cfg80211_registered_device *rdev,
+				 struct wireless_dev *wdev,
+				 struct cfg80211_nan_conf *conf)
+{
+	int ret;
+
+	trace_rdev_start_nan(&rdev->wiphy, wdev, conf);
+	ret = rdev->ops->start_nan(&rdev->wiphy, wdev, conf);
+	trace_rdev_return_int(&rdev->wiphy, ret);
+	return ret;
+}
+
+static inline void rdev_stop_nan(struct cfg80211_registered_device *rdev,
+				 struct wireless_dev *wdev)
+{
+	trace_rdev_stop_nan(&rdev->wiphy, wdev);
+	rdev->ops->stop_nan(&rdev->wiphy, wdev);
+	trace_rdev_return_void(&rdev->wiphy);
+}
+
 static inline int rdev_set_mac_acl(struct cfg80211_registered_device *rdev,
 				   struct net_device *dev,
 				   struct cfg80211_acl_data *params)
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 72b5255cefe2..5f3370f4c6a2 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1889,6 +1889,33 @@ DEFINE_EVENT(wiphy_wdev_evt, rdev_stop_p2p_device,
 	TP_ARGS(wiphy, wdev)
 );
 
+TRACE_EVENT(rdev_start_nan,
+	TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
+		 struct cfg80211_nan_conf *conf),
+	TP_ARGS(wiphy, wdev, conf),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		WDEV_ENTRY
+		__field(u8, master_pref)
+		__field(u8, dual);
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		WDEV_ASSIGN;
+		__entry->master_pref = conf->master_pref;
+		__entry->dual = conf->dual;
+	),
+	TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT
+		  ", master preference: %u, dual: %d",
+		  WIPHY_PR_ARG, WDEV_PR_ARG, __entry->master_pref,
+		  __entry->dual)
+);
+
+DEFINE_EVENT(wiphy_wdev_evt, rdev_stop_nan,
+	TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
+	TP_ARGS(wiphy, wdev)
+);
+
 TRACE_EVENT(rdev_set_mac_acl,
 	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
 		 struct cfg80211_acl_data *params),
diff --git a/net/wireless/util.c b/net/wireless/util.c
index e02141d66b69..7a2d46b0058a 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1008,8 +1008,9 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
 	if (otype == NL80211_IFTYPE_AP_VLAN)
 		return -EOPNOTSUPP;
 
-	/* cannot change into P2P device type */
-	if (ntype == NL80211_IFTYPE_P2P_DEVICE)
+	/* cannot change into P2P device or NAN */
+	if (ntype == NL80211_IFTYPE_P2P_DEVICE ||
+	    ntype == NL80211_IFTYPE_NAN)
 		return -EOPNOTSUPP;
 
 	if (!rdev->ops->change_virtual_intf ||
@@ -1088,6 +1089,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
 			/* not happening */
 			break;
 		case NL80211_IFTYPE_P2P_DEVICE:
+		case NL80211_IFTYPE_NAN:
 			WARN_ON(1);
 			break;
 		}
-- 
cgit v1.2.3


From a442b761b24b6886f9a4e2ff5f8cb4824c96526b Mon Sep 17 00:00:00 2001
From: Ayala Beker <ayala.beker@intel.com>
Date: Tue, 20 Sep 2016 17:31:15 +0300
Subject: cfg80211: add add_nan_func / del_nan_func

A NAN function can be either publish, subscribe or follow
up. Make all the necessary verifications and just pass the
request to the driver.
Allow the user space application that starts NAN to
forbid any other socket to add or remove functions.

Signed-off-by: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Ayala Beker <ayala.beker@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  91 +++++++++++
 include/uapi/linux/nl80211.h | 150 ++++++++++++++++++
 net/wireless/core.c          |   3 +-
 net/wireless/nl80211.c       | 369 +++++++++++++++++++++++++++++++++++++++++++
 net/wireless/rdev-ops.h      |  21 +++
 net/wireless/trace.h         |  39 +++++
 net/wireless/util.c          |  22 +++
 7 files changed, 694 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 9898e1f883e2..2f35ccf6da83 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2326,6 +2326,73 @@ struct cfg80211_nan_conf {
 	u8 dual;
 };
 
+/**
+ * struct cfg80211_nan_func_filter - a NAN function Rx / Tx filter
+ *
+ * @filter: the content of the filter
+ * @len: the length of the filter
+ */
+struct cfg80211_nan_func_filter {
+	const u8 *filter;
+	u8 len;
+};
+
+/**
+ * struct cfg80211_nan_func - a NAN function
+ *
+ * @type: &enum nl80211_nan_function_type
+ * @service_id: the service ID of the function
+ * @publish_type: &nl80211_nan_publish_type
+ * @close_range: if true, the range should be limited. Threshold is
+ *	implementation specific.
+ * @publish_bcast: if true, the solicited publish should be broadcasted
+ * @subscribe_active: if true, the subscribe is active
+ * @followup_id: the instance ID for follow up
+ * @followup_reqid: the requestor instance ID for follow up
+ * @followup_dest: MAC address of the recipient of the follow up
+ * @ttl: time to live counter in DW.
+ * @serv_spec_info: Service Specific Info
+ * @serv_spec_info_len: Service Specific Info length
+ * @srf_include: if true, SRF is inclusive
+ * @srf_bf: Bloom Filter
+ * @srf_bf_len: Bloom Filter length
+ * @srf_bf_idx: Bloom Filter index
+ * @srf_macs: SRF MAC addresses
+ * @srf_num_macs: number of MAC addresses in SRF
+ * @rx_filters: rx filters that are matched with corresponding peer's tx_filter
+ * @tx_filters: filters that should be transmitted in the SDF.
+ * @num_rx_filters: length of &rx_filters.
+ * @num_tx_filters: length of &tx_filters.
+ * @instance_id: driver allocated id of the function.
+ * @cookie: unique NAN function identifier.
+ */
+struct cfg80211_nan_func {
+	enum nl80211_nan_function_type type;
+	u8 service_id[NL80211_NAN_FUNC_SERVICE_ID_LEN];
+	u8 publish_type;
+	bool close_range;
+	bool publish_bcast;
+	bool subscribe_active;
+	u8 followup_id;
+	u8 followup_reqid;
+	struct mac_address followup_dest;
+	u32 ttl;
+	const u8 *serv_spec_info;
+	u8 serv_spec_info_len;
+	bool srf_include;
+	const u8 *srf_bf;
+	u8 srf_bf_len;
+	u8 srf_bf_idx;
+	struct mac_address *srf_macs;
+	int srf_num_macs;
+	struct cfg80211_nan_func_filter *rx_filters;
+	struct cfg80211_nan_func_filter *tx_filters;
+	u8 num_tx_filters;
+	u8 num_rx_filters;
+	u8 instance_id;
+	u64 cookie;
+};
+
 /**
  * struct cfg80211_ops - backend description for wireless configuration
  *
@@ -2616,6 +2683,14 @@ struct cfg80211_nan_conf {
  *	peers must be on the base channel when the call completes.
  * @start_nan: Start the NAN interface.
  * @stop_nan: Stop the NAN interface.
+ * @add_nan_func: Add a NAN function. Returns negative value on failure.
+ *	On success @nan_func ownership is transferred to the driver and
+ *	it may access it outside of the scope of this function. The driver
+ *	should free the @nan_func when no longer needed by calling
+ *	cfg80211_free_nan_func().
+ *	On success the driver should assign an instance_id in the
+ *	provided @nan_func.
+ * @del_nan_func: Delete a NAN function.
  */
 struct cfg80211_ops {
 	int	(*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -2884,6 +2959,10 @@ struct cfg80211_ops {
 	int	(*start_nan)(struct wiphy *wiphy, struct wireless_dev *wdev,
 			     struct cfg80211_nan_conf *conf);
 	void	(*stop_nan)(struct wiphy *wiphy, struct wireless_dev *wdev);
+	int	(*add_nan_func)(struct wiphy *wiphy, struct wireless_dev *wdev,
+				struct cfg80211_nan_func *nan_func);
+	void	(*del_nan_func)(struct wiphy *wiphy, struct wireless_dev *wdev,
+			       u64 cookie);
 };
 
 /*
@@ -3335,6 +3414,8 @@ struct wiphy_iftype_ext_capab {
  * @bss_select_support: bitmask indicating the BSS selection criteria supported
  *	by the driver in the .connect() callback. The bit position maps to the
  *	attribute indices defined in &enum nl80211_bss_select_attr.
+ *
+ * @cookie_counter: unique generic cookie counter, used to identify objects.
  */
 struct wiphy {
 	/* assign these fields before you register the wiphy */
@@ -3464,6 +3545,8 @@ struct wiphy {
 
 	u32 bss_select_support;
 
+	u64 cookie_counter;
+
 	char priv[0] __aligned(NETDEV_ALIGN);
 };
 
@@ -5584,6 +5667,14 @@ wiphy_ext_feature_isset(struct wiphy *wiphy,
 	return (ft_byte & BIT(ftidx % 8)) != 0;
 }
 
+/**
+ * cfg80211_free_nan_func - free NAN function
+ * @f: NAN function that should be freed
+ *
+ * Frees all the NAN function and all it's allocated members.
+ */
+void cfg80211_free_nan_func(struct cfg80211_nan_func *f);
+
 /* ethtool helper */
 void cfg80211_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info);
 
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 98fd3ec8598d..e4935d963061 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -847,6 +847,21 @@
  *	After this command NAN functions can be added.
  * @NL80211_CMD_STOP_NAN: Stop the NAN operation, identified by
  *	its %NL80211_ATTR_WDEV interface.
+ * @NL80211_CMD_ADD_NAN_FUNCTION: Add a NAN function. The function is defined
+ *	with %NL80211_ATTR_NAN_FUNC nested attribute. When called, this
+ *	operation returns the strictly positive and unique instance id
+ *	(%NL80211_ATTR_NAN_FUNC_INST_ID) and a cookie (%NL80211_ATTR_COOKIE)
+ *	of the function upon success.
+ *	Since instance ID's can be re-used, this cookie is the right
+ *	way to identify the function. This will avoid races when a termination
+ *	event is handled by the user space after it has already added a new
+ *	function that got the same instance id from the kernel as the one
+ *	which just terminated.
+ *	This cookie may be used in NAN events even before the command
+ *	returns, so userspace shouldn't process NAN events until it processes
+ *	the response to this command.
+ *	Look at %NL80211_ATTR_SOCKET_OWNER as well.
+ * @NL80211_CMD_DEL_NAN_FUNCTION: Delete a NAN function by cookie.
  *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
@@ -1038,6 +1053,8 @@ enum nl80211_commands {
 
 	NL80211_CMD_START_NAN,
 	NL80211_CMD_STOP_NAN,
+	NL80211_CMD_ADD_NAN_FUNCTION,
+	NL80211_CMD_DEL_NAN_FUNCTION,
 
 	/* add new commands above here */
 
@@ -1899,6 +1916,9 @@ enum nl80211_commands {
  * @NL80211_ATTR_NAN_DUAL: NAN dual band operation config (see
  *	&enum nl80211_nan_dual_band_conf). This attribute is used with
  *	%NL80211_CMD_START_NAN.
+ * @NL80211_ATTR_NAN_FUNC: a function that can be added to NAN. See
+ *	&enum nl80211_nan_func_attributes for description of this nested
+ *	attribute.
  *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
@@ -2296,6 +2316,7 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_NAN_MASTER_PREF,
 	NL80211_ATTR_NAN_DUAL,
+	NL80211_ATTR_NAN_FUNC,
 
 	/* add attributes here, update the policy in nl80211.c */
 
@@ -4917,4 +4938,133 @@ enum nl80211_nan_dual_band_conf {
 	NL80211_NAN_BAND_5GHZ		= 1 << 2,
 };
 
+/**
+ * enum nl80211_nan_function_type - NAN function type
+ *
+ * Defines the function type of a NAN function
+ *
+ * @NL80211_NAN_FUNC_PUBLISH: function is publish
+ * @NL80211_NAN_FUNC_SUBSCRIBE: function is subscribe
+ * @NL80211_NAN_FUNC_FOLLOW_UP: function is follow-up
+ */
+enum nl80211_nan_function_type {
+	NL80211_NAN_FUNC_PUBLISH,
+	NL80211_NAN_FUNC_SUBSCRIBE,
+	NL80211_NAN_FUNC_FOLLOW_UP,
+
+	/* keep last */
+	__NL80211_NAN_FUNC_TYPE_AFTER_LAST,
+	NL80211_NAN_FUNC_MAX_TYPE = __NL80211_NAN_FUNC_TYPE_AFTER_LAST - 1,
+};
+
+/**
+ * enum nl80211_nan_publish_type - NAN publish tx type
+ *
+ * Defines how to send publish Service Discovery Frames
+ *
+ * @NL80211_NAN_SOLICITED_PUBLISH: publish function is solicited
+ * @NL80211_NAN_UNSOLICITED_PUBLISH: publish function is unsolicited
+ */
+enum nl80211_nan_publish_type {
+	NL80211_NAN_SOLICITED_PUBLISH = 1 << 0,
+	NL80211_NAN_UNSOLICITED_PUBLISH = 1 << 1,
+};
+
+#define NL80211_NAN_FUNC_SERVICE_ID_LEN 6
+#define NL80211_NAN_FUNC_SERVICE_SPEC_INFO_MAX_LEN 0xff
+#define NL80211_NAN_FUNC_SRF_MAX_LEN 0xff
+
+/**
+ * enum nl80211_nan_func_attributes - NAN function attributes
+ * @__NL80211_NAN_FUNC_INVALID: invalid
+ * @NL80211_NAN_FUNC_TYPE: &enum nl80211_nan_function_type (u8).
+ * @NL80211_NAN_FUNC_SERVICE_ID: 6 bytes of the service ID hash as
+ *	specified in NAN spec. This is a binary attribute.
+ * @NL80211_NAN_FUNC_PUBLISH_TYPE: relevant if the function's type is
+ *	publish. Defines the transmission type for the publish Service Discovery
+ *	Frame, see &enum nl80211_nan_publish_type. Its type is u8.
+ * @NL80211_NAN_FUNC_PUBLISH_BCAST: relevant if the function is a solicited
+ *	publish. Should the solicited publish Service Discovery Frame be sent to
+ *	the NAN Broadcast address. This is a flag.
+ * @NL80211_NAN_FUNC_SUBSCRIBE_ACTIVE: relevant if the function's type is
+ *	subscribe. Is the subscribe active. This is a flag.
+ * @NL80211_NAN_FUNC_FOLLOW_UP_ID: relevant if the function's type is follow up.
+ *	The instance ID for the follow up Service Discovery Frame. This is u8.
+ * @NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID: relevant if the function's type
+ *	is follow up. This is a u8.
+ *	The requestor instance ID for the follow up Service Discovery Frame.
+ * @NL80211_NAN_FUNC_FOLLOW_UP_DEST: the MAC address of the recipient of the
+ *	follow up Service Discovery Frame. This is a binary attribute.
+ * @NL80211_NAN_FUNC_CLOSE_RANGE: is this function limited for devices in a
+ *	close range. The range itself (RSSI) is defined by the device.
+ *	This is a flag.
+ * @NL80211_NAN_FUNC_TTL: strictly positive number of DWs this function should
+ *	stay active. If not present infinite TTL is assumed. This is a u32.
+ * @NL80211_NAN_FUNC_SERVICE_INFO: array of bytes describing the service
+ *	specific info. This is a binary attribute.
+ * @NL80211_NAN_FUNC_SRF: Service Receive Filter. This is a nested attribute.
+ *	See &enum nl80211_nan_srf_attributes.
+ * @NL80211_NAN_FUNC_RX_MATCH_FILTER: Receive Matching filter. This is a nested
+ *	attribute. It is a list of binary values.
+ * @NL80211_NAN_FUNC_TX_MATCH_FILTER: Transmit Matching filter. This is a
+ *	nested attribute. It is a list of binary values.
+ * @NL80211_NAN_FUNC_INSTANCE_ID: The instance ID of the function.
+ *	Its type is u8 and it cannot be 0.
+ * @NL80211_NAN_FUNC_TERM_REASON: NAN function termination reason.
+ *	See &enum nl80211_nan_func_term_reason.
+ *
+ * @NUM_NL80211_NAN_FUNC_ATTR: internal
+ * @NL80211_NAN_FUNC_ATTR_MAX: highest NAN function attribute
+ */
+enum nl80211_nan_func_attributes {
+	__NL80211_NAN_FUNC_INVALID,
+	NL80211_NAN_FUNC_TYPE,
+	NL80211_NAN_FUNC_SERVICE_ID,
+	NL80211_NAN_FUNC_PUBLISH_TYPE,
+	NL80211_NAN_FUNC_PUBLISH_BCAST,
+	NL80211_NAN_FUNC_SUBSCRIBE_ACTIVE,
+	NL80211_NAN_FUNC_FOLLOW_UP_ID,
+	NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID,
+	NL80211_NAN_FUNC_FOLLOW_UP_DEST,
+	NL80211_NAN_FUNC_CLOSE_RANGE,
+	NL80211_NAN_FUNC_TTL,
+	NL80211_NAN_FUNC_SERVICE_INFO,
+	NL80211_NAN_FUNC_SRF,
+	NL80211_NAN_FUNC_RX_MATCH_FILTER,
+	NL80211_NAN_FUNC_TX_MATCH_FILTER,
+	NL80211_NAN_FUNC_INSTANCE_ID,
+	NL80211_NAN_FUNC_TERM_REASON,
+
+	/* keep last */
+	NUM_NL80211_NAN_FUNC_ATTR,
+	NL80211_NAN_FUNC_ATTR_MAX = NUM_NL80211_NAN_FUNC_ATTR - 1
+};
+
+/**
+ * enum nl80211_nan_srf_attributes - NAN Service Response filter attributes
+ * @__NL80211_NAN_SRF_INVALID: invalid
+ * @NL80211_NAN_SRF_INCLUDE: present if the include bit of the SRF set.
+ *	This is a flag.
+ * @NL80211_NAN_SRF_BF: Bloom Filter. Present if and only if
+ *	&NL80211_NAN_SRF_MAC_ADDRS isn't present. This attribute is binary.
+ * @NL80211_NAN_SRF_BF_IDX: index of the Bloom Filter. Mandatory if
+ *	&NL80211_NAN_SRF_BF is present. This is a u8.
+ * @NL80211_NAN_SRF_MAC_ADDRS: list of MAC addresses for the SRF. Present if
+ *	and only if &NL80211_NAN_SRF_BF isn't present. This is a nested
+ *	attribute. Each nested attribute is a MAC address.
+ * @NUM_NL80211_NAN_SRF_ATTR: internal
+ * @NL80211_NAN_SRF_ATTR_MAX: highest NAN SRF attribute
+ */
+enum nl80211_nan_srf_attributes {
+	__NL80211_NAN_SRF_INVALID,
+	NL80211_NAN_SRF_INCLUDE,
+	NL80211_NAN_SRF_BF,
+	NL80211_NAN_SRF_BF_IDX,
+	NL80211_NAN_SRF_MAC_ADDRS,
+
+	/* keep last */
+	NUM_NL80211_NAN_SRF_ATTR,
+	NL80211_NAN_SRF_ATTR_MAX = NUM_NL80211_NAN_SRF_ATTR - 1,
+};
+
 #endif /* __LINUX_NL80211_H */
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 013987243c0b..8201e6d7449e 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -605,7 +605,8 @@ int wiphy_register(struct wiphy *wiphy)
 		return -EINVAL;
 
 	if (WARN_ON((wiphy->interface_modes & BIT(NL80211_IFTYPE_NAN)) &&
-		    (!rdev->ops->start_nan || !rdev->ops->stop_nan)))
+		    (!rdev->ops->start_nan || !rdev->ops->stop_nan ||
+		     !rdev->ops->add_nan_func || !rdev->ops->del_nan_func)))
 		return -EINVAL;
 
 	/*
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 9e9fb37087fc..0eca59ccd685 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -411,6 +411,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR] = { .len = ETH_ALEN },
 	[NL80211_ATTR_NAN_MASTER_PREF] = { .type = NLA_U8 },
 	[NL80211_ATTR_NAN_DUAL] = { .type = NLA_U8 },
+	[NL80211_ATTR_NAN_FUNC] = { .type = NLA_NESTED },
 };
 
 /* policy for the key attributes */
@@ -504,6 +505,39 @@ nl80211_bss_select_policy[NL80211_BSS_SELECT_ATTR_MAX + 1] = {
 	},
 };
 
+/* policy for NAN function attributes */
+static const struct nla_policy
+nl80211_nan_func_policy[NL80211_NAN_FUNC_ATTR_MAX + 1] = {
+	[NL80211_NAN_FUNC_TYPE] = { .type = NLA_U8 },
+	[NL80211_NAN_FUNC_SERVICE_ID] = { .type = NLA_BINARY,
+				    .len = NL80211_NAN_FUNC_SERVICE_ID_LEN },
+	[NL80211_NAN_FUNC_PUBLISH_TYPE] = { .type = NLA_U8 },
+	[NL80211_NAN_FUNC_PUBLISH_BCAST] = { .type = NLA_FLAG },
+	[NL80211_NAN_FUNC_SUBSCRIBE_ACTIVE] = { .type = NLA_FLAG },
+	[NL80211_NAN_FUNC_FOLLOW_UP_ID] = { .type = NLA_U8 },
+	[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID] = { .type = NLA_U8 },
+	[NL80211_NAN_FUNC_FOLLOW_UP_DEST] = { .len = ETH_ALEN },
+	[NL80211_NAN_FUNC_CLOSE_RANGE] = { .type = NLA_FLAG },
+	[NL80211_NAN_FUNC_TTL] = { .type = NLA_U32 },
+	[NL80211_NAN_FUNC_SERVICE_INFO] = { .type = NLA_BINARY,
+			.len = NL80211_NAN_FUNC_SERVICE_SPEC_INFO_MAX_LEN },
+	[NL80211_NAN_FUNC_SRF] = { .type = NLA_NESTED },
+	[NL80211_NAN_FUNC_RX_MATCH_FILTER] = { .type = NLA_NESTED },
+	[NL80211_NAN_FUNC_TX_MATCH_FILTER] = { .type = NLA_NESTED },
+	[NL80211_NAN_FUNC_INSTANCE_ID] = { .type = NLA_U8 },
+	[NL80211_NAN_FUNC_TERM_REASON] = { .type = NLA_U8 },
+};
+
+/* policy for Service Response Filter attributes */
+static const struct nla_policy
+nl80211_nan_srf_policy[NL80211_NAN_SRF_ATTR_MAX + 1] = {
+	[NL80211_NAN_SRF_INCLUDE] = { .type = NLA_FLAG },
+	[NL80211_NAN_SRF_BF] = { .type = NLA_BINARY,
+				 .len =  NL80211_NAN_FUNC_SRF_MAX_LEN },
+	[NL80211_NAN_SRF_BF_IDX] = { .type = NLA_U8 },
+	[NL80211_NAN_SRF_MAC_ADDRS] = { .type = NLA_NESTED },
+};
+
 static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
 				     struct netlink_callback *cb,
 				     struct cfg80211_registered_device **rdev,
@@ -10566,6 +10600,325 @@ static int nl80211_stop_nan(struct sk_buff *skb, struct genl_info *info)
 	return 0;
 }
 
+static int validate_nan_filter(struct nlattr *filter_attr)
+{
+	struct nlattr *attr;
+	int len = 0, n_entries = 0, rem;
+
+	nla_for_each_nested(attr, filter_attr, rem) {
+		len += nla_len(attr);
+		n_entries++;
+	}
+
+	if (len >= U8_MAX)
+		return -EINVAL;
+
+	return n_entries;
+}
+
+static int handle_nan_filter(struct nlattr *attr_filter,
+			     struct cfg80211_nan_func *func,
+			     bool tx)
+{
+	struct nlattr *attr;
+	int n_entries, rem, i;
+	struct cfg80211_nan_func_filter *filter;
+
+	n_entries = validate_nan_filter(attr_filter);
+	if (n_entries < 0)
+		return n_entries;
+
+	BUILD_BUG_ON(sizeof(*func->rx_filters) != sizeof(*func->tx_filters));
+
+	filter = kcalloc(n_entries, sizeof(*func->rx_filters), GFP_KERNEL);
+	if (!filter)
+		return -ENOMEM;
+
+	i = 0;
+	nla_for_each_nested(attr, attr_filter, rem) {
+		filter[i].filter = kmemdup(nla_data(attr), nla_len(attr),
+					   GFP_KERNEL);
+		filter[i].len = nla_len(attr);
+		i++;
+	}
+	if (tx) {
+		func->num_tx_filters = n_entries;
+		func->tx_filters = filter;
+	} else {
+		func->num_rx_filters = n_entries;
+		func->rx_filters = filter;
+	}
+
+	return 0;
+}
+
+static int nl80211_nan_add_func(struct sk_buff *skb,
+				struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct wireless_dev *wdev = info->user_ptr[1];
+	struct nlattr *tb[NUM_NL80211_NAN_FUNC_ATTR], *func_attr;
+	struct cfg80211_nan_func *func;
+	struct sk_buff *msg = NULL;
+	void *hdr = NULL;
+	int err = 0;
+
+	if (wdev->iftype != NL80211_IFTYPE_NAN)
+		return -EOPNOTSUPP;
+
+	if (!wdev->nan_started)
+		return -ENOTCONN;
+
+	if (!info->attrs[NL80211_ATTR_NAN_FUNC])
+		return -EINVAL;
+
+	if (wdev->owner_nlportid &&
+	    wdev->owner_nlportid != info->snd_portid)
+		return -ENOTCONN;
+
+	err = nla_parse(tb, NL80211_NAN_FUNC_ATTR_MAX,
+			nla_data(info->attrs[NL80211_ATTR_NAN_FUNC]),
+			nla_len(info->attrs[NL80211_ATTR_NAN_FUNC]),
+			nl80211_nan_func_policy);
+	if (err)
+		return err;
+
+	func = kzalloc(sizeof(*func), GFP_KERNEL);
+	if (!func)
+		return -ENOMEM;
+
+	func->cookie = wdev->wiphy->cookie_counter++;
+
+	if (!tb[NL80211_NAN_FUNC_TYPE] ||
+	    nla_get_u8(tb[NL80211_NAN_FUNC_TYPE]) > NL80211_NAN_FUNC_MAX_TYPE) {
+		err = -EINVAL;
+		goto out;
+	}
+
+
+	func->type = nla_get_u8(tb[NL80211_NAN_FUNC_TYPE]);
+
+	if (!tb[NL80211_NAN_FUNC_SERVICE_ID]) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	memcpy(func->service_id, nla_data(tb[NL80211_NAN_FUNC_SERVICE_ID]),
+	       sizeof(func->service_id));
+
+	func->close_range =
+		nla_get_flag(tb[NL80211_NAN_FUNC_CLOSE_RANGE]);
+
+	if (tb[NL80211_NAN_FUNC_SERVICE_INFO]) {
+		func->serv_spec_info_len =
+			nla_len(tb[NL80211_NAN_FUNC_SERVICE_INFO]);
+		func->serv_spec_info =
+			kmemdup(nla_data(tb[NL80211_NAN_FUNC_SERVICE_INFO]),
+				func->serv_spec_info_len,
+				GFP_KERNEL);
+		if (!func->serv_spec_info) {
+			err = -ENOMEM;
+			goto out;
+		}
+	}
+
+	if (tb[NL80211_NAN_FUNC_TTL])
+		func->ttl = nla_get_u32(tb[NL80211_NAN_FUNC_TTL]);
+
+	switch (func->type) {
+	case NL80211_NAN_FUNC_PUBLISH:
+		if (!tb[NL80211_NAN_FUNC_PUBLISH_TYPE]) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		func->publish_type =
+			nla_get_u8(tb[NL80211_NAN_FUNC_PUBLISH_TYPE]);
+		func->publish_bcast =
+			nla_get_flag(tb[NL80211_NAN_FUNC_PUBLISH_BCAST]);
+
+		if ((!(func->publish_type & NL80211_NAN_SOLICITED_PUBLISH)) &&
+			func->publish_bcast) {
+			err = -EINVAL;
+			goto out;
+		}
+		break;
+	case NL80211_NAN_FUNC_SUBSCRIBE:
+		func->subscribe_active =
+			nla_get_flag(tb[NL80211_NAN_FUNC_SUBSCRIBE_ACTIVE]);
+		break;
+	case NL80211_NAN_FUNC_FOLLOW_UP:
+		if (!tb[NL80211_NAN_FUNC_FOLLOW_UP_ID] ||
+		    !tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID]) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		func->followup_id =
+			nla_get_u8(tb[NL80211_NAN_FUNC_FOLLOW_UP_ID]);
+		func->followup_reqid =
+			nla_get_u8(tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID]);
+		memcpy(func->followup_dest.addr,
+		       nla_data(tb[NL80211_NAN_FUNC_FOLLOW_UP_DEST]),
+		       sizeof(func->followup_dest.addr));
+		if (func->ttl) {
+			err = -EINVAL;
+			goto out;
+		}
+		break;
+	default:
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (tb[NL80211_NAN_FUNC_SRF]) {
+		struct nlattr *srf_tb[NUM_NL80211_NAN_SRF_ATTR];
+
+		err = nla_parse(srf_tb, NL80211_NAN_SRF_ATTR_MAX,
+				nla_data(tb[NL80211_NAN_FUNC_SRF]),
+				nla_len(tb[NL80211_NAN_FUNC_SRF]), NULL);
+		if (err)
+			goto out;
+
+		func->srf_include =
+			nla_get_flag(srf_tb[NL80211_NAN_SRF_INCLUDE]);
+
+		if (srf_tb[NL80211_NAN_SRF_BF]) {
+			if (srf_tb[NL80211_NAN_SRF_MAC_ADDRS] ||
+			    !srf_tb[NL80211_NAN_SRF_BF_IDX]) {
+				err = -EINVAL;
+				goto out;
+			}
+
+			func->srf_bf_len =
+				nla_len(srf_tb[NL80211_NAN_SRF_BF]);
+			func->srf_bf =
+				kmemdup(nla_data(srf_tb[NL80211_NAN_SRF_BF]),
+					func->srf_bf_len, GFP_KERNEL);
+			if (!func->srf_bf) {
+				err = -ENOMEM;
+				goto out;
+			}
+
+			func->srf_bf_idx =
+				nla_get_u8(srf_tb[NL80211_NAN_SRF_BF_IDX]);
+		} else {
+			struct nlattr *attr, *mac_attr =
+				srf_tb[NL80211_NAN_SRF_MAC_ADDRS];
+			int n_entries, rem, i = 0;
+
+			if (!mac_attr) {
+				err = -EINVAL;
+				goto out;
+			}
+
+			n_entries = validate_acl_mac_addrs(mac_attr);
+			if (n_entries <= 0) {
+				err = -EINVAL;
+				goto out;
+			}
+
+			func->srf_num_macs = n_entries;
+			func->srf_macs =
+				kzalloc(sizeof(*func->srf_macs) * n_entries,
+					GFP_KERNEL);
+			if (!func->srf_macs) {
+				err = -ENOMEM;
+				goto out;
+			}
+
+			nla_for_each_nested(attr, mac_attr, rem)
+				memcpy(func->srf_macs[i++].addr, nla_data(attr),
+				       sizeof(*func->srf_macs));
+		}
+	}
+
+	if (tb[NL80211_NAN_FUNC_TX_MATCH_FILTER]) {
+		err = handle_nan_filter(tb[NL80211_NAN_FUNC_TX_MATCH_FILTER],
+					func, true);
+		if (err)
+			goto out;
+	}
+
+	if (tb[NL80211_NAN_FUNC_RX_MATCH_FILTER]) {
+		err = handle_nan_filter(tb[NL80211_NAN_FUNC_RX_MATCH_FILTER],
+					func, false);
+		if (err)
+			goto out;
+	}
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
+			     NL80211_CMD_ADD_NAN_FUNCTION);
+	/* This can't really happen - we just allocated 4KB */
+	if (WARN_ON(!hdr)) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	err = rdev_add_nan_func(rdev, wdev, func);
+out:
+	if (err < 0) {
+		cfg80211_free_nan_func(func);
+		nlmsg_free(msg);
+		return err;
+	}
+
+	/* propagate the instance id and cookie to userspace  */
+	if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, func->cookie,
+			      NL80211_ATTR_PAD))
+		goto nla_put_failure;
+
+	func_attr = nla_nest_start(msg, NL80211_ATTR_NAN_FUNC);
+	if (!func_attr)
+		goto nla_put_failure;
+
+	if (nla_put_u8(msg, NL80211_NAN_FUNC_INSTANCE_ID,
+		       func->instance_id))
+		goto nla_put_failure;
+
+	nla_nest_end(msg, func_attr);
+
+	genlmsg_end(msg, hdr);
+	return genlmsg_reply(msg, info);
+
+nla_put_failure:
+	nlmsg_free(msg);
+	return -ENOBUFS;
+}
+
+static int nl80211_nan_del_func(struct sk_buff *skb,
+			       struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct wireless_dev *wdev = info->user_ptr[1];
+	u64 cookie;
+
+	if (wdev->iftype != NL80211_IFTYPE_NAN)
+		return -EOPNOTSUPP;
+
+	if (!wdev->nan_started)
+		return -ENOTCONN;
+
+	if (!info->attrs[NL80211_ATTR_COOKIE])
+		return -EINVAL;
+
+	if (wdev->owner_nlportid &&
+	    wdev->owner_nlportid != info->snd_portid)
+		return -ENOTCONN;
+
+	cookie = nla_get_u64(info->attrs[NL80211_ATTR_COOKIE]);
+
+	rdev_del_nan_func(rdev, wdev, cookie);
+
+	return 0;
+}
+
 static int nl80211_get_protocol_features(struct sk_buff *skb,
 					 struct genl_info *info)
 {
@@ -11923,6 +12276,22 @@ static const struct genl_ops nl80211_ops[] = {
 		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
+	{
+		.cmd = NL80211_CMD_ADD_NAN_FUNCTION,
+		.doit = nl80211_nan_add_func,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
+	{
+		.cmd = NL80211_CMD_DEL_NAN_FUNCTION,
+		.doit = nl80211_nan_del_func,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
 	{
 		.cmd = NL80211_CMD_SET_MCAST_RATE,
 		.doit = nl80211_set_mcast_rate,
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index afb68a8428b9..98c4c3bdcb11 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -907,6 +907,27 @@ static inline void rdev_stop_nan(struct cfg80211_registered_device *rdev,
 	trace_rdev_return_void(&rdev->wiphy);
 }
 
+static inline int
+rdev_add_nan_func(struct cfg80211_registered_device *rdev,
+		  struct wireless_dev *wdev,
+		  struct cfg80211_nan_func *nan_func)
+{
+	int ret;
+
+	trace_rdev_add_nan_func(&rdev->wiphy, wdev, nan_func);
+	ret = rdev->ops->add_nan_func(&rdev->wiphy, wdev, nan_func);
+	trace_rdev_return_int(&rdev->wiphy, ret);
+	return ret;
+}
+
+static inline void rdev_del_nan_func(struct cfg80211_registered_device *rdev,
+				    struct wireless_dev *wdev, u64 cookie)
+{
+	trace_rdev_del_nan_func(&rdev->wiphy, wdev, cookie);
+	rdev->ops->del_nan_func(&rdev->wiphy, wdev, cookie);
+	trace_rdev_return_void(&rdev->wiphy);
+}
+
 static inline int rdev_set_mac_acl(struct cfg80211_registered_device *rdev,
 				   struct net_device *dev,
 				   struct cfg80211_acl_data *params)
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 5f3370f4c6a2..56089843d619 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1916,6 +1916,45 @@ DEFINE_EVENT(wiphy_wdev_evt, rdev_stop_nan,
 	TP_ARGS(wiphy, wdev)
 );
 
+TRACE_EVENT(rdev_add_nan_func,
+	TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
+		 const struct cfg80211_nan_func *func),
+	TP_ARGS(wiphy, wdev, func),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		WDEV_ENTRY
+		__field(u8, func_type)
+		__field(u64, cookie)
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		WDEV_ASSIGN;
+		__entry->func_type = func->type;
+		__entry->cookie = func->cookie
+	),
+	TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", type=%u, cookie=%llu",
+		  WIPHY_PR_ARG, WDEV_PR_ARG, __entry->func_type,
+		  __entry->cookie)
+);
+
+TRACE_EVENT(rdev_del_nan_func,
+	TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
+		 u64 cookie),
+	TP_ARGS(wiphy, wdev, cookie),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		WDEV_ENTRY
+		__field(u64, cookie)
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		WDEV_ASSIGN;
+		__entry->cookie = cookie;
+	),
+	TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", cookie=%llu",
+		  WIPHY_PR_ARG, WDEV_PR_ARG, __entry->cookie)
+);
+
 TRACE_EVENT(rdev_set_mac_acl,
 	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
 		 struct cfg80211_acl_data *params),
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 7a2d46b0058a..8edce22d1b93 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1762,6 +1762,28 @@ int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr,
 }
 EXPORT_SYMBOL(cfg80211_get_station);
 
+void cfg80211_free_nan_func(struct cfg80211_nan_func *f)
+{
+	int i;
+
+	if (!f)
+		return;
+
+	kfree(f->serv_spec_info);
+	kfree(f->srf_bf);
+	kfree(f->srf_macs);
+	for (i = 0; i < f->num_rx_filters; i++)
+		kfree(f->rx_filters[i].filter);
+
+	for (i = 0; i < f->num_tx_filters; i++)
+		kfree(f->tx_filters[i].filter);
+
+	kfree(f->rx_filters);
+	kfree(f->tx_filters);
+	kfree(f);
+}
+EXPORT_SYMBOL(cfg80211_free_nan_func);
+
 /* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */
 /* Ethernet-II snap header (RFC1042 for most EtherTypes) */
 const unsigned char rfc1042_header[] __aligned(2) =
-- 
cgit v1.2.3


From a5a9dcf291e1e541243878eed2d73a74006fa1f1 Mon Sep 17 00:00:00 2001
From: Ayala Beker <ayala.beker@intel.com>
Date: Tue, 20 Sep 2016 17:31:16 +0300
Subject: cfg80211: allow the user space to change current NAN configuration

Some NAN configuration paramaters may change during the operation of
the NAN device. For example, a user may want to update master preference
value when the device gets plugged/unplugged to the power.
Add API that allows to do so.

Signed-off-by: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 19 +++++++++++++++++++
 include/uapi/linux/nl80211.h | 11 +++++++++--
 net/wireless/nl80211.c       | 42 ++++++++++++++++++++++++++++++++++++++++++
 net/wireless/rdev-ops.h      | 17 +++++++++++++++++
 net/wireless/trace.h         | 24 ++++++++++++++++++++++++
 5 files changed, 111 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 2f35ccf6da83..8574a57e19ba 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2326,6 +2326,18 @@ struct cfg80211_nan_conf {
 	u8 dual;
 };
 
+/**
+ * enum cfg80211_nan_conf_changes - indicates changed fields in NAN
+ * configuration
+ *
+ * @CFG80211_NAN_CONF_CHANGED_PREF: master preference
+ * @CFG80211_NAN_CONF_CHANGED_DUAL: dual band operation
+ */
+enum cfg80211_nan_conf_changes {
+	CFG80211_NAN_CONF_CHANGED_PREF = BIT(0),
+	CFG80211_NAN_CONF_CHANGED_DUAL = BIT(1),
+};
+
 /**
  * struct cfg80211_nan_func_filter - a NAN function Rx / Tx filter
  *
@@ -2691,6 +2703,9 @@ struct cfg80211_nan_func {
  *	On success the driver should assign an instance_id in the
  *	provided @nan_func.
  * @del_nan_func: Delete a NAN function.
+ * @nan_change_conf: changes NAN configuration. The changed parameters must
+ *	be specified in @changes (using &enum cfg80211_nan_conf_changes);
+ *	All other parameters must be ignored.
  */
 struct cfg80211_ops {
 	int	(*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -2963,6 +2978,10 @@ struct cfg80211_ops {
 				struct cfg80211_nan_func *nan_func);
 	void	(*del_nan_func)(struct wiphy *wiphy, struct wireless_dev *wdev,
 			       u64 cookie);
+	int	(*nan_change_conf)(struct wiphy *wiphy,
+				   struct wireless_dev *wdev,
+				   struct cfg80211_nan_conf *conf,
+				   u32 changes);
 };
 
 /*
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index e4935d963061..9c9c0c352873 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -862,6 +862,10 @@
  *	the response to this command.
  *	Look at %NL80211_ATTR_SOCKET_OWNER as well.
  * @NL80211_CMD_DEL_NAN_FUNCTION: Delete a NAN function by cookie.
+ * @NL80211_CMD_CHANGE_NAN_CONFIG: Change current NAN configuration. NAN
+ *	must be operational (%NL80211_CMD_START_NAN was executed).
+ *	It must contain at least one of the following attributes:
+ *	%NL80211_ATTR_NAN_MASTER_PREF, %NL80211_ATTR_NAN_DUAL.
  *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
@@ -1055,6 +1059,7 @@ enum nl80211_commands {
 	NL80211_CMD_STOP_NAN,
 	NL80211_CMD_ADD_NAN_FUNCTION,
 	NL80211_CMD_DEL_NAN_FUNCTION,
+	NL80211_CMD_CHANGE_NAN_CONFIG,
 
 	/* add new commands above here */
 
@@ -1910,12 +1915,14 @@ enum nl80211_commands {
  *	used to pull the stored data for mesh peer in power save state.
  *
  * @NL80211_ATTR_NAN_MASTER_PREF: the master preference to be used by
- *	%NL80211_CMD_START_NAN. Its type is u8 and it can't be 0.
+ *	%NL80211_CMD_START_NAN and optionally with
+ *	%NL80211_CMD_CHANGE_NAN_CONFIG. Its type is u8 and it can't be 0.
  *	Also, values 1 and 255 are reserved for certification purposes and
  *	should not be used during a normal device operation.
  * @NL80211_ATTR_NAN_DUAL: NAN dual band operation config (see
  *	&enum nl80211_nan_dual_band_conf). This attribute is used with
- *	%NL80211_CMD_START_NAN.
+ *	%NL80211_CMD_START_NAN and optionally with
+ *	%NL80211_CMD_CHANGE_NAN_CONFIG.
  * @NL80211_ATTR_NAN_FUNC: a function that can be added to NAN. See
  *	&enum nl80211_nan_func_attributes for description of this nested
  *	attribute.
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 0eca59ccd685..c0b5ae4af2d8 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -10919,6 +10919,40 @@ static int nl80211_nan_del_func(struct sk_buff *skb,
 	return 0;
 }
 
+static int nl80211_nan_change_config(struct sk_buff *skb,
+				     struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct wireless_dev *wdev = info->user_ptr[1];
+	struct cfg80211_nan_conf conf = {};
+	u32 changed = 0;
+
+	if (wdev->iftype != NL80211_IFTYPE_NAN)
+		return -EOPNOTSUPP;
+
+	if (!wdev->nan_started)
+		return -ENOTCONN;
+
+	if (info->attrs[NL80211_ATTR_NAN_MASTER_PREF]) {
+		conf.master_pref =
+			nla_get_u8(info->attrs[NL80211_ATTR_NAN_MASTER_PREF]);
+		if (conf.master_pref <= 1 || conf.master_pref == 255)
+			return -EINVAL;
+
+		changed |= CFG80211_NAN_CONF_CHANGED_PREF;
+	}
+
+	if (info->attrs[NL80211_ATTR_NAN_DUAL]) {
+		conf.dual = nla_get_u8(info->attrs[NL80211_ATTR_NAN_DUAL]);
+		changed |= CFG80211_NAN_CONF_CHANGED_DUAL;
+	}
+
+	if (!changed)
+		return -EINVAL;
+
+	return rdev_nan_change_conf(rdev, wdev, &conf, changed);
+}
+
 static int nl80211_get_protocol_features(struct sk_buff *skb,
 					 struct genl_info *info)
 {
@@ -12292,6 +12326,14 @@ static const struct genl_ops nl80211_ops[] = {
 		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
+	{
+		.cmd = NL80211_CMD_CHANGE_NAN_CONFIG,
+		.doit = nl80211_nan_change_config,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
 	{
 		.cmd = NL80211_CMD_SET_MCAST_RATE,
 		.doit = nl80211_set_mcast_rate,
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 98c4c3bdcb11..11cf83c8ad4f 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -928,6 +928,23 @@ static inline void rdev_del_nan_func(struct cfg80211_registered_device *rdev,
 	trace_rdev_return_void(&rdev->wiphy);
 }
 
+static inline int
+rdev_nan_change_conf(struct cfg80211_registered_device *rdev,
+		     struct wireless_dev *wdev,
+		     struct cfg80211_nan_conf *conf, u32 changes)
+{
+	int ret;
+
+	trace_rdev_nan_change_conf(&rdev->wiphy, wdev, conf, changes);
+	if (rdev->ops->nan_change_conf)
+		ret = rdev->ops->nan_change_conf(&rdev->wiphy, wdev, conf,
+						 changes);
+	else
+		ret = -ENOTSUPP;
+	trace_rdev_return_int(&rdev->wiphy, ret);
+	return ret;
+}
+
 static inline int rdev_set_mac_acl(struct cfg80211_registered_device *rdev,
 				   struct net_device *dev,
 				   struct cfg80211_acl_data *params)
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 56089843d619..a3d0a91b1e09 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1911,6 +1911,30 @@ TRACE_EVENT(rdev_start_nan,
 		  __entry->dual)
 );
 
+TRACE_EVENT(rdev_nan_change_conf,
+	TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
+		 struct cfg80211_nan_conf *conf, u32 changes),
+	TP_ARGS(wiphy, wdev, conf, changes),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		WDEV_ENTRY
+		__field(u8, master_pref)
+		__field(u8, dual);
+		__field(u32, changes);
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		WDEV_ASSIGN;
+		__entry->master_pref = conf->master_pref;
+		__entry->dual = conf->dual;
+		__entry->changes = changes;
+	),
+	TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT
+		  ", master preference: %u, dual: %d, changes: %x",
+		  WIPHY_PR_ARG, WDEV_PR_ARG, __entry->master_pref,
+		  __entry->dual, __entry->changes)
+);
+
 DEFINE_EVENT(wiphy_wdev_evt, rdev_stop_nan,
 	TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
 	TP_ARGS(wiphy, wdev)
-- 
cgit v1.2.3


From 50bcd31d9992e99c231820f5276e70346cbfbc51 Mon Sep 17 00:00:00 2001
From: Ayala Beker <ayala.beker@intel.com>
Date: Tue, 20 Sep 2016 17:31:17 +0300
Subject: cfg80211: provide a function to report a match for NAN

Provide a function the driver can call to report a match.
This will send the event to the user space.
If the NAN instance is tied to the owner, the notifications will be
sent to the socket that started the NAN interface only.

Signed-off-by: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 37 ++++++++++++++++++++
 include/uapi/linux/nl80211.h | 31 +++++++++++++++++
 net/wireless/nl80211.c       | 80 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 148 insertions(+)

(limited to 'include/uapi')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 8574a57e19ba..5481664b5389 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -5694,6 +5694,43 @@ wiphy_ext_feature_isset(struct wiphy *wiphy,
  */
 void cfg80211_free_nan_func(struct cfg80211_nan_func *f);
 
+/**
+ * struct cfg80211_nan_match_params - NAN match parameters
+ * @type: the type of the function that triggered a match. If it is
+ *	 %NL80211_NAN_FUNC_SUBSCRIBE it means that we replied to a subscriber.
+ *	 If it is %NL80211_NAN_FUNC_PUBLISH, it means that we got a discovery
+ *	 result.
+ *	 If it is %NL80211_NAN_FUNC_FOLLOW_UP, we received a follow up.
+ * @inst_id: the local instance id
+ * @peer_inst_id: the instance id of the peer's function
+ * @addr: the MAC address of the peer
+ * @info_len: the length of the &info
+ * @info: the Service Specific Info from the peer (if any)
+ * @cookie: unique identifier of the corresponding function
+ */
+struct cfg80211_nan_match_params {
+	enum nl80211_nan_function_type type;
+	u8 inst_id;
+	u8 peer_inst_id;
+	const u8 *addr;
+	u8 info_len;
+	const u8 *info;
+	u64 cookie;
+};
+
+/**
+ * cfg80211_nan_match - report a match for a NAN function.
+ * @wdev: the wireless device reporting the match
+ * @match: match notification parameters
+ * @gfp: allocation flags
+ *
+ * This function reports that the a NAN function had a match. This
+ * can be a subscribe that had a match or a solicited publish that
+ * was sent. It can also be a follow up that was received.
+ */
+void cfg80211_nan_match(struct wireless_dev *wdev,
+			struct cfg80211_nan_match_params *match, gfp_t gfp);
+
 /* ethtool helper */
 void cfg80211_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info);
 
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 9c9c0c352873..995bf802d604 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -48,6 +48,7 @@
 #define NL80211_MULTICAST_GROUP_REG		"regulatory"
 #define NL80211_MULTICAST_GROUP_MLME		"mlme"
 #define NL80211_MULTICAST_GROUP_VENDOR		"vendor"
+#define NL80211_MULTICAST_GROUP_NAN		"nan"
 #define NL80211_MULTICAST_GROUP_TESTMODE	"testmode"
 
 /**
@@ -866,6 +867,9 @@
  *	must be operational (%NL80211_CMD_START_NAN was executed).
  *	It must contain at least one of the following attributes:
  *	%NL80211_ATTR_NAN_MASTER_PREF, %NL80211_ATTR_NAN_DUAL.
+ * @NL80211_CMD_NAN_FUNC_MATCH: Notification sent when a match is reported.
+ *	This will contain a %NL80211_ATTR_NAN_MATCH nested attribute and
+ *	%NL80211_ATTR_COOKIE.
  *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
@@ -1060,6 +1064,7 @@ enum nl80211_commands {
 	NL80211_CMD_ADD_NAN_FUNCTION,
 	NL80211_CMD_DEL_NAN_FUNCTION,
 	NL80211_CMD_CHANGE_NAN_CONFIG,
+	NL80211_CMD_NAN_MATCH,
 
 	/* add new commands above here */
 
@@ -1926,6 +1931,8 @@ enum nl80211_commands {
  * @NL80211_ATTR_NAN_FUNC: a function that can be added to NAN. See
  *	&enum nl80211_nan_func_attributes for description of this nested
  *	attribute.
+ * @NL80211_ATTR_NAN_MATCH: used to report a match. This is a nested attribute.
+ *	See &enum nl80211_nan_match_attributes.
  *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
@@ -2324,6 +2331,7 @@ enum nl80211_attrs {
 	NL80211_ATTR_NAN_MASTER_PREF,
 	NL80211_ATTR_NAN_DUAL,
 	NL80211_ATTR_NAN_FUNC,
+	NL80211_ATTR_NAN_MATCH,
 
 	/* add attributes here, update the policy in nl80211.c */
 
@@ -5074,4 +5082,27 @@ enum nl80211_nan_srf_attributes {
 	NL80211_NAN_SRF_ATTR_MAX = NUM_NL80211_NAN_SRF_ATTR - 1,
 };
 
+/**
+ * enum nl80211_nan_match_attributes - NAN match attributes
+ * @__NL80211_NAN_MATCH_INVALID: invalid
+ * @NL80211_NAN_MATCH_FUNC_LOCAL: the local function that had the
+ *	match. This is a nested attribute.
+ *	See &enum nl80211_nan_func_attributes.
+ * @NL80211_NAN_MATCH_FUNC_PEER: the peer function
+ *	that caused the match. This is a nested attribute.
+ *	See &enum nl80211_nan_func_attributes.
+ *
+ * @NUM_NL80211_NAN_MATCH_ATTR: internal
+ * @NL80211_NAN_MATCH_ATTR_MAX: highest NAN match attribute
+ */
+enum nl80211_nan_match_attributes {
+	__NL80211_NAN_MATCH_INVALID,
+	NL80211_NAN_MATCH_FUNC_LOCAL,
+	NL80211_NAN_MATCH_FUNC_PEER,
+
+	/* keep last */
+	NUM_NL80211_NAN_MATCH_ATTR,
+	NL80211_NAN_MATCH_ATTR_MAX = NUM_NL80211_NAN_MATCH_ATTR - 1
+};
+
 #endif /* __LINUX_NL80211_H */
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c0b5ae4af2d8..0bbd9ed28318 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -56,6 +56,7 @@ enum nl80211_multicast_groups {
 	NL80211_MCGRP_REGULATORY,
 	NL80211_MCGRP_MLME,
 	NL80211_MCGRP_VENDOR,
+	NL80211_MCGRP_NAN,
 	NL80211_MCGRP_TESTMODE /* keep last - ifdef! */
 };
 
@@ -65,6 +66,7 @@ static const struct genl_multicast_group nl80211_mcgrps[] = {
 	[NL80211_MCGRP_REGULATORY] = { .name = NL80211_MULTICAST_GROUP_REG },
 	[NL80211_MCGRP_MLME] = { .name = NL80211_MULTICAST_GROUP_MLME },
 	[NL80211_MCGRP_VENDOR] = { .name = NL80211_MULTICAST_GROUP_VENDOR },
+	[NL80211_MCGRP_NAN] = { .name = NL80211_MULTICAST_GROUP_NAN },
 #ifdef CONFIG_NL80211_TESTMODE
 	[NL80211_MCGRP_TESTMODE] = { .name = NL80211_MULTICAST_GROUP_TESTMODE }
 #endif
@@ -10953,6 +10955,84 @@ static int nl80211_nan_change_config(struct sk_buff *skb,
 	return rdev_nan_change_conf(rdev, wdev, &conf, changed);
 }
 
+void cfg80211_nan_match(struct wireless_dev *wdev,
+			struct cfg80211_nan_match_params *match, gfp_t gfp)
+{
+	struct wiphy *wiphy = wdev->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+	struct nlattr *match_attr, *local_func_attr, *peer_func_attr;
+	struct sk_buff *msg;
+	void *hdr;
+
+	if (WARN_ON(!match->inst_id || !match->peer_inst_id || !match->addr))
+		return;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_NAN_MATCH);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
+	    (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX,
+					 wdev->netdev->ifindex)) ||
+	    nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev),
+			      NL80211_ATTR_PAD))
+		goto nla_put_failure;
+
+	if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, match->cookie,
+			      NL80211_ATTR_PAD) ||
+	    nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, match->addr))
+		goto nla_put_failure;
+
+	match_attr = nla_nest_start(msg, NL80211_ATTR_NAN_MATCH);
+	if (!match_attr)
+		goto nla_put_failure;
+
+	local_func_attr = nla_nest_start(msg, NL80211_NAN_MATCH_FUNC_LOCAL);
+	if (!local_func_attr)
+		goto nla_put_failure;
+
+	if (nla_put_u8(msg, NL80211_NAN_FUNC_INSTANCE_ID, match->inst_id))
+		goto nla_put_failure;
+
+	nla_nest_end(msg, local_func_attr);
+
+	peer_func_attr = nla_nest_start(msg, NL80211_NAN_MATCH_FUNC_PEER);
+	if (!peer_func_attr)
+		goto nla_put_failure;
+
+	if (nla_put_u8(msg, NL80211_NAN_FUNC_TYPE, match->type) ||
+	    nla_put_u8(msg, NL80211_NAN_FUNC_INSTANCE_ID, match->peer_inst_id))
+		goto nla_put_failure;
+
+	if (match->info && match->info_len &&
+	    nla_put(msg, NL80211_NAN_FUNC_SERVICE_INFO, match->info_len,
+		    match->info))
+		goto nla_put_failure;
+
+	nla_nest_end(msg, peer_func_attr);
+	nla_nest_end(msg, match_attr);
+	genlmsg_end(msg, hdr);
+
+	if (!wdev->owner_nlportid)
+		genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy),
+					msg, 0, NL80211_MCGRP_NAN, gfp);
+	else
+		genlmsg_unicast(wiphy_net(&rdev->wiphy), msg,
+				wdev->owner_nlportid);
+
+	return;
+
+nla_put_failure:
+	nlmsg_free(msg);
+}
+EXPORT_SYMBOL(cfg80211_nan_match);
+
 static int nl80211_get_protocol_features(struct sk_buff *skb,
 					 struct genl_info *info)
 {
-- 
cgit v1.2.3


From 368e5a7b4ecb71b3d347799cb9351b0dce5dec70 Mon Sep 17 00:00:00 2001
From: Ayala Beker <ayala.beker@intel.com>
Date: Tue, 20 Sep 2016 17:31:18 +0300
Subject: cfg80211: Provide an API to report NAN function termination

Provide a function that reports NAN DE function termination. The function
may be terminated due to one of the following reasons: user request,
ttl expiration or failure.
If the NAN instance is tied to the owner, the notification will be
sent to the socket that started the NAN interface only

Signed-off-by: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 16 ++++++++++++
 include/uapi/linux/nl80211.h | 18 +++++++++++++
 net/wireless/nl80211.c       | 60 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 94 insertions(+)

(limited to 'include/uapi')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 5481664b5389..fe78f02a242e 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -5731,6 +5731,22 @@ struct cfg80211_nan_match_params {
 void cfg80211_nan_match(struct wireless_dev *wdev,
 			struct cfg80211_nan_match_params *match, gfp_t gfp);
 
+/**
+ * cfg80211_nan_func_terminated - notify about NAN function termination.
+ *
+ * @wdev: the wireless device reporting the match
+ * @inst_id: the local instance id
+ * @reason: termination reason (one of the NL80211_NAN_FUNC_TERM_REASON_*)
+ * @cookie: unique NAN function identifier
+ * @gfp: allocation flags
+ *
+ * This function reports that the a NAN function is terminated.
+ */
+void cfg80211_nan_func_terminated(struct wireless_dev *wdev,
+				  u8 inst_id,
+				  enum nl80211_nan_func_term_reason reason,
+				  u64 cookie, gfp_t gfp);
+
 /* ethtool helper */
 void cfg80211_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info);
 
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 995bf802d604..56368e9b4622 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -863,6 +863,9 @@
  *	the response to this command.
  *	Look at %NL80211_ATTR_SOCKET_OWNER as well.
  * @NL80211_CMD_DEL_NAN_FUNCTION: Delete a NAN function by cookie.
+ *	This command is also used as a notification sent when a NAN function is
+ *	terminated. This will contain a %NL80211_ATTR_NAN_FUNC_INST_ID
+ *	and %NL80211_ATTR_COOKIE attributes.
  * @NL80211_CMD_CHANGE_NAN_CONFIG: Change current NAN configuration. NAN
  *	must be operational (%NL80211_CMD_START_NAN was executed).
  *	It must contain at least one of the following attributes:
@@ -4985,6 +4988,21 @@ enum nl80211_nan_publish_type {
 	NL80211_NAN_UNSOLICITED_PUBLISH = 1 << 1,
 };
 
+/**
+ * enum nl80211_nan_func_term_reason - NAN functions termination reason
+ *
+ * Defines termination reasons of a NAN function
+ *
+ * @NL80211_NAN_FUNC_TERM_REASON_USER_REQUEST: requested by user
+ * @NL80211_NAN_FUNC_TERM_REASON_TTL_EXPIRED: timeout
+ * @NL80211_NAN_FUNC_TERM_REASON_ERROR: errored
+ */
+enum nl80211_nan_func_term_reason {
+	NL80211_NAN_FUNC_TERM_REASON_USER_REQUEST,
+	NL80211_NAN_FUNC_TERM_REASON_TTL_EXPIRED,
+	NL80211_NAN_FUNC_TERM_REASON_ERROR,
+};
+
 #define NL80211_NAN_FUNC_SERVICE_ID_LEN 6
 #define NL80211_NAN_FUNC_SERVICE_SPEC_INFO_MAX_LEN 0xff
 #define NL80211_NAN_FUNC_SRF_MAX_LEN 0xff
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 0bbd9ed28318..92eb6f0b9f3d 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -11033,6 +11033,66 @@ nla_put_failure:
 }
 EXPORT_SYMBOL(cfg80211_nan_match);
 
+void cfg80211_nan_func_terminated(struct wireless_dev *wdev,
+				  u8 inst_id,
+				  enum nl80211_nan_func_term_reason reason,
+				  u64 cookie, gfp_t gfp)
+{
+	struct wiphy *wiphy = wdev->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+	struct sk_buff *msg;
+	struct nlattr *func_attr;
+	void *hdr;
+
+	if (WARN_ON(!inst_id))
+		return;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_DEL_NAN_FUNCTION);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
+	    (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX,
+					 wdev->netdev->ifindex)) ||
+	    nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev),
+			      NL80211_ATTR_PAD))
+		goto nla_put_failure;
+
+	if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, cookie,
+			      NL80211_ATTR_PAD))
+		goto nla_put_failure;
+
+	func_attr = nla_nest_start(msg, NL80211_ATTR_NAN_FUNC);
+	if (!func_attr)
+		goto nla_put_failure;
+
+	if (nla_put_u8(msg, NL80211_NAN_FUNC_INSTANCE_ID, inst_id) ||
+	    nla_put_u8(msg, NL80211_NAN_FUNC_TERM_REASON, reason))
+		goto nla_put_failure;
+
+	nla_nest_end(msg, func_attr);
+	genlmsg_end(msg, hdr);
+
+	if (!wdev->owner_nlportid)
+		genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy),
+					msg, 0, NL80211_MCGRP_NAN, gfp);
+	else
+		genlmsg_unicast(wiphy_net(&rdev->wiphy), msg,
+				wdev->owner_nlportid);
+
+	return;
+
+nla_put_failure:
+	nlmsg_free(msg);
+}
+EXPORT_SYMBOL(cfg80211_nan_func_terminated);
+
 static int nl80211_get_protocol_features(struct sk_buff *skb,
 					 struct genl_info *info)
 {
-- 
cgit v1.2.3


From 5e940c1dd3c1f7561924954eecee956ec277a79b Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Sat, 1 Oct 2016 07:32:32 +0200
Subject: fuse: handle killpriv in userspace fs

Only userspace filesystem can do the killing of suid/sgid without races.
So introduce an INIT flag and negotiate support for this.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/fuse/dir.c             | 44 +++++++++++++++++++++++++++-----------------
 fs/fuse/fuse_i.h          |  3 +++
 fs/fuse/inode.c           |  4 +++-
 include/uapi/linux/fuse.h |  7 ++++++-
 4 files changed, 39 insertions(+), 19 deletions(-)

(limited to 'include/uapi')

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index b7a690ed6a75..7cb68b18eb3f 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1703,6 +1703,7 @@ error:
 static int fuse_setattr(struct dentry *entry, struct iattr *attr)
 {
 	struct inode *inode = d_inode(entry);
+	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL;
 	int ret;
 
@@ -1710,27 +1711,36 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr)
 		return -EACCES;
 
 	if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) {
-		int kill;
-
 		attr->ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID |
 				    ATTR_MODE);
+
 		/*
-		 * ia_mode calculation may have used stale i_mode.  Refresh and
-		 * recalculate.
+		 * The only sane way to reliably kill suid/sgid is to do it in
+		 * the userspace filesystem
+		 *
+		 * This should be done on write(), truncate() and chown().
 		 */
-		ret = fuse_do_getattr(inode, NULL, file);
-		if (ret)
-			return ret;
-
-		attr->ia_mode = inode->i_mode;
-		kill = should_remove_suid(entry);
-		if (kill & ATTR_KILL_SUID) {
-			attr->ia_valid |= ATTR_MODE;
-			attr->ia_mode &= ~S_ISUID;
-		}
-		if (kill & ATTR_KILL_SGID) {
-			attr->ia_valid |= ATTR_MODE;
-			attr->ia_mode &= ~S_ISGID;
+		if (!fc->handle_killpriv) {
+			int kill;
+
+			/*
+			 * ia_mode calculation may have used stale i_mode.
+			 * Refresh and recalculate.
+			 */
+			ret = fuse_do_getattr(inode, NULL, file);
+			if (ret)
+				return ret;
+
+			attr->ia_mode = inode->i_mode;
+			kill = should_remove_suid(entry);
+			if (kill & ATTR_KILL_SUID) {
+				attr->ia_valid |= ATTR_MODE;
+				attr->ia_mode &= ~S_ISUID;
+			}
+			if (kill & ATTR_KILL_SGID) {
+				attr->ia_valid |= ATTR_MODE;
+				attr->ia_mode &= ~S_ISGID;
+			}
 		}
 	}
 	if (!attr->ia_valid)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 6db54d0bd81b..9940a648c985 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -547,6 +547,9 @@ struct fuse_conn {
 	/** allow parallel lookups and readdir (default is serialized) */
 	unsigned parallel_dirops:1;
 
+	/** handle fs handles killing suid/sgid/cap on write/chown/trunc */
+	unsigned handle_killpriv:1;
+
 	/*
 	 * The following bitfields are only for optimization purposes
 	 * and hence races in setting them will not cause malfunction
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 1e535f31fed0..b965934939cb 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -910,6 +910,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
 				fc->writeback_cache = 1;
 			if (arg->flags & FUSE_PARALLEL_DIROPS)
 				fc->parallel_dirops = 1;
+			if (arg->flags & FUSE_HANDLE_KILLPRIV)
+				fc->handle_killpriv = 1;
 			if (arg->time_gran && arg->time_gran <= 1000000000)
 				fc->sb->s_time_gran = arg->time_gran;
 		} else {
@@ -941,7 +943,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
 		FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
 		FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
 		FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT |
-		FUSE_PARALLEL_DIROPS;
+		FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV;
 	req->in.h.opcode = FUSE_INIT;
 	req->in.numargs = 1;
 	req->in.args[0].size = sizeof(*arg);
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 27e17363263a..14ca2f10d354 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -108,6 +108,9 @@
  *
  *  7.25
  *  - add FUSE_PARALLEL_DIROPS
+ *
+ *  7.26
+ *  - add FUSE_HANDLE_KILLPRIV
  */
 
 #ifndef _LINUX_FUSE_H
@@ -143,7 +146,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 25
+#define FUSE_KERNEL_MINOR_VERSION 26
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -238,6 +241,7 @@ struct fuse_file_lock {
  * FUSE_WRITEBACK_CACHE: use writeback cache for buffered writes
  * FUSE_NO_OPEN_SUPPORT: kernel supports zero-message opens
  * FUSE_PARALLEL_DIROPS: allow parallel lookups and readdir
+ * FUSE_HANDLE_KILLPRIV: fs handles killing suid/sgid/cap on write/chown/trunc
  */
 #define FUSE_ASYNC_READ		(1 << 0)
 #define FUSE_POSIX_LOCKS	(1 << 1)
@@ -258,6 +262,7 @@ struct fuse_file_lock {
 #define FUSE_WRITEBACK_CACHE	(1 << 16)
 #define FUSE_NO_OPEN_SUPPORT	(1 << 17)
 #define FUSE_PARALLEL_DIROPS    (1 << 18)
+#define FUSE_HANDLE_KILLPRIV	(1 << 19)
 
 /**
  * CUSE INIT request/reply flags
-- 
cgit v1.2.3


From 60bcc88ad185d512f5718f2f8dcccb483ea8fb73 Mon Sep 17 00:00:00 2001
From: Seth Forshee <seth.forshee@canonical.com>
Date: Mon, 29 Aug 2016 08:46:37 -0500
Subject: fuse: Add posix ACL support

Add a new INIT flag, FUSE_POSIX_ACL, for negotiating ACL support with
userspace.  When it is set in the INIT response, ACL support will be
enabled.  ACL support also implies "default_permissions".

When ACL support is enabled, the kernel will cache and have responsibility
for enforcing ACLs.  ACL xattrs will be passed to userspace, which is
responsible for updating the ACLs in the filesystem, keeping the file mode
in sync, and inheritance of default ACLs when new filesystem nodes are
created.

Signed-off-by: Seth Forshee <seth.forshee@canonical.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/fuse/Kconfig           |  1 +
 fs/fuse/Makefile          |  2 +-
 fs/fuse/acl.c             | 99 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/fuse/dir.c             | 16 ++++++++
 fs/fuse/fuse_i.h          | 14 +++++++
 fs/fuse/inode.c           |  9 ++++-
 fs/fuse/xattr.c           | 18 ++++++---
 include/uapi/linux/fuse.h |  3 ++
 8 files changed, 155 insertions(+), 7 deletions(-)
 create mode 100644 fs/fuse/acl.c

(limited to 'include/uapi')

diff --git a/fs/fuse/Kconfig b/fs/fuse/Kconfig
index 1b2f6c2c3aaf..76f09ce7e5b2 100644
--- a/fs/fuse/Kconfig
+++ b/fs/fuse/Kconfig
@@ -1,5 +1,6 @@
 config FUSE_FS
 	tristate "FUSE (Filesystem in Userspace) support"
+	select FS_POSIX_ACL
 	help
 	  With FUSE it is possible to implement a fully functional filesystem
 	  in a userspace program.
diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
index 448aa27ada00..60da84a86dab 100644
--- a/fs/fuse/Makefile
+++ b/fs/fuse/Makefile
@@ -5,4 +5,4 @@
 obj-$(CONFIG_FUSE_FS) += fuse.o
 obj-$(CONFIG_CUSE) += cuse.o
 
-fuse-objs := dev.o dir.o file.o inode.o control.o xattr.o
+fuse-objs := dev.o dir.o file.o inode.o control.o xattr.o acl.o
diff --git a/fs/fuse/acl.c b/fs/fuse/acl.c
new file mode 100644
index 000000000000..ec85765502f1
--- /dev/null
+++ b/fs/fuse/acl.c
@@ -0,0 +1,99 @@
+/*
+ * FUSE: Filesystem in Userspace
+ * Copyright (C) 2016 Canonical Ltd. <seth.forshee@canonical.com>
+ *
+ * This program can be distributed under the terms of the GNU GPL.
+ * See the file COPYING.
+ */
+
+#include "fuse_i.h"
+
+#include <linux/posix_acl.h>
+#include <linux/posix_acl_xattr.h>
+
+struct posix_acl *fuse_get_acl(struct inode *inode, int type)
+{
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	int size;
+	const char *name;
+	void *value = NULL;
+	struct posix_acl *acl;
+
+	if (!fc->posix_acl || fc->no_getxattr)
+		return NULL;
+
+	if (type == ACL_TYPE_ACCESS)
+		name = XATTR_NAME_POSIX_ACL_ACCESS;
+	else if (type == ACL_TYPE_DEFAULT)
+		name = XATTR_NAME_POSIX_ACL_DEFAULT;
+	else
+		return ERR_PTR(-EOPNOTSUPP);
+
+	value = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!value)
+		return ERR_PTR(-ENOMEM);
+	size = fuse_getxattr(inode, name, value, PAGE_SIZE);
+	if (size > 0)
+		acl = posix_acl_from_xattr(&init_user_ns, value, size);
+	else if ((size == 0) || (size == -ENODATA) ||
+		 (size == -EOPNOTSUPP && fc->no_getxattr))
+		acl = NULL;
+	else if (size == -ERANGE)
+		acl = ERR_PTR(-E2BIG);
+	else
+		acl = ERR_PTR(size);
+
+	kfree(value);
+	return acl;
+}
+
+int fuse_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+{
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	const char *name;
+	int ret;
+
+	if (!fc->posix_acl || fc->no_setxattr)
+		return -EOPNOTSUPP;
+
+	if (type == ACL_TYPE_ACCESS)
+		name = XATTR_NAME_POSIX_ACL_ACCESS;
+	else if (type == ACL_TYPE_DEFAULT)
+		name = XATTR_NAME_POSIX_ACL_DEFAULT;
+	else
+		return -EINVAL;
+
+	if (acl) {
+		/*
+		 * Fuse userspace is responsible for updating access
+		 * permissions in the inode, if needed. fuse_setxattr
+		 * invalidates the inode attributes, which will force
+		 * them to be refreshed the next time they are used,
+		 * and it also updates i_ctime.
+		 */
+		size_t size = posix_acl_xattr_size(acl->a_count);
+		void *value;
+
+		if (size > PAGE_SIZE)
+			return -E2BIG;
+
+		value = kmalloc(size, GFP_KERNEL);
+		if (!value)
+			return -ENOMEM;
+
+		ret = posix_acl_to_xattr(&init_user_ns, acl, value, size);
+		if (ret < 0) {
+			kfree(value);
+			return ret;
+		}
+
+		ret = fuse_setxattr(inode, name, value, size, 0);
+		kfree(value);
+	} else {
+		ret = fuse_removexattr(inode, name);
+	}
+	forget_all_cached_acls(inode);
+	fuse_invalidate_attr(inode);
+
+	return ret;
+}
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 7cb68b18eb3f..3076f48d86a6 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -14,6 +14,7 @@
 #include <linux/namei.h>
 #include <linux/slab.h>
 #include <linux/xattr.h>
+#include <linux/posix_acl.h>
 
 static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
 {
@@ -244,6 +245,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
 		if (ret || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
 			goto invalid;
 
+		forget_all_cached_acls(inode);
 		fuse_change_attributes(inode, &outarg.attr,
 				       entry_attr_timeout(&outarg),
 				       attr_version);
@@ -918,6 +920,7 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat,
 
 	if (time_before64(fi->i_time, get_jiffies_64())) {
 		r = true;
+		forget_all_cached_acls(inode);
 		err = fuse_do_getattr(inode, stat, file);
 	} else {
 		r = false;
@@ -1065,6 +1068,7 @@ static int fuse_perm_getattr(struct inode *inode, int mask)
 	if (mask & MAY_NOT_BLOCK)
 		return -ECHILD;
 
+	forget_all_cached_acls(inode);
 	return fuse_do_getattr(inode, NULL, NULL);
 }
 
@@ -1234,6 +1238,7 @@ retry:
 		fi->nlookup++;
 		spin_unlock(&fc->lock);
 
+		forget_all_cached_acls(inode);
 		fuse_change_attributes(inode, &o->attr,
 				       entry_attr_timeout(o),
 				       attr_version);
@@ -1748,6 +1753,13 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr)
 
 	ret = fuse_do_setattr(inode, attr, file);
 	if (!ret) {
+		/*
+		 * If filesystem supports acls it may have updated acl xattrs in
+		 * the filesystem, so forget cached acls for the inode.
+		 */
+		if (fc->posix_acl)
+			forget_all_cached_acls(inode);
+
 		/* Directory mode changed, may need to revalidate access */
 		if (d_is_dir(entry) && (attr->ia_valid & ATTR_MODE))
 			fuse_invalidate_entry_cache(entry);
@@ -1785,6 +1797,8 @@ static const struct inode_operations fuse_dir_inode_operations = {
 	.getxattr	= generic_getxattr,
 	.listxattr	= fuse_listxattr,
 	.removexattr	= generic_removexattr,
+	.get_acl	= fuse_get_acl,
+	.set_acl	= fuse_set_acl,
 };
 
 static const struct file_operations fuse_dir_operations = {
@@ -1806,6 +1820,8 @@ static const struct inode_operations fuse_common_inode_operations = {
 	.getxattr	= generic_getxattr,
 	.listxattr	= fuse_listxattr,
 	.removexattr	= generic_removexattr,
+	.get_acl	= fuse_get_acl,
+	.set_acl	= fuse_set_acl,
 };
 
 static const struct inode_operations fuse_symlink_inode_operations = {
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 9940a648c985..e8d96ec22533 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -23,6 +23,7 @@
 #include <linux/poll.h>
 #include <linux/workqueue.h>
 #include <linux/kref.h>
+#include <linux/xattr.h>
 
 /** Max number of pages that can be used in a single read request */
 #define FUSE_MAX_PAGES_PER_REQ 32
@@ -627,6 +628,9 @@ struct fuse_conn {
 	/** Is lseek not implemented by fs? */
 	unsigned no_lseek:1;
 
+	/** Does the filesystem support posix acls? */
+	unsigned posix_acl:1;
+
 	/** The number of requests waiting for completion */
 	atomic_t num_waiting;
 
@@ -971,7 +975,17 @@ void fuse_set_initialized(struct fuse_conn *fc);
 void fuse_unlock_inode(struct inode *inode);
 void fuse_lock_inode(struct inode *inode);
 
+int fuse_setxattr(struct inode *inode, const char *name, const void *value,
+		  size_t size, int flags);
+ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value,
+		      size_t size);
 ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size);
+int fuse_removexattr(struct inode *inode, const char *name);
 extern const struct xattr_handler *fuse_xattr_handlers[];
+extern const struct xattr_handler *fuse_acl_xattr_handlers[];
+
+struct posix_acl;
+struct posix_acl *fuse_get_acl(struct inode *inode, int type);
+int fuse_set_acl(struct inode *inode, struct posix_acl *acl, int type);
 
 #endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index b965934939cb..16ac9d86c507 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -20,6 +20,7 @@
 #include <linux/random.h>
 #include <linux/sched.h>
 #include <linux/exportfs.h>
+#include <linux/posix_acl.h>
 
 MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
 MODULE_DESCRIPTION("Filesystem in Userspace");
@@ -340,6 +341,7 @@ int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
 		return -ENOENT;
 
 	fuse_invalidate_attr(inode);
+	forget_all_cached_acls(inode);
 	if (offset >= 0) {
 		pg_start = offset >> PAGE_SHIFT;
 		if (len <= 0)
@@ -914,6 +916,11 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
 				fc->handle_killpriv = 1;
 			if (arg->time_gran && arg->time_gran <= 1000000000)
 				fc->sb->s_time_gran = arg->time_gran;
+			if ((arg->flags & FUSE_POSIX_ACL)) {
+				fc->flags |= FUSE_DEFAULT_PERMISSIONS;
+				fc->posix_acl = 1;
+				fc->sb->s_xattr = fuse_acl_xattr_handlers;
+			}
 		} else {
 			ra_pages = fc->max_read / PAGE_SIZE;
 			fc->no_lock = 1;
@@ -943,7 +950,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
 		FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
 		FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
 		FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT |
-		FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV;
+		FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL;
 	req->in.h.opcode = FUSE_INIT;
 	req->in.numargs = 1;
 	req->in.args[0].size = sizeof(*arg);
diff --git a/fs/fuse/xattr.c b/fs/fuse/xattr.c
index e22980f0a9e2..04b097f29d8a 100644
--- a/fs/fuse/xattr.c
+++ b/fs/fuse/xattr.c
@@ -9,9 +9,10 @@
 #include "fuse_i.h"
 
 #include <linux/xattr.h>
+#include <linux/posix_acl_xattr.h>
 
-static int fuse_setxattr(struct inode *inode, const char *name,
-			 const void *value, size_t size, int flags)
+int fuse_setxattr(struct inode *inode, const char *name, const void *value,
+		  size_t size, int flags)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	FUSE_ARGS(args);
@@ -45,8 +46,8 @@ static int fuse_setxattr(struct inode *inode, const char *name,
 	return err;
 }
 
-static ssize_t fuse_getxattr(struct inode *inode, const char *name,
-			     void *value, size_t size)
+ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value,
+		      size_t size)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	FUSE_ARGS(args);
@@ -147,7 +148,7 @@ ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
 	return ret;
 }
 
-static int fuse_removexattr(struct inode *inode, const char *name)
+int fuse_removexattr(struct inode *inode, const char *name)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	FUSE_ARGS(args);
@@ -201,3 +202,10 @@ const struct xattr_handler *fuse_xattr_handlers[] = {
 	&fuse_xattr_handler,
 	NULL
 };
+
+const struct xattr_handler *fuse_acl_xattr_handlers[] = {
+	&posix_acl_access_xattr_handler,
+	&posix_acl_default_xattr_handler,
+	&fuse_xattr_handler,
+	NULL
+};
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 14ca2f10d354..42fa977e3b14 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -111,6 +111,7 @@
  *
  *  7.26
  *  - add FUSE_HANDLE_KILLPRIV
+ *  - add FUSE_POSIX_ACL
  */
 
 #ifndef _LINUX_FUSE_H
@@ -242,6 +243,7 @@ struct fuse_file_lock {
  * FUSE_NO_OPEN_SUPPORT: kernel supports zero-message opens
  * FUSE_PARALLEL_DIROPS: allow parallel lookups and readdir
  * FUSE_HANDLE_KILLPRIV: fs handles killing suid/sgid/cap on write/chown/trunc
+ * FUSE_POSIX_ACL: filesystem supports posix acls
  */
 #define FUSE_ASYNC_READ		(1 << 0)
 #define FUSE_POSIX_LOCKS	(1 << 1)
@@ -263,6 +265,7 @@ struct fuse_file_lock {
 #define FUSE_NO_OPEN_SUPPORT	(1 << 17)
 #define FUSE_PARALLEL_DIROPS    (1 << 18)
 #define FUSE_HANDLE_KILLPRIV	(1 << 19)
+#define FUSE_POSIX_ACL		(1 << 20)
 
 /**
  * CUSE INIT request/reply flags
-- 
cgit v1.2.3


From 0a6eab8bd4e0120d49511acbb294797d96ef9e4a Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Mon, 3 Oct 2016 09:11:13 -0700
Subject: vfs: support FS_XFLAG_COWEXTSIZE and get/set of CoW extent size hint

Introduce XFLAGs for the new XFS CoW extent size hint, and actually
plumb the CoW extent size hint into the fsxattr structure.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 include/uapi/linux/fs.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 3b00f7c8943f..cf398764b19e 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -157,7 +157,8 @@ struct fsxattr {
 	__u32		fsx_extsize;	/* extsize field value (get/set)*/
 	__u32		fsx_nextents;	/* nextents field value (get)	*/
 	__u32		fsx_projid;	/* project identifier (get/set) */
-	unsigned char	fsx_pad[12];
+	__u32		fsx_cowextsize;	/* CoW extsize field value (get/set)*/
+	unsigned char	fsx_pad[8];
 };
 
 /*
@@ -178,6 +179,7 @@ struct fsxattr {
 #define FS_XFLAG_NODEFRAG	0x00002000	/* do not defragment */
 #define FS_XFLAG_FILESTREAM	0x00004000	/* use filestream allocator */
 #define FS_XFLAG_DAX		0x00008000	/* use DAX for IO */
+#define FS_XFLAG_COWEXTSIZE	0x00010000	/* CoW extent size allocator hint */
 #define FS_XFLAG_HASATTR	0x80000000	/* no DIFLAG for this	*/
 
 /* the read-only stuff doesn't really belong here, but any other place is
-- 
cgit v1.2.3


From 71be6b4942dd64bc17728f82f787be98fd8afed7 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Mon, 3 Oct 2016 09:11:14 -0700
Subject: vfs: add a FALLOC_FL_UNSHARE mode to fallocate to unshare a range of
 blocks

Add a new fallocate mode flag that explicitly unshares blocks on
filesystems that support such features.  The new flag can only
be used with an allocate-mode fallocate call.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/open.c                   |  5 +++++
 include/linux/falloc.h      |  3 ++-
 include/uapi/linux/falloc.h | 18 ++++++++++++++++++
 3 files changed, 25 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/fs/open.c b/fs/open.c
index 4fd6e256f4f4..d58525dda28d 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -256,6 +256,11 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 	    (mode & ~FALLOC_FL_INSERT_RANGE))
 		return -EINVAL;
 
+	/* Unshare range should only be used with allocate mode. */
+	if ((mode & FALLOC_FL_UNSHARE_RANGE) &&
+	    (mode & ~(FALLOC_FL_UNSHARE_RANGE | FALLOC_FL_KEEP_SIZE)))
+		return -EINVAL;
+
 	if (!(file->f_mode & FMODE_WRITE))
 		return -EBADF;
 
diff --git a/include/linux/falloc.h b/include/linux/falloc.h
index 996111000a8c..7494dc67c66f 100644
--- a/include/linux/falloc.h
+++ b/include/linux/falloc.h
@@ -25,6 +25,7 @@ struct space_resv {
 					 FALLOC_FL_PUNCH_HOLE |		\
 					 FALLOC_FL_COLLAPSE_RANGE |	\
 					 FALLOC_FL_ZERO_RANGE |		\
-					 FALLOC_FL_INSERT_RANGE)
+					 FALLOC_FL_INSERT_RANGE |	\
+					 FALLOC_FL_UNSHARE_RANGE)
 
 #endif /* _FALLOC_H_ */
diff --git a/include/uapi/linux/falloc.h b/include/uapi/linux/falloc.h
index 3e445a760f14..b075f601919b 100644
--- a/include/uapi/linux/falloc.h
+++ b/include/uapi/linux/falloc.h
@@ -58,4 +58,22 @@
  */
 #define FALLOC_FL_INSERT_RANGE		0x20
 
+/*
+ * FALLOC_FL_UNSHARE_RANGE is used to unshare shared blocks within the
+ * file size without overwriting any existing data. The purpose of this
+ * call is to preemptively reallocate any blocks that are subject to
+ * copy-on-write.
+ *
+ * Different filesystems may implement different limitations on the
+ * granularity of the operation. Most will limit operations to filesystem
+ * block size boundaries, but this boundary may be larger or smaller
+ * depending on the filesystem and/or the configuration of the filesystem
+ * or file.
+ *
+ * This flag can only be used with allocate-mode fallocate, which is
+ * to say that it cannot be used with the punch, zero, collapse, or
+ * insert range modes.
+ */
+#define FALLOC_FL_UNSHARE_RANGE		0x40
+
 #endif /* _UAPI_FALLOC_H_ */
-- 
cgit v1.2.3


From 6675df311db87aa2107a04ef97e19420953cbace Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Thu, 22 Sep 2016 17:24:22 -0700
Subject: Btrfs: catch invalid free space trees
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are two separate issues that can lead to corrupted free space
trees.

1. The free space tree bitmaps had an endianness issue on big-endian
   systems which is fixed by an earlier patch in this series.
2. btrfs-progs before v4.7.3 modified filesystems without updating the
   free space tree.

To catch both of these issues at once, we need to force the free space
tree to be rebuilt. To do so, add a FREE_SPACE_TREE_VALID compat_ro bit.
If the bit isn't set, we know that it was either produced by a broken
big-endian kernel or may have been corrupted by btrfs-progs.

This also provides us with a way to add rudimentary read-write support
for the free space tree to btrfs-progs: it can just clear this bit and
have the kernel rebuild the free space tree.

Cc: stable@vger.kernel.org # 4.5+
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
Tested-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h           |  3 ++-
 fs/btrfs/disk-io.c         |  9 +++++++++
 fs/btrfs/free-space-tree.c |  2 ++
 include/uapi/linux/btrfs.h | 12 +++++++++++-
 4 files changed, 24 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 33fe03551105..791e47ce9d27 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -251,7 +251,8 @@ struct btrfs_super_block {
 #define BTRFS_FEATURE_COMPAT_SAFE_CLEAR		0ULL
 
 #define BTRFS_FEATURE_COMPAT_RO_SUPP			\
-	(BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)
+	(BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE |	\
+	 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID)
 
 #define BTRFS_FEATURE_COMPAT_RO_SAFE_SET	0ULL
 #define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR	0ULL
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index c0bfc6ce5f06..3dede6d53bad 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2566,6 +2566,7 @@ int open_ctree(struct super_block *sb,
 	int num_backups_tried = 0;
 	int backup_index = 0;
 	int max_active;
+	int clear_free_space_tree = 0;
 
 	tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
 	chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
@@ -3131,6 +3132,14 @@ retry_root_backup:
 
 	if (btrfs_test_opt(fs_info, CLEAR_CACHE) &&
 	    btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
+		clear_free_space_tree = 1;
+	} else if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
+		   !btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID)) {
+		btrfs_warn(fs_info, "free space tree is invalid");
+		clear_free_space_tree = 1;
+	}
+
+	if (clear_free_space_tree) {
 		btrfs_info(fs_info, "clearing free space tree");
 		ret = btrfs_clear_free_space_tree(fs_info);
 		if (ret) {
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index 8fd85bfbe2da..ea605ffd0e03 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -1182,6 +1182,7 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
 	}
 
 	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
+	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
 	fs_info->creating_free_space_tree = 0;
 
 	ret = btrfs_commit_transaction(trans, tree_root);
@@ -1250,6 +1251,7 @@ int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
 		return PTR_ERR(trans);
 
 	btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE);
+	btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
 	fs_info->free_space_root = NULL;
 
 	ret = clear_free_space_tree(trans, free_space_root);
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index ac5eacd3055b..db4c253f8011 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -239,7 +239,17 @@ struct btrfs_ioctl_fs_info_args {
  * Used by:
  * struct btrfs_ioctl_feature_flags
  */
-#define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE	(1ULL << 0)
+#define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE		(1ULL << 0)
+/*
+ * Older kernels (< 4.9) on big-endian systems produced broken free space tree
+ * bitmaps, and btrfs-progs also used to corrupt the free space tree (versions
+ * < 4.7.3).  If this bit is clear, then the free space tree cannot be trusted.
+ * btrfs-progs can also intentionally clear this bit to ask the kernel to
+ * rebuild the free space tree, however this might not work on older kernels
+ * that do not know about this bit. If not sure, clear the cache manually on
+ * first mount when booting older kernel versions.
+ */
+#define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID	(1ULL << 1)
 
 #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF	(1ULL << 0)
 #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL	(1ULL << 1)
-- 
cgit v1.2.3


From 58f0f9f75c1b94dabbfc3daa333a4e68536b0a42 Mon Sep 17 00:00:00 2001
From: Emilio López <emilio.lopez@collabora.co.uk>
Date: Tue, 27 Sep 2016 11:31:42 -0300
Subject: uapi: add missing install of sync_file.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As part of the sync framework destaging, the sync_file.h header
was moved, but an entry was not added on Kbuild to install it.
This patch resolves this omission so that "make headers_install"
installs this header.

Fixes: 460bfc41fd52 ("dma-buf/sync_file: de-stage sync_file headers")
Reported-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
Signed-off-by: Emilio López <emilio.lopez@collabora.co.uk>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
Link: http://patchwork.freedesktop.org/patch/msgid/20160927143142.8975-1-emilio.lopez@collabora.co.uk
---
 include/uapi/linux/Kbuild | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 185f8ea2702f..407ca0d7a938 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -396,6 +396,7 @@ header-y += string.h
 header-y += suspend_ioctls.h
 header-y += swab.h
 header-y += synclink.h
+header-y += sync_file.h
 header-y += sysctl.h
 header-y += sysinfo.h
 header-y += target_core_user.h
-- 
cgit v1.2.3


From 47adf2f4f5805d075359fe6cbe3437612f7d4a34 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Sun, 28 Aug 2016 11:28:44 +0300
Subject: IB/uverbs: Expose RSS related capabilities

Query RSS related attributes and return them to user-space via the
extended query device uverbs command.

It includes both direct ones (i.e. struct ib_uverbs_rss_caps) and
max_wq_type_rq which may be used in both RSS and non RSS flows.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/uverbs_cmd.c | 17 +++++++++++++++++
 include/uapi/rdma/ib_user_verbs.h    | 14 ++++++++++++++
 2 files changed, 31 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index fe784a908817..19c1ebf596ad 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -4173,6 +4173,23 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
 
 	resp.device_cap_flags_ex = attr.device_cap_flags;
 	resp.response_length += sizeof(resp.device_cap_flags_ex);
+
+	if (ucore->outlen < resp.response_length + sizeof(resp.rss_caps))
+		goto end;
+
+	resp.rss_caps.supported_qpts = attr.rss_caps.supported_qpts;
+	resp.rss_caps.max_rwq_indirection_tables =
+		attr.rss_caps.max_rwq_indirection_tables;
+	resp.rss_caps.max_rwq_indirection_table_size =
+		attr.rss_caps.max_rwq_indirection_table_size;
+
+	resp.response_length += sizeof(resp.rss_caps);
+
+	if (ucore->outlen < resp.response_length + sizeof(resp.max_wq_type_rq))
+		goto end;
+
+	resp.max_wq_type_rq = attr.max_wq_type_rq;
+	resp.response_length += sizeof(resp.max_wq_type_rq);
 end:
 	err = ib_copy_to_udata(ucore, &resp, resp.response_length);
 	return err;
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 7f035f4b53b0..a9ebb477dc77 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -224,6 +224,17 @@ struct ib_uverbs_odp_caps {
 	__u32 reserved;
 };
 
+struct ib_uverbs_rss_caps {
+	/* Corresponding bit will be set if qp type from
+	 * 'enum ib_qp_type' is supported, e.g.
+	 * supported_qpts |= 1 << IB_QPT_UD
+	 */
+	__u32 supported_qpts;
+	__u32 max_rwq_indirection_tables;
+	__u32 max_rwq_indirection_table_size;
+	__u32 reserved;
+};
+
 struct ib_uverbs_ex_query_device_resp {
 	struct ib_uverbs_query_device_resp base;
 	__u32 comp_mask;
@@ -232,6 +243,9 @@ struct ib_uverbs_ex_query_device_resp {
 	__u64 timestamp_mask;
 	__u64 hca_core_clock; /* in KHZ */
 	__u64 device_cap_flags_ex;
+	struct ib_uverbs_rss_caps rss_caps;
+	__u32  max_wq_type_rq;
+	__u32 reserved;
 };
 
 struct ib_uverbs_query_port {
-- 
cgit v1.2.3


From 989a3a8f91ba02f71b84ecde3b948388d8bf2200 Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@mellanox.com>
Date: Tue, 30 Aug 2016 16:58:33 +0300
Subject: IB/uverbs: Add more fields to IPv4 flow specification

Add the following fields to IPv4 flow filter specification:
1. Type of Service
2. Time to Live
3. Flags
4. Protocol

Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/rdma/ib_verbs.h           | 11 +++++++++++
 include/uapi/rdma/ib_user_verbs.h |  4 ++++
 2 files changed, 15 insertions(+)

(limited to 'include/uapi')

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 4570d8844f63..990220b757f0 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1648,9 +1648,20 @@ struct ib_flow_spec_ib {
 	struct ib_flow_ib_filter mask;
 };
 
+/* IPv4 header flags */
+enum ib_ipv4_flags {
+	IB_IPV4_DONT_FRAG = 0x2, /* Don't enable packet fragmentation */
+	IB_IPV4_MORE_FRAG = 0X4  /* For All fragmented packets except the
+				    last have this flag set */
+};
+
 struct ib_flow_ipv4_filter {
 	__be32	src_ip;
 	__be32	dst_ip;
+	u8	proto;
+	u8	tos;
+	u8	ttl;
+	u8	flags;
 	/* Must be last */
 	u8	real_sz[0];
 };
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index a9ebb477dc77..804f086835c6 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -848,6 +848,10 @@ struct ib_uverbs_flow_spec_eth {
 struct ib_uverbs_flow_ipv4_filter {
 	__be32 src_ip;
 	__be32 dst_ip;
+	__u8	proto;
+	__u8	tos;
+	__u8	ttl;
+	__u8	flags;
 };
 
 struct ib_uverbs_flow_spec_ipv4 {
-- 
cgit v1.2.3


From a72c6a2b0e699fcbcf679b881d5af2683228ae98 Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@mellanox.com>
Date: Tue, 30 Aug 2016 16:58:34 +0300
Subject: IB/core: Add more fields to IPv6 flow specification

Add the following fields to IPv6 flow filter specification:
1. Traffic Class
2. Flow Label
3. Next Header
4. Hop Limit

Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/uverbs_cmd.c | 4 ++++
 include/rdma/ib_verbs.h              | 4 ++++
 include/uapi/rdma/ib_user_verbs.h    | 9 +++++++--
 3 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index b9fb256c25fa..cb3f515a2285 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -3158,6 +3158,10 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
 		ib_spec->size = sizeof(struct ib_flow_spec_ipv6);
 		memcpy(&ib_spec->ipv6.val, kern_spec_val, actual_filter_sz);
 		memcpy(&ib_spec->ipv6.mask, kern_spec_mask, actual_filter_sz);
+
+		if ((ntohl(ib_spec->ipv6.mask.flow_label)) >= BIT(20) ||
+		    (ntohl(ib_spec->ipv6.val.flow_label)) >= BIT(20))
+			return -EINVAL;
 		break;
 	case IB_FLOW_SPEC_TCP:
 	case IB_FLOW_SPEC_UDP:
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 990220b757f0..0cec4da51eb7 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1676,6 +1676,10 @@ struct ib_flow_spec_ipv4 {
 struct ib_flow_ipv6_filter {
 	u8	src_ip[16];
 	u8	dst_ip[16];
+	__be32	flow_label;
+	u8	next_hdr;
+	u8	traffic_class;
+	u8	hop_limit;
 	/* Must be last */
 	u8	real_sz[0];
 };
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 804f086835c6..25225ebbc7d5 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -886,8 +886,13 @@ struct ib_uverbs_flow_spec_tcp_udp {
 };
 
 struct ib_uverbs_flow_ipv6_filter {
-	__u8 src_ip[16];
-	__u8 dst_ip[16];
+	__u8    src_ip[16];
+	__u8    dst_ip[16];
+	__be32	flow_label;
+	__u8	next_hdr;
+	__u8	traffic_class;
+	__u8	hop_limit;
+	__u8	reserved;
 };
 
 struct ib_uverbs_flow_spec_ipv6 {
-- 
cgit v1.2.3


From 3085e29e2f832cbf77ddeeffe715809a31254b5f Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leon@kernel.org>
Date: Thu, 22 Sep 2016 17:31:11 +0300
Subject: IB/mlx5: Move and decouple user vendor structures

This patch decouples and moves vendors specific structures to
common UAPI folder which will be visible to all consumers.

These structures are used by user-space library driver
(libmlx5) and currently manually copied to that library.

This move will allow cross-compile against these files and
simplify introduction of vendor specific data.

Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 MAINTAINERS                          |   1 +
 drivers/infiniband/hw/mlx5/cq.c      |   1 -
 drivers/infiniband/hw/mlx5/main.c    |   1 -
 drivers/infiniband/hw/mlx5/mlx5_ib.h |  37 +++++
 drivers/infiniband/hw/mlx5/mr.c      |   1 -
 drivers/infiniband/hw/mlx5/qp.c      |   1 -
 drivers/infiniband/hw/mlx5/srq.c     |   1 -
 drivers/infiniband/hw/mlx5/user.h    | 288 -----------------------------------
 include/uapi/rdma/Kbuild             |   1 +
 include/uapi/rdma/mlx5-abi.h         | 249 ++++++++++++++++++++++++++++++
 10 files changed, 288 insertions(+), 293 deletions(-)
 delete mode 100644 drivers/infiniband/hw/mlx5/user.h
 create mode 100644 include/uapi/rdma/mlx5-abi.h

(limited to 'include/uapi')

diff --git a/MAINTAINERS b/MAINTAINERS
index 87e23cda8dc1..e76efbc414cd 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7819,6 +7819,7 @@ Q:	http://patchwork.ozlabs.org/project/netdev/list/
 S:	Supported
 F:	drivers/net/ethernet/mellanox/mlx5/core/
 F:	include/linux/mlx5/
+F:	include/uapi/rdma/mlx5-abi.h
 
 MELLANOX MLX5 IB driver
 M:	Matan Barak <matanb@mellanox.com>
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 35a9f718e669..1188fef08450 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -35,7 +35,6 @@
 #include <rdma/ib_user_verbs.h>
 #include <rdma/ib_cache.h>
 #include "mlx5_ib.h"
-#include "user.h"
 
 static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq)
 {
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 29878aa716ee..f4160d56dc4f 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -54,7 +54,6 @@
 #include <linux/in.h>
 #include <linux/etherdevice.h>
 #include <linux/mlx5/fs.h>
-#include "user.h"
 #include "mlx5_ib.h"
 
 #define DRIVER_NAME "mlx5_ib"
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 40fe1a65402b..1df8a67d4f02 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -44,6 +44,7 @@
 #include <linux/types.h>
 #include <linux/mlx5/transobj.h>
 #include <rdma/ib_user_verbs.h>
+#include <rdma/mlx5-abi.h>
 
 #define mlx5_ib_dbg(dev, format, arg...)				\
 pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__,	\
@@ -955,4 +956,40 @@ static inline int verify_assign_uidx(u8 cqe_version, u32 cmd_uidx,
 
 	return 0;
 }
+
+static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext,
+				    struct mlx5_ib_create_qp *ucmd,
+				    int inlen,
+				    u32 *user_index)
+{
+	u8 cqe_version = ucontext->cqe_version;
+
+	if (field_avail(struct mlx5_ib_create_qp, uidx, inlen) &&
+	    !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX))
+		return 0;
+
+	if (!!(field_avail(struct mlx5_ib_create_qp, uidx, inlen) !=
+	       !!cqe_version))
+		return -EINVAL;
+
+	return verify_assign_uidx(cqe_version, ucmd->uidx, user_index);
+}
+
+static inline int get_srq_user_index(struct mlx5_ib_ucontext *ucontext,
+				     struct mlx5_ib_create_srq *ucmd,
+				     int inlen,
+				     u32 *user_index)
+{
+	u8 cqe_version = ucontext->cqe_version;
+
+	if (field_avail(struct mlx5_ib_create_srq, uidx, inlen) &&
+	    !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX))
+		return 0;
+
+	if (!!(field_avail(struct mlx5_ib_create_srq, uidx, inlen) !=
+	       !!cqe_version))
+		return -EINVAL;
+
+	return verify_assign_uidx(cqe_version, ucmd->uidx, user_index);
+}
 #endif /* MLX5_IB_H */
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 873d81dbde41..d4ad672b905b 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -40,7 +40,6 @@
 #include <rdma/ib_umem_odp.h>
 #include <rdma/ib_verbs.h>
 #include "mlx5_ib.h"
-#include "user.h"
 
 enum {
 	MAX_PENDING_REG_MR = 8,
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 2ec88c649ac0..9d97a71a1335 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -35,7 +35,6 @@
 #include <rdma/ib_cache.h>
 #include <rdma/ib_user_verbs.h>
 #include "mlx5_ib.h"
-#include "user.h"
 
 /* not supported currently */
 static int wq_signature;
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index ed6ac52355f1..3857dbd9c956 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -38,7 +38,6 @@
 #include <rdma/ib_user_verbs.h>
 
 #include "mlx5_ib.h"
-#include "user.h"
 
 /* not supported currently */
 static int srq_signature;
diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h
deleted file mode 100644
index 0e49d5b30a4c..000000000000
--- a/drivers/infiniband/hw/mlx5/user.h
+++ /dev/null
@@ -1,288 +0,0 @@
-/*
- * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef MLX5_IB_USER_H
-#define MLX5_IB_USER_H
-
-#include <linux/types.h>
-
-#include "mlx5_ib.h"
-
-enum {
-	MLX5_QP_FLAG_SIGNATURE		= 1 << 0,
-	MLX5_QP_FLAG_SCATTER_CQE	= 1 << 1,
-};
-
-enum {
-	MLX5_SRQ_FLAG_SIGNATURE		= 1 << 0,
-};
-
-enum {
-	MLX5_WQ_FLAG_SIGNATURE		= 1 << 0,
-};
-
-
-/* Increment this value if any changes that break userspace ABI
- * compatibility are made.
- */
-#define MLX5_IB_UVERBS_ABI_VERSION	1
-
-/* Make sure that all structs defined in this file remain laid out so
- * that they pack the same way on 32-bit and 64-bit architectures (to
- * avoid incompatibility between 32-bit userspace and 64-bit kernels).
- * In particular do not use pointer types -- pass pointers in __u64
- * instead.
- */
-
-struct mlx5_ib_alloc_ucontext_req {
-	__u32	total_num_uuars;
-	__u32	num_low_latency_uuars;
-};
-
-struct mlx5_ib_alloc_ucontext_req_v2 {
-	__u32	total_num_uuars;
-	__u32	num_low_latency_uuars;
-	__u32	flags;
-	__u32	comp_mask;
-	__u8	max_cqe_version;
-	__u8	reserved0;
-	__u16	reserved1;
-	__u32	reserved2;
-};
-
-enum mlx5_ib_alloc_ucontext_resp_mask {
-	MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0,
-};
-
-enum mlx5_user_cmds_supp_uhw {
-	MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE = 1 << 0,
-};
-
-struct mlx5_ib_alloc_ucontext_resp {
-	__u32	qp_tab_size;
-	__u32	bf_reg_size;
-	__u32	tot_uuars;
-	__u32	cache_line_size;
-	__u16	max_sq_desc_sz;
-	__u16	max_rq_desc_sz;
-	__u32	max_send_wqebb;
-	__u32	max_recv_wr;
-	__u32	max_srq_recv_wr;
-	__u16	num_ports;
-	__u16	reserved1;
-	__u32	comp_mask;
-	__u32	response_length;
-	__u8	cqe_version;
-	__u8	cmds_supp_uhw;
-	__u16	reserved2;
-	__u64	hca_core_clock_offset;
-};
-
-struct mlx5_ib_alloc_pd_resp {
-	__u32	pdn;
-};
-
-struct mlx5_ib_tso_caps {
-	__u32 max_tso; /* Maximum tso payload size in bytes */
-
-	/* Corresponding bit will be set if qp type from
-	 * 'enum ib_qp_type' is supported, e.g.
-	 * supported_qpts |= 1 << IB_QPT_UD
-	 */
-	__u32 supported_qpts;
-};
-
-struct mlx5_ib_rss_caps {
-	__u64 rx_hash_fields_mask; /* enum mlx5_rx_hash_fields */
-	__u8 rx_hash_function; /* enum mlx5_rx_hash_function_flags */
-	__u8 reserved[7];
-};
-
-struct mlx5_ib_query_device_resp {
-	__u32	comp_mask;
-	__u32	response_length;
-	struct	mlx5_ib_tso_caps tso_caps;
-	struct	mlx5_ib_rss_caps rss_caps;
-};
-
-struct mlx5_ib_create_cq {
-	__u64	buf_addr;
-	__u64	db_addr;
-	__u32	cqe_size;
-	__u32	reserved; /* explicit padding (optional on i386) */
-};
-
-struct mlx5_ib_create_cq_resp {
-	__u32	cqn;
-	__u32	reserved;
-};
-
-struct mlx5_ib_resize_cq {
-	__u64	buf_addr;
-	__u16	cqe_size;
-	__u16	reserved0;
-	__u32	reserved1;
-};
-
-struct mlx5_ib_create_srq {
-	__u64	buf_addr;
-	__u64	db_addr;
-	__u32	flags;
-	__u32	reserved0; /* explicit padding (optional on i386) */
-	__u32	uidx;
-	__u32	reserved1;
-};
-
-struct mlx5_ib_create_srq_resp {
-	__u32	srqn;
-	__u32	reserved;
-};
-
-struct mlx5_ib_create_qp {
-	__u64	buf_addr;
-	__u64	db_addr;
-	__u32	sq_wqe_count;
-	__u32	rq_wqe_count;
-	__u32	rq_wqe_shift;
-	__u32	flags;
-	__u32	uidx;
-	__u32	reserved0;
-	__u64	sq_buf_addr;
-};
-
-/* RX Hash function flags */
-enum mlx5_rx_hash_function_flags {
-	MLX5_RX_HASH_FUNC_TOEPLITZ	= 1 << 0,
-};
-
-/*
- * RX Hash flags, these flags allows to set which incoming packet's field should
- * participates in RX Hash. Each flag represent certain packet's field,
- * when the flag is set the field that is represented by the flag will
- * participate in RX Hash calculation.
- * Note: *IPV4 and *IPV6 flags can't be enabled together on the same QP
- * and *TCP and *UDP flags can't be enabled together on the same QP.
-*/
-enum mlx5_rx_hash_fields {
-	MLX5_RX_HASH_SRC_IPV4	= 1 << 0,
-	MLX5_RX_HASH_DST_IPV4	= 1 << 1,
-	MLX5_RX_HASH_SRC_IPV6	= 1 << 2,
-	MLX5_RX_HASH_DST_IPV6	= 1 << 3,
-	MLX5_RX_HASH_SRC_PORT_TCP	= 1 << 4,
-	MLX5_RX_HASH_DST_PORT_TCP	= 1 << 5,
-	MLX5_RX_HASH_SRC_PORT_UDP	= 1 << 6,
-	MLX5_RX_HASH_DST_PORT_UDP	= 1 << 7
-};
-
-struct mlx5_ib_create_qp_rss {
-	__u64 rx_hash_fields_mask; /* enum mlx5_rx_hash_fields */
-	__u8 rx_hash_function; /* enum mlx5_rx_hash_function_flags */
-	__u8 rx_key_len; /* valid only for Toeplitz */
-	__u8 reserved[6];
-	__u8 rx_hash_key[128]; /* valid only for Toeplitz */
-	__u32   comp_mask;
-	__u32   reserved1;
-};
-
-struct mlx5_ib_create_qp_resp {
-	__u32	uuar_index;
-};
-
-struct mlx5_ib_alloc_mw {
-	__u32	comp_mask;
-	__u8	num_klms;
-	__u8	reserved1;
-	__u16	reserved2;
-};
-
-struct mlx5_ib_create_wq {
-	__u64   buf_addr;
-	__u64   db_addr;
-	__u32   rq_wqe_count;
-	__u32   rq_wqe_shift;
-	__u32   user_index;
-	__u32   flags;
-	__u32   comp_mask;
-	__u32   reserved;
-};
-
-struct mlx5_ib_create_wq_resp {
-	__u32	response_length;
-	__u32	reserved;
-};
-
-struct mlx5_ib_create_rwq_ind_tbl_resp {
-	__u32	response_length;
-	__u32	reserved;
-};
-
-struct mlx5_ib_modify_wq {
-	__u32	comp_mask;
-	__u32	reserved;
-};
-
-static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext,
-				    struct mlx5_ib_create_qp *ucmd,
-				    int inlen,
-				    u32 *user_index)
-{
-	u8 cqe_version = ucontext->cqe_version;
-
-	if (field_avail(struct mlx5_ib_create_qp, uidx, inlen) &&
-	    !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX))
-		return 0;
-
-	if (!!(field_avail(struct mlx5_ib_create_qp, uidx, inlen) !=
-	       !!cqe_version))
-		return -EINVAL;
-
-	return verify_assign_uidx(cqe_version, ucmd->uidx, user_index);
-}
-
-static inline int get_srq_user_index(struct mlx5_ib_ucontext *ucontext,
-				     struct mlx5_ib_create_srq *ucmd,
-				     int inlen,
-				     u32 *user_index)
-{
-	u8 cqe_version = ucontext->cqe_version;
-
-	if (field_avail(struct mlx5_ib_create_srq, uidx, inlen) &&
-	    !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX))
-		return 0;
-
-	if (!!(field_avail(struct mlx5_ib_create_srq, uidx, inlen) !=
-	       !!cqe_version))
-		return -EINVAL;
-
-	return verify_assign_uidx(cqe_version, ucmd->uidx, user_index);
-}
-#endif /* MLX5_IB_USER_H */
diff --git a/include/uapi/rdma/Kbuild b/include/uapi/rdma/Kbuild
index 4edb0f2b4f9f..e9b0ca24f29d 100644
--- a/include/uapi/rdma/Kbuild
+++ b/include/uapi/rdma/Kbuild
@@ -7,3 +7,4 @@ header-y += rdma_netlink.h
 header-y += rdma_user_cm.h
 header-y += hfi/
 header-y += rdma_user_rxe.h
+header-y += mlx5-abi.h
diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h
new file mode 100644
index 000000000000..f5d0f4e83b59
--- /dev/null
+++ b/include/uapi/rdma/mlx5-abi.h
@@ -0,0 +1,249 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX5_ABI_USER_H
+#define MLX5_ABI_USER_H
+
+#include <linux/types.h>
+
+enum {
+	MLX5_QP_FLAG_SIGNATURE		= 1 << 0,
+	MLX5_QP_FLAG_SCATTER_CQE	= 1 << 1,
+};
+
+enum {
+	MLX5_SRQ_FLAG_SIGNATURE		= 1 << 0,
+};
+
+enum {
+	MLX5_WQ_FLAG_SIGNATURE		= 1 << 0,
+};
+
+/* Increment this value if any changes that break userspace ABI
+ * compatibility are made.
+ */
+#define MLX5_IB_UVERBS_ABI_VERSION	1
+
+/* Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * In particular do not use pointer types -- pass pointers in __u64
+ * instead.
+ */
+
+struct mlx5_ib_alloc_ucontext_req {
+	__u32	total_num_uuars;
+	__u32	num_low_latency_uuars;
+};
+
+struct mlx5_ib_alloc_ucontext_req_v2 {
+	__u32	total_num_uuars;
+	__u32	num_low_latency_uuars;
+	__u32	flags;
+	__u32	comp_mask;
+	__u8	max_cqe_version;
+	__u8	reserved0;
+	__u16	reserved1;
+	__u32	reserved2;
+};
+
+enum mlx5_ib_alloc_ucontext_resp_mask {
+	MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0,
+};
+
+enum mlx5_user_cmds_supp_uhw {
+	MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE = 1 << 0,
+};
+
+struct mlx5_ib_alloc_ucontext_resp {
+	__u32	qp_tab_size;
+	__u32	bf_reg_size;
+	__u32	tot_uuars;
+	__u32	cache_line_size;
+	__u16	max_sq_desc_sz;
+	__u16	max_rq_desc_sz;
+	__u32	max_send_wqebb;
+	__u32	max_recv_wr;
+	__u32	max_srq_recv_wr;
+	__u16	num_ports;
+	__u16	reserved1;
+	__u32	comp_mask;
+	__u32	response_length;
+	__u8	cqe_version;
+	__u8	cmds_supp_uhw;
+	__u16	reserved2;
+	__u64	hca_core_clock_offset;
+};
+
+struct mlx5_ib_alloc_pd_resp {
+	__u32	pdn;
+};
+
+struct mlx5_ib_tso_caps {
+	__u32 max_tso; /* Maximum tso payload size in bytes */
+
+	/* Corresponding bit will be set if qp type from
+	 * 'enum ib_qp_type' is supported, e.g.
+	 * supported_qpts |= 1 << IB_QPT_UD
+	 */
+	__u32 supported_qpts;
+};
+
+struct mlx5_ib_rss_caps {
+	__u64 rx_hash_fields_mask; /* enum mlx5_rx_hash_fields */
+	__u8 rx_hash_function; /* enum mlx5_rx_hash_function_flags */
+	__u8 reserved[7];
+};
+
+struct mlx5_ib_query_device_resp {
+	__u32	comp_mask;
+	__u32	response_length;
+	struct	mlx5_ib_tso_caps tso_caps;
+	struct	mlx5_ib_rss_caps rss_caps;
+};
+
+struct mlx5_ib_create_cq {
+	__u64	buf_addr;
+	__u64	db_addr;
+	__u32	cqe_size;
+	__u32	reserved; /* explicit padding (optional on i386) */
+};
+
+struct mlx5_ib_create_cq_resp {
+	__u32	cqn;
+	__u32	reserved;
+};
+
+struct mlx5_ib_resize_cq {
+	__u64	buf_addr;
+	__u16	cqe_size;
+	__u16	reserved0;
+	__u32	reserved1;
+};
+
+struct mlx5_ib_create_srq {
+	__u64	buf_addr;
+	__u64	db_addr;
+	__u32	flags;
+	__u32	reserved0; /* explicit padding (optional on i386) */
+	__u32	uidx;
+	__u32	reserved1;
+};
+
+struct mlx5_ib_create_srq_resp {
+	__u32	srqn;
+	__u32	reserved;
+};
+
+struct mlx5_ib_create_qp {
+	__u64	buf_addr;
+	__u64	db_addr;
+	__u32	sq_wqe_count;
+	__u32	rq_wqe_count;
+	__u32	rq_wqe_shift;
+	__u32	flags;
+	__u32	uidx;
+	__u32	reserved0;
+	__u64	sq_buf_addr;
+};
+
+/* RX Hash function flags */
+enum mlx5_rx_hash_function_flags {
+	MLX5_RX_HASH_FUNC_TOEPLITZ	= 1 << 0,
+};
+
+/*
+ * RX Hash flags, these flags allows to set which incoming packet's field should
+ * participates in RX Hash. Each flag represent certain packet's field,
+ * when the flag is set the field that is represented by the flag will
+ * participate in RX Hash calculation.
+ * Note: *IPV4 and *IPV6 flags can't be enabled together on the same QP
+ * and *TCP and *UDP flags can't be enabled together on the same QP.
+*/
+enum mlx5_rx_hash_fields {
+	MLX5_RX_HASH_SRC_IPV4	= 1 << 0,
+	MLX5_RX_HASH_DST_IPV4	= 1 << 1,
+	MLX5_RX_HASH_SRC_IPV6	= 1 << 2,
+	MLX5_RX_HASH_DST_IPV6	= 1 << 3,
+	MLX5_RX_HASH_SRC_PORT_TCP	= 1 << 4,
+	MLX5_RX_HASH_DST_PORT_TCP	= 1 << 5,
+	MLX5_RX_HASH_SRC_PORT_UDP	= 1 << 6,
+	MLX5_RX_HASH_DST_PORT_UDP	= 1 << 7
+};
+
+struct mlx5_ib_create_qp_rss {
+	__u64 rx_hash_fields_mask; /* enum mlx5_rx_hash_fields */
+	__u8 rx_hash_function; /* enum mlx5_rx_hash_function_flags */
+	__u8 rx_key_len; /* valid only for Toeplitz */
+	__u8 reserved[6];
+	__u8 rx_hash_key[128]; /* valid only for Toeplitz */
+	__u32   comp_mask;
+	__u32   reserved1;
+};
+
+struct mlx5_ib_create_qp_resp {
+	__u32	uuar_index;
+};
+
+struct mlx5_ib_alloc_mw {
+	__u32	comp_mask;
+	__u8	num_klms;
+	__u8	reserved1;
+	__u16	reserved2;
+};
+
+struct mlx5_ib_create_wq {
+	__u64   buf_addr;
+	__u64   db_addr;
+	__u32   rq_wqe_count;
+	__u32   rq_wqe_shift;
+	__u32   user_index;
+	__u32   flags;
+	__u32   comp_mask;
+	__u32   reserved;
+};
+
+struct mlx5_ib_create_wq_resp {
+	__u32	response_length;
+	__u32	reserved;
+};
+
+struct mlx5_ib_create_rwq_ind_tbl_resp {
+	__u32	response_length;
+	__u32	reserved;
+};
+
+struct mlx5_ib_modify_wq {
+	__u32	comp_mask;
+	__u32	reserved;
+};
+#endif /* MLX5_ABI_USER_H */
-- 
cgit v1.2.3


From a85fb3383340b417132e5731f9694840660887cb Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leon@kernel.org>
Date: Thu, 22 Sep 2016 17:31:12 +0300
Subject: IB/cxgb3: Move user vendor structures

This patch moves cxgb3 vendor's specific structures to
common UAPI folder which will be visible to all consumers.

These structures are used by user-space library driver
(libcxgb3) and currently manually copied to that library.

This move will allow cross-compile against these files and
simplify introduction of vendor specific data.

Signed-off-by: Leon Romanovsky <leon@kernel.org>
Reviewed-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 MAINTAINERS                                 |  1 +
 drivers/infiniband/hw/cxgb3/iwch.c          |  2 +-
 drivers/infiniband/hw/cxgb3/iwch_provider.c |  2 +-
 drivers/infiniband/hw/cxgb3/iwch_user.h     | 74 ----------------------------
 include/uapi/rdma/Kbuild                    |  1 +
 include/uapi/rdma/cxgb3-abi.h               | 76 +++++++++++++++++++++++++++++
 6 files changed, 80 insertions(+), 76 deletions(-)
 delete mode 100644 drivers/infiniband/hw/cxgb3/iwch_user.h
 create mode 100644 include/uapi/rdma/cxgb3-abi.h

(limited to 'include/uapi')

diff --git a/MAINTAINERS b/MAINTAINERS
index e76efbc414cd..8ebd644becef 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3460,6 +3460,7 @@ L:	linux-rdma@vger.kernel.org
 W:	http://www.openfabrics.org
 S:	Supported
 F:	drivers/infiniband/hw/cxgb3/
+F:	include/uapi/rdma/cxgb3-abi.h
 
 CXGB4 ETHERNET DRIVER (CXGB4)
 M:	Hariprasad S <hariprasad@chelsio.com>
diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c
index 8e77dc543dd1..b3e11329801d 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.c
+++ b/drivers/infiniband/hw/cxgb3/iwch.c
@@ -36,7 +36,7 @@
 
 #include "cxgb3_offload.h"
 #include "iwch_provider.h"
-#include "iwch_user.h"
+#include <rdma/cxgb3-abi.h>
 #include "iwch.h"
 #include "iwch_cm.h"
 
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index b47be87d5a53..cba57bb53dba 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -58,7 +58,7 @@
 #include "iwch.h"
 #include "iwch_provider.h"
 #include "iwch_cm.h"
-#include "iwch_user.h"
+#include <rdma/cxgb3-abi.h>
 #include "common.h"
 
 static struct ib_ah *iwch_ah_create(struct ib_pd *pd,
diff --git a/drivers/infiniband/hw/cxgb3/iwch_user.h b/drivers/infiniband/hw/cxgb3/iwch_user.h
deleted file mode 100644
index a277c31fcaf7..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch_user.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __IWCH_USER_H__
-#define __IWCH_USER_H__
-
-#define IWCH_UVERBS_ABI_VERSION	1
-
-/*
- * Make sure that all structs defined in this file remain laid out so
- * that they pack the same way on 32-bit and 64-bit architectures (to
- * avoid incompatibility between 32-bit userspace and 64-bit kernels).
- * In particular do not use pointer types -- pass pointers in __u64
- * instead.
- */
-struct iwch_create_cq_req {
-	__u64 user_rptr_addr;
-};
-
-struct iwch_create_cq_resp_v0 {
-	__u64 key;
-	__u32 cqid;
-	__u32 size_log2;
-};
-
-struct iwch_create_cq_resp {
-	__u64 key;
-	__u32 cqid;
-	__u32 size_log2;
-	__u32 memsize;
-	__u32 reserved;
-};
-
-struct iwch_create_qp_resp {
-	__u64 key;
-	__u64 db_key;
-	__u32 qpid;
-	__u32 size_log2;
-	__u32 sq_size_log2;
-	__u32 rq_size_log2;
-};
-
-struct iwch_reg_user_mr_resp {
-	__u32 pbl_addr;
-};
-#endif
diff --git a/include/uapi/rdma/Kbuild b/include/uapi/rdma/Kbuild
index e9b0ca24f29d..681d7189a01a 100644
--- a/include/uapi/rdma/Kbuild
+++ b/include/uapi/rdma/Kbuild
@@ -7,4 +7,5 @@ header-y += rdma_netlink.h
 header-y += rdma_user_cm.h
 header-y += hfi/
 header-y += rdma_user_rxe.h
+header-y += cxgb3-abi.h
 header-y += mlx5-abi.h
diff --git a/include/uapi/rdma/cxgb3-abi.h b/include/uapi/rdma/cxgb3-abi.h
new file mode 100644
index 000000000000..48a19bda071b
--- /dev/null
+++ b/include/uapi/rdma/cxgb3-abi.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef CXGB3_ABI_USER_H
+#define CXBG3_ABI_USER_H
+
+#include <linux/types.h>
+
+#define IWCH_UVERBS_ABI_VERSION	1
+
+/*
+ * Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * In particular do not use pointer types -- pass pointers in __u64
+ * instead.
+ */
+struct iwch_create_cq_req {
+	__u64 user_rptr_addr;
+};
+
+struct iwch_create_cq_resp_v0 {
+	__u64 key;
+	__u32 cqid;
+	__u32 size_log2;
+};
+
+struct iwch_create_cq_resp {
+	__u64 key;
+	__u32 cqid;
+	__u32 size_log2;
+	__u32 memsize;
+	__u32 reserved;
+};
+
+struct iwch_create_qp_resp {
+	__u64 key;
+	__u64 db_key;
+	__u32 qpid;
+	__u32 size_log2;
+	__u32 sq_size_log2;
+	__u32 rq_size_log2;
+};
+
+struct iwch_reg_user_mr_resp {
+	__u32 pbl_addr;
+};
+#endif /* CXGB3_ABI_USER_H */
-- 
cgit v1.2.3


From e44ee2fd9845a86b0c7e8742672ae5ba6d7d34ee Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leon@kernel.org>
Date: Thu, 22 Sep 2016 17:31:13 +0300
Subject: IB/cxgb4: Move user vendor structures

This patch moves cxgb4 vendor's specific structures to
common UAPI folder which will be visible to all consumers.

These structures are used by user-space library driver
(libcxgb4) and currently manually copied to that library.

This move will allow cross-compile against these files and
simplify introduction of vendor specific data.

Signed-off-by: Leon Romanovsky <leon@kernel.org>
Reviewed-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 MAINTAINERS                            |  1 +
 drivers/infiniband/hw/cxgb4/iw_cxgb4.h |  2 +-
 drivers/infiniband/hw/cxgb4/user.h     | 80 ---------------------------------
 include/uapi/rdma/Kbuild               |  1 +
 include/uapi/rdma/cxgb4-abi.h          | 81 ++++++++++++++++++++++++++++++++++
 5 files changed, 84 insertions(+), 81 deletions(-)
 delete mode 100644 drivers/infiniband/hw/cxgb4/user.h
 create mode 100644 include/uapi/rdma/cxgb4-abi.h

(limited to 'include/uapi')

diff --git a/MAINTAINERS b/MAINTAINERS
index 8ebd644becef..22fd3b7f5a31 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3482,6 +3482,7 @@ L:	linux-rdma@vger.kernel.org
 W:	http://www.openfabrics.org
 S:	Supported
 F:	drivers/infiniband/hw/cxgb4/
+F:	include/uapi/rdma/cxgb4-abi.h
 
 CXGB4VF ETHERNET DRIVER (CXGB4VF)
 M:	Casey Leedom <leedom@chelsio.com>
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index aa47e0ae80bc..f83604b2f82d 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -58,7 +58,7 @@
 #include "cxgb4.h"
 #include "cxgb4_uld.h"
 #include "l2t.h"
-#include "user.h"
+#include <rdma/cxgb4-abi.h>
 
 #define DRV_NAME "iw_cxgb4"
 #define MOD DRV_NAME ":"
diff --git a/drivers/infiniband/hw/cxgb4/user.h b/drivers/infiniband/hw/cxgb4/user.h
deleted file mode 100644
index 295f422b9a3a..000000000000
--- a/drivers/infiniband/hw/cxgb4/user.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __C4IW_USER_H__
-#define __C4IW_USER_H__
-
-#define C4IW_UVERBS_ABI_VERSION	3
-
-/*
- * Make sure that all structs defined in this file remain laid out so
- * that they pack the same way on 32-bit and 64-bit architectures (to
- * avoid incompatibility between 32-bit userspace and 64-bit kernels).
- * In particular do not use pointer types -- pass pointers in __u64
- * instead.
- */
-struct c4iw_create_cq_resp {
-	__u64 key;
-	__u64 gts_key;
-	__u64 memsize;
-	__u32 cqid;
-	__u32 size;
-	__u32 qid_mask;
-	__u32 reserved; /* explicit padding (optional for i386) */
-};
-
-
-enum {
-	C4IW_QPF_ONCHIP = (1<<0)
-};
-
-struct c4iw_create_qp_resp {
-	__u64 ma_sync_key;
-	__u64 sq_key;
-	__u64 rq_key;
-	__u64 sq_db_gts_key;
-	__u64 rq_db_gts_key;
-	__u64 sq_memsize;
-	__u64 rq_memsize;
-	__u32 sqid;
-	__u32 rqid;
-	__u32 sq_size;
-	__u32 rq_size;
-	__u32 qid_mask;
-	__u32 flags;
-};
-
-struct c4iw_alloc_ucontext_resp {
-	__u64 status_page_key;
-	__u32 status_page_size;
-	__u32 reserved; /* explicit padding (optional for i386) */
-};
-#endif
diff --git a/include/uapi/rdma/Kbuild b/include/uapi/rdma/Kbuild
index 681d7189a01a..59b2c9b34423 100644
--- a/include/uapi/rdma/Kbuild
+++ b/include/uapi/rdma/Kbuild
@@ -8,4 +8,5 @@ header-y += rdma_user_cm.h
 header-y += hfi/
 header-y += rdma_user_rxe.h
 header-y += cxgb3-abi.h
+header-y += cxgb4-abi.h
 header-y += mlx5-abi.h
diff --git a/include/uapi/rdma/cxgb4-abi.h b/include/uapi/rdma/cxgb4-abi.h
new file mode 100644
index 000000000000..472b15990894
--- /dev/null
+++ b/include/uapi/rdma/cxgb4-abi.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef CXGB4_ABI_USER_H
+#define CXGB4_ABI_USER_H
+
+#include <linux/types.h>
+
+#define C4IW_UVERBS_ABI_VERSION	3
+
+/*
+ * Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * In particular do not use pointer types -- pass pointers in __u64
+ * instead.
+ */
+struct c4iw_create_cq_resp {
+	__u64 key;
+	__u64 gts_key;
+	__u64 memsize;
+	__u32 cqid;
+	__u32 size;
+	__u32 qid_mask;
+	__u32 reserved; /* explicit padding (optional for i386) */
+};
+
+enum {
+	C4IW_QPF_ONCHIP = (1 << 0)
+};
+
+struct c4iw_create_qp_resp {
+	__u64 ma_sync_key;
+	__u64 sq_key;
+	__u64 rq_key;
+	__u64 sq_db_gts_key;
+	__u64 rq_db_gts_key;
+	__u64 sq_memsize;
+	__u64 rq_memsize;
+	__u32 sqid;
+	__u32 rqid;
+	__u32 sq_size;
+	__u32 rq_size;
+	__u32 qid_mask;
+	__u32 flags;
+};
+
+struct c4iw_alloc_ucontext_resp {
+	__u64 status_page_key;
+	__u32 status_page_size;
+	__u32 reserved; /* explicit padding (optional for i386) */
+};
+#endif /* CXGB4_ABI_USER_H */
-- 
cgit v1.2.3


From 9ce28a20eec551cfc032c87cd31a05c14d11155d Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leon@kernel.org>
Date: Thu, 22 Sep 2016 17:31:14 +0300
Subject: IB/mlx4: Move user vendor structures

This patch moves mlx4 vendor's specific structures to
common UAPI folder which will be visible to all consumers.

These structures are used by user-space library driver
(libmlx4) and currently manually copied to that library.

This move will allow cross-compile against these files and
simplify introduction of vendor specific data.

Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 MAINTAINERS                       |   1 +
 drivers/infiniband/hw/mlx4/cq.c   |   2 +-
 drivers/infiniband/hw/mlx4/main.c |   2 +-
 drivers/infiniband/hw/mlx4/qp.c   |   2 +-
 drivers/infiniband/hw/mlx4/srq.c  |   2 +-
 drivers/infiniband/hw/mlx4/user.h | 107 --------------------------------------
 include/uapi/rdma/Kbuild          |   1 +
 include/uapi/rdma/mlx4-abi.h      | 107 ++++++++++++++++++++++++++++++++++++++
 8 files changed, 113 insertions(+), 111 deletions(-)
 delete mode 100644 drivers/infiniband/hw/mlx4/user.h
 create mode 100644 include/uapi/rdma/mlx4-abi.h

(limited to 'include/uapi')

diff --git a/MAINTAINERS b/MAINTAINERS
index 22fd3b7f5a31..0af558f501cf 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7801,6 +7801,7 @@ Q:	http://patchwork.ozlabs.org/project/netdev/list/
 S:	Supported
 F:	drivers/net/ethernet/mellanox/mlx4/
 F:	include/linux/mlx4/
+F:	include/uapi/rdma/mlx4-abi.h
 
 MELLANOX MLX4 IB driver
 M:	Yishai Hadas <yishaih@mellanox.com>
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index d6fc8a6e8c33..2f0b4eed7eae 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -37,7 +37,7 @@
 #include <linux/slab.h>
 
 #include "mlx4_ib.h"
-#include "user.h"
+#include <rdma/mlx4-abi.h>
 
 static void mlx4_ib_cq_comp(struct mlx4_cq *cq)
 {
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 3ae64cefc39e..3c6d6103d18b 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -55,7 +55,7 @@
 #include <linux/mlx4/qp.h>
 
 #include "mlx4_ib.h"
-#include "user.h"
+#include <rdma/mlx4-abi.h>
 
 #define DRV_NAME	MLX4_IB_DRV_NAME
 #define DRV_VERSION	"2.2-1"
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 768085f59566..44c8a0da1507 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -47,7 +47,7 @@
 #include <linux/mlx4/qp.h>
 
 #include "mlx4_ib.h"
-#include "user.h"
+#include <rdma/mlx4-abi.h>
 
 static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq,
 			     struct mlx4_ib_cq *recv_cq);
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index 0597f3eef5d0..7dd3f267f06b 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -37,7 +37,7 @@
 #include <linux/vmalloc.h>
 
 #include "mlx4_ib.h"
-#include "user.h"
+#include <rdma/mlx4-abi.h>
 
 static void *get_wqe(struct mlx4_ib_srq *srq, int n)
 {
diff --git a/drivers/infiniband/hw/mlx4/user.h b/drivers/infiniband/hw/mlx4/user.h
deleted file mode 100644
index 07e6769ef43b..000000000000
--- a/drivers/infiniband/hw/mlx4/user.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef MLX4_IB_USER_H
-#define MLX4_IB_USER_H
-
-#include <linux/types.h>
-
-/*
- * Increment this value if any changes that break userspace ABI
- * compatibility are made.
- */
-
-#define MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION	3
-#define MLX4_IB_UVERBS_ABI_VERSION		4
-
-/*
- * Make sure that all structs defined in this file remain laid out so
- * that they pack the same way on 32-bit and 64-bit architectures (to
- * avoid incompatibility between 32-bit userspace and 64-bit kernels).
- * In particular do not use pointer types -- pass pointers in __u64
- * instead.
- */
-
-struct mlx4_ib_alloc_ucontext_resp_v3 {
-	__u32	qp_tab_size;
-	__u16	bf_reg_size;
-	__u16	bf_regs_per_page;
-};
-
-struct mlx4_ib_alloc_ucontext_resp {
-	__u32	dev_caps;
-	__u32	qp_tab_size;
-	__u16	bf_reg_size;
-	__u16	bf_regs_per_page;
-	__u32	cqe_size;
-};
-
-struct mlx4_ib_alloc_pd_resp {
-	__u32	pdn;
-	__u32	reserved;
-};
-
-struct mlx4_ib_create_cq {
-	__u64	buf_addr;
-	__u64	db_addr;
-};
-
-struct mlx4_ib_create_cq_resp {
-	__u32	cqn;
-	__u32	reserved;
-};
-
-struct mlx4_ib_resize_cq {
-	__u64	buf_addr;
-};
-
-struct mlx4_ib_create_srq {
-	__u64	buf_addr;
-	__u64	db_addr;
-};
-
-struct mlx4_ib_create_srq_resp {
-	__u32	srqn;
-	__u32	reserved;
-};
-
-struct mlx4_ib_create_qp {
-	__u64	buf_addr;
-	__u64	db_addr;
-	__u8	log_sq_bb_count;
-	__u8	log_sq_stride;
-	__u8	sq_no_prefetch;
-	__u8	reserved[5];
-};
-
-#endif /* MLX4_IB_USER_H */
diff --git a/include/uapi/rdma/Kbuild b/include/uapi/rdma/Kbuild
index 59b2c9b34423..a1090ebc0b34 100644
--- a/include/uapi/rdma/Kbuild
+++ b/include/uapi/rdma/Kbuild
@@ -9,4 +9,5 @@ header-y += hfi/
 header-y += rdma_user_rxe.h
 header-y += cxgb3-abi.h
 header-y += cxgb4-abi.h
+header-y += mlx4-abi.h
 header-y += mlx5-abi.h
diff --git a/include/uapi/rdma/mlx4-abi.h b/include/uapi/rdma/mlx4-abi.h
new file mode 100644
index 000000000000..af431752655c
--- /dev/null
+++ b/include/uapi/rdma/mlx4-abi.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX4_ABI_USER_H
+#define MLX4_ABI_USER_H
+
+#include <linux/types.h>
+
+/*
+ * Increment this value if any changes that break userspace ABI
+ * compatibility are made.
+ */
+
+#define MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION	3
+#define MLX4_IB_UVERBS_ABI_VERSION		4
+
+/*
+ * Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * In particular do not use pointer types -- pass pointers in __u64
+ * instead.
+ */
+
+struct mlx4_ib_alloc_ucontext_resp_v3 {
+	__u32	qp_tab_size;
+	__u16	bf_reg_size;
+	__u16	bf_regs_per_page;
+};
+
+struct mlx4_ib_alloc_ucontext_resp {
+	__u32	dev_caps;
+	__u32	qp_tab_size;
+	__u16	bf_reg_size;
+	__u16	bf_regs_per_page;
+	__u32	cqe_size;
+};
+
+struct mlx4_ib_alloc_pd_resp {
+	__u32	pdn;
+	__u32	reserved;
+};
+
+struct mlx4_ib_create_cq {
+	__u64	buf_addr;
+	__u64	db_addr;
+};
+
+struct mlx4_ib_create_cq_resp {
+	__u32	cqn;
+	__u32	reserved;
+};
+
+struct mlx4_ib_resize_cq {
+	__u64	buf_addr;
+};
+
+struct mlx4_ib_create_srq {
+	__u64	buf_addr;
+	__u64	db_addr;
+};
+
+struct mlx4_ib_create_srq_resp {
+	__u32	srqn;
+	__u32	reserved;
+};
+
+struct mlx4_ib_create_qp {
+	__u64	buf_addr;
+	__u64	db_addr;
+	__u8	log_sq_bb_count;
+	__u8	log_sq_stride;
+	__u8	sq_no_prefetch;
+	__u8	reserved[5];
+};
+
+#endif /* MLX4_ABI_USER_H */
-- 
cgit v1.2.3


From a7fe7380f6b234f207a599c9ffbaae6c1a574634 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leon@kernel.org>
Date: Thu, 22 Sep 2016 17:31:15 +0300
Subject: IB/ocrdma: Move user vendor structures

This patch moves ocrdma vendor's specific structures to
common UAPI folder which will be visible to all consumers.

These structures are used by user-space library driver
(libmlx4) and currently manually copied to that library.

This move will allow cross-compile against these files and
simplify introduction of vendor specific data.

In addition, it changes types to be __uXX instead of uXX.

Signed-off-by: Leon Romanovsky <leon@kernel.org>
Acked-By: Devesh Sharma <devesh.sharma@broadcom.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 MAINTAINERS                                 |   1 +
 drivers/infiniband/hw/ocrdma/ocrdma_abi.h   | 149 ---------------------------
 drivers/infiniband/hw/ocrdma/ocrdma_main.c  |   2 +-
 drivers/infiniband/hw/ocrdma/ocrdma_verbs.c |   2 +-
 include/uapi/rdma/Kbuild                    |   1 +
 include/uapi/rdma/ocrdma-abi.h              | 151 ++++++++++++++++++++++++++++
 6 files changed, 155 insertions(+), 151 deletions(-)
 delete mode 100644 drivers/infiniband/hw/ocrdma/ocrdma_abi.h
 create mode 100644 include/uapi/rdma/ocrdma-abi.h

(limited to 'include/uapi')

diff --git a/MAINTAINERS b/MAINTAINERS
index 0af558f501cf..940c0ff03f3f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10601,6 +10601,7 @@ L:	linux-rdma@vger.kernel.org
 W:	http://www.emulex.com
 S:	Supported
 F:	drivers/infiniband/hw/ocrdma/
+F:	include/uapi/rdma/ocrdma-abi.h
 
 SFC NETWORK DRIVER
 M:	Solarflare linux maintainers <linux-net-drivers@solarflare.com>
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h
deleted file mode 100644
index 430b1350fe96..000000000000
--- a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h
+++ /dev/null
@@ -1,149 +0,0 @@
-/* This file is part of the Emulex RoCE Device Driver for
- * RoCE (RDMA over Converged Ethernet) adapters.
- * Copyright (C) 2012-2015 Emulex. All rights reserved.
- * EMULEX and SLI are trademarks of Emulex.
- * www.emulex.com
- *
- * This software is available to you under a choice of one of two licenses.
- * You may choose to be licensed under the terms of the GNU General Public
- * License (GPL) Version 2, available from the file COPYING in the main
- * directory of this source tree, or the BSD license below:
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright notice,
- *   this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright
- *   notice, this list of conditions and the following disclaimer in
- *   the documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Contact Information:
- * linux-drivers@emulex.com
- *
- * Emulex
- * 3333 Susan Street
- * Costa Mesa, CA 92626
- */
-
-#ifndef __OCRDMA_ABI_H__
-#define __OCRDMA_ABI_H__
-
-#define OCRDMA_ABI_VERSION 2
-#define OCRDMA_BE_ROCE_ABI_VERSION 1
-/* user kernel communication data structures. */
-
-struct ocrdma_alloc_ucontext_resp {
-	u32 dev_id;
-	u32 wqe_size;
-	u32 max_inline_data;
-	u32 dpp_wqe_size;
-	u64 ah_tbl_page;
-	u32 ah_tbl_len;
-	u32 rqe_size;
-	u8 fw_ver[32];
-	/* for future use/new features in progress */
-	u64 rsvd1;
-	u64 rsvd2;
-};
-
-struct ocrdma_alloc_pd_ureq {
-	u64 rsvd1;
-};
-
-struct ocrdma_alloc_pd_uresp {
-	u32 id;
-	u32 dpp_enabled;
-	u32 dpp_page_addr_hi;
-	u32 dpp_page_addr_lo;
-	u64 rsvd1;
-};
-
-struct ocrdma_create_cq_ureq {
-	u32 dpp_cq;
-	u32 rsvd; /* pad */
-};
-
-#define MAX_CQ_PAGES 8
-struct ocrdma_create_cq_uresp {
-	u32 cq_id;
-	u32 page_size;
-	u32 num_pages;
-	u32 max_hw_cqe;
-	u64 page_addr[MAX_CQ_PAGES];
-	u64 db_page_addr;
-	u32 db_page_size;
-	u32 phase_change;
-	/* for future use/new features in progress */
-	u64 rsvd1;
-	u64 rsvd2;
-};
-
-#define MAX_QP_PAGES 8
-#define MAX_UD_AV_PAGES 8
-
-struct ocrdma_create_qp_ureq {
-	u8 enable_dpp_cq;
-	u8 rsvd;
-	u16 dpp_cq_id;
-	u32 rsvd1;	/* pad */
-};
-
-struct ocrdma_create_qp_uresp {
-	u16 qp_id;
-	u16 sq_dbid;
-	u16 rq_dbid;
-	u16 resv0;	/* pad */
-	u32 sq_page_size;
-	u32 rq_page_size;
-	u32 num_sq_pages;
-	u32 num_rq_pages;
-	u64 sq_page_addr[MAX_QP_PAGES];
-	u64 rq_page_addr[MAX_QP_PAGES];
-	u64 db_page_addr;
-	u32 db_page_size;
-	u32 dpp_credit;
-	u32 dpp_offset;
-	u32 num_wqe_allocated;
-	u32 num_rqe_allocated;
-	u32 db_sq_offset;
-	u32 db_rq_offset;
-	u32 db_shift;
-	u64 rsvd[11];
-} __packed;
-
-struct ocrdma_create_srq_uresp {
-	u16 rq_dbid;
-	u16 resv0;	/* pad */
-	u32 resv1;
-
-	u32 rq_page_size;
-	u32 num_rq_pages;
-
-	u64 rq_page_addr[MAX_QP_PAGES];
-	u64 db_page_addr;
-
-	u32 db_page_size;
-	u32 num_rqe_allocated;
-	u32 db_rq_offset;
-	u32 db_shift;
-
-	u64 rsvd2;
-	u64 rsvd3;
-};
-
-#endif				/* __OCRDMA_ABI_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 15e35acc690d..896071502739 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -56,7 +56,7 @@
 #include "be_roce.h"
 #include "ocrdma_hw.h"
 #include "ocrdma_stats.h"
-#include "ocrdma_abi.h"
+#include <rdma/ocrdma-abi.h>
 
 MODULE_VERSION(OCRDMA_ROCE_DRV_VERSION);
 MODULE_DESCRIPTION(OCRDMA_ROCE_DRV_DESC " " OCRDMA_ROCE_DRV_VERSION);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index b1a3d91fe8b9..71d0534960d6 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -51,7 +51,7 @@
 #include "ocrdma.h"
 #include "ocrdma_hw.h"
 #include "ocrdma_verbs.h"
-#include "ocrdma_abi.h"
+#include <rdma/ocrdma-abi.h>
 
 int ocrdma_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
 {
diff --git a/include/uapi/rdma/Kbuild b/include/uapi/rdma/Kbuild
index a1090ebc0b34..492e144ebe90 100644
--- a/include/uapi/rdma/Kbuild
+++ b/include/uapi/rdma/Kbuild
@@ -11,3 +11,4 @@ header-y += cxgb3-abi.h
 header-y += cxgb4-abi.h
 header-y += mlx4-abi.h
 header-y += mlx5-abi.h
+header-y += ocrdma-abi.h
diff --git a/include/uapi/rdma/ocrdma-abi.h b/include/uapi/rdma/ocrdma-abi.h
new file mode 100644
index 000000000000..9f28191bef4d
--- /dev/null
+++ b/include/uapi/rdma/ocrdma-abi.h
@@ -0,0 +1,151 @@
+/* This file is part of the Emulex RoCE Device Driver for
+ * RoCE (RDMA over Converged Ethernet) adapters.
+ * Copyright (C) 2012-2015 Emulex. All rights reserved.
+ * EMULEX and SLI are trademarks of Emulex.
+ * www.emulex.com
+ *
+ * This software is available to you under a choice of one of two licenses.
+ * You may choose to be licensed under the terms of the GNU General Public
+ * License (GPL) Version 2, available from the file COPYING in the main
+ * directory of this source tree, or the BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Contact Information:
+ * linux-drivers@emulex.com
+ *
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
+ */
+
+#ifndef OCRDMA_ABI_USER_H
+#define OCRDMA_ABI_USER_H
+
+#include <linux/types.h>
+
+#define OCRDMA_ABI_VERSION 2
+#define OCRDMA_BE_ROCE_ABI_VERSION 1
+/* user kernel communication data structures. */
+
+struct ocrdma_alloc_ucontext_resp {
+	__u32 dev_id;
+	__u32 wqe_size;
+	__u32 max_inline_data;
+	__u32 dpp_wqe_size;
+	__u64 ah_tbl_page;
+	__u32 ah_tbl_len;
+	__u32 rqe_size;
+	__u8 fw_ver[32];
+	/* for future use/new features in progress */
+	__u64 rsvd1;
+	__u64 rsvd2;
+};
+
+struct ocrdma_alloc_pd_ureq {
+	__u64 rsvd1;
+};
+
+struct ocrdma_alloc_pd_uresp {
+	__u32 id;
+	__u32 dpp_enabled;
+	__u32 dpp_page_addr_hi;
+	__u32 dpp_page_addr_lo;
+	__u64 rsvd1;
+};
+
+struct ocrdma_create_cq_ureq {
+	__u32 dpp_cq;
+	__u32 rsvd; /* pad */
+};
+
+#define MAX_CQ_PAGES 8
+struct ocrdma_create_cq_uresp {
+	__u32 cq_id;
+	__u32 page_size;
+	__u32 num_pages;
+	__u32 max_hw_cqe;
+	__u64 page_addr[MAX_CQ_PAGES];
+	__u64 db_page_addr;
+	__u32 db_page_size;
+	__u32 phase_change;
+	/* for future use/new features in progress */
+	__u64 rsvd1;
+	__u64 rsvd2;
+};
+
+#define MAX_QP_PAGES 8
+#define MAX_UD_AV_PAGES 8
+
+struct ocrdma_create_qp_ureq {
+	__u8 enable_dpp_cq;
+	__u8 rsvd;
+	__u16 dpp_cq_id;
+	__u32 rsvd1;	/* pad */
+};
+
+struct ocrdma_create_qp_uresp {
+	__u16 qp_id;
+	__u16 sq_dbid;
+	__u16 rq_dbid;
+	__u16 resv0;	/* pad */
+	__u32 sq_page_size;
+	__u32 rq_page_size;
+	__u32 num_sq_pages;
+	__u32 num_rq_pages;
+	__u64 sq_page_addr[MAX_QP_PAGES];
+	__u64 rq_page_addr[MAX_QP_PAGES];
+	__u64 db_page_addr;
+	__u32 db_page_size;
+	__u32 dpp_credit;
+	__u32 dpp_offset;
+	__u32 num_wqe_allocated;
+	__u32 num_rqe_allocated;
+	__u32 db_sq_offset;
+	__u32 db_rq_offset;
+	__u32 db_shift;
+	__u64 rsvd[11];
+} __packed;
+
+struct ocrdma_create_srq_uresp {
+	__u16 rq_dbid;
+	__u16 resv0;	/* pad */
+	__u32 resv1;
+
+	__u32 rq_page_size;
+	__u32 num_rq_pages;
+
+	__u64 rq_page_addr[MAX_QP_PAGES];
+	__u64 db_page_addr;
+
+	__u32 db_page_size;
+	__u32 num_rqe_allocated;
+	__u32 db_rq_offset;
+	__u32 db_shift;
+
+	__u64 rsvd2;
+	__u64 rsvd3;
+};
+
+#endif	/* OCRDMA_ABI_USER_H */
-- 
cgit v1.2.3


From c546b2a3b6612a9fa09940a13844ef384683db6e Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leon@kernel.org>
Date: Thu, 22 Sep 2016 17:31:16 +0300
Subject: IB/nes: Move user vendor structures

This patch moves nes vendor's specific structures to
common UAPI folder which will be visible to all consumers.

These structures are used by user-space library driver
(libmlx4) and currently manually copied to that library.

This move will allow cross-compile against these files and
simplify introduction of vendor specific data.

Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 MAINTAINERS                          |   1 +
 drivers/infiniband/hw/nes/nes.h      |   2 +-
 drivers/infiniband/hw/nes/nes_user.h | 114 -----------------------------------
 include/uapi/rdma/Kbuild             |   1 +
 include/uapi/rdma/nes-abi.h          | 114 +++++++++++++++++++++++++++++++++++
 5 files changed, 117 insertions(+), 115 deletions(-)
 delete mode 100644 drivers/infiniband/hw/nes/nes_user.h
 create mode 100644 include/uapi/rdma/nes-abi.h

(limited to 'include/uapi')

diff --git a/MAINTAINERS b/MAINTAINERS
index 940c0ff03f3f..cc77b9ab9d52 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8086,6 +8086,7 @@ L:	linux-rdma@vger.kernel.org
 W:	http://www.intel.com/Products/Server/Adapters/Server-Cluster/Server-Cluster-overview.htm
 S:	Supported
 F:	drivers/infiniband/hw/nes/
+F:	include/uapi/rdma/nes-abi.h
 
 NETEM NETWORK EMULATOR
 M:	Stephen Hemminger <stephen@networkplumber.org>
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index bd9d132f11c7..e7430c9254d3 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -165,7 +165,7 @@ do { \
 #include "nes_hw.h"
 #include "nes_verbs.h"
 #include "nes_context.h"
-#include "nes_user.h"
+#include <rdma/nes-abi.h>
 #include "nes_cm.h"
 #include "nes_mgt.h"
 
diff --git a/drivers/infiniband/hw/nes/nes_user.h b/drivers/infiniband/hw/nes/nes_user.h
deleted file mode 100644
index 529c421bb15c..000000000000
--- a/drivers/infiniband/hw/nes/nes_user.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright (c) 2006 - 2011 Intel Corporation.  All rights reserved.
- * Copyright (c) 2005 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Cisco Systems.  All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#ifndef NES_USER_H
-#define NES_USER_H
-
-#include <linux/types.h>
-
-#define NES_ABI_USERSPACE_VER 2
-#define NES_ABI_KERNEL_VER    2
-
-/*
- * Make sure that all structs defined in this file remain laid out so
- * that they pack the same way on 32-bit and 64-bit architectures (to
- * avoid incompatibility between 32-bit userspace and 64-bit kernels).
- * In particular do not use pointer types -- pass pointers in __u64
- * instead.
- */
-
-struct nes_alloc_ucontext_req {
-	__u32 reserved32;
-	__u8  userspace_ver;
-	__u8  reserved8[3];
-};
-
-struct nes_alloc_ucontext_resp {
-	__u32 max_pds; /* maximum pds allowed for this user process */
-	__u32 max_qps; /* maximum qps allowed for this user process */
-	__u32 wq_size; /* size of the WQs (sq+rq) allocated to the mmaped area */
-	__u8  virtwq;  /* flag to indicate if virtual WQ are to be used or not */
-	__u8  kernel_ver;
-	__u8  reserved[2];
-};
-
-struct nes_alloc_pd_resp {
-	__u32 pd_id;
-	__u32 mmap_db_index;
-};
-
-struct nes_create_cq_req {
-	__u64 user_cq_buffer;
-	__u32 mcrqf;
-	__u8 reserved[4];
-};
-
-struct nes_create_qp_req {
-	__u64 user_wqe_buffers;
-	__u64 user_qp_buffer;
-};
-
-enum iwnes_memreg_type {
-	IWNES_MEMREG_TYPE_MEM = 0x0000,
-	IWNES_MEMREG_TYPE_QP = 0x0001,
-	IWNES_MEMREG_TYPE_CQ = 0x0002,
-	IWNES_MEMREG_TYPE_MW = 0x0003,
-	IWNES_MEMREG_TYPE_FMR = 0x0004,
-	IWNES_MEMREG_TYPE_FMEM = 0x0005,
-};
-
-struct nes_mem_reg_req {
-	__u32 reg_type;	/* indicates if id is memory, QP or CQ */
-	__u32 reserved;
-};
-
-struct nes_create_cq_resp {
-	__u32 cq_id;
-	__u32 cq_size;
-	__u32 mmap_db_index;
-	__u32 reserved;
-};
-
-struct nes_create_qp_resp {
-	__u32 qp_id;
-	__u32 actual_sq_size;
-	__u32 actual_rq_size;
-	__u32 mmap_sq_db_index;
-	__u32 mmap_rq_db_index;
-	__u32 nes_drv_opt;
-};
-
-#endif				/* NES_USER_H */
diff --git a/include/uapi/rdma/Kbuild b/include/uapi/rdma/Kbuild
index 492e144ebe90..ec32a8baf600 100644
--- a/include/uapi/rdma/Kbuild
+++ b/include/uapi/rdma/Kbuild
@@ -11,4 +11,5 @@ header-y += cxgb3-abi.h
 header-y += cxgb4-abi.h
 header-y += mlx4-abi.h
 header-y += mlx5-abi.h
+header-y += nes-abi.h
 header-y += ocrdma-abi.h
diff --git a/include/uapi/rdma/nes-abi.h b/include/uapi/rdma/nes-abi.h
new file mode 100644
index 000000000000..6eb3734394a2
--- /dev/null
+++ b/include/uapi/rdma/nes-abi.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2006 - 2011 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef NES_ABI_USER_H
+#define NES_ABI_USER_H
+
+#include <linux/types.h>
+
+#define NES_ABI_USERSPACE_VER 2
+#define NES_ABI_KERNEL_VER    2
+
+/*
+ * Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * In particular do not use pointer types -- pass pointers in __u64
+ * instead.
+ */
+
+struct nes_alloc_ucontext_req {
+	__u32 reserved32;
+	__u8  userspace_ver;
+	__u8  reserved8[3];
+};
+
+struct nes_alloc_ucontext_resp {
+	__u32 max_pds; /* maximum pds allowed for this user process */
+	__u32 max_qps; /* maximum qps allowed for this user process */
+	__u32 wq_size; /* size of the WQs (sq+rq) allocated to the mmaped area */
+	__u8  virtwq;  /* flag to indicate if virtual WQ are to be used or not */
+	__u8  kernel_ver;
+	__u8  reserved[2];
+};
+
+struct nes_alloc_pd_resp {
+	__u32 pd_id;
+	__u32 mmap_db_index;
+};
+
+struct nes_create_cq_req {
+	__u64 user_cq_buffer;
+	__u32 mcrqf;
+	__u8 reserved[4];
+};
+
+struct nes_create_qp_req {
+	__u64 user_wqe_buffers;
+	__u64 user_qp_buffer;
+};
+
+enum iwnes_memreg_type {
+	IWNES_MEMREG_TYPE_MEM = 0x0000,
+	IWNES_MEMREG_TYPE_QP = 0x0001,
+	IWNES_MEMREG_TYPE_CQ = 0x0002,
+	IWNES_MEMREG_TYPE_MW = 0x0003,
+	IWNES_MEMREG_TYPE_FMR = 0x0004,
+	IWNES_MEMREG_TYPE_FMEM = 0x0005,
+};
+
+struct nes_mem_reg_req {
+	__u32 reg_type;	/* indicates if id is memory, QP or CQ */
+	__u32 reserved;
+};
+
+struct nes_create_cq_resp {
+	__u32 cq_id;
+	__u32 cq_size;
+	__u32 mmap_db_index;
+	__u32 reserved;
+};
+
+struct nes_create_qp_resp {
+	__u32 qp_id;
+	__u32 actual_sq_size;
+	__u32 actual_rq_size;
+	__u32 mmap_sq_db_index;
+	__u32 mmap_rq_db_index;
+	__u32 nes_drv_opt;
+};
+
+#endif	/* NES_ABI_USER_H */
-- 
cgit v1.2.3


From 486f60954c71c5ce42341ed02cf804dda1c1bcc5 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leon@kernel.org>
Date: Thu, 22 Sep 2016 17:31:17 +0300
Subject: IB/mthca: Move user vendor structures

This patch moves mthca vendor's specific structures to
common UAPI folder which will be visible to all consumers.

These structures are used by user-space library driver
(libmthca) and currently manually copied to that library.

This move will allow cross-compile against these files and
simplify introduction of vendor specific data.

Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/mthca/mthca_provider.c |   2 +-
 include/uapi/rdma/Kbuild                     |   1 +
 include/uapi/rdma/mthca-abi.h                | 111 +++++++++++++++++++++++++++
 3 files changed, 113 insertions(+), 1 deletion(-)
 create mode 100644 include/uapi/rdma/mthca-abi.h

(limited to 'include/uapi')

diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index b697a0f77231..358930a41e36 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -46,7 +46,7 @@
 
 #include "mthca_dev.h"
 #include "mthca_cmd.h"
-#include "mthca_user.h"
+#include <rdma/mthca-abi.h>
 #include "mthca_memfree.h"
 
 static void init_query_mad(struct ib_smp *mad)
diff --git a/include/uapi/rdma/Kbuild b/include/uapi/rdma/Kbuild
index ec32a8baf600..f14ab7ff5fee 100644
--- a/include/uapi/rdma/Kbuild
+++ b/include/uapi/rdma/Kbuild
@@ -11,5 +11,6 @@ header-y += cxgb3-abi.h
 header-y += cxgb4-abi.h
 header-y += mlx4-abi.h
 header-y += mlx5-abi.h
+header-y += mthca-abi.h
 header-y += nes-abi.h
 header-y += ocrdma-abi.h
diff --git a/include/uapi/rdma/mthca-abi.h b/include/uapi/rdma/mthca-abi.h
new file mode 100644
index 000000000000..bcbf4ff2f6d1
--- /dev/null
+++ b/include/uapi/rdma/mthca-abi.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MTHCA_ABI_USER_H
+#define MTHCA_ABI_USER_H
+
+#include <linux/types.h>
+
+/*
+ * Increment this value if any changes that break userspace ABI
+ * compatibility are made.
+ */
+#define MTHCA_UVERBS_ABI_VERSION	1
+
+/*
+ * Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * In particular do not use pointer types -- pass pointers in __u64
+ * instead.
+ */
+struct mthca_alloc_ucontext_resp {
+	__u32 qp_tab_size;
+	__u32 uarc_size;
+};
+
+struct mthca_alloc_pd_resp {
+	__u32 pdn;
+	__u32 reserved;
+};
+
+/*
+ * Mark the memory region with a DMA attribute that causes
+ * in-flight DMA to be flushed when the region is written to:
+ */
+#define MTHCA_MR_DMASYNC	0x1
+
+struct mthca_reg_mr {
+	__u32 mr_attrs;
+	__u32 reserved;
+};
+
+struct mthca_create_cq {
+	__u32 lkey;
+	__u32 pdn;
+	__u64 arm_db_page;
+	__u64 set_db_page;
+	__u32 arm_db_index;
+	__u32 set_db_index;
+};
+
+struct mthca_create_cq_resp {
+	__u32 cqn;
+	__u32 reserved;
+};
+
+struct mthca_resize_cq {
+	__u32 lkey;
+	__u32 reserved;
+};
+
+struct mthca_create_srq {
+	__u32 lkey;
+	__u32 db_index;
+	__u64 db_page;
+};
+
+struct mthca_create_srq_resp {
+	__u32 srqn;
+	__u32 reserved;
+};
+
+struct mthca_create_qp {
+	__u32 lkey;
+	__u32 reserved;
+	__u64 sq_db_page;
+	__u64 rq_db_page;
+	__u32 sq_db_index;
+	__u32 rq_db_index;
+};
+#endif /* MTHCA_ABI_USER_H */
-- 
cgit v1.2.3


From f58b3c91f6786c66483fc18fd8b82a74cbf96d19 Mon Sep 17 00:00:00 2001
From: Tomohiro Kusumi <kusumi.tomohiro@gmail.com>
Date: Tue, 11 Oct 2016 13:53:10 -0700
Subject: autofs: move inclusion of linux/limits.h to uapi

linux/limits.h should be included by uapi instead of linux/auto_fs.h
so as not to cause compile error in userspace.

 # cat << EOF > ./test1.c
 > #include <stdio.h>
 > #include <linux/auto_fs.h>
 > int main(void) {
 >     return 0;
 > }
 > EOF
 # gcc -Wall -g ./test1.c
 In file included from ./test1.c:2:0:
 /usr/include/linux/auto_fs.h:54:12: error: 'NAME_MAX' undeclared here (not in a function)
   char name[NAME_MAX+1];
             ^

Link: http://lkml.kernel.org/r/20160812024856.12352.24092.stgit@pluto.themaw.net
Signed-off-by: Tomohiro Kusumi <kusumi.tomohiro@gmail.com>
Signed-off-by: Ian Kent <ikent@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/auto_fs.h      | 1 -
 include/uapi/linux/auto_fs.h | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/linux/auto_fs.h b/include/linux/auto_fs.h
index b4066bb89083..b8f814c95cf5 100644
--- a/include/linux/auto_fs.h
+++ b/include/linux/auto_fs.h
@@ -10,7 +10,6 @@
 #define _LINUX_AUTO_FS_H
 
 #include <linux/fs.h>
-#include <linux/limits.h>
 #include <linux/ioctl.h>
 #include <uapi/linux/auto_fs.h>
 #endif /* _LINUX_AUTO_FS_H */
diff --git a/include/uapi/linux/auto_fs.h b/include/uapi/linux/auto_fs.h
index 9175a1b4dc69..1bfc3ed8b284 100644
--- a/include/uapi/linux/auto_fs.h
+++ b/include/uapi/linux/auto_fs.h
@@ -12,6 +12,7 @@
 #define _UAPI_LINUX_AUTO_FS_H
 
 #include <linux/types.h>
+#include <linux/limits.h>
 #ifndef __KERNEL__
 #include <sys/ioctl.h>
 #endif /* __KERNEL__ */
-- 
cgit v1.2.3


From 9b88ee0f3bb4c5b1e721bdcee93601b501d72f0a Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Tue, 11 Oct 2016 13:53:13 -0700
Subject: autofs4: move linux/auto_dev-ioctl.h to uapi/linux

Since linux/auto_dev-ioctl.h wasn't included in include/linux/Kbuild
it wasn't moved to uapi/linux as part of the uapi series.

Link: http://lkml.kernel.org/r/20160812024901.12352.10984.stgit@pluto.themaw.net
Signed-off-by: Ian Kent <raven@themaw.net>
Cc: Tomohiro Kusumi <kusumi.tomohiro@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/auto_dev-ioctl.h      | 209 +---------------------------------
 include/uapi/linux/auto_dev-ioctl.h | 221 ++++++++++++++++++++++++++++++++++++
 2 files changed, 222 insertions(+), 208 deletions(-)
 create mode 100644 include/uapi/linux/auto_dev-ioctl.h

(limited to 'include/uapi')

diff --git a/include/linux/auto_dev-ioctl.h b/include/linux/auto_dev-ioctl.h
index bf82e3a758e5..28c15050ebe6 100644
--- a/include/linux/auto_dev-ioctl.h
+++ b/include/linux/auto_dev-ioctl.h
@@ -10,212 +10,5 @@
 #ifndef _LINUX_AUTO_DEV_IOCTL_H
 #define _LINUX_AUTO_DEV_IOCTL_H
 
-#include <linux/auto_fs.h>
-#include <linux/string.h>
-
-#define AUTOFS_DEVICE_NAME		"autofs"
-
-#define AUTOFS_DEV_IOCTL_VERSION_MAJOR	1
-#define AUTOFS_DEV_IOCTL_VERSION_MINOR	0
-
-#define AUTOFS_DEV_IOCTL_SIZE		sizeof(struct autofs_dev_ioctl)
-
-/*
- * An ioctl interface for autofs mount point control.
- */
-
-struct args_protover {
-	__u32	version;
-};
-
-struct args_protosubver {
-	__u32	sub_version;
-};
-
-struct args_openmount {
-	__u32	devid;
-};
-
-struct args_ready {
-	__u32	token;
-};
-
-struct args_fail {
-	__u32	token;
-	__s32	status;
-};
-
-struct args_setpipefd {
-	__s32	pipefd;
-};
-
-struct args_timeout {
-	__u64	timeout;
-};
-
-struct args_requester {
-	__u32	uid;
-	__u32	gid;
-};
-
-struct args_expire {
-	__u32	how;
-};
-
-struct args_askumount {
-	__u32	may_umount;
-};
-
-struct args_ismountpoint {
-	union {
-		struct args_in {
-			__u32	type;
-		} in;
-		struct args_out {
-			__u32	devid;
-			__u32	magic;
-		} out;
-	};
-};
-
-/*
- * All the ioctls use this structure.
- * When sending a path size must account for the total length
- * of the chunk of memory otherwise is is the size of the
- * structure.
- */
-
-struct autofs_dev_ioctl {
-	__u32 ver_major;
-	__u32 ver_minor;
-	__u32 size;		/* total size of data passed in
-				 * including this struct */
-	__s32 ioctlfd;		/* automount command fd */
-
-	/* Command parameters */
-
-	union {
-		struct args_protover		protover;
-		struct args_protosubver		protosubver;
-		struct args_openmount		openmount;
-		struct args_ready		ready;
-		struct args_fail		fail;
-		struct args_setpipefd		setpipefd;
-		struct args_timeout		timeout;
-		struct args_requester		requester;
-		struct args_expire		expire;
-		struct args_askumount		askumount;
-		struct args_ismountpoint	ismountpoint;
-	};
-
-	char path[0];
-};
-
-static inline void init_autofs_dev_ioctl(struct autofs_dev_ioctl *in)
-{
-	memset(in, 0, sizeof(struct autofs_dev_ioctl));
-	in->ver_major = AUTOFS_DEV_IOCTL_VERSION_MAJOR;
-	in->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR;
-	in->size = sizeof(struct autofs_dev_ioctl);
-	in->ioctlfd = -1;
-}
-
-/*
- * If you change this make sure you make the corresponding change
- * to autofs-dev-ioctl.c:lookup_ioctl()
- */
-enum {
-	/* Get various version info */
-	AUTOFS_DEV_IOCTL_VERSION_CMD = 0x71,
-	AUTOFS_DEV_IOCTL_PROTOVER_CMD,
-	AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD,
-
-	/* Open mount ioctl fd */
-	AUTOFS_DEV_IOCTL_OPENMOUNT_CMD,
-
-	/* Close mount ioctl fd */
-	AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD,
-
-	/* Mount/expire status returns */
-	AUTOFS_DEV_IOCTL_READY_CMD,
-	AUTOFS_DEV_IOCTL_FAIL_CMD,
-
-	/* Activate/deactivate autofs mount */
-	AUTOFS_DEV_IOCTL_SETPIPEFD_CMD,
-	AUTOFS_DEV_IOCTL_CATATONIC_CMD,
-
-	/* Expiry timeout */
-	AUTOFS_DEV_IOCTL_TIMEOUT_CMD,
-
-	/* Get mount last requesting uid and gid */
-	AUTOFS_DEV_IOCTL_REQUESTER_CMD,
-
-	/* Check for eligible expire candidates */
-	AUTOFS_DEV_IOCTL_EXPIRE_CMD,
-
-	/* Request busy status */
-	AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD,
-
-	/* Check if path is a mountpoint */
-	AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD,
-};
-
-#define AUTOFS_IOCTL 0x93
-
-#define AUTOFS_DEV_IOCTL_VERSION \
-	_IOWR(AUTOFS_IOCTL, \
-	      AUTOFS_DEV_IOCTL_VERSION_CMD, struct autofs_dev_ioctl)
-
-#define AUTOFS_DEV_IOCTL_PROTOVER \
-	_IOWR(AUTOFS_IOCTL, \
-	      AUTOFS_DEV_IOCTL_PROTOVER_CMD, struct autofs_dev_ioctl)
-
-#define AUTOFS_DEV_IOCTL_PROTOSUBVER \
-	_IOWR(AUTOFS_IOCTL, \
-	      AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD, struct autofs_dev_ioctl)
-
-#define AUTOFS_DEV_IOCTL_OPENMOUNT \
-	_IOWR(AUTOFS_IOCTL, \
-	      AUTOFS_DEV_IOCTL_OPENMOUNT_CMD, struct autofs_dev_ioctl)
-
-#define AUTOFS_DEV_IOCTL_CLOSEMOUNT \
-	_IOWR(AUTOFS_IOCTL, \
-	      AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD, struct autofs_dev_ioctl)
-
-#define AUTOFS_DEV_IOCTL_READY \
-	_IOWR(AUTOFS_IOCTL, \
-	      AUTOFS_DEV_IOCTL_READY_CMD, struct autofs_dev_ioctl)
-
-#define AUTOFS_DEV_IOCTL_FAIL \
-	_IOWR(AUTOFS_IOCTL, \
-	      AUTOFS_DEV_IOCTL_FAIL_CMD, struct autofs_dev_ioctl)
-
-#define AUTOFS_DEV_IOCTL_SETPIPEFD \
-	_IOWR(AUTOFS_IOCTL, \
-	      AUTOFS_DEV_IOCTL_SETPIPEFD_CMD, struct autofs_dev_ioctl)
-
-#define AUTOFS_DEV_IOCTL_CATATONIC \
-	_IOWR(AUTOFS_IOCTL, \
-	      AUTOFS_DEV_IOCTL_CATATONIC_CMD, struct autofs_dev_ioctl)
-
-#define AUTOFS_DEV_IOCTL_TIMEOUT \
-	_IOWR(AUTOFS_IOCTL, \
-	      AUTOFS_DEV_IOCTL_TIMEOUT_CMD, struct autofs_dev_ioctl)
-
-#define AUTOFS_DEV_IOCTL_REQUESTER \
-	_IOWR(AUTOFS_IOCTL, \
-	      AUTOFS_DEV_IOCTL_REQUESTER_CMD, struct autofs_dev_ioctl)
-
-#define AUTOFS_DEV_IOCTL_EXPIRE \
-	_IOWR(AUTOFS_IOCTL, \
-	      AUTOFS_DEV_IOCTL_EXPIRE_CMD, struct autofs_dev_ioctl)
-
-#define AUTOFS_DEV_IOCTL_ASKUMOUNT \
-	_IOWR(AUTOFS_IOCTL, \
-	      AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD, struct autofs_dev_ioctl)
-
-#define AUTOFS_DEV_IOCTL_ISMOUNTPOINT \
-	_IOWR(AUTOFS_IOCTL, \
-	      AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD, struct autofs_dev_ioctl)
-
+#include <uapi/linux/auto_dev-ioctl.h>
 #endif	/* _LINUX_AUTO_DEV_IOCTL_H */
diff --git a/include/uapi/linux/auto_dev-ioctl.h b/include/uapi/linux/auto_dev-ioctl.h
new file mode 100644
index 000000000000..021ed331dd71
--- /dev/null
+++ b/include/uapi/linux/auto_dev-ioctl.h
@@ -0,0 +1,221 @@
+/*
+ * Copyright 2008 Red Hat, Inc. All rights reserved.
+ * Copyright 2008 Ian Kent <raven@themaw.net>
+ *
+ * This file is part of the Linux kernel and is made available under
+ * the terms of the GNU General Public License, version 2, or at your
+ * option, any later version, incorporated herein by reference.
+ */
+
+#ifndef _UAPI_LINUX_AUTO_DEV_IOCTL_H
+#define _UAPI_LINUX_AUTO_DEV_IOCTL_H
+
+#include <linux/auto_fs.h>
+#include <linux/string.h>
+
+#define AUTOFS_DEVICE_NAME		"autofs"
+
+#define AUTOFS_DEV_IOCTL_VERSION_MAJOR	1
+#define AUTOFS_DEV_IOCTL_VERSION_MINOR	0
+
+#define AUTOFS_DEV_IOCTL_SIZE		sizeof(struct autofs_dev_ioctl)
+
+/*
+ * An ioctl interface for autofs mount point control.
+ */
+
+struct args_protover {
+	__u32	version;
+};
+
+struct args_protosubver {
+	__u32	sub_version;
+};
+
+struct args_openmount {
+	__u32	devid;
+};
+
+struct args_ready {
+	__u32	token;
+};
+
+struct args_fail {
+	__u32	token;
+	__s32	status;
+};
+
+struct args_setpipefd {
+	__s32	pipefd;
+};
+
+struct args_timeout {
+	__u64	timeout;
+};
+
+struct args_requester {
+	__u32	uid;
+	__u32	gid;
+};
+
+struct args_expire {
+	__u32	how;
+};
+
+struct args_askumount {
+	__u32	may_umount;
+};
+
+struct args_ismountpoint {
+	union {
+		struct args_in {
+			__u32	type;
+		} in;
+		struct args_out {
+			__u32	devid;
+			__u32	magic;
+		} out;
+	};
+};
+
+/*
+ * All the ioctls use this structure.
+ * When sending a path size must account for the total length
+ * of the chunk of memory otherwise is is the size of the
+ * structure.
+ */
+
+struct autofs_dev_ioctl {
+	__u32 ver_major;
+	__u32 ver_minor;
+	__u32 size;		/* total size of data passed in
+				 * including this struct */
+	__s32 ioctlfd;		/* automount command fd */
+
+	/* Command parameters */
+
+	union {
+		struct args_protover		protover;
+		struct args_protosubver		protosubver;
+		struct args_openmount		openmount;
+		struct args_ready		ready;
+		struct args_fail		fail;
+		struct args_setpipefd		setpipefd;
+		struct args_timeout		timeout;
+		struct args_requester		requester;
+		struct args_expire		expire;
+		struct args_askumount		askumount;
+		struct args_ismountpoint	ismountpoint;
+	};
+
+	char path[0];
+};
+
+static inline void init_autofs_dev_ioctl(struct autofs_dev_ioctl *in)
+{
+	memset(in, 0, sizeof(struct autofs_dev_ioctl));
+	in->ver_major = AUTOFS_DEV_IOCTL_VERSION_MAJOR;
+	in->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR;
+	in->size = sizeof(struct autofs_dev_ioctl);
+	in->ioctlfd = -1;
+}
+
+/*
+ * If you change this make sure you make the corresponding change
+ * to autofs-dev-ioctl.c:lookup_ioctl()
+ */
+enum {
+	/* Get various version info */
+	AUTOFS_DEV_IOCTL_VERSION_CMD = 0x71,
+	AUTOFS_DEV_IOCTL_PROTOVER_CMD,
+	AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD,
+
+	/* Open mount ioctl fd */
+	AUTOFS_DEV_IOCTL_OPENMOUNT_CMD,
+
+	/* Close mount ioctl fd */
+	AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD,
+
+	/* Mount/expire status returns */
+	AUTOFS_DEV_IOCTL_READY_CMD,
+	AUTOFS_DEV_IOCTL_FAIL_CMD,
+
+	/* Activate/deactivate autofs mount */
+	AUTOFS_DEV_IOCTL_SETPIPEFD_CMD,
+	AUTOFS_DEV_IOCTL_CATATONIC_CMD,
+
+	/* Expiry timeout */
+	AUTOFS_DEV_IOCTL_TIMEOUT_CMD,
+
+	/* Get mount last requesting uid and gid */
+	AUTOFS_DEV_IOCTL_REQUESTER_CMD,
+
+	/* Check for eligible expire candidates */
+	AUTOFS_DEV_IOCTL_EXPIRE_CMD,
+
+	/* Request busy status */
+	AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD,
+
+	/* Check if path is a mountpoint */
+	AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD,
+};
+
+#define AUTOFS_IOCTL 0x93
+
+#define AUTOFS_DEV_IOCTL_VERSION \
+	_IOWR(AUTOFS_IOCTL, \
+	      AUTOFS_DEV_IOCTL_VERSION_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_PROTOVER \
+	_IOWR(AUTOFS_IOCTL, \
+	      AUTOFS_DEV_IOCTL_PROTOVER_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_PROTOSUBVER \
+	_IOWR(AUTOFS_IOCTL, \
+	      AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_OPENMOUNT \
+	_IOWR(AUTOFS_IOCTL, \
+	      AUTOFS_DEV_IOCTL_OPENMOUNT_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_CLOSEMOUNT \
+	_IOWR(AUTOFS_IOCTL, \
+	      AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_READY \
+	_IOWR(AUTOFS_IOCTL, \
+	      AUTOFS_DEV_IOCTL_READY_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_FAIL \
+	_IOWR(AUTOFS_IOCTL, \
+	      AUTOFS_DEV_IOCTL_FAIL_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_SETPIPEFD \
+	_IOWR(AUTOFS_IOCTL, \
+	      AUTOFS_DEV_IOCTL_SETPIPEFD_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_CATATONIC \
+	_IOWR(AUTOFS_IOCTL, \
+	      AUTOFS_DEV_IOCTL_CATATONIC_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_TIMEOUT \
+	_IOWR(AUTOFS_IOCTL, \
+	      AUTOFS_DEV_IOCTL_TIMEOUT_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_REQUESTER \
+	_IOWR(AUTOFS_IOCTL, \
+	      AUTOFS_DEV_IOCTL_REQUESTER_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_EXPIRE \
+	_IOWR(AUTOFS_IOCTL, \
+	      AUTOFS_DEV_IOCTL_EXPIRE_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_ASKUMOUNT \
+	_IOWR(AUTOFS_IOCTL, \
+	      AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_ISMOUNTPOINT \
+	_IOWR(AUTOFS_IOCTL, \
+	      AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD, struct autofs_dev_ioctl)
+
+#endif	/* _UAPI_LINUX_AUTO_DEV_IOCTL_H */
-- 
cgit v1.2.3


From 2e0cbc4dd077aea4f1693583fd68eaed4d60464b Mon Sep 17 00:00:00 2001
From: Ram Amrani <Ram.Amrani@cavium.com>
Date: Mon, 10 Oct 2016 13:15:30 +0300
Subject: qedr: Add RoCE driver framework

Adds a skeletal implementation of the qed* RoCE driver -
basically the ability to communicate with the qede driver and
receive notifications from it regarding various init/exit events.

Signed-off-by: Rajesh Borundia <rajesh.borundia@cavium.com>
Signed-off-by: Ram Amrani <Ram.Amrani@cavium.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/Kconfig          |   2 +
 drivers/infiniband/hw/Makefile      |   1 +
 drivers/infiniband/hw/qedr/Kconfig  |   7 +
 drivers/infiniband/hw/qedr/Makefile |   3 +
 drivers/infiniband/hw/qedr/main.c   | 254 ++++++++++++++++++++++++++++++++++++
 drivers/infiniband/hw/qedr/qedr.h   |  61 +++++++++
 drivers/net/ethernet/qlogic/Kconfig |  11 --
 include/uapi/linux/pci_regs.h       |   3 +
 8 files changed, 331 insertions(+), 11 deletions(-)
 create mode 100644 drivers/infiniband/hw/qedr/Kconfig
 create mode 100644 drivers/infiniband/hw/qedr/Makefile
 create mode 100644 drivers/infiniband/hw/qedr/main.c
 create mode 100644 drivers/infiniband/hw/qedr/qedr.h

(limited to 'include/uapi')

diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 19a418a1b631..fb3fb89640e5 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -89,4 +89,6 @@ source "drivers/infiniband/sw/rxe/Kconfig"
 
 source "drivers/infiniband/hw/hfi1/Kconfig"
 
+source "drivers/infiniband/hw/qedr/Kconfig"
+
 endif # INFINIBAND
diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile
index 21fe401ff178..e7a5ed9f6f3f 100644
--- a/drivers/infiniband/hw/Makefile
+++ b/drivers/infiniband/hw/Makefile
@@ -10,3 +10,4 @@ obj-$(CONFIG_INFINIBAND_OCRDMA)		+= ocrdma/
 obj-$(CONFIG_INFINIBAND_USNIC)		+= usnic/
 obj-$(CONFIG_INFINIBAND_HFI1)		+= hfi1/
 obj-$(CONFIG_INFINIBAND_HNS)		+= hns/
+obj-$(CONFIG_INFINIBAND_QEDR)		+= qedr/
diff --git a/drivers/infiniband/hw/qedr/Kconfig b/drivers/infiniband/hw/qedr/Kconfig
new file mode 100644
index 000000000000..7c06d85568d4
--- /dev/null
+++ b/drivers/infiniband/hw/qedr/Kconfig
@@ -0,0 +1,7 @@
+config INFINIBAND_QEDR
+	tristate "QLogic RoCE driver"
+	depends on 64BIT && QEDE
+	select QED_LL2
+	---help---
+	  This driver provides low-level InfiniBand over Ethernet
+	  support for QLogic QED host channel adapters (HCAs).
diff --git a/drivers/infiniband/hw/qedr/Makefile b/drivers/infiniband/hw/qedr/Makefile
new file mode 100644
index 000000000000..3a5b7a288281
--- /dev/null
+++ b/drivers/infiniband/hw/qedr/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_INFINIBAND_QEDR) := qedr.o
+
+qedr-y := main.o
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
new file mode 100644
index 000000000000..7387d029a35b
--- /dev/null
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -0,0 +1,254 @@
+/* QLogic qedr NIC Driver
+ * Copyright (c) 2015-2016  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/module.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_addr.h>
+#include <linux/netdevice.h>
+#include <linux/iommu.h>
+#include <net/addrconf.h>
+#include <linux/qed/qede_roce.h>
+#include "qedr.h"
+
+MODULE_DESCRIPTION("QLogic 40G/100G ROCE Driver");
+MODULE_AUTHOR("QLogic Corporation");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(QEDR_MODULE_VERSION);
+
+void qedr_ib_dispatch_event(struct qedr_dev *dev, u8 port_num,
+			    enum ib_event_type type)
+{
+	struct ib_event ibev;
+
+	ibev.device = &dev->ibdev;
+	ibev.element.port_num = port_num;
+	ibev.event = type;
+
+	ib_dispatch_event(&ibev);
+}
+
+static enum rdma_link_layer qedr_link_layer(struct ib_device *device,
+					    u8 port_num)
+{
+	return IB_LINK_LAYER_ETHERNET;
+}
+
+static int qedr_register_device(struct qedr_dev *dev)
+{
+	strlcpy(dev->ibdev.name, "qedr%d", IB_DEVICE_NAME_MAX);
+
+	memcpy(dev->ibdev.node_desc, QEDR_NODE_DESC, sizeof(QEDR_NODE_DESC));
+	dev->ibdev.owner = THIS_MODULE;
+
+	dev->ibdev.get_link_layer = qedr_link_layer;
+
+	return 0;
+}
+
+/* QEDR sysfs interface */
+static ssize_t show_rev(struct device *device, struct device_attribute *attr,
+			char *buf)
+{
+	struct qedr_dev *dev = dev_get_drvdata(device);
+
+	return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->pdev->vendor);
+}
+
+static ssize_t show_hca_type(struct device *device,
+			     struct device_attribute *attr, char *buf)
+{
+	return scnprintf(buf, PAGE_SIZE, "%s\n", "HCA_TYPE_TO_SET");
+}
+
+static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
+static DEVICE_ATTR(hca_type, S_IRUGO, show_hca_type, NULL);
+
+static struct device_attribute *qedr_attributes[] = {
+	&dev_attr_hw_rev,
+	&dev_attr_hca_type
+};
+
+static void qedr_remove_sysfiles(struct qedr_dev *dev)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(qedr_attributes); i++)
+		device_remove_file(&dev->ibdev.dev, qedr_attributes[i]);
+}
+
+static void qedr_pci_set_atomic(struct qedr_dev *dev, struct pci_dev *pdev)
+{
+	struct pci_dev *bridge;
+	u32 val;
+
+	dev->atomic_cap = IB_ATOMIC_NONE;
+
+	bridge = pdev->bus->self;
+	if (!bridge)
+		return;
+
+	/* Check whether we are connected directly or via a switch */
+	while (bridge && bridge->bus->parent) {
+		DP_DEBUG(dev, QEDR_MSG_INIT,
+			 "Device is not connected directly to root. bridge->bus->number=%d primary=%d\n",
+			 bridge->bus->number, bridge->bus->primary);
+		/* Need to check Atomic Op Routing Supported all the way to
+		 * root complex.
+		 */
+		pcie_capability_read_dword(bridge, PCI_EXP_DEVCAP2, &val);
+		if (!(val & PCI_EXP_DEVCAP2_ATOMIC_ROUTE)) {
+			pcie_capability_clear_word(pdev,
+						   PCI_EXP_DEVCTL2,
+						   PCI_EXP_DEVCTL2_ATOMIC_REQ);
+			return;
+		}
+		bridge = bridge->bus->parent->self;
+	}
+	bridge = pdev->bus->self;
+
+	/* according to bridge capability */
+	pcie_capability_read_dword(bridge, PCI_EXP_DEVCAP2, &val);
+	if (val & PCI_EXP_DEVCAP2_ATOMIC_COMP64) {
+		pcie_capability_set_word(pdev, PCI_EXP_DEVCTL2,
+					 PCI_EXP_DEVCTL2_ATOMIC_REQ);
+		dev->atomic_cap = IB_ATOMIC_GLOB;
+	} else {
+		pcie_capability_clear_word(pdev, PCI_EXP_DEVCTL2,
+					   PCI_EXP_DEVCTL2_ATOMIC_REQ);
+	}
+}
+
+static struct qedr_dev *qedr_add(struct qed_dev *cdev, struct pci_dev *pdev,
+				 struct net_device *ndev)
+{
+	struct qedr_dev *dev;
+	int rc = 0, i;
+
+	dev = (struct qedr_dev *)ib_alloc_device(sizeof(*dev));
+	if (!dev) {
+		pr_err("Unable to allocate ib device\n");
+		return NULL;
+	}
+
+	DP_DEBUG(dev, QEDR_MSG_INIT, "qedr add device called\n");
+
+	dev->pdev = pdev;
+	dev->ndev = ndev;
+	dev->cdev = cdev;
+
+	qedr_pci_set_atomic(dev, pdev);
+
+	rc = qedr_register_device(dev);
+	if (rc) {
+		DP_ERR(dev, "Unable to allocate register device\n");
+		goto init_err;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(qedr_attributes); i++)
+		if (device_create_file(&dev->ibdev.dev, qedr_attributes[i]))
+			goto init_err;
+
+	DP_DEBUG(dev, QEDR_MSG_INIT, "qedr driver loaded successfully\n");
+	return dev;
+
+init_err:
+	ib_dealloc_device(&dev->ibdev);
+	DP_ERR(dev, "qedr driver load failed rc=%d\n", rc);
+
+	return NULL;
+}
+
+static void qedr_remove(struct qedr_dev *dev)
+{
+	/* First unregister with stack to stop all the active traffic
+	 * of the registered clients.
+	 */
+	qedr_remove_sysfiles(dev);
+
+	ib_dealloc_device(&dev->ibdev);
+}
+
+static int qedr_close(struct qedr_dev *dev)
+{
+	qedr_ib_dispatch_event(dev, 1, IB_EVENT_PORT_ERR);
+
+	return 0;
+}
+
+static void qedr_shutdown(struct qedr_dev *dev)
+{
+	qedr_close(dev);
+	qedr_remove(dev);
+}
+
+/* event handling via NIC driver ensures that all the NIC specific
+ * initialization done before RoCE driver notifies
+ * event to stack.
+ */
+static void qedr_notify(struct qedr_dev *dev, enum qede_roce_event event)
+{
+	switch (event) {
+	case QEDE_UP:
+		qedr_ib_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE);
+		break;
+	case QEDE_DOWN:
+		qedr_close(dev);
+		break;
+	case QEDE_CLOSE:
+		qedr_shutdown(dev);
+		break;
+	case QEDE_CHANGE_ADDR:
+		qedr_ib_dispatch_event(dev, 1, IB_EVENT_GID_CHANGE);
+		break;
+	default:
+		pr_err("Event not supported\n");
+	}
+}
+
+static struct qedr_driver qedr_drv = {
+	.name = "qedr_driver",
+	.add = qedr_add,
+	.remove = qedr_remove,
+	.notify = qedr_notify,
+};
+
+static int __init qedr_init_module(void)
+{
+	return qede_roce_register_driver(&qedr_drv);
+}
+
+static void __exit qedr_exit_module(void)
+{
+	qede_roce_unregister_driver(&qedr_drv);
+}
+
+module_init(qedr_init_module);
+module_exit(qedr_exit_module);
diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
new file mode 100644
index 000000000000..801417080f11
--- /dev/null
+++ b/drivers/infiniband/hw/qedr/qedr.h
@@ -0,0 +1,61 @@
+/* QLogic qedr NIC Driver
+ * Copyright (c) 2015-2016  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __QEDR_H__
+#define __QEDR_H__
+
+#include <linux/pci.h>
+#include <rdma/ib_addr.h>
+#include <linux/qed/qed_if.h>
+#include <linux/qed/qede_roce.h>
+
+#define QEDR_MODULE_VERSION	"8.10.10.0"
+#define QEDR_NODE_DESC "QLogic 579xx RoCE HCA"
+#define DP_NAME(dev) ((dev)->ibdev.name)
+
+#define DP_DEBUG(dev, module, fmt, ...)					\
+	pr_debug("(%s) " module ": " fmt,				\
+		 DP_NAME(dev) ? DP_NAME(dev) : "", ## __VA_ARGS__)
+
+#define QEDR_MSG_INIT "INIT"
+
+struct qedr_dev {
+	struct ib_device	ibdev;
+	struct qed_dev		*cdev;
+	struct pci_dev		*pdev;
+	struct net_device	*ndev;
+
+	enum ib_atomic_cap	atomic_cap;
+
+	u32			dp_module;
+	u8			dp_level;
+};
+#endif
diff --git a/drivers/net/ethernet/qlogic/Kconfig b/drivers/net/ethernet/qlogic/Kconfig
index 0df1391f9663..1e8339a67f6e 100644
--- a/drivers/net/ethernet/qlogic/Kconfig
+++ b/drivers/net/ethernet/qlogic/Kconfig
@@ -107,15 +107,4 @@ config QEDE
 	---help---
 	  This enables the support for ...
 
-config INFINIBAND_QEDR
-	tristate "QLogic qede RoCE sources [debug]"
-	depends on QEDE && 64BIT
-	select QED_LL2
-	default n
-	---help---
-	  This provides a temporary node that allows the compilation
-	  and logical testing of the InfiniBand over Ethernet support
-	  for QLogic QED. This would be replaced by the 'real' option
-	  once the QEDR driver is added [+relocated].
-
 endif # NET_VENDOR_QLOGIC
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index d812172d1d7b..e5a2e68b2236 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -612,6 +612,8 @@
  */
 #define PCI_EXP_DEVCAP2		36	/* Device Capabilities 2 */
 #define  PCI_EXP_DEVCAP2_ARI		0x00000020 /* Alternative Routing-ID */
+#define  PCI_EXP_DEVCAP2_ATOMIC_ROUTE	0x00000040 /* Atomic Op routing */
+#define PCI_EXP_DEVCAP2_ATOMIC_COMP64	0x00000100 /* Atomic 64-bit compare */
 #define  PCI_EXP_DEVCAP2_LTR		0x00000800 /* Latency tolerance reporting */
 #define  PCI_EXP_DEVCAP2_OBFF_MASK	0x000c0000 /* OBFF support mechanism */
 #define  PCI_EXP_DEVCAP2_OBFF_MSG	0x00040000 /* New message signaling */
@@ -619,6 +621,7 @@
 #define PCI_EXP_DEVCTL2		40	/* Device Control 2 */
 #define  PCI_EXP_DEVCTL2_COMP_TIMEOUT	0x000f	/* Completion Timeout Value */
 #define  PCI_EXP_DEVCTL2_ARI		0x0020	/* Alternative Routing-ID */
+#define PCI_EXP_DEVCTL2_ATOMIC_REQ	0x0040	/* Set Atomic requests */
 #define  PCI_EXP_DEVCTL2_IDO_REQ_EN	0x0100	/* Allow IDO for requests */
 #define  PCI_EXP_DEVCTL2_IDO_CMP_EN	0x0200	/* Allow IDO for completions */
 #define  PCI_EXP_DEVCTL2_LTR_EN		0x0400	/* Enable LTR mechanism */
-- 
cgit v1.2.3


From ac1b36e55a5137e2f146e60be36d0cc81069feb6 Mon Sep 17 00:00:00 2001
From: Ram Amrani <Ram.Amrani@cavium.com>
Date: Mon, 10 Oct 2016 13:15:32 +0300
Subject: qedr: Add support for user context verbs

Add support for ucontext, query port, add and del gid verbs.

Signed-off-by: Rajesh Borundia <rajesh.borundia@cavium.com>
Signed-off-by: Ram Amrani <Ram.Amrani@cavium.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/qedr/Makefile |   2 +-
 drivers/infiniband/hw/qedr/main.c   |  26 ++
 drivers/infiniband/hw/qedr/qedr.h   |  34 +++
 drivers/infiniband/hw/qedr/verbs.c  | 456 ++++++++++++++++++++++++++++++++++++
 drivers/infiniband/hw/qedr/verbs.h  |  52 ++++
 include/uapi/rdma/qedr-abi.h        |  53 +++++
 6 files changed, 622 insertions(+), 1 deletion(-)
 create mode 100644 drivers/infiniband/hw/qedr/verbs.c
 create mode 100644 drivers/infiniband/hw/qedr/verbs.h
 create mode 100644 include/uapi/rdma/qedr-abi.h

(limited to 'include/uapi')

diff --git a/drivers/infiniband/hw/qedr/Makefile b/drivers/infiniband/hw/qedr/Makefile
index 3a5b7a288281..b10f2b17ada5 100644
--- a/drivers/infiniband/hw/qedr/Makefile
+++ b/drivers/infiniband/hw/qedr/Makefile
@@ -1,3 +1,3 @@
 obj-$(CONFIG_INFINIBAND_QEDR) := qedr.o
 
-qedr-y := main.o
+qedr-y := main.o verbs.o
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index 67805941d138..55c4f565c214 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -32,6 +32,7 @@
 #include <linux/module.h>
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_addr.h>
+#include <rdma/ib_user_verbs.h>
 #include <linux/netdevice.h>
 #include <linux/iommu.h>
 #include <net/addrconf.h>
@@ -39,6 +40,8 @@
 #include <linux/qed/qed_chain.h>
 #include <linux/qed/qed_if.h>
 #include "qedr.h"
+#include "verbs.h"
+#include <rdma/qedr-abi.h>
 
 MODULE_DESCRIPTION("QLogic 40G/100G ROCE Driver");
 MODULE_AUTHOR("QLogic Corporation");
@@ -80,6 +83,29 @@ static int qedr_register_device(struct qedr_dev *dev)
 
 	memcpy(dev->ibdev.node_desc, QEDR_NODE_DESC, sizeof(QEDR_NODE_DESC));
 	dev->ibdev.owner = THIS_MODULE;
+	dev->ibdev.uverbs_abi_ver = QEDR_ABI_VERSION;
+
+	dev->ibdev.uverbs_cmd_mask = QEDR_UVERBS(GET_CONTEXT) |
+				     QEDR_UVERBS(QUERY_DEVICE) |
+				     QEDR_UVERBS(QUERY_PORT);
+
+	dev->ibdev.phys_port_cnt = 1;
+	dev->ibdev.num_comp_vectors = dev->num_cnq;
+	dev->ibdev.node_type = RDMA_NODE_IB_CA;
+
+	dev->ibdev.query_device = qedr_query_device;
+	dev->ibdev.query_port = qedr_query_port;
+	dev->ibdev.modify_port = qedr_modify_port;
+
+	dev->ibdev.query_gid = qedr_query_gid;
+	dev->ibdev.add_gid = qedr_add_gid;
+	dev->ibdev.del_gid = qedr_del_gid;
+
+	dev->ibdev.alloc_ucontext = qedr_alloc_ucontext;
+	dev->ibdev.dealloc_ucontext = qedr_dealloc_ucontext;
+	dev->ibdev.mmap = qedr_mmap;
+
+	dev->ibdev.dma_device = &dev->pdev->dev;
 
 	dev->ibdev.get_link_layer = qedr_link_layer;
 	dev->ibdev.get_dev_fw_str = qedr_get_dev_fw_str;
diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
index 1adbebb887eb..2091c0d6e67d 100644
--- a/drivers/infiniband/hw/qedr/qedr.h
+++ b/drivers/infiniband/hw/qedr/qedr.h
@@ -49,6 +49,7 @@
 		 DP_NAME(dev) ? DP_NAME(dev) : "", ## __VA_ARGS__)
 
 #define QEDR_MSG_INIT "INIT"
+#define QEDR_MSG_MISC "MISC"
 
 struct qedr_dev;
 
@@ -176,6 +177,39 @@ struct qedr_dev {
 
 #define QEDR_UVERBS(CMD_NAME) (1ull << IB_USER_VERBS_CMD_##CMD_NAME)
 
+#define QEDR_ROCE_PKEY_MAX 1
+#define QEDR_ROCE_PKEY_TABLE_LEN 1
+#define QEDR_ROCE_PKEY_DEFAULT 0xffff
+
+struct qedr_ucontext {
+	struct ib_ucontext ibucontext;
+	struct qedr_dev *dev;
+	struct qedr_pd *pd;
+	u64 dpi_addr;
+	u64 dpi_phys_addr;
+	u32 dpi_size;
+	u16 dpi;
+
+	struct list_head mm_head;
+
+	/* Lock to protect mm list */
+	struct mutex mm_list_lock;
+};
+
+struct qedr_mm {
+	struct {
+		u64 phy_addr;
+		unsigned long len;
+	} key;
+	struct list_head entry;
+};
+
+static inline
+struct qedr_ucontext *get_qedr_ucontext(struct ib_ucontext *ibucontext)
+{
+	return container_of(ibucontext, struct qedr_ucontext, ibucontext);
+}
+
 static inline struct qedr_dev *get_qedr_dev(struct ib_device *ibdev)
 {
 	return container_of(ibdev, struct qedr_dev, ibdev);
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
new file mode 100644
index 000000000000..b9dcade1cb9f
--- /dev/null
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -0,0 +1,456 @@
+/* QLogic qedr NIC Driver
+ * Copyright (c) 2015-2016  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/dma-mapping.h>
+#include <linux/crc32.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/udp.h>
+#include <linux/iommu.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/iw_cm.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_cache.h>
+
+#include "qedr_hsi.h"
+#include <linux/qed/qed_if.h>
+#include "qedr.h"
+#include "verbs.h"
+#include <rdma/qedr-abi.h>
+
+int qedr_query_gid(struct ib_device *ibdev, u8 port, int index,
+		   union ib_gid *sgid)
+{
+	struct qedr_dev *dev = get_qedr_dev(ibdev);
+	int rc = 0;
+
+	if (!rdma_cap_roce_gid_table(ibdev, port))
+		return -ENODEV;
+
+	rc = ib_get_cached_gid(ibdev, port, index, sgid, NULL);
+	if (rc == -EAGAIN) {
+		memcpy(sgid, &zgid, sizeof(*sgid));
+		return 0;
+	}
+
+	DP_DEBUG(dev, QEDR_MSG_INIT, "query gid: index=%d %llx:%llx\n", index,
+		 sgid->global.interface_id, sgid->global.subnet_prefix);
+
+	return rc;
+}
+
+int qedr_add_gid(struct ib_device *device, u8 port_num,
+		 unsigned int index, const union ib_gid *gid,
+		 const struct ib_gid_attr *attr, void **context)
+{
+	if (!rdma_cap_roce_gid_table(device, port_num))
+		return -EINVAL;
+
+	if (port_num > QEDR_MAX_PORT)
+		return -EINVAL;
+
+	if (!context)
+		return -EINVAL;
+
+	return 0;
+}
+
+int qedr_del_gid(struct ib_device *device, u8 port_num,
+		 unsigned int index, void **context)
+{
+	if (!rdma_cap_roce_gid_table(device, port_num))
+		return -EINVAL;
+
+	if (port_num > QEDR_MAX_PORT)
+		return -EINVAL;
+
+	if (!context)
+		return -EINVAL;
+
+	return 0;
+}
+
+int qedr_query_device(struct ib_device *ibdev,
+		      struct ib_device_attr *attr, struct ib_udata *udata)
+{
+	struct qedr_dev *dev = get_qedr_dev(ibdev);
+	struct qedr_device_attr *qattr = &dev->attr;
+
+	if (!dev->rdma_ctx) {
+		DP_ERR(dev,
+		       "qedr_query_device called with invalid params rdma_ctx=%p\n",
+		       dev->rdma_ctx);
+		return -EINVAL;
+	}
+
+	memset(attr, 0, sizeof(*attr));
+
+	attr->fw_ver = qattr->fw_ver;
+	attr->sys_image_guid = qattr->sys_image_guid;
+	attr->max_mr_size = qattr->max_mr_size;
+	attr->page_size_cap = qattr->page_size_caps;
+	attr->vendor_id = qattr->vendor_id;
+	attr->vendor_part_id = qattr->vendor_part_id;
+	attr->hw_ver = qattr->hw_ver;
+	attr->max_qp = qattr->max_qp;
+	attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe);
+	attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
+	    IB_DEVICE_RC_RNR_NAK_GEN |
+	    IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
+
+	attr->max_sge = qattr->max_sge;
+	attr->max_sge_rd = qattr->max_sge;
+	attr->max_cq = qattr->max_cq;
+	attr->max_cqe = qattr->max_cqe;
+	attr->max_mr = qattr->max_mr;
+	attr->max_mw = qattr->max_mw;
+	attr->max_pd = qattr->max_pd;
+	attr->atomic_cap = dev->atomic_cap;
+	attr->max_fmr = qattr->max_fmr;
+	attr->max_map_per_fmr = 16;
+	attr->max_qp_init_rd_atom =
+	    1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1);
+	attr->max_qp_rd_atom =
+	    min(1 << (fls(qattr->max_qp_resp_rd_atomic_resc) - 1),
+		attr->max_qp_init_rd_atom);
+
+	attr->max_srq = qattr->max_srq;
+	attr->max_srq_sge = qattr->max_srq_sge;
+	attr->max_srq_wr = qattr->max_srq_wr;
+
+	attr->local_ca_ack_delay = qattr->dev_ack_delay;
+	attr->max_fast_reg_page_list_len = qattr->max_mr / 8;
+	attr->max_pkeys = QEDR_ROCE_PKEY_MAX;
+	attr->max_ah = qattr->max_ah;
+
+	return 0;
+}
+
+#define QEDR_SPEED_SDR		(1)
+#define QEDR_SPEED_DDR		(2)
+#define QEDR_SPEED_QDR		(4)
+#define QEDR_SPEED_FDR10	(8)
+#define QEDR_SPEED_FDR		(16)
+#define QEDR_SPEED_EDR		(32)
+
+static inline void get_link_speed_and_width(int speed, u8 *ib_speed,
+					    u8 *ib_width)
+{
+	switch (speed) {
+	case 1000:
+		*ib_speed = QEDR_SPEED_SDR;
+		*ib_width = IB_WIDTH_1X;
+		break;
+	case 10000:
+		*ib_speed = QEDR_SPEED_QDR;
+		*ib_width = IB_WIDTH_1X;
+		break;
+
+	case 20000:
+		*ib_speed = QEDR_SPEED_DDR;
+		*ib_width = IB_WIDTH_4X;
+		break;
+
+	case 25000:
+		*ib_speed = QEDR_SPEED_EDR;
+		*ib_width = IB_WIDTH_1X;
+		break;
+
+	case 40000:
+		*ib_speed = QEDR_SPEED_QDR;
+		*ib_width = IB_WIDTH_4X;
+		break;
+
+	case 50000:
+		*ib_speed = QEDR_SPEED_QDR;
+		*ib_width = IB_WIDTH_4X;
+		break;
+
+	case 100000:
+		*ib_speed = QEDR_SPEED_EDR;
+		*ib_width = IB_WIDTH_4X;
+		break;
+
+	default:
+		/* Unsupported */
+		*ib_speed = QEDR_SPEED_SDR;
+		*ib_width = IB_WIDTH_1X;
+	}
+}
+
+int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
+{
+	struct qedr_dev *dev;
+	struct qed_rdma_port *rdma_port;
+
+	dev = get_qedr_dev(ibdev);
+	if (port > 1) {
+		DP_ERR(dev, "invalid_port=0x%x\n", port);
+		return -EINVAL;
+	}
+
+	if (!dev->rdma_ctx) {
+		DP_ERR(dev, "rdma_ctx is NULL\n");
+		return -EINVAL;
+	}
+
+	rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
+	memset(attr, 0, sizeof(*attr));
+
+	if (rdma_port->port_state == QED_RDMA_PORT_UP) {
+		attr->state = IB_PORT_ACTIVE;
+		attr->phys_state = 5;
+	} else {
+		attr->state = IB_PORT_DOWN;
+		attr->phys_state = 3;
+	}
+	attr->max_mtu = IB_MTU_4096;
+	attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
+	attr->lid = 0;
+	attr->lmc = 0;
+	attr->sm_lid = 0;
+	attr->sm_sl = 0;
+	attr->port_cap_flags = IB_PORT_IP_BASED_GIDS;
+	attr->gid_tbl_len = QEDR_MAX_SGID;
+	attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
+	attr->bad_pkey_cntr = rdma_port->pkey_bad_counter;
+	attr->qkey_viol_cntr = 0;
+	get_link_speed_and_width(rdma_port->link_speed,
+				 &attr->active_speed, &attr->active_width);
+	attr->max_msg_sz = rdma_port->max_msg_size;
+	attr->max_vl_num = 4;
+
+	return 0;
+}
+
+int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask,
+		     struct ib_port_modify *props)
+{
+	struct qedr_dev *dev;
+
+	dev = get_qedr_dev(ibdev);
+	if (port > 1) {
+		DP_ERR(dev, "invalid_port=0x%x\n", port);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int qedr_add_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
+			 unsigned long len)
+{
+	struct qedr_mm *mm;
+
+	mm = kzalloc(sizeof(*mm), GFP_KERNEL);
+	if (!mm)
+		return -ENOMEM;
+
+	mm->key.phy_addr = phy_addr;
+	/* This function might be called with a length which is not a multiple
+	 * of PAGE_SIZE, while the mapping is PAGE_SIZE grained and the kernel
+	 * forces this granularity by increasing the requested size if needed.
+	 * When qedr_mmap is called, it will search the list with the updated
+	 * length as a key. To prevent search failures, the length is rounded up
+	 * in advance to PAGE_SIZE.
+	 */
+	mm->key.len = roundup(len, PAGE_SIZE);
+	INIT_LIST_HEAD(&mm->entry);
+
+	mutex_lock(&uctx->mm_list_lock);
+	list_add(&mm->entry, &uctx->mm_head);
+	mutex_unlock(&uctx->mm_list_lock);
+
+	DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
+		 "added (addr=0x%llx,len=0x%lx) for ctx=%p\n",
+		 (unsigned long long)mm->key.phy_addr,
+		 (unsigned long)mm->key.len, uctx);
+
+	return 0;
+}
+
+static bool qedr_search_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
+			     unsigned long len)
+{
+	bool found = false;
+	struct qedr_mm *mm;
+
+	mutex_lock(&uctx->mm_list_lock);
+	list_for_each_entry(mm, &uctx->mm_head, entry) {
+		if (len != mm->key.len || phy_addr != mm->key.phy_addr)
+			continue;
+
+		found = true;
+		break;
+	}
+	mutex_unlock(&uctx->mm_list_lock);
+	DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
+		 "searched for (addr=0x%llx,len=0x%lx) for ctx=%p, result=%d\n",
+		 mm->key.phy_addr, mm->key.len, uctx, found);
+
+	return found;
+}
+
+struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev,
+					struct ib_udata *udata)
+{
+	int rc;
+	struct qedr_ucontext *ctx;
+	struct qedr_alloc_ucontext_resp uresp;
+	struct qedr_dev *dev = get_qedr_dev(ibdev);
+	struct qed_rdma_add_user_out_params oparams;
+
+	if (!udata)
+		return ERR_PTR(-EFAULT);
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return ERR_PTR(-ENOMEM);
+
+	rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams);
+	if (rc) {
+		DP_ERR(dev,
+		       "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n",
+		       rc);
+		goto err;
+	}
+
+	ctx->dpi = oparams.dpi;
+	ctx->dpi_addr = oparams.dpi_addr;
+	ctx->dpi_phys_addr = oparams.dpi_phys_addr;
+	ctx->dpi_size = oparams.dpi_size;
+	INIT_LIST_HEAD(&ctx->mm_head);
+	mutex_init(&ctx->mm_list_lock);
+
+	memset(&uresp, 0, sizeof(uresp));
+
+	uresp.db_pa = ctx->dpi_phys_addr;
+	uresp.db_size = ctx->dpi_size;
+	uresp.max_send_wr = dev->attr.max_sqe;
+	uresp.max_recv_wr = dev->attr.max_rqe;
+	uresp.max_srq_wr = dev->attr.max_srq_wr;
+	uresp.sges_per_send_wr = QEDR_MAX_SQE_ELEMENTS_PER_SQE;
+	uresp.sges_per_recv_wr = QEDR_MAX_RQE_ELEMENTS_PER_RQE;
+	uresp.sges_per_srq_wr = dev->attr.max_srq_sge;
+	uresp.max_cqes = QEDR_MAX_CQES;
+
+	rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+	if (rc)
+		goto err;
+
+	ctx->dev = dev;
+
+	rc = qedr_add_mmap(ctx, ctx->dpi_phys_addr, ctx->dpi_size);
+	if (rc)
+		goto err;
+
+	DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n",
+		 &ctx->ibucontext);
+	return &ctx->ibucontext;
+
+err:
+	kfree(ctx);
+	return ERR_PTR(rc);
+}
+
+int qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
+{
+	struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx);
+	struct qedr_mm *mm, *tmp;
+	int status = 0;
+
+	DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n",
+		 uctx);
+	uctx->dev->ops->rdma_remove_user(uctx->dev->rdma_ctx, uctx->dpi);
+
+	list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
+		DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
+			 "deleted (addr=0x%llx,len=0x%lx) for ctx=%p\n",
+			 mm->key.phy_addr, mm->key.len, uctx);
+		list_del(&mm->entry);
+		kfree(mm);
+	}
+
+	kfree(uctx);
+	return status;
+}
+
+int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+	struct qedr_ucontext *ucontext = get_qedr_ucontext(context);
+	struct qedr_dev *dev = get_qedr_dev(context->device);
+	unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT;
+	u64 unmapped_db = dev->db_phys_addr;
+	unsigned long len = (vma->vm_end - vma->vm_start);
+	int rc = 0;
+	bool found;
+
+	DP_DEBUG(dev, QEDR_MSG_INIT,
+		 "qedr_mmap called vm_page=0x%lx vm_pgoff=0x%lx unmapped_db=0x%llx db_size=%x, len=%lx\n",
+		 vm_page, vma->vm_pgoff, unmapped_db, dev->db_size, len);
+	if (vma->vm_start & (PAGE_SIZE - 1)) {
+		DP_ERR(dev, "Vma_start not page aligned = %ld\n",
+		       vma->vm_start);
+		return -EINVAL;
+	}
+
+	found = qedr_search_mmap(ucontext, vm_page, len);
+	if (!found) {
+		DP_ERR(dev, "Vma_pgoff not found in mapped array = %ld\n",
+		       vma->vm_pgoff);
+		return -EINVAL;
+	}
+
+	DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n");
+
+	if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db +
+						     dev->db_size))) {
+		DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n");
+		if (vma->vm_flags & VM_READ) {
+			DP_ERR(dev, "Trying to map doorbell bar for read\n");
+			return -EPERM;
+		}
+
+		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+
+		rc = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+					PAGE_SIZE, vma->vm_page_prot);
+	} else {
+		DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping chains\n");
+		rc = remap_pfn_range(vma, vma->vm_start,
+				     vma->vm_pgoff, len, vma->vm_page_prot);
+	}
+	DP_DEBUG(dev, QEDR_MSG_INIT, "qedr_mmap return code: %d\n", rc);
+	return rc;
+}
diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h
new file mode 100644
index 000000000000..9472044dd587
--- /dev/null
+++ b/drivers/infiniband/hw/qedr/verbs.h
@@ -0,0 +1,52 @@
+/* QLogic qedr NIC Driver
+ * Copyright (c) 2015-2016  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __QEDR_VERBS_H__
+#define __QEDR_VERBS_H__
+
+int qedr_query_device(struct ib_device *ibdev,
+		      struct ib_device_attr *attr, struct ib_udata *udata);
+int qedr_query_port(struct ib_device *, u8 port, struct ib_port_attr *props);
+int qedr_modify_port(struct ib_device *, u8 port, int mask,
+		     struct ib_port_modify *props);
+
+int qedr_query_gid(struct ib_device *, u8 port, int index, union ib_gid *gid);
+
+struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *, struct ib_udata *);
+int qedr_dealloc_ucontext(struct ib_ucontext *);
+
+int qedr_mmap(struct ib_ucontext *, struct vm_area_struct *vma);
+int qedr_del_gid(struct ib_device *device, u8 port_num,
+		 unsigned int index, void **context);
+int qedr_add_gid(struct ib_device *device, u8 port_num,
+		 unsigned int index, const union ib_gid *gid,
+		 const struct ib_gid_attr *attr, void **context);
+#endif
diff --git a/include/uapi/rdma/qedr-abi.h b/include/uapi/rdma/qedr-abi.h
new file mode 100644
index 000000000000..f7c7fff7a877
--- /dev/null
+++ b/include/uapi/rdma/qedr-abi.h
@@ -0,0 +1,53 @@
+/* QLogic qedr NIC Driver
+ * Copyright (c) 2015-2016  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __QEDR_USER_H__
+#define __QEDR_USER_H__
+
+#include <linux/types.h>
+
+#define QEDR_ABI_VERSION		(8)
+
+/* user kernel communication data structures. */
+
+struct qedr_alloc_ucontext_resp {
+	__u64 db_pa;
+	__u32 db_size;
+
+	__u32 max_send_wr;
+	__u32 max_recv_wr;
+	__u32 max_srq_wr;
+	__u32 sges_per_send_wr;
+	__u32 sges_per_recv_wr;
+	__u32 sges_per_srq_wr;
+	__u32 max_cqes;
+};
+#endif /* __QEDR_USER_H__ */
-- 
cgit v1.2.3


From a7efd7773e31b60f695816c27393fc717a9df127 Mon Sep 17 00:00:00 2001
From: Ram Amrani <Ram.Amrani@cavium.com>
Date: Mon, 10 Oct 2016 13:15:33 +0300
Subject: qedr: Add support for PD,PKEY and CQ verbs

Add support for protection domain and completion queue verbs.

Signed-off-by: Rajesh Borundia <rajesh.borundia@cavium.com>
Signed-off-by: Ram Amrani <Ram.Amrani@cavium.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/qedr/main.c          |  49 ++-
 drivers/infiniband/hw/qedr/qedr.h          |  78 +++++
 drivers/infiniband/hw/qedr/qedr_hsi_rdma.h |  79 +++++
 drivers/infiniband/hw/qedr/verbs.c         | 539 +++++++++++++++++++++++++++++
 drivers/infiniband/hw/qedr/verbs.h         |  14 +
 include/uapi/rdma/qedr-abi.h               |  19 +
 6 files changed, 777 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index 55c4f565c214..35928abb6b63 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -87,7 +87,14 @@ static int qedr_register_device(struct qedr_dev *dev)
 
 	dev->ibdev.uverbs_cmd_mask = QEDR_UVERBS(GET_CONTEXT) |
 				     QEDR_UVERBS(QUERY_DEVICE) |
-				     QEDR_UVERBS(QUERY_PORT);
+				     QEDR_UVERBS(QUERY_PORT) |
+				     QEDR_UVERBS(ALLOC_PD) |
+				     QEDR_UVERBS(DEALLOC_PD) |
+				     QEDR_UVERBS(CREATE_COMP_CHANNEL) |
+				     QEDR_UVERBS(CREATE_CQ) |
+				     QEDR_UVERBS(RESIZE_CQ) |
+				     QEDR_UVERBS(DESTROY_CQ) |
+				     QEDR_UVERBS(REQ_NOTIFY_CQ);
 
 	dev->ibdev.phys_port_cnt = 1;
 	dev->ibdev.num_comp_vectors = dev->num_cnq;
@@ -105,6 +112,16 @@ static int qedr_register_device(struct qedr_dev *dev)
 	dev->ibdev.dealloc_ucontext = qedr_dealloc_ucontext;
 	dev->ibdev.mmap = qedr_mmap;
 
+	dev->ibdev.alloc_pd = qedr_alloc_pd;
+	dev->ibdev.dealloc_pd = qedr_dealloc_pd;
+
+	dev->ibdev.create_cq = qedr_create_cq;
+	dev->ibdev.destroy_cq = qedr_destroy_cq;
+	dev->ibdev.resize_cq = qedr_resize_cq;
+	dev->ibdev.req_notify_cq = qedr_arm_cq;
+
+	dev->ibdev.query_pkey = qedr_query_pkey;
+
 	dev->ibdev.dma_device = &dev->pdev->dev;
 
 	dev->ibdev.get_link_layer = qedr_link_layer;
@@ -322,6 +339,8 @@ static irqreturn_t qedr_irq_handler(int irq, void *handle)
 {
 	u16 hw_comp_cons, sw_comp_cons;
 	struct qedr_cnq *cnq = handle;
+	struct regpair *cq_handle;
+	struct qedr_cq *cq;
 
 	qed_sb_ack(cnq->sb, IGU_INT_DISABLE, 0);
 
@@ -334,8 +353,36 @@ static irqreturn_t qedr_irq_handler(int irq, void *handle)
 	rmb();
 
 	while (sw_comp_cons != hw_comp_cons) {
+		cq_handle = (struct regpair *)qed_chain_consume(&cnq->pbl);
+		cq = (struct qedr_cq *)(uintptr_t)HILO_U64(cq_handle->hi,
+				cq_handle->lo);
+
+		if (cq == NULL) {
+			DP_ERR(cnq->dev,
+			       "Received NULL CQ cq_handle->hi=%d cq_handle->lo=%d sw_comp_cons=%d hw_comp_cons=%d\n",
+			       cq_handle->hi, cq_handle->lo, sw_comp_cons,
+			       hw_comp_cons);
+
+			break;
+		}
+
+		if (cq->sig != QEDR_CQ_MAGIC_NUMBER) {
+			DP_ERR(cnq->dev,
+			       "Problem with cq signature, cq_handle->hi=%d ch_handle->lo=%d cq=%p\n",
+			       cq_handle->hi, cq_handle->lo, cq);
+			break;
+		}
+
+		cq->arm_flags = 0;
+
+		if (cq->ibcq.comp_handler)
+			(*cq->ibcq.comp_handler)
+				(&cq->ibcq, cq->ibcq.cq_context);
+
 		sw_comp_cons = qed_chain_get_cons_idx(&cnq->pbl);
+
 		cnq->n_comp++;
+
 	}
 
 	qed_ops->rdma_cnq_prod_update(cnq->dev->rdma_ctx, cnq->index,
diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
index 2091c0d6e67d..9e2846a599ce 100644
--- a/drivers/infiniband/hw/qedr/qedr.h
+++ b/drivers/infiniband/hw/qedr/qedr.h
@@ -50,6 +50,10 @@
 
 #define QEDR_MSG_INIT "INIT"
 #define QEDR_MSG_MISC "MISC"
+#define QEDR_MSG_CQ   "  CQ"
+#define QEDR_MSG_MR   "  MR"
+
+#define QEDR_CQ_MAGIC_NUMBER   (0x11223344)
 
 struct qedr_dev;
 
@@ -181,6 +185,12 @@ struct qedr_dev {
 #define QEDR_ROCE_PKEY_TABLE_LEN 1
 #define QEDR_ROCE_PKEY_DEFAULT 0xffff
 
+struct qedr_pbl {
+	struct list_head list_entry;
+	void *va;
+	dma_addr_t pa;
+};
+
 struct qedr_ucontext {
 	struct ib_ucontext ibucontext;
 	struct qedr_dev *dev;
@@ -196,6 +206,64 @@ struct qedr_ucontext {
 	struct mutex mm_list_lock;
 };
 
+union db_prod64 {
+	struct rdma_pwm_val32_data data;
+	u64 raw;
+};
+
+enum qedr_cq_type {
+	QEDR_CQ_TYPE_GSI,
+	QEDR_CQ_TYPE_KERNEL,
+	QEDR_CQ_TYPE_USER,
+};
+
+struct qedr_pbl_info {
+	u32 num_pbls;
+	u32 num_pbes;
+	u32 pbl_size;
+	u32 pbe_size;
+	bool two_layered;
+};
+
+struct qedr_userq {
+	struct ib_umem *umem;
+	struct qedr_pbl_info pbl_info;
+	struct qedr_pbl *pbl_tbl;
+	u64 buf_addr;
+	size_t buf_len;
+};
+
+struct qedr_cq {
+	struct ib_cq ibcq;
+
+	enum qedr_cq_type cq_type;
+	u32 sig;
+
+	u16 icid;
+
+	/* Lock to protect multiplem CQ's */
+	spinlock_t cq_lock;
+	u8 arm_flags;
+	struct qed_chain pbl;
+
+	void __iomem *db_addr;
+	union db_prod64 db;
+
+	u8 pbl_toggle;
+	union rdma_cqe *latest_cqe;
+	union rdma_cqe *toggle_cqe;
+
+	u32 cq_cons;
+
+	struct qedr_userq q;
+};
+
+struct qedr_pd {
+	struct ib_pd ibpd;
+	u32 pd_id;
+	struct qedr_ucontext *uctx;
+};
+
 struct qedr_mm {
 	struct {
 		u64 phy_addr;
@@ -215,4 +283,14 @@ static inline struct qedr_dev *get_qedr_dev(struct ib_device *ibdev)
 	return container_of(ibdev, struct qedr_dev, ibdev);
 }
 
+static inline struct qedr_pd *get_qedr_pd(struct ib_pd *ibpd)
+{
+	return container_of(ibpd, struct qedr_pd, ibpd);
+}
+
+static inline struct qedr_cq *get_qedr_cq(struct ib_cq *ibcq)
+{
+	return container_of(ibcq, struct qedr_cq, ibcq);
+}
+
 #endif
diff --git a/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h b/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h
index 3e508fbd5e78..84f6520107cc 100644
--- a/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h
+++ b/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h
@@ -47,6 +47,19 @@ struct rdma_cqe_responder {
 	__le32 imm_data_hi;
 	__le16 rq_cons;
 	u8 flags;
+#define RDMA_CQE_RESPONDER_TOGGLE_BIT_MASK  0x1
+#define RDMA_CQE_RESPONDER_TOGGLE_BIT_SHIFT 0
+#define RDMA_CQE_RESPONDER_TYPE_MASK        0x3
+#define RDMA_CQE_RESPONDER_TYPE_SHIFT       1
+#define RDMA_CQE_RESPONDER_INV_FLG_MASK     0x1
+#define RDMA_CQE_RESPONDER_INV_FLG_SHIFT    3
+#define RDMA_CQE_RESPONDER_IMM_FLG_MASK     0x1
+#define RDMA_CQE_RESPONDER_IMM_FLG_SHIFT    4
+#define RDMA_CQE_RESPONDER_RDMA_FLG_MASK    0x1
+#define RDMA_CQE_RESPONDER_RDMA_FLG_SHIFT   5
+#define RDMA_CQE_RESPONDER_RESERVED2_MASK   0x3
+#define RDMA_CQE_RESPONDER_RESERVED2_SHIFT  6
+	u8 status;
 };
 
 struct rdma_cqe_requester {
@@ -58,6 +71,12 @@ struct rdma_cqe_requester {
 	__le32 reserved3;
 	__le16 reserved4;
 	u8 flags;
+#define RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK  0x1
+#define RDMA_CQE_REQUESTER_TOGGLE_BIT_SHIFT 0
+#define RDMA_CQE_REQUESTER_TYPE_MASK        0x3
+#define RDMA_CQE_REQUESTER_TYPE_SHIFT       1
+#define RDMA_CQE_REQUESTER_RESERVED5_MASK   0x1F
+#define RDMA_CQE_REQUESTER_RESERVED5_SHIFT  3
 	u8 status;
 };
 
@@ -66,6 +85,12 @@ struct rdma_cqe_common {
 	struct regpair qp_handle;
 	__le16 reserved1[7];
 	u8 flags;
+#define RDMA_CQE_COMMON_TOGGLE_BIT_MASK  0x1
+#define RDMA_CQE_COMMON_TOGGLE_BIT_SHIFT 0
+#define RDMA_CQE_COMMON_TYPE_MASK        0x3
+#define RDMA_CQE_COMMON_TYPE_SHIFT       1
+#define RDMA_CQE_COMMON_RESERVED2_MASK   0x1F
+#define RDMA_CQE_COMMON_RESERVED2_SHIFT  3
 	u8 status;
 };
 
@@ -76,6 +101,45 @@ union rdma_cqe {
 	struct rdma_cqe_common cmn;
 };
 
+/* * CQE requester status enumeration */
+enum rdma_cqe_requester_status_enum {
+	RDMA_CQE_REQ_STS_OK,
+	RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR,
+	RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR,
+	RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR,
+	RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR,
+	RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR,
+	RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR,
+	RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR,
+	RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR,
+	RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR,
+	RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR,
+	RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR,
+	MAX_RDMA_CQE_REQUESTER_STATUS_ENUM
+};
+
+/* CQE responder status enumeration */
+enum rdma_cqe_responder_status_enum {
+	RDMA_CQE_RESP_STS_OK,
+	RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR,
+	RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR,
+	RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR,
+	RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR,
+	RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR,
+	RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR,
+	RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR,
+	MAX_RDMA_CQE_RESPONDER_STATUS_ENUM
+};
+
+/* CQE type enumeration */
+enum rdma_cqe_type {
+	RDMA_CQE_TYPE_REQUESTER,
+	RDMA_CQE_TYPE_RESPONDER_RQ,
+	RDMA_CQE_TYPE_RESPONDER_SRQ,
+	RDMA_CQE_TYPE_INVALID,
+	MAX_RDMA_CQE_TYPE
+};
+
 struct rdma_sq_sge {
 	__le32 length;
 	struct regpair	addr;
@@ -93,4 +157,19 @@ struct rdma_srq_sge {
 	__le32 length;
 	__le32 l_key;
 };
+
+/* Rdma doorbell data for CQ */
+struct rdma_pwm_val32_data {
+	__le16 icid;
+	u8 agg_flags;
+	u8 params;
+#define RDMA_PWM_VAL32_DATA_AGG_CMD_MASK    0x3
+#define RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT   0
+#define RDMA_PWM_VAL32_DATA_BYPASS_EN_MASK  0x1
+#define RDMA_PWM_VAL32_DATA_BYPASS_EN_SHIFT 2
+#define RDMA_PWM_VAL32_DATA_RESERVED_MASK   0x1F
+#define RDMA_PWM_VAL32_DATA_RESERVED_SHIFT  3
+	__le32 value;
+};
+
 #endif /* __QED_HSI_RDMA__ */
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index b9dcade1cb9f..b525c6cf1df0 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -49,6 +49,17 @@
 #include "verbs.h"
 #include <rdma/qedr-abi.h>
 
+#define DB_ADDR_SHIFT(addr)		((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
+
+int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
+{
+	if (index > QEDR_ROCE_PKEY_TABLE_LEN)
+		return -EINVAL;
+
+	*pkey = QEDR_ROCE_PKEY_DEFAULT;
+	return 0;
+}
+
 int qedr_query_gid(struct ib_device *ibdev, u8 port, int index,
 		   union ib_gid *sgid)
 {
@@ -454,3 +465,531 @@ int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
 	DP_DEBUG(dev, QEDR_MSG_INIT, "qedr_mmap return code: %d\n", rc);
 	return rc;
 }
+
+struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev,
+			    struct ib_ucontext *context, struct ib_udata *udata)
+{
+	struct qedr_dev *dev = get_qedr_dev(ibdev);
+	struct qedr_ucontext *uctx = NULL;
+	struct qedr_alloc_pd_uresp uresp;
+	struct qedr_pd *pd;
+	u16 pd_id;
+	int rc;
+
+	DP_DEBUG(dev, QEDR_MSG_INIT, "Function called from: %s\n",
+		 (udata && context) ? "User Lib" : "Kernel");
+
+	if (!dev->rdma_ctx) {
+		DP_ERR(dev, "invlaid RDMA context\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+	if (!pd)
+		return ERR_PTR(-ENOMEM);
+
+	dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id);
+
+	uresp.pd_id = pd_id;
+	pd->pd_id = pd_id;
+
+	if (udata && context) {
+		rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+		if (rc)
+			DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id);
+		uctx = get_qedr_ucontext(context);
+		uctx->pd = pd;
+		pd->uctx = uctx;
+	}
+
+	return &pd->ibpd;
+}
+
+int qedr_dealloc_pd(struct ib_pd *ibpd)
+{
+	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
+	struct qedr_pd *pd = get_qedr_pd(ibpd);
+
+	if (!pd)
+		pr_err("Invalid PD received in dealloc_pd\n");
+
+	DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
+	dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
+
+	kfree(pd);
+
+	return 0;
+}
+
+static void qedr_free_pbl(struct qedr_dev *dev,
+			  struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl)
+{
+	struct pci_dev *pdev = dev->pdev;
+	int i;
+
+	for (i = 0; i < pbl_info->num_pbls; i++) {
+		if (!pbl[i].va)
+			continue;
+		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
+				  pbl[i].va, pbl[i].pa);
+	}
+
+	kfree(pbl);
+}
+
+#define MIN_FW_PBL_PAGE_SIZE (4 * 1024)
+#define MAX_FW_PBL_PAGE_SIZE (64 * 1024)
+
+#define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64))
+#define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE)
+#define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE * MAX_PBES_ON_PAGE)
+
+static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev,
+					   struct qedr_pbl_info *pbl_info,
+					   gfp_t flags)
+{
+	struct pci_dev *pdev = dev->pdev;
+	struct qedr_pbl *pbl_table;
+	dma_addr_t *pbl_main_tbl;
+	dma_addr_t pa;
+	void *va;
+	int i;
+
+	pbl_table = kcalloc(pbl_info->num_pbls, sizeof(*pbl_table), flags);
+	if (!pbl_table)
+		return ERR_PTR(-ENOMEM);
+
+	for (i = 0; i < pbl_info->num_pbls; i++) {
+		va = dma_alloc_coherent(&pdev->dev, pbl_info->pbl_size,
+					&pa, flags);
+		if (!va)
+			goto err;
+
+		memset(va, 0, pbl_info->pbl_size);
+		pbl_table[i].va = va;
+		pbl_table[i].pa = pa;
+	}
+
+	/* Two-Layer PBLs, if we have more than one pbl we need to initialize
+	 * the first one with physical pointers to all of the rest
+	 */
+	pbl_main_tbl = (dma_addr_t *)pbl_table[0].va;
+	for (i = 0; i < pbl_info->num_pbls - 1; i++)
+		pbl_main_tbl[i] = pbl_table[i + 1].pa;
+
+	return pbl_table;
+
+err:
+	for (i--; i >= 0; i--)
+		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
+				  pbl_table[i].va, pbl_table[i].pa);
+
+	qedr_free_pbl(dev, pbl_info, pbl_table);
+
+	return ERR_PTR(-ENOMEM);
+}
+
+static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
+				struct qedr_pbl_info *pbl_info,
+				u32 num_pbes, int two_layer_capable)
+{
+	u32 pbl_capacity;
+	u32 pbl_size;
+	u32 num_pbls;
+
+	if ((num_pbes > MAX_PBES_ON_PAGE) && two_layer_capable) {
+		if (num_pbes > MAX_PBES_TWO_LAYER) {
+			DP_ERR(dev, "prepare pbl table: too many pages %d\n",
+			       num_pbes);
+			return -EINVAL;
+		}
+
+		/* calculate required pbl page size */
+		pbl_size = MIN_FW_PBL_PAGE_SIZE;
+		pbl_capacity = NUM_PBES_ON_PAGE(pbl_size) *
+			       NUM_PBES_ON_PAGE(pbl_size);
+
+		while (pbl_capacity < num_pbes) {
+			pbl_size *= 2;
+			pbl_capacity = pbl_size / sizeof(u64);
+			pbl_capacity = pbl_capacity * pbl_capacity;
+		}
+
+		num_pbls = DIV_ROUND_UP(num_pbes, NUM_PBES_ON_PAGE(pbl_size));
+		num_pbls++;	/* One for the layer0 ( points to the pbls) */
+		pbl_info->two_layered = true;
+	} else {
+		/* One layered PBL */
+		num_pbls = 1;
+		pbl_size = max_t(u32, MIN_FW_PBL_PAGE_SIZE,
+				 roundup_pow_of_two((num_pbes * sizeof(u64))));
+		pbl_info->two_layered = false;
+	}
+
+	pbl_info->num_pbls = num_pbls;
+	pbl_info->pbl_size = pbl_size;
+	pbl_info->num_pbes = num_pbes;
+
+	DP_DEBUG(dev, QEDR_MSG_MR,
+		 "prepare pbl table: num_pbes=%d, num_pbls=%d, pbl_size=%d\n",
+		 pbl_info->num_pbes, pbl_info->num_pbls, pbl_info->pbl_size);
+
+	return 0;
+}
+
+static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
+			       struct qedr_pbl *pbl,
+			       struct qedr_pbl_info *pbl_info)
+{
+	int shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0;
+	struct qedr_pbl *pbl_tbl;
+	struct scatterlist *sg;
+	struct regpair *pbe;
+	int entry;
+	u32 addr;
+
+	if (!pbl_info->num_pbes)
+		return;
+
+	/* If we have a two layered pbl, the first pbl points to the rest
+	 * of the pbls and the first entry lays on the second pbl in the table
+	 */
+	if (pbl_info->two_layered)
+		pbl_tbl = &pbl[1];
+	else
+		pbl_tbl = pbl;
+
+	pbe = (struct regpair *)pbl_tbl->va;
+	if (!pbe) {
+		DP_ERR(dev, "cannot populate PBL due to a NULL PBE\n");
+		return;
+	}
+
+	pbe_cnt = 0;
+
+	shift = ilog2(umem->page_size);
+
+	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+		pages = sg_dma_len(sg) >> shift;
+		for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
+			/* store the page address in pbe */
+			pbe->lo = cpu_to_le32(sg_dma_address(sg) +
+					      umem->page_size * pg_cnt);
+			addr = upper_32_bits(sg_dma_address(sg) +
+					     umem->page_size * pg_cnt);
+			pbe->hi = cpu_to_le32(addr);
+			pbe_cnt++;
+			total_num_pbes++;
+			pbe++;
+
+			if (total_num_pbes == pbl_info->num_pbes)
+				return;
+
+			/* If the given pbl is full storing the pbes,
+			 * move to next pbl.
+			 */
+			if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) {
+				pbl_tbl++;
+				pbe = (struct regpair *)pbl_tbl->va;
+				pbe_cnt = 0;
+			}
+		}
+	}
+}
+
+static int qedr_copy_cq_uresp(struct qedr_dev *dev,
+			      struct qedr_cq *cq, struct ib_udata *udata)
+{
+	struct qedr_create_cq_uresp uresp;
+	int rc;
+
+	memset(&uresp, 0, sizeof(uresp));
+
+	uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
+	uresp.icid = cq->icid;
+
+	rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+	if (rc)
+		DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid);
+
+	return rc;
+}
+
+static void consume_cqe(struct qedr_cq *cq)
+{
+	if (cq->latest_cqe == cq->toggle_cqe)
+		cq->pbl_toggle ^= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
+
+	cq->latest_cqe = qed_chain_consume(&cq->pbl);
+}
+
+static inline int qedr_align_cq_entries(int entries)
+{
+	u64 size, aligned_size;
+
+	/* We allocate an extra entry that we don't report to the FW. */
+	size = (entries + 1) * QEDR_CQE_SIZE;
+	aligned_size = ALIGN(size, PAGE_SIZE);
+
+	return aligned_size / QEDR_CQE_SIZE;
+}
+
+static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
+				       struct qedr_dev *dev,
+				       struct qedr_userq *q,
+				       u64 buf_addr, size_t buf_len,
+				       int access, int dmasync)
+{
+	int page_cnt;
+	int rc;
+
+	q->buf_addr = buf_addr;
+	q->buf_len = buf_len;
+	q->umem = ib_umem_get(ib_ctx, q->buf_addr, q->buf_len, access, dmasync);
+	if (IS_ERR(q->umem)) {
+		DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n",
+		       PTR_ERR(q->umem));
+		return PTR_ERR(q->umem);
+	}
+
+	page_cnt = ib_umem_page_count(q->umem);
+	rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, page_cnt, 0);
+	if (rc)
+		goto err0;
+
+	q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL);
+	if (IS_ERR_OR_NULL(q->pbl_tbl))
+		goto err0;
+
+	qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info);
+
+	return 0;
+
+err0:
+	ib_umem_release(q->umem);
+
+	return rc;
+}
+
+static inline void qedr_init_cq_params(struct qedr_cq *cq,
+				       struct qedr_ucontext *ctx,
+				       struct qedr_dev *dev, int vector,
+				       int chain_entries, int page_cnt,
+				       u64 pbl_ptr,
+				       struct qed_rdma_create_cq_in_params
+				       *params)
+{
+	memset(params, 0, sizeof(*params));
+	params->cq_handle_hi = upper_32_bits((uintptr_t)cq);
+	params->cq_handle_lo = lower_32_bits((uintptr_t)cq);
+	params->cnq_id = vector;
+	params->cq_size = chain_entries - 1;
+	params->dpi = (ctx) ? ctx->dpi : dev->dpi;
+	params->pbl_num_pages = page_cnt;
+	params->pbl_ptr = pbl_ptr;
+	params->pbl_two_level = 0;
+}
+
+static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags)
+{
+	/* Flush data before signalling doorbell */
+	wmb();
+	cq->db.data.agg_flags = flags;
+	cq->db.data.value = cpu_to_le32(cons);
+	writeq(cq->db.raw, cq->db_addr);
+
+	/* Make sure write would stick */
+	mmiowb();
+}
+
+int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
+{
+	struct qedr_cq *cq = get_qedr_cq(ibcq);
+	unsigned long sflags;
+
+	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
+		return 0;
+
+	spin_lock_irqsave(&cq->cq_lock, sflags);
+
+	cq->arm_flags = 0;
+
+	if (flags & IB_CQ_SOLICITED)
+		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD;
+
+	if (flags & IB_CQ_NEXT_COMP)
+		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_CF_CMD;
+
+	doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
+
+	spin_unlock_irqrestore(&cq->cq_lock, sflags);
+
+	return 0;
+}
+
+struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
+			     const struct ib_cq_init_attr *attr,
+			     struct ib_ucontext *ib_ctx, struct ib_udata *udata)
+{
+	struct qedr_ucontext *ctx = get_qedr_ucontext(ib_ctx);
+	struct qed_rdma_destroy_cq_out_params destroy_oparams;
+	struct qed_rdma_destroy_cq_in_params destroy_iparams;
+	struct qedr_dev *dev = get_qedr_dev(ibdev);
+	struct qed_rdma_create_cq_in_params params;
+	struct qedr_create_cq_ureq ureq;
+	int vector = attr->comp_vector;
+	int entries = attr->cqe;
+	struct qedr_cq *cq;
+	int chain_entries;
+	int page_cnt;
+	u64 pbl_ptr;
+	u16 icid;
+	int rc;
+
+	DP_DEBUG(dev, QEDR_MSG_INIT,
+		 "create_cq: called from %s. entries=%d, vector=%d\n",
+		 udata ? "User Lib" : "Kernel", entries, vector);
+
+	if (entries > QEDR_MAX_CQES) {
+		DP_ERR(dev,
+		       "create cq: the number of entries %d is too high. Must be equal or below %d.\n",
+		       entries, QEDR_MAX_CQES);
+		return ERR_PTR(-EINVAL);
+	}
+
+	chain_entries = qedr_align_cq_entries(entries);
+	chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
+
+	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
+	if (!cq)
+		return ERR_PTR(-ENOMEM);
+
+	if (udata) {
+		memset(&ureq, 0, sizeof(ureq));
+		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
+			DP_ERR(dev,
+			       "create cq: problem copying data from user space\n");
+			goto err0;
+		}
+
+		if (!ureq.len) {
+			DP_ERR(dev,
+			       "create cq: cannot create a cq with 0 entries\n");
+			goto err0;
+		}
+
+		cq->cq_type = QEDR_CQ_TYPE_USER;
+
+		rc = qedr_init_user_queue(ib_ctx, dev, &cq->q, ureq.addr,
+					  ureq.len, IB_ACCESS_LOCAL_WRITE, 1);
+		if (rc)
+			goto err0;
+
+		pbl_ptr = cq->q.pbl_tbl->pa;
+		page_cnt = cq->q.pbl_info.num_pbes;
+	} else {
+		cq->cq_type = QEDR_CQ_TYPE_KERNEL;
+
+		rc = dev->ops->common->chain_alloc(dev->cdev,
+						   QED_CHAIN_USE_TO_CONSUME,
+						   QED_CHAIN_MODE_PBL,
+						   QED_CHAIN_CNT_TYPE_U32,
+						   chain_entries,
+						   sizeof(union rdma_cqe),
+						   &cq->pbl);
+		if (rc)
+			goto err1;
+
+		page_cnt = qed_chain_get_page_cnt(&cq->pbl);
+		pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl);
+	}
+
+	qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt,
+			    pbl_ptr, &params);
+
+	rc = dev->ops->rdma_create_cq(dev->rdma_ctx, &params, &icid);
+	if (rc)
+		goto err2;
+
+	cq->icid = icid;
+	cq->sig = QEDR_CQ_MAGIC_NUMBER;
+	spin_lock_init(&cq->cq_lock);
+
+	if (ib_ctx) {
+		rc = qedr_copy_cq_uresp(dev, cq, udata);
+		if (rc)
+			goto err3;
+	} else {
+		/* Generate doorbell address. */
+		cq->db_addr = dev->db_addr +
+		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
+		cq->db.data.icid = cq->icid;
+		cq->db.data.params = DB_AGG_CMD_SET <<
+		    RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
+
+		/* point to the very last element, passing it we will toggle */
+		cq->toggle_cqe = qed_chain_get_last_elem(&cq->pbl);
+		cq->pbl_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
+		cq->latest_cqe = NULL;
+		consume_cqe(cq);
+		cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
+	}
+
+	DP_DEBUG(dev, QEDR_MSG_CQ,
+		 "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n",
+		 cq->icid, cq, params.cq_size);
+
+	return &cq->ibcq;
+
+err3:
+	destroy_iparams.icid = cq->icid;
+	dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams,
+				  &destroy_oparams);
+err2:
+	if (udata)
+		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
+	else
+		dev->ops->common->chain_free(dev->cdev, &cq->pbl);
+err1:
+	if (udata)
+		ib_umem_release(cq->q.umem);
+err0:
+	kfree(cq);
+	return ERR_PTR(-EINVAL);
+}
+
+int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
+{
+	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
+	struct qedr_cq *cq = get_qedr_cq(ibcq);
+
+	DP_ERR(dev, "cq %p RESIZE NOT SUPPORTED\n", cq);
+
+	return 0;
+}
+
+int qedr_destroy_cq(struct ib_cq *ibcq)
+{
+	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
+	struct qed_rdma_destroy_cq_out_params oparams;
+	struct qed_rdma_destroy_cq_in_params iparams;
+	struct qedr_cq *cq = get_qedr_cq(ibcq);
+
+	DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq: cq_id %d", cq->icid);
+
+	/* GSIs CQs are handled by driver, so they don't exist in the FW */
+	if (cq->cq_type != QEDR_CQ_TYPE_GSI) {
+		iparams.icid = cq->icid;
+		dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
+		dev->ops->common->chain_free(dev->cdev, &cq->pbl);
+	}
+
+	if (ibcq->uobject && ibcq->uobject->context) {
+		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
+		ib_umem_release(cq->q.umem);
+	}
+
+	kfree(cq);
+
+	return 0;
+}
diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h
index 9472044dd587..36c8a692f740 100644
--- a/drivers/infiniband/hw/qedr/verbs.h
+++ b/drivers/infiniband/hw/qedr/verbs.h
@@ -40,6 +40,8 @@ int qedr_modify_port(struct ib_device *, u8 port, int mask,
 
 int qedr_query_gid(struct ib_device *, u8 port, int index, union ib_gid *gid);
 
+int qedr_query_pkey(struct ib_device *, u8 port, u16 index, u16 *pkey);
+
 struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *, struct ib_udata *);
 int qedr_dealloc_ucontext(struct ib_ucontext *);
 
@@ -49,4 +51,16 @@ int qedr_del_gid(struct ib_device *device, u8 port_num,
 int qedr_add_gid(struct ib_device *device, u8 port_num,
 		 unsigned int index, const union ib_gid *gid,
 		 const struct ib_gid_attr *attr, void **context);
+struct ib_pd *qedr_alloc_pd(struct ib_device *,
+			    struct ib_ucontext *, struct ib_udata *);
+int qedr_dealloc_pd(struct ib_pd *pd);
+
+struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
+			     const struct ib_cq_init_attr *attr,
+			     struct ib_ucontext *ib_ctx,
+			     struct ib_udata *udata);
+int qedr_resize_cq(struct ib_cq *, int cqe, struct ib_udata *);
+int qedr_destroy_cq(struct ib_cq *);
+int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
+
 #endif
diff --git a/include/uapi/rdma/qedr-abi.h b/include/uapi/rdma/qedr-abi.h
index f7c7fff7a877..b0fc5f2125e0 100644
--- a/include/uapi/rdma/qedr-abi.h
+++ b/include/uapi/rdma/qedr-abi.h
@@ -50,4 +50,23 @@ struct qedr_alloc_ucontext_resp {
 	__u32 sges_per_srq_wr;
 	__u32 max_cqes;
 };
+
+struct qedr_alloc_pd_ureq {
+	__u64 rsvd1;
+};
+
+struct qedr_alloc_pd_uresp {
+	__u32 pd_id;
+};
+
+struct qedr_create_cq_ureq {
+	__u64 addr;
+	__u64 len;
+};
+
+struct qedr_create_cq_uresp {
+	__u32 db_offset;
+	__u16 icid;
+};
+
 #endif /* __QEDR_USER_H__ */
-- 
cgit v1.2.3


From cecbcddf6461a11ce229e80bb3981415220c9763 Mon Sep 17 00:00:00 2001
From: Ram Amrani <Ram.Amrani@cavium.com>
Date: Mon, 10 Oct 2016 13:15:34 +0300
Subject: qedr: Add support for QP verbs

Add support for Queue Pair verbs which adds, deletes,
modifies and queries Queue Pairs.

Signed-off-by: Rajesh Borundia <rajesh.borundia@cavium.com>
Signed-off-by: Ram Amrani <Ram.Amrani@cavium.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/qedr/main.c          |   15 +-
 drivers/infiniband/hw/qedr/qedr.h          |  125 ++++
 drivers/infiniband/hw/qedr/qedr_cm.h       |   40 +
 drivers/infiniband/hw/qedr/qedr_hsi_rdma.h |   11 +
 drivers/infiniband/hw/qedr/verbs.c         | 1089 ++++++++++++++++++++++++++++
 drivers/infiniband/hw/qedr/verbs.h         |    7 +
 include/uapi/rdma/qedr-abi.h               |   34 +
 7 files changed, 1320 insertions(+), 1 deletion(-)
 create mode 100644 drivers/infiniband/hw/qedr/qedr_cm.h

(limited to 'include/uapi')

diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index 35928abb6b63..13ba47b7b99f 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -48,6 +48,8 @@ MODULE_AUTHOR("QLogic Corporation");
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_VERSION(QEDR_MODULE_VERSION);
 
+#define QEDR_WQ_MULTIPLIER_DFT	(3)
+
 void qedr_ib_dispatch_event(struct qedr_dev *dev, u8 port_num,
 			    enum ib_event_type type)
 {
@@ -94,7 +96,11 @@ static int qedr_register_device(struct qedr_dev *dev)
 				     QEDR_UVERBS(CREATE_CQ) |
 				     QEDR_UVERBS(RESIZE_CQ) |
 				     QEDR_UVERBS(DESTROY_CQ) |
-				     QEDR_UVERBS(REQ_NOTIFY_CQ);
+				     QEDR_UVERBS(REQ_NOTIFY_CQ) |
+				     QEDR_UVERBS(CREATE_QP) |
+				     QEDR_UVERBS(MODIFY_QP) |
+				     QEDR_UVERBS(QUERY_QP) |
+				     QEDR_UVERBS(DESTROY_QP);
 
 	dev->ibdev.phys_port_cnt = 1;
 	dev->ibdev.num_comp_vectors = dev->num_cnq;
@@ -120,6 +126,11 @@ static int qedr_register_device(struct qedr_dev *dev)
 	dev->ibdev.resize_cq = qedr_resize_cq;
 	dev->ibdev.req_notify_cq = qedr_arm_cq;
 
+	dev->ibdev.create_qp = qedr_create_qp;
+	dev->ibdev.modify_qp = qedr_modify_qp;
+	dev->ibdev.query_qp = qedr_query_qp;
+	dev->ibdev.destroy_qp = qedr_destroy_qp;
+
 	dev->ibdev.query_pkey = qedr_query_pkey;
 
 	dev->ibdev.dma_device = &dev->pdev->dev;
@@ -630,6 +641,8 @@ static struct qedr_dev *qedr_add(struct qed_dev *cdev, struct pci_dev *pdev,
 		goto init_err;
 	}
 
+	dev->wq_multiplier = QEDR_WQ_MULTIPLIER_DFT;
+
 	qedr_pci_set_atomic(dev, pdev);
 
 	rc = qedr_alloc_resources(dev);
diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
index 9e2846a599ce..e9fe941c48ad 100644
--- a/drivers/infiniband/hw/qedr/qedr.h
+++ b/drivers/infiniband/hw/qedr/qedr.h
@@ -52,6 +52,9 @@
 #define QEDR_MSG_MISC "MISC"
 #define QEDR_MSG_CQ   "  CQ"
 #define QEDR_MSG_MR   "  MR"
+#define QEDR_MSG_RQ   "  RQ"
+#define QEDR_MSG_SQ   "  SQ"
+#define QEDR_MSG_QP   "  QP"
 
 #define QEDR_CQ_MAGIC_NUMBER   (0x11223344)
 
@@ -143,6 +146,8 @@ struct qedr_dev {
 	u32			dp_module;
 	u8			dp_level;
 	u8			num_hwfns;
+	uint			wq_multiplier;
+
 };
 
 #define QEDR_MAX_SQ_PBL			(0x8000)
@@ -272,6 +277,122 @@ struct qedr_mm {
 	struct list_head entry;
 };
 
+union db_prod32 {
+	struct rdma_pwm_val16_data data;
+	u32 raw;
+};
+
+struct qedr_qp_hwq_info {
+	/* WQE Elements */
+	struct qed_chain pbl;
+	u64 p_phys_addr_tbl;
+	u32 max_sges;
+
+	/* WQE */
+	u16 prod;
+	u16 cons;
+	u16 wqe_cons;
+	u16 max_wr;
+
+	/* DB */
+	void __iomem *db;
+	union db_prod32 db_data;
+};
+
+#define QEDR_INC_SW_IDX(p_info, index)					\
+	do {								\
+		p_info->index = (p_info->index + 1) &			\
+				qed_chain_get_capacity(p_info->pbl)	\
+	} while (0)
+
+enum qedr_qp_err_bitmap {
+	QEDR_QP_ERR_SQ_FULL = 1,
+	QEDR_QP_ERR_RQ_FULL = 2,
+	QEDR_QP_ERR_BAD_SR = 4,
+	QEDR_QP_ERR_BAD_RR = 8,
+	QEDR_QP_ERR_SQ_PBL_FULL = 16,
+	QEDR_QP_ERR_RQ_PBL_FULL = 32,
+};
+
+struct qedr_qp {
+	struct ib_qp ibqp;	/* must be first */
+	struct qedr_dev *dev;
+
+	struct qedr_qp_hwq_info sq;
+	struct qedr_qp_hwq_info rq;
+
+	u32 max_inline_data;
+
+	/* Lock for QP's */
+	spinlock_t q_lock;
+	struct qedr_cq *sq_cq;
+	struct qedr_cq *rq_cq;
+	struct qedr_srq *srq;
+	enum qed_roce_qp_state state;
+	u32 id;
+	struct qedr_pd *pd;
+	enum ib_qp_type qp_type;
+	struct qed_rdma_qp *qed_qp;
+	u32 qp_id;
+	u16 icid;
+	u16 mtu;
+	int sgid_idx;
+	u32 rq_psn;
+	u32 sq_psn;
+	u32 qkey;
+	u32 dest_qp_num;
+
+	/* Relevant to qps created from kernel space only (ULPs) */
+	u8 prev_wqe_size;
+	u16 wqe_cons;
+	u32 err_bitmap;
+	bool signaled;
+
+	/* SQ shadow */
+	struct {
+		u64 wr_id;
+		enum ib_wc_opcode opcode;
+		u32 bytes_len;
+		u8 wqe_size;
+		bool signaled;
+		dma_addr_t icrc_mapping;
+		u32 *icrc;
+		struct qedr_mr *mr;
+	} *wqe_wr_id;
+
+	/* RQ shadow */
+	struct {
+		u64 wr_id;
+		struct ib_sge sg_list[RDMA_MAX_SGE_PER_RQ_WQE];
+		u8 wqe_size;
+
+		u16 vlan_id;
+		int rc;
+	} *rqe_wr_id;
+
+	/* Relevant to qps created from user space only (applications) */
+	struct qedr_userq usq;
+	struct qedr_userq urq;
+};
+
+static inline int qedr_get_dmac(struct qedr_dev *dev,
+				struct ib_ah_attr *ah_attr, u8 *mac_addr)
+{
+	union ib_gid zero_sgid = { { 0 } };
+	struct in6_addr in6;
+
+	if (!memcmp(&ah_attr->grh.dgid, &zero_sgid, sizeof(union ib_gid))) {
+		DP_ERR(dev, "Local port GID not supported\n");
+		eth_zero_addr(mac_addr);
+		return -EINVAL;
+	}
+
+	memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6));
+	ether_addr_copy(mac_addr, ah_attr->dmac);
+
+	return 0;
+}
+
 static inline
 struct qedr_ucontext *get_qedr_ucontext(struct ib_ucontext *ibucontext)
 {
@@ -293,4 +414,8 @@ static inline struct qedr_cq *get_qedr_cq(struct ib_cq *ibcq)
 	return container_of(ibcq, struct qedr_cq, ibcq);
 }
 
+static inline struct qedr_qp *get_qedr_qp(struct ib_qp *ibqp)
+{
+	return container_of(ibqp, struct qedr_qp, ibqp);
+}
 #endif
diff --git a/drivers/infiniband/hw/qedr/qedr_cm.h b/drivers/infiniband/hw/qedr/qedr_cm.h
new file mode 100644
index 000000000000..b8a8b76d77b8
--- /dev/null
+++ b/drivers/infiniband/hw/qedr/qedr_cm.h
@@ -0,0 +1,40 @@
+/* QLogic qedr NIC Driver
+ * Copyright (c) 2015-2016  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef LINUX_QEDR_CM_H_
+#define LINUX_QEDR_CM_H_
+
+static inline u32 qedr_get_ipv4_from_gid(u8 *gid)
+{
+	return *(u32 *)(void *)&gid[12];
+}
+
+#endif
diff --git a/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h b/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h
index 84f6520107cc..47705598eec6 100644
--- a/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h
+++ b/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h
@@ -158,6 +158,17 @@ struct rdma_srq_sge {
 	__le32 l_key;
 };
 
+/* Rdma doorbell data for SQ and RQ */
+struct rdma_pwm_val16_data {
+	__le16 icid;
+	__le16 value;
+};
+
+union rdma_pwm_val16_data_union {
+	struct rdma_pwm_val16_data as_struct;
+	__le32 as_dword;
+};
+
 /* Rdma doorbell data for CQ */
 struct rdma_pwm_val32_data {
 	__le16 icid;
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index b525c6cf1df0..a0d1c5fffb63 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -48,6 +48,7 @@
 #include "qedr.h"
 #include "verbs.h"
 #include <rdma/qedr-abi.h>
+#include "qedr_cm.h"
 
 #define DB_ADDR_SHIFT(addr)		((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
 
@@ -993,3 +994,1091 @@ int qedr_destroy_cq(struct ib_cq *ibcq)
 
 	return 0;
 }
+
+static inline int get_gid_info_from_table(struct ib_qp *ibqp,
+					  struct ib_qp_attr *attr,
+					  int attr_mask,
+					  struct qed_rdma_modify_qp_in_params
+					  *qp_params)
+{
+	enum rdma_network_type nw_type;
+	struct ib_gid_attr gid_attr;
+	union ib_gid gid;
+	u32 ipv4_addr;
+	int rc = 0;
+	int i;
+
+	rc = ib_get_cached_gid(ibqp->device, attr->ah_attr.port_num,
+			       attr->ah_attr.grh.sgid_index, &gid, &gid_attr);
+	if (rc)
+		return rc;
+
+	if (!memcmp(&gid, &zgid, sizeof(gid)))
+		return -ENOENT;
+
+	if (gid_attr.ndev) {
+		qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr.ndev);
+
+		dev_put(gid_attr.ndev);
+		nw_type = ib_gid_to_network_type(gid_attr.gid_type, &gid);
+		switch (nw_type) {
+		case RDMA_NETWORK_IPV6:
+			memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
+			       sizeof(qp_params->sgid));
+			memcpy(&qp_params->dgid.bytes[0],
+			       &attr->ah_attr.grh.dgid,
+			       sizeof(qp_params->dgid));
+			qp_params->roce_mode = ROCE_V2_IPV6;
+			SET_FIELD(qp_params->modify_flags,
+				  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
+			break;
+		case RDMA_NETWORK_IB:
+			memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
+			       sizeof(qp_params->sgid));
+			memcpy(&qp_params->dgid.bytes[0],
+			       &attr->ah_attr.grh.dgid,
+			       sizeof(qp_params->dgid));
+			qp_params->roce_mode = ROCE_V1;
+			break;
+		case RDMA_NETWORK_IPV4:
+			memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
+			memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
+			ipv4_addr = qedr_get_ipv4_from_gid(gid.raw);
+			qp_params->sgid.ipv4_addr = ipv4_addr;
+			ipv4_addr =
+			    qedr_get_ipv4_from_gid(attr->ah_attr.grh.dgid.raw);
+			qp_params->dgid.ipv4_addr = ipv4_addr;
+			SET_FIELD(qp_params->modify_flags,
+				  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
+			qp_params->roce_mode = ROCE_V2_IPV4;
+			break;
+		}
+	}
+
+	for (i = 0; i < 4; i++) {
+		qp_params->sgid.dwords[i] = ntohl(qp_params->sgid.dwords[i]);
+		qp_params->dgid.dwords[i] = ntohl(qp_params->dgid.dwords[i]);
+	}
+
+	if (qp_params->vlan_id >= VLAN_CFI_MASK)
+		qp_params->vlan_id = 0;
+
+	return 0;
+}
+
+static void qedr_cleanup_user_sq(struct qedr_dev *dev, struct qedr_qp *qp)
+{
+	qedr_free_pbl(dev, &qp->usq.pbl_info, qp->usq.pbl_tbl);
+	ib_umem_release(qp->usq.umem);
+}
+
+static void qedr_cleanup_user_rq(struct qedr_dev *dev, struct qedr_qp *qp)
+{
+	qedr_free_pbl(dev, &qp->urq.pbl_info, qp->urq.pbl_tbl);
+	ib_umem_release(qp->urq.umem);
+}
+
+static void qedr_cleanup_kernel_sq(struct qedr_dev *dev, struct qedr_qp *qp)
+{
+	dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl);
+	kfree(qp->wqe_wr_id);
+}
+
+static void qedr_cleanup_kernel_rq(struct qedr_dev *dev, struct qedr_qp *qp)
+{
+	dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
+	kfree(qp->rqe_wr_id);
+}
+
+static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
+			       struct ib_qp_init_attr *attrs)
+{
+	struct qedr_device_attr *qattr = &dev->attr;
+
+	/* QP0... attrs->qp_type == IB_QPT_GSI */
+	if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) {
+		DP_DEBUG(dev, QEDR_MSG_QP,
+			 "create qp: unsupported qp type=0x%x requested\n",
+			 attrs->qp_type);
+		return -EINVAL;
+	}
+
+	if (attrs->cap.max_send_wr > qattr->max_sqe) {
+		DP_ERR(dev,
+		       "create qp: cannot create a SQ with %d elements (max_send_wr=0x%x)\n",
+		       attrs->cap.max_send_wr, qattr->max_sqe);
+		return -EINVAL;
+	}
+
+	if (attrs->cap.max_inline_data > qattr->max_inline) {
+		DP_ERR(dev,
+		       "create qp: unsupported inline data size=0x%x requested (max_inline=0x%x)\n",
+		       attrs->cap.max_inline_data, qattr->max_inline);
+		return -EINVAL;
+	}
+
+	if (attrs->cap.max_send_sge > qattr->max_sge) {
+		DP_ERR(dev,
+		       "create qp: unsupported send_sge=0x%x requested (max_send_sge=0x%x)\n",
+		       attrs->cap.max_send_sge, qattr->max_sge);
+		return -EINVAL;
+	}
+
+	if (attrs->cap.max_recv_sge > qattr->max_sge) {
+		DP_ERR(dev,
+		       "create qp: unsupported recv_sge=0x%x requested (max_recv_sge=0x%x)\n",
+		       attrs->cap.max_recv_sge, qattr->max_sge);
+		return -EINVAL;
+	}
+
+	/* Unprivileged user space cannot create special QP */
+	if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) {
+		DP_ERR(dev,
+		       "create qp: userspace can't create special QPs of type=0x%x\n",
+		       attrs->qp_type);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void qedr_copy_rq_uresp(struct qedr_create_qp_uresp *uresp,
+			       struct qedr_qp *qp)
+{
+	uresp->rq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
+	uresp->rq_icid = qp->icid;
+}
+
+static void qedr_copy_sq_uresp(struct qedr_create_qp_uresp *uresp,
+			       struct qedr_qp *qp)
+{
+	uresp->sq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
+	uresp->sq_icid = qp->icid + 1;
+}
+
+static int qedr_copy_qp_uresp(struct qedr_dev *dev,
+			      struct qedr_qp *qp, struct ib_udata *udata)
+{
+	struct qedr_create_qp_uresp uresp;
+	int rc;
+
+	memset(&uresp, 0, sizeof(uresp));
+	qedr_copy_sq_uresp(&uresp, qp);
+	qedr_copy_rq_uresp(&uresp, qp);
+
+	uresp.atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
+	uresp.qp_id = qp->qp_id;
+
+	rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+	if (rc)
+		DP_ERR(dev,
+		       "create qp: failed a copy to user space with qp icid=0x%x.\n",
+		       qp->icid);
+
+	return rc;
+}
+
+static void qedr_set_qp_init_params(struct qedr_dev *dev,
+				    struct qedr_qp *qp,
+				    struct qedr_pd *pd,
+				    struct ib_qp_init_attr *attrs)
+{
+	qp->pd = pd;
+
+	spin_lock_init(&qp->q_lock);
+
+	qp->qp_type = attrs->qp_type;
+	qp->max_inline_data = attrs->cap.max_inline_data;
+	qp->sq.max_sges = attrs->cap.max_send_sge;
+	qp->state = QED_ROCE_QP_STATE_RESET;
+	qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
+	qp->sq_cq = get_qedr_cq(attrs->send_cq);
+	qp->rq_cq = get_qedr_cq(attrs->recv_cq);
+	qp->dev = dev;
+
+	DP_DEBUG(dev, QEDR_MSG_QP,
+		 "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n",
+		 pd->pd_id, qp->qp_type, qp->max_inline_data,
+		 qp->state, qp->signaled, (attrs->srq) ? 1 : 0);
+	DP_DEBUG(dev, QEDR_MSG_QP,
+		 "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
+		 qp->sq.max_sges, qp->sq_cq->icid);
+	qp->rq.max_sges = attrs->cap.max_recv_sge;
+	DP_DEBUG(dev, QEDR_MSG_QP,
+		 "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
+		 qp->rq.max_sges, qp->rq_cq->icid);
+}
+
+static inline void
+qedr_init_qp_user_params(struct qed_rdma_create_qp_in_params *params,
+			 struct qedr_create_qp_ureq *ureq)
+{
+	/* QP handle to be written in CQE */
+	params->qp_handle_lo = ureq->qp_handle_lo;
+	params->qp_handle_hi = ureq->qp_handle_hi;
+}
+
+static inline void
+qedr_init_qp_kernel_doorbell_sq(struct qedr_dev *dev, struct qedr_qp *qp)
+{
+	qp->sq.db = dev->db_addr +
+		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
+	qp->sq.db_data.data.icid = qp->icid + 1;
+}
+
+static inline void
+qedr_init_qp_kernel_doorbell_rq(struct qedr_dev *dev, struct qedr_qp *qp)
+{
+	qp->rq.db = dev->db_addr +
+		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
+	qp->rq.db_data.data.icid = qp->icid;
+}
+
+static inline int
+qedr_init_qp_kernel_params_rq(struct qedr_dev *dev,
+			      struct qedr_qp *qp, struct ib_qp_init_attr *attrs)
+{
+	/* Allocate driver internal RQ array */
+	qp->rqe_wr_id = kcalloc(qp->rq.max_wr, sizeof(*qp->rqe_wr_id),
+				GFP_KERNEL);
+	if (!qp->rqe_wr_id)
+		return -ENOMEM;
+
+	DP_DEBUG(dev, QEDR_MSG_QP, "RQ max_wr set to %d.\n", qp->rq.max_wr);
+
+	return 0;
+}
+
+static inline int
+qedr_init_qp_kernel_params_sq(struct qedr_dev *dev,
+			      struct qedr_qp *qp,
+			      struct ib_qp_init_attr *attrs,
+			      struct qed_rdma_create_qp_in_params *params)
+{
+	u32 temp_max_wr;
+
+	/* Allocate driver internal SQ array */
+	temp_max_wr = attrs->cap.max_send_wr * dev->wq_multiplier;
+	temp_max_wr = min_t(u32, temp_max_wr, dev->attr.max_sqe);
+
+	/* temp_max_wr < attr->max_sqe < u16 so the casting is safe */
+	qp->sq.max_wr = (u16)temp_max_wr;
+	qp->wqe_wr_id = kcalloc(qp->sq.max_wr, sizeof(*qp->wqe_wr_id),
+				GFP_KERNEL);
+	if (!qp->wqe_wr_id)
+		return -ENOMEM;
+
+	DP_DEBUG(dev, QEDR_MSG_QP, "SQ max_wr set to %d.\n", qp->sq.max_wr);
+
+	/* QP handle to be written in CQE */
+	params->qp_handle_lo = lower_32_bits((uintptr_t)qp);
+	params->qp_handle_hi = upper_32_bits((uintptr_t)qp);
+
+	return 0;
+}
+
+static inline int qedr_init_qp_kernel_sq(struct qedr_dev *dev,
+					 struct qedr_qp *qp,
+					 struct ib_qp_init_attr *attrs)
+{
+	u32 n_sq_elems, n_sq_entries;
+	int rc;
+
+	/* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
+	 * the ring. The ring should allow at least a single WR, even if the
+	 * user requested none, due to allocation issues.
+	 */
+	n_sq_entries = attrs->cap.max_send_wr;
+	n_sq_entries = min_t(u32, n_sq_entries, dev->attr.max_sqe);
+	n_sq_entries = max_t(u32, n_sq_entries, 1);
+	n_sq_elems = n_sq_entries * QEDR_MAX_SQE_ELEMENTS_PER_SQE;
+	rc = dev->ops->common->chain_alloc(dev->cdev,
+					   QED_CHAIN_USE_TO_PRODUCE,
+					   QED_CHAIN_MODE_PBL,
+					   QED_CHAIN_CNT_TYPE_U32,
+					   n_sq_elems,
+					   QEDR_SQE_ELEMENT_SIZE,
+					   &qp->sq.pbl);
+	if (rc) {
+		DP_ERR(dev, "failed to allocate QP %p SQ\n", qp);
+		return rc;
+	}
+
+	DP_DEBUG(dev, QEDR_MSG_SQ,
+		 "SQ Pbl base addr = %llx max_send_wr=%d max_wr=%d capacity=%d, rc=%d\n",
+		 qed_chain_get_pbl_phys(&qp->sq.pbl), attrs->cap.max_send_wr,
+		 n_sq_entries, qed_chain_get_capacity(&qp->sq.pbl), rc);
+	return 0;
+}
+
+static inline int qedr_init_qp_kernel_rq(struct qedr_dev *dev,
+					 struct qedr_qp *qp,
+					 struct ib_qp_init_attr *attrs)
+{
+	u32 n_rq_elems, n_rq_entries;
+	int rc;
+
+	/* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in
+	 * the ring. There ring should allow at least a single WR, even if the
+	 * user requested none, due to allocation issues.
+	 */
+	n_rq_entries = max_t(u32, attrs->cap.max_recv_wr, 1);
+	n_rq_elems = n_rq_entries * QEDR_MAX_RQE_ELEMENTS_PER_RQE;
+	rc = dev->ops->common->chain_alloc(dev->cdev,
+					   QED_CHAIN_USE_TO_CONSUME_PRODUCE,
+					   QED_CHAIN_MODE_PBL,
+					   QED_CHAIN_CNT_TYPE_U32,
+					   n_rq_elems,
+					   QEDR_RQE_ELEMENT_SIZE,
+					   &qp->rq.pbl);
+
+	if (rc) {
+		DP_ERR(dev, "failed to allocate memory for QP %p RQ\n", qp);
+		return -ENOMEM;
+	}
+
+	DP_DEBUG(dev, QEDR_MSG_RQ,
+		 "RQ Pbl base addr = %llx max_recv_wr=%d max_wr=%d capacity=%d, rc=%d\n",
+		 qed_chain_get_pbl_phys(&qp->rq.pbl), attrs->cap.max_recv_wr,
+		 n_rq_entries, qed_chain_get_capacity(&qp->rq.pbl), rc);
+
+	/* n_rq_entries < u16 so the casting is safe */
+	qp->rq.max_wr = (u16)n_rq_entries;
+
+	return 0;
+}
+
+static inline void
+qedr_init_qp_in_params_sq(struct qedr_dev *dev,
+			  struct qedr_pd *pd,
+			  struct qedr_qp *qp,
+			  struct ib_qp_init_attr *attrs,
+			  struct ib_udata *udata,
+			  struct qed_rdma_create_qp_in_params *params)
+{
+	/* QP handle to be written in an async event */
+	params->qp_handle_async_lo = lower_32_bits((uintptr_t)qp);
+	params->qp_handle_async_hi = upper_32_bits((uintptr_t)qp);
+
+	params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR);
+	params->fmr_and_reserved_lkey = !udata;
+	params->pd = pd->pd_id;
+	params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
+	params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
+	params->max_sq_sges = 0;
+	params->stats_queue = 0;
+
+	if (udata) {
+		params->sq_num_pages = qp->usq.pbl_info.num_pbes;
+		params->sq_pbl_ptr = qp->usq.pbl_tbl->pa;
+	} else {
+		params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl);
+		params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl);
+	}
+}
+
+static inline void
+qedr_init_qp_in_params_rq(struct qedr_qp *qp,
+			  struct ib_qp_init_attr *attrs,
+			  struct ib_udata *udata,
+			  struct qed_rdma_create_qp_in_params *params)
+{
+	params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
+	params->srq_id = 0;
+	params->use_srq = false;
+
+	if (udata) {
+		params->rq_num_pages = qp->urq.pbl_info.num_pbes;
+		params->rq_pbl_ptr = qp->urq.pbl_tbl->pa;
+	} else {
+		params->rq_num_pages = qed_chain_get_page_cnt(&qp->rq.pbl);
+		params->rq_pbl_ptr = qed_chain_get_pbl_phys(&qp->rq.pbl);
+	}
+}
+
+static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
+{
+	DP_DEBUG(dev, QEDR_MSG_QP,
+		 "create qp: successfully created user QP. qp=%p, sq_addr=0x%llx, sq_len=%zd, rq_addr=0x%llx, rq_len=%zd\n",
+		 qp, qp->usq.buf_addr, qp->usq.buf_len, qp->urq.buf_addr,
+		 qp->urq.buf_len);
+}
+
+static inline int qedr_init_user_qp(struct ib_ucontext *ib_ctx,
+				    struct qedr_dev *dev,
+				    struct qedr_qp *qp,
+				    struct qedr_create_qp_ureq *ureq)
+{
+	int rc;
+
+	/* SQ - read access only (0), dma sync not required (0) */
+	rc = qedr_init_user_queue(ib_ctx, dev, &qp->usq, ureq->sq_addr,
+				  ureq->sq_len, 0, 0);
+	if (rc)
+		return rc;
+
+	/* RQ - read access only (0), dma sync not required (0) */
+	rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq->rq_addr,
+				  ureq->rq_len, 0, 0);
+
+	if (rc)
+		qedr_cleanup_user_sq(dev, qp);
+	return rc;
+}
+
+static inline int
+qedr_init_kernel_qp(struct qedr_dev *dev,
+		    struct qedr_qp *qp,
+		    struct ib_qp_init_attr *attrs,
+		    struct qed_rdma_create_qp_in_params *params)
+{
+	int rc;
+
+	rc = qedr_init_qp_kernel_sq(dev, qp, attrs);
+	if (rc) {
+		DP_ERR(dev, "failed to init kernel QP %p SQ\n", qp);
+		return rc;
+	}
+
+	rc = qedr_init_qp_kernel_params_sq(dev, qp, attrs, params);
+	if (rc) {
+		dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl);
+		DP_ERR(dev, "failed to init kernel QP %p SQ params\n", qp);
+		return rc;
+	}
+
+	rc = qedr_init_qp_kernel_rq(dev, qp, attrs);
+	if (rc) {
+		qedr_cleanup_kernel_sq(dev, qp);
+		DP_ERR(dev, "failed to init kernel QP %p RQ\n", qp);
+		return rc;
+	}
+
+	rc = qedr_init_qp_kernel_params_rq(dev, qp, attrs);
+	if (rc) {
+		DP_ERR(dev, "failed to init kernel QP %p RQ params\n", qp);
+		qedr_cleanup_kernel_sq(dev, qp);
+		dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
+		return rc;
+	}
+
+	return rc;
+}
+
+struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
+			     struct ib_qp_init_attr *attrs,
+			     struct ib_udata *udata)
+{
+	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
+	struct qed_rdma_create_qp_out_params out_params;
+	struct qed_rdma_create_qp_in_params in_params;
+	struct qedr_pd *pd = get_qedr_pd(ibpd);
+	struct ib_ucontext *ib_ctx = NULL;
+	struct qedr_ucontext *ctx = NULL;
+	struct qedr_create_qp_ureq ureq;
+	struct qedr_qp *qp;
+	int rc = 0;
+
+	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
+		 udata ? "user library" : "kernel", pd);
+
+	rc = qedr_check_qp_attrs(ibpd, dev, attrs);
+	if (rc)
+		return ERR_PTR(rc);
+
+	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+	if (!qp)
+		return ERR_PTR(-ENOMEM);
+
+	if (attrs->srq)
+		return ERR_PTR(-EINVAL);
+
+	DP_DEBUG(dev, QEDR_MSG_QP,
+		 "create qp: sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
+		 get_qedr_cq(attrs->send_cq),
+		 get_qedr_cq(attrs->send_cq)->icid,
+		 get_qedr_cq(attrs->recv_cq),
+		 get_qedr_cq(attrs->recv_cq)->icid);
+
+	qedr_set_qp_init_params(dev, qp, pd, attrs);
+
+	memset(&in_params, 0, sizeof(in_params));
+
+	if (udata) {
+		if (!(udata && ibpd->uobject && ibpd->uobject->context))
+			goto err0;
+
+		ib_ctx = ibpd->uobject->context;
+		ctx = get_qedr_ucontext(ib_ctx);
+
+		memset(&ureq, 0, sizeof(ureq));
+		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
+			DP_ERR(dev,
+			       "create qp: problem copying data from user space\n");
+			goto err0;
+		}
+
+		rc = qedr_init_user_qp(ib_ctx, dev, qp, &ureq);
+		if (rc)
+			goto err0;
+
+		qedr_init_qp_user_params(&in_params, &ureq);
+	} else {
+		rc = qedr_init_kernel_qp(dev, qp, attrs, &in_params);
+		if (rc)
+			goto err0;
+	}
+
+	qedr_init_qp_in_params_sq(dev, pd, qp, attrs, udata, &in_params);
+	qedr_init_qp_in_params_rq(qp, attrs, udata, &in_params);
+
+	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
+					      &in_params, &out_params);
+
+	if (!qp->qed_qp)
+		goto err1;
+
+	qp->qp_id = out_params.qp_id;
+	qp->icid = out_params.icid;
+	qp->ibqp.qp_num = qp->qp_id;
+
+	if (udata) {
+		rc = qedr_copy_qp_uresp(dev, qp, udata);
+		if (rc)
+			goto err2;
+
+		qedr_qp_user_print(dev, qp);
+	} else {
+		qedr_init_qp_kernel_doorbell_sq(dev, qp);
+		qedr_init_qp_kernel_doorbell_rq(dev, qp);
+	}
+
+	DP_DEBUG(dev, QEDR_MSG_QP, "created %s space QP %p\n",
+		 udata ? "user" : "kernel", qp);
+
+	return &qp->ibqp;
+
+err2:
+	rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
+	if (rc)
+		DP_ERR(dev, "create qp: fatal fault. rc=%d", rc);
+err1:
+	if (udata) {
+		qedr_cleanup_user_sq(dev, qp);
+		qedr_cleanup_user_rq(dev, qp);
+	} else {
+		qedr_cleanup_kernel_sq(dev, qp);
+		qedr_cleanup_kernel_rq(dev, qp);
+	}
+
+err0:
+	kfree(qp);
+
+	return ERR_PTR(-EFAULT);
+}
+
+enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state)
+{
+	switch (qp_state) {
+	case QED_ROCE_QP_STATE_RESET:
+		return IB_QPS_RESET;
+	case QED_ROCE_QP_STATE_INIT:
+		return IB_QPS_INIT;
+	case QED_ROCE_QP_STATE_RTR:
+		return IB_QPS_RTR;
+	case QED_ROCE_QP_STATE_RTS:
+		return IB_QPS_RTS;
+	case QED_ROCE_QP_STATE_SQD:
+		return IB_QPS_SQD;
+	case QED_ROCE_QP_STATE_ERR:
+		return IB_QPS_ERR;
+	case QED_ROCE_QP_STATE_SQE:
+		return IB_QPS_SQE;
+	}
+	return IB_QPS_ERR;
+}
+
+enum qed_roce_qp_state qedr_get_state_from_ibqp(enum ib_qp_state qp_state)
+{
+	switch (qp_state) {
+	case IB_QPS_RESET:
+		return QED_ROCE_QP_STATE_RESET;
+	case IB_QPS_INIT:
+		return QED_ROCE_QP_STATE_INIT;
+	case IB_QPS_RTR:
+		return QED_ROCE_QP_STATE_RTR;
+	case IB_QPS_RTS:
+		return QED_ROCE_QP_STATE_RTS;
+	case IB_QPS_SQD:
+		return QED_ROCE_QP_STATE_SQD;
+	case IB_QPS_ERR:
+		return QED_ROCE_QP_STATE_ERR;
+	default:
+		return QED_ROCE_QP_STATE_ERR;
+	}
+}
+
+static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph)
+{
+	qed_chain_reset(&qph->pbl);
+	qph->prod = 0;
+	qph->cons = 0;
+	qph->wqe_cons = 0;
+	qph->db_data.data.value = cpu_to_le16(0);
+}
+
+static int qedr_update_qp_state(struct qedr_dev *dev,
+				struct qedr_qp *qp,
+				enum qed_roce_qp_state new_state)
+{
+	int status = 0;
+
+	if (new_state == qp->state)
+		return 1;
+
+	switch (qp->state) {
+	case QED_ROCE_QP_STATE_RESET:
+		switch (new_state) {
+		case QED_ROCE_QP_STATE_INIT:
+			qp->prev_wqe_size = 0;
+			qedr_reset_qp_hwq_info(&qp->sq);
+			qedr_reset_qp_hwq_info(&qp->rq);
+			break;
+		default:
+			status = -EINVAL;
+			break;
+		};
+		break;
+	case QED_ROCE_QP_STATE_INIT:
+		switch (new_state) {
+		case QED_ROCE_QP_STATE_RTR:
+			/* Update doorbell (in case post_recv was
+			 * done before move to RTR)
+			 */
+			wmb();
+			writel(qp->rq.db_data.raw, qp->rq.db);
+			/* Make sure write takes effect */
+			mmiowb();
+			break;
+		case QED_ROCE_QP_STATE_ERR:
+			break;
+		default:
+			/* Invalid state change. */
+			status = -EINVAL;
+			break;
+		};
+		break;
+	case QED_ROCE_QP_STATE_RTR:
+		/* RTR->XXX */
+		switch (new_state) {
+		case QED_ROCE_QP_STATE_RTS:
+			break;
+		case QED_ROCE_QP_STATE_ERR:
+			break;
+		default:
+			/* Invalid state change. */
+			status = -EINVAL;
+			break;
+		};
+		break;
+	case QED_ROCE_QP_STATE_RTS:
+		/* RTS->XXX */
+		switch (new_state) {
+		case QED_ROCE_QP_STATE_SQD:
+			break;
+		case QED_ROCE_QP_STATE_ERR:
+			break;
+		default:
+			/* Invalid state change. */
+			status = -EINVAL;
+			break;
+		};
+		break;
+	case QED_ROCE_QP_STATE_SQD:
+		/* SQD->XXX */
+		switch (new_state) {
+		case QED_ROCE_QP_STATE_RTS:
+		case QED_ROCE_QP_STATE_ERR:
+			break;
+		default:
+			/* Invalid state change. */
+			status = -EINVAL;
+			break;
+		};
+		break;
+	case QED_ROCE_QP_STATE_ERR:
+		/* ERR->XXX */
+		switch (new_state) {
+		case QED_ROCE_QP_STATE_RESET:
+			break;
+		default:
+			status = -EINVAL;
+			break;
+		};
+		break;
+	default:
+		status = -EINVAL;
+		break;
+	};
+
+	return status;
+}
+
+int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+		   int attr_mask, struct ib_udata *udata)
+{
+	struct qedr_qp *qp = get_qedr_qp(ibqp);
+	struct qed_rdma_modify_qp_in_params qp_params = { 0 };
+	struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev);
+	enum ib_qp_state old_qp_state, new_qp_state;
+	int rc = 0;
+
+	DP_DEBUG(dev, QEDR_MSG_QP,
+		 "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask,
+		 attr->qp_state);
+
+	old_qp_state = qedr_get_ibqp_state(qp->state);
+	if (attr_mask & IB_QP_STATE)
+		new_qp_state = attr->qp_state;
+	else
+		new_qp_state = old_qp_state;
+
+	if (!ib_modify_qp_is_ok
+	    (old_qp_state, new_qp_state, ibqp->qp_type, attr_mask,
+	     IB_LINK_LAYER_ETHERNET)) {
+		DP_ERR(dev,
+		       "modify qp: invalid attribute mask=0x%x specified for\n"
+		       "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n",
+		       attr_mask, qp->qp_id, ibqp->qp_type, old_qp_state,
+		       new_qp_state);
+		rc = -EINVAL;
+		goto err;
+	}
+
+	/* Translate the masks... */
+	if (attr_mask & IB_QP_STATE) {
+		SET_FIELD(qp_params.modify_flags,
+			  QED_RDMA_MODIFY_QP_VALID_NEW_STATE, 1);
+		qp_params.new_state = qedr_get_state_from_ibqp(attr->qp_state);
+	}
+
+	if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
+		qp_params.sqd_async = true;
+
+	if (attr_mask & IB_QP_PKEY_INDEX) {
+		SET_FIELD(qp_params.modify_flags,
+			  QED_ROCE_MODIFY_QP_VALID_PKEY, 1);
+		if (attr->pkey_index >= QEDR_ROCE_PKEY_TABLE_LEN) {
+			rc = -EINVAL;
+			goto err;
+		}
+
+		qp_params.pkey = QEDR_ROCE_PKEY_DEFAULT;
+	}
+
+	if (attr_mask & IB_QP_QKEY)
+		qp->qkey = attr->qkey;
+
+	if (attr_mask & IB_QP_ACCESS_FLAGS) {
+		SET_FIELD(qp_params.modify_flags,
+			  QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN, 1);
+		qp_params.incoming_rdma_read_en = attr->qp_access_flags &
+						  IB_ACCESS_REMOTE_READ;
+		qp_params.incoming_rdma_write_en = attr->qp_access_flags &
+						   IB_ACCESS_REMOTE_WRITE;
+		qp_params.incoming_atomic_en = attr->qp_access_flags &
+					       IB_ACCESS_REMOTE_ATOMIC;
+	}
+
+	if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) {
+		if (attr_mask & IB_QP_PATH_MTU) {
+			if (attr->path_mtu < IB_MTU_256 ||
+			    attr->path_mtu > IB_MTU_4096) {
+				pr_err("error: Only MTU sizes of 256, 512, 1024, 2048 and 4096 are supported by RoCE\n");
+				rc = -EINVAL;
+				goto err;
+			}
+			qp->mtu = min(ib_mtu_enum_to_int(attr->path_mtu),
+				      ib_mtu_enum_to_int(iboe_get_mtu
+							 (dev->ndev->mtu)));
+		}
+
+		if (!qp->mtu) {
+			qp->mtu =
+			ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
+			pr_err("Fixing zeroed MTU to qp->mtu = %d\n", qp->mtu);
+		}
+
+		SET_FIELD(qp_params.modify_flags,
+			  QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, 1);
+
+		qp_params.traffic_class_tos = attr->ah_attr.grh.traffic_class;
+		qp_params.flow_label = attr->ah_attr.grh.flow_label;
+		qp_params.hop_limit_ttl = attr->ah_attr.grh.hop_limit;
+
+		qp->sgid_idx = attr->ah_attr.grh.sgid_index;
+
+		rc = get_gid_info_from_table(ibqp, attr, attr_mask, &qp_params);
+		if (rc) {
+			DP_ERR(dev,
+			       "modify qp: problems with GID index %d (rc=%d)\n",
+			       attr->ah_attr.grh.sgid_index, rc);
+			return rc;
+		}
+
+		rc = qedr_get_dmac(dev, &attr->ah_attr,
+				   qp_params.remote_mac_addr);
+		if (rc)
+			return rc;
+
+		qp_params.use_local_mac = true;
+		ether_addr_copy(qp_params.local_mac_addr, dev->ndev->dev_addr);
+
+		DP_DEBUG(dev, QEDR_MSG_QP, "dgid=%x:%x:%x:%x\n",
+			 qp_params.dgid.dwords[0], qp_params.dgid.dwords[1],
+			 qp_params.dgid.dwords[2], qp_params.dgid.dwords[3]);
+		DP_DEBUG(dev, QEDR_MSG_QP, "sgid=%x:%x:%x:%x\n",
+			 qp_params.sgid.dwords[0], qp_params.sgid.dwords[1],
+			 qp_params.sgid.dwords[2], qp_params.sgid.dwords[3]);
+		DP_DEBUG(dev, QEDR_MSG_QP, "remote_mac=[%pM]\n",
+			 qp_params.remote_mac_addr);
+;
+
+		qp_params.mtu = qp->mtu;
+		qp_params.lb_indication = false;
+	}
+
+	if (!qp_params.mtu) {
+		/* Stay with current MTU */
+		if (qp->mtu)
+			qp_params.mtu = qp->mtu;
+		else
+			qp_params.mtu =
+			    ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
+	}
+
+	if (attr_mask & IB_QP_TIMEOUT) {
+		SET_FIELD(qp_params.modify_flags,
+			  QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1);
+
+		qp_params.ack_timeout = attr->timeout;
+		if (attr->timeout) {
+			u32 temp;
+
+			temp = 4096 * (1UL << attr->timeout) / 1000 / 1000;
+			/* FW requires [msec] */
+			qp_params.ack_timeout = temp;
+		} else {
+			/* Infinite */
+			qp_params.ack_timeout = 0;
+		}
+	}
+	if (attr_mask & IB_QP_RETRY_CNT) {
+		SET_FIELD(qp_params.modify_flags,
+			  QED_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1);
+		qp_params.retry_cnt = attr->retry_cnt;
+	}
+
+	if (attr_mask & IB_QP_RNR_RETRY) {
+		SET_FIELD(qp_params.modify_flags,
+			  QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT, 1);
+		qp_params.rnr_retry_cnt = attr->rnr_retry;
+	}
+
+	if (attr_mask & IB_QP_RQ_PSN) {
+		SET_FIELD(qp_params.modify_flags,
+			  QED_ROCE_MODIFY_QP_VALID_RQ_PSN, 1);
+		qp_params.rq_psn = attr->rq_psn;
+		qp->rq_psn = attr->rq_psn;
+	}
+
+	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
+		if (attr->max_rd_atomic > dev->attr.max_qp_req_rd_atomic_resc) {
+			rc = -EINVAL;
+			DP_ERR(dev,
+			       "unsupported max_rd_atomic=%d, supported=%d\n",
+			       attr->max_rd_atomic,
+			       dev->attr.max_qp_req_rd_atomic_resc);
+			goto err;
+		}
+
+		SET_FIELD(qp_params.modify_flags,
+			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ, 1);
+		qp_params.max_rd_atomic_req = attr->max_rd_atomic;
+	}
+
+	if (attr_mask & IB_QP_MIN_RNR_TIMER) {
+		SET_FIELD(qp_params.modify_flags,
+			  QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER, 1);
+		qp_params.min_rnr_nak_timer = attr->min_rnr_timer;
+	}
+
+	if (attr_mask & IB_QP_SQ_PSN) {
+		SET_FIELD(qp_params.modify_flags,
+			  QED_ROCE_MODIFY_QP_VALID_SQ_PSN, 1);
+		qp_params.sq_psn = attr->sq_psn;
+		qp->sq_psn = attr->sq_psn;
+	}
+
+	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
+		if (attr->max_dest_rd_atomic >
+		    dev->attr.max_qp_resp_rd_atomic_resc) {
+			DP_ERR(dev,
+			       "unsupported max_dest_rd_atomic=%d, supported=%d\n",
+			       attr->max_dest_rd_atomic,
+			       dev->attr.max_qp_resp_rd_atomic_resc);
+
+			rc = -EINVAL;
+			goto err;
+		}
+
+		SET_FIELD(qp_params.modify_flags,
+			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP, 1);
+		qp_params.max_rd_atomic_resp = attr->max_dest_rd_atomic;
+	}
+
+	if (attr_mask & IB_QP_DEST_QPN) {
+		SET_FIELD(qp_params.modify_flags,
+			  QED_ROCE_MODIFY_QP_VALID_DEST_QP, 1);
+
+		qp_params.dest_qp = attr->dest_qp_num;
+		qp->dest_qp_num = attr->dest_qp_num;
+	}
+
+	if (qp->qp_type != IB_QPT_GSI)
+		rc = dev->ops->rdma_modify_qp(dev->rdma_ctx,
+					      qp->qed_qp, &qp_params);
+
+	if (attr_mask & IB_QP_STATE) {
+		if ((qp->qp_type != IB_QPT_GSI) && (!udata))
+			qedr_update_qp_state(dev, qp, qp_params.new_state);
+		qp->state = qp_params.new_state;
+	}
+
+err:
+	return rc;
+}
+
+static int qedr_to_ib_qp_acc_flags(struct qed_rdma_query_qp_out_params *params)
+{
+	int ib_qp_acc_flags = 0;
+
+	if (params->incoming_rdma_write_en)
+		ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
+	if (params->incoming_rdma_read_en)
+		ib_qp_acc_flags |= IB_ACCESS_REMOTE_READ;
+	if (params->incoming_atomic_en)
+		ib_qp_acc_flags |= IB_ACCESS_REMOTE_ATOMIC;
+	ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
+	return ib_qp_acc_flags;
+}
+
+int qedr_query_qp(struct ib_qp *ibqp,
+		  struct ib_qp_attr *qp_attr,
+		  int attr_mask, struct ib_qp_init_attr *qp_init_attr)
+{
+	struct qed_rdma_query_qp_out_params params;
+	struct qedr_qp *qp = get_qedr_qp(ibqp);
+	struct qedr_dev *dev = qp->dev;
+	int rc = 0;
+
+	memset(&params, 0, sizeof(params));
+
+	rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, &params);
+	if (rc)
+		goto err;
+
+	memset(qp_attr, 0, sizeof(*qp_attr));
+	memset(qp_init_attr, 0, sizeof(*qp_init_attr));
+
+	qp_attr->qp_state = qedr_get_ibqp_state(params.state);
+	qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state);
+	qp_attr->path_mtu = iboe_get_mtu(params.mtu);
+	qp_attr->path_mig_state = IB_MIG_MIGRATED;
+	qp_attr->rq_psn = params.rq_psn;
+	qp_attr->sq_psn = params.sq_psn;
+	qp_attr->dest_qp_num = params.dest_qp;
+
+	qp_attr->qp_access_flags = qedr_to_ib_qp_acc_flags(&params);
+
+	qp_attr->cap.max_send_wr = qp->sq.max_wr;
+	qp_attr->cap.max_recv_wr = qp->rq.max_wr;
+	qp_attr->cap.max_send_sge = qp->sq.max_sges;
+	qp_attr->cap.max_recv_sge = qp->rq.max_sges;
+	qp_attr->cap.max_inline_data = qp->max_inline_data;
+	qp_init_attr->cap = qp_attr->cap;
+
+	memcpy(&qp_attr->ah_attr.grh.dgid.raw[0], &params.dgid.bytes[0],
+	       sizeof(qp_attr->ah_attr.grh.dgid.raw));
+
+	qp_attr->ah_attr.grh.flow_label = params.flow_label;
+	qp_attr->ah_attr.grh.sgid_index = qp->sgid_idx;
+	qp_attr->ah_attr.grh.hop_limit = params.hop_limit_ttl;
+	qp_attr->ah_attr.grh.traffic_class = params.traffic_class_tos;
+
+	qp_attr->ah_attr.ah_flags = IB_AH_GRH;
+	qp_attr->ah_attr.port_num = 1;
+	qp_attr->ah_attr.sl = 0;
+	qp_attr->timeout = params.timeout;
+	qp_attr->rnr_retry = params.rnr_retry;
+	qp_attr->retry_cnt = params.retry_cnt;
+	qp_attr->min_rnr_timer = params.min_rnr_nak_timer;
+	qp_attr->pkey_index = params.pkey_index;
+	qp_attr->port_num = 1;
+	qp_attr->ah_attr.src_path_bits = 0;
+	qp_attr->ah_attr.static_rate = 0;
+	qp_attr->alt_pkey_index = 0;
+	qp_attr->alt_port_num = 0;
+	qp_attr->alt_timeout = 0;
+	memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
+
+	qp_attr->sq_draining = (params.state == QED_ROCE_QP_STATE_SQD) ? 1 : 0;
+	qp_attr->max_dest_rd_atomic = params.max_dest_rd_atomic;
+	qp_attr->max_rd_atomic = params.max_rd_atomic;
+	qp_attr->en_sqd_async_notify = (params.sqd_async) ? 1 : 0;
+
+	DP_DEBUG(dev, QEDR_MSG_QP, "QEDR_QUERY_QP: max_inline_data=%d\n",
+		 qp_attr->cap.max_inline_data);
+
+err:
+	return rc;
+}
+
+int qedr_destroy_qp(struct ib_qp *ibqp)
+{
+	struct qedr_qp *qp = get_qedr_qp(ibqp);
+	struct qedr_dev *dev = qp->dev;
+	struct ib_qp_attr attr;
+	int attr_mask = 0;
+	int rc = 0;
+
+	DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n",
+		 qp, qp->qp_type);
+
+	if (qp->state != (QED_ROCE_QP_STATE_RESET | QED_ROCE_QP_STATE_ERR |
+			  QED_ROCE_QP_STATE_INIT)) {
+		attr.qp_state = IB_QPS_ERR;
+		attr_mask |= IB_QP_STATE;
+
+		/* Change the QP state to ERROR */
+		qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
+	}
+
+	if (qp->qp_type != IB_QPT_GSI) {
+		rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
+		if (rc)
+			return rc;
+	}
+
+	if (ibqp->uobject && ibqp->uobject->context) {
+		qedr_cleanup_user_sq(dev, qp);
+		qedr_cleanup_user_rq(dev, qp);
+	} else {
+		qedr_cleanup_kernel_sq(dev, qp);
+		qedr_cleanup_kernel_rq(dev, qp);
+	}
+
+	kfree(qp);
+
+	return rc;
+}
diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h
index 36c8a692f740..056d6cb31fa2 100644
--- a/drivers/infiniband/hw/qedr/verbs.h
+++ b/drivers/infiniband/hw/qedr/verbs.h
@@ -62,5 +62,12 @@ struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
 int qedr_resize_cq(struct ib_cq *, int cqe, struct ib_udata *);
 int qedr_destroy_cq(struct ib_cq *);
 int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
+struct ib_qp *qedr_create_qp(struct ib_pd *, struct ib_qp_init_attr *attrs,
+			     struct ib_udata *);
+int qedr_modify_qp(struct ib_qp *, struct ib_qp_attr *attr,
+		   int attr_mask, struct ib_udata *udata);
+int qedr_query_qp(struct ib_qp *, struct ib_qp_attr *qp_attr,
+		  int qp_attr_mask, struct ib_qp_init_attr *);
+int qedr_destroy_qp(struct ib_qp *ibqp);
 
 #endif
diff --git a/include/uapi/rdma/qedr-abi.h b/include/uapi/rdma/qedr-abi.h
index b0fc5f2125e0..75c270d839c8 100644
--- a/include/uapi/rdma/qedr-abi.h
+++ b/include/uapi/rdma/qedr-abi.h
@@ -69,4 +69,38 @@ struct qedr_create_cq_uresp {
 	__u16 icid;
 };
 
+struct qedr_create_qp_ureq {
+	__u32 qp_handle_hi;
+	__u32 qp_handle_lo;
+
+	/* SQ */
+	/* user space virtual address of SQ buffer */
+	__u64 sq_addr;
+
+	/* length of SQ buffer */
+	__u64 sq_len;
+
+	/* RQ */
+	/* user space virtual address of RQ buffer */
+	__u64 rq_addr;
+
+	/* length of RQ buffer */
+	__u64 rq_len;
+};
+
+struct qedr_create_qp_uresp {
+	__u32 qp_id;
+	__u32 atomic_supported;
+
+	/* SQ */
+	__u32 sq_db_offset;
+	__u16 sq_icid;
+
+	/* RQ */
+	__u32 rq_db_offset;
+	__u16 rq_icid;
+
+	__u32 rq_db2_offset;
+};
+
 #endif /* __QEDR_USER_H__ */
-- 
cgit v1.2.3


From 71757904efadefdf5505712f675218ce59483c5d Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@intel.com>
Date: Mon, 17 Oct 2016 08:18:15 -0700
Subject: generic syscalls: kill cruft from removed pkey syscalls

pkey_set() and pkey_get() were syscalls present in older versions
of the protection keys patches.  They were fully excised from the
x86 code, but some cruft was left in the generic syscall code.  The
C++ comments were intended to help to make it more glaring to me to
fix them before actually submitting them.  That technique worked,
but later than I would have liked.

I test-compiled this for arm64.

Fixes: a60f7b69d92c0 ("generic syscalls: Wire up memory protection keys syscalls")
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: x86@kernel.org
Cc: linux-arch@vger.kernel.org
Cc: mgorman@techsingularity.net
Cc: linux-api@vger.kernel.org
Cc: linux-mm@kvack.org
Cc: luto@kernel.org
Cc: akpm@linux-foundation.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/syscalls.h          | 3 ---
 include/uapi/asm-generic/unistd.h | 4 ----
 2 files changed, 7 deletions(-)

(limited to 'include/uapi')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 0d7abb8b7315..91a740f6b884 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -902,8 +902,5 @@ asmlinkage long sys_pkey_mprotect(unsigned long start, size_t len,
 				  unsigned long prot, int pkey);
 asmlinkage long sys_pkey_alloc(unsigned long flags, unsigned long init_val);
 asmlinkage long sys_pkey_free(int pkey);
-//asmlinkage long sys_pkey_get(int pkey, unsigned long flags);
-//asmlinkage long sys_pkey_set(int pkey, unsigned long access_rights,
-//			     unsigned long flags);
 
 #endif
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index dbfee7e86ba6..9b1462e38b82 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -730,10 +730,6 @@ __SYSCALL(__NR_pkey_mprotect, sys_pkey_mprotect)
 __SYSCALL(__NR_pkey_alloc,    sys_pkey_alloc)
 #define __NR_pkey_free 290
 __SYSCALL(__NR_pkey_free,     sys_pkey_free)
-#define __NR_pkey_get 291
-//__SYSCALL(__NR_pkey_get,      sys_pkey_get)
-#define __NR_pkey_set 292
-//__SYSCALL(__NR_pkey_set,      sys_pkey_set)
 
 #undef __NR_syscalls
 #define __NR_syscalls 291
-- 
cgit v1.2.3