From e22d8e3c69a9f432b40baaaf3f894a128fdc2222 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 12 Apr 2017 12:01:11 +0100
Subject: drm/i915: Treat WC a separate cache domain

When discussing a new WC mmap, we based the interface upon the
assumption that GTT was fully coherent. How naive! Commits 3b5724d702ef
("drm/i915: Wait for writes through the GTT to land before reading
back") and ed4596ea992d ("drm/i915/guc: WA to address the Ringbuffer
coherency issue") demonstrate that writes through the GTT are indeed
delayed and may be overtaken by direct WC access. To be safe, if
userspace is mixing WC mmaps with other potential GTT access (pwrite,
GTT mmaps) it should use set_domain(WC).

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96563
Testcase: igt/gem_pwrite/small-gtt*
Testcase: igt/drv_selftest/coherency
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170412110111.26626-2-chris@chris-wilson.co.uk
---
 include/uapi/drm/i915_drm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 3554495bef13..9ee06ec8a2d6 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -666,6 +666,8 @@ struct drm_i915_gem_relocation_entry {
 #define I915_GEM_DOMAIN_VERTEX		0x00000020
 /** GTT domain - aperture and scanout */
 #define I915_GEM_DOMAIN_GTT		0x00000040
+/** WC domain - uncached access */
+#define I915_GEM_DOMAIN_WC		0x00000080
 /** @} */
 
 struct drm_i915_gem_exec_object {
-- 
cgit v1.2.3


From b0fd47adc6233e4c198a97e1ddb05accd31eeacb Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Sat, 15 Apr 2017 10:39:02 +0100
Subject: drm/i915: Copy user requested buffers into the error state

Introduce a new execobject.flag (EXEC_OBJECT_CAPTURE) that userspace may
use to indicate that it wants the contents of this buffer preserved in
the error state (/sys/class/drm/cardN/error) following a GPU hang
involving this batch.

Use this at your discretion, the contents of the error state. although
compressed, are allocated with GFP_ATOMIC (i.e. limited) and kept for all
eternity (until the error state is destroyed).

Based on an earlier patch by Ben Widawsky <ben@bwidawsk.net>
Testcase: igt/gem_exec_capture
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ben Widawsky <ben@bwidawsk.net>
Cc: Matt Turner <mattst88@gmail.com>
Acked-by: Ben Widawsky <ben@bwidawsk.net>
Acked-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170415093902.22581-1-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_drv.c            |  1 +
 drivers/gpu/drm/i915/i915_drv.h            |  3 +++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 12 +++++++++
 drivers/gpu/drm/i915/i915_gem_request.c    | 16 ++++++++++++
 drivers/gpu/drm/i915/i915_gem_request.h    | 11 ++++++++
 drivers/gpu/drm/i915/i915_gpu_error.c      | 40 +++++++++++++++++++++++++++++-
 include/uapi/drm/i915_drm.h                | 15 ++++++++++-
 7 files changed, 96 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index bd85e3826b72..cc7393e65e99 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -350,6 +350,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_EXEC_SOFTPIN:
 	case I915_PARAM_HAS_EXEC_ASYNC:
 	case I915_PARAM_HAS_EXEC_FENCE:
+	case I915_PARAM_HAS_EXEC_CAPTURE:
 		/* For the time being all of these are always true;
 		 * if some supported hardware does not have one of these
 		 * features this value needs to be provided from
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1af4e6f5410c..ed21f0afaaec 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1025,6 +1025,9 @@ struct i915_gpu_state {
 			u32 *pages[0];
 		} *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
 
+		struct drm_i915_error_object **user_bo;
+		long user_bo_count;
+
 		struct drm_i915_error_object *wa_ctx;
 
 		struct drm_i915_error_request {
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index a3e59c8ef27b..af1965774e7b 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1114,6 +1114,18 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 	list_for_each_entry(vma, vmas, exec_list) {
 		struct drm_i915_gem_object *obj = vma->obj;
 
+		if (vma->exec_entry->flags & EXEC_OBJECT_CAPTURE) {
+			struct i915_gem_capture_list *capture;
+
+			capture = kmalloc(sizeof(*capture), GFP_KERNEL);
+			if (unlikely(!capture))
+				return -ENOMEM;
+
+			capture->next = req->capture_list;
+			capture->vma = vma;
+			req->capture_list = capture;
+		}
+
 		if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC)
 			continue;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 313cdff7c6dd..095cccc2e8b2 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -292,6 +292,19 @@ static void advance_ring(struct drm_i915_gem_request *request)
 	request->ring->head = tail;
 }
 
+static void free_capture_list(struct drm_i915_gem_request *request)
+{
+	struct i915_gem_capture_list *capture;
+
+	capture = request->capture_list;
+	while (capture) {
+		struct i915_gem_capture_list *next = capture->next;
+
+		kfree(capture);
+		capture = next;
+	}
+}
+
 static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
@@ -317,6 +330,8 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 	unreserve_seqno(request->engine);
 	advance_ring(request);
 
+	free_capture_list(request);
+
 	/* Walk through the active list, calling retire on each. This allows
 	 * objects to track their GPU activity and mark themselves as idle
 	 * when their *last* active request is completed (updating state
@@ -615,6 +630,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	req->global_seqno = 0;
 	req->file_priv = NULL;
 	req->batch = NULL;
+	req->capture_list = NULL;
 
 	/*
 	 * Reserve space in the ring buffer for all the commands required to
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index a211c53c813f..4ccab5affd3c 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -73,6 +73,11 @@ struct i915_priotree {
 #define I915_PRIORITY_MIN (-I915_PRIORITY_MAX)
 };
 
+struct i915_gem_capture_list {
+	struct i915_gem_capture_list *next;
+	struct i915_vma *vma;
+};
+
 /**
  * Request queue structure.
  *
@@ -167,6 +172,12 @@ struct drm_i915_gem_request {
 	 * error state dump only).
 	 */
 	struct i915_vma *batch;
+	/** Additional buffers requested by userspace to be captured upon
+	 * a GPU hang. The vma/obj on this list are protected by their
+	 * active reference - all objects on this list must also be
+	 * on the active_list (of their final request).
+	 */
+	struct i915_gem_capture_list *capture_list;
 	struct list_head active_list;
 
 	/** Time at which this request was emitted, in jiffies. */
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 8effc59f5cb5..4b247b050dcd 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -712,6 +712,10 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 			print_error_obj(m, dev_priv->engine[i], NULL, obj);
 		}
 
+		for (j = 0; j < ee->user_bo_count; j++)
+			print_error_obj(m, dev_priv->engine[i],
+					"user", ee->user_bo[j]);
+
 		if (ee->num_requests) {
 			err_printf(m, "%s --- %d requests\n",
 				   dev_priv->engine[i]->name,
@@ -825,11 +829,15 @@ void __i915_gpu_state_free(struct kref *error_ref)
 {
 	struct i915_gpu_state *error =
 		container_of(error_ref, typeof(*error), ref);
-	int i;
+	long i, j;
 
 	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
 		struct drm_i915_error_engine *ee = &error->engine[i];
 
+		for (j = 0; j < ee->user_bo_count; j++)
+			i915_error_object_free(ee->user_bo[j]);
+		kfree(ee->user_bo);
+
 		i915_error_object_free(ee->batchbuffer);
 		i915_error_object_free(ee->wa_batchbuffer);
 		i915_error_object_free(ee->ringbuffer);
@@ -1346,6 +1354,35 @@ static void record_context(struct drm_i915_error_context *e,
 	e->active = ctx->active_count;
 }
 
+static void request_record_user_bo(struct drm_i915_gem_request *request,
+				   struct drm_i915_error_engine *ee)
+{
+	struct i915_gem_capture_list *c;
+	struct drm_i915_error_object **bo;
+	long count;
+
+	count = 0;
+	for (c = request->capture_list; c; c = c->next)
+		count++;
+
+	bo = NULL;
+	if (count)
+		bo = kcalloc(count, sizeof(*bo), GFP_ATOMIC);
+	if (!bo)
+		return;
+
+	count = 0;
+	for (c = request->capture_list; c; c = c->next) {
+		bo[count] = i915_error_object_create(request->i915, c->vma);
+		if (!bo[count])
+			break;
+		count++;
+	}
+
+	ee->user_bo = bo;
+	ee->user_bo_count = count;
+}
+
 static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
 				  struct i915_gpu_state *error)
 {
@@ -1392,6 +1429,7 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
 				ee->wa_batchbuffer =
 					i915_error_object_create(dev_priv,
 								 engine->scratch);
+			request_record_user_bo(request, ee);
 
 			ee->ctx =
 				i915_error_object_create(dev_priv,
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 9ee06ec8a2d6..f24a80d2d42e 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -412,6 +412,12 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_HAS_EXEC_FENCE	 44
 
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to capture
+ * user specified bufffers for post-mortem debugging of GPU hangs. See
+ * EXEC_OBJECT_CAPTURE.
+ */
+#define I915_PARAM_HAS_EXEC_CAPTURE	 45
+
 typedef struct drm_i915_getparam {
 	__s32 param;
 	/*
@@ -775,8 +781,15 @@ struct drm_i915_gem_exec_object2 {
  * I915_PARAM_HAS_EXEC_FENCE to order execbufs and execute them asynchronously.
  */
 #define EXEC_OBJECT_ASYNC		(1<<6)
+/* Request that the contents of this execobject be copied into the error
+ * state upon a GPU hang involving this batch for post-mortem debugging.
+ * These buffers are recorded in no particular order as "user" in
+ * /sys/class/drm/cardN/error. Query I915_PARAM_HAS_EXEC_CAPTURE to see
+ * if the kernel supports this flag.
+ */
+#define EXEC_OBJECT_CAPTURE		(1<<7)
 /* All remaining bits are MBZ and RESERVED FOR FUTURE USE */
-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_ASYNC<<1)
+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_CAPTURE<<1)
 	__u64 flags;
 
 	union {
-- 
cgit v1.2.3


From c2c446ad29437bb92b157423c632286608ebd3ec Mon Sep 17 00:00:00 2001
From: Robert Foss <robert.foss@collabora.com>
Date: Fri, 19 May 2017 16:50:17 -0400
Subject: drm: Add DRM_MODE_ROTATE_ and DRM_MODE_REFLECT_ to UAPI

Add DRM_MODE_ROTATE_ and DRM_MODE_REFLECT_ defines to the UAPI
as a convenience.

Ideally the DRM_ROTATE_ and DRM_REFLECT_ property ids are looked up
through the atomic API, but realizing that userspace is likely to take
shortcuts and assume that the enum values are what is sent over the
wire.

As a result these defines are provided purely as a convenience to
userspace applications.

Changes since v3:
 - Switched away from past tense in comments
 - Add define name change to previously mis-spelled DRM_REFLECT_X comment
 - Improved the comment for the DRM_MODE_REFLECT_<axis> comment

Changes since v2:
 - Changed define prefix from DRM_MODE_PROP_ to DRM_MODE_
 - Fix compilation errors
 - Changed comment formatting
 - Deduplicated comment lines
 - Clarified DRM_MODE_PROP_REFLECT_ comment

Changes since v1:
 - Moved defines from drm.h to drm_mode.h
 - Changed define prefix from DRM_ to DRM_MODE_PROP_
 - Updated uses of the defines to the new prefix
 - Removed include from drm_rect.c
 - Stopped using the BIT() macro

Signed-off-by: Robert Foss <robert.foss@collabora.com>
Reviewed-by: Emil Velikov <emil.velikov@collabora.com>
Reviewed-by: Sinclair Yeh <syeh@vmware.com>
Acked-by: Liviu Dudau <Liviu.Dudau@arm.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20170519205017.23307-2-robert.foss@collabora.com
---
 drivers/gpu/drm/arm/malidp_drv.h                |  2 +-
 drivers/gpu/drm/arm/malidp_planes.c             | 18 ++++-----
 drivers/gpu/drm/armada/armada_overlay.c         |  2 +-
 drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c | 20 +++++-----
 drivers/gpu/drm/drm_atomic.c                    |  2 +-
 drivers/gpu/drm/drm_atomic_helper.c             |  2 +-
 drivers/gpu/drm/drm_blend.c                     | 45 +++++++++++-----------
 drivers/gpu/drm/drm_fb_helper.c                 |  4 +-
 drivers/gpu/drm/drm_plane_helper.c              |  2 +-
 drivers/gpu/drm/drm_rect.c                      | 36 +++++++++---------
 drivers/gpu/drm/i915/i915_debugfs.c             | 14 +++----
 drivers/gpu/drm/i915/intel_atomic_plane.c       |  6 +--
 drivers/gpu/drm/i915/intel_display.c            | 50 ++++++++++++-------------
 drivers/gpu/drm/i915/intel_fbc.c                |  2 +-
 drivers/gpu/drm/i915/intel_fbdev.c              |  2 +-
 drivers/gpu/drm/i915/intel_sprite.c             | 20 +++++-----
 drivers/gpu/drm/imx/ipuv3-plane.c               |  2 +-
 drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c       | 30 +++++++--------
 drivers/gpu/drm/nouveau/nv50_display.c          |  2 +-
 drivers/gpu/drm/omapdrm/omap_drv.c              |  4 +-
 drivers/gpu/drm/omapdrm/omap_fb.c               | 18 ++++-----
 drivers/gpu/drm/omapdrm/omap_plane.c            | 16 ++++----
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.c             |  4 +-
 include/drm/drm_blend.h                         | 21 +----------
 include/uapi/drm/drm_mode.h                     | 49 +++++++++++++++++++++++-
 25 files changed, 202 insertions(+), 171 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/arm/malidp_drv.h b/drivers/gpu/drm/arm/malidp_drv.h
index 040311ffcaec..2e2033140efc 100644
--- a/drivers/gpu/drm/arm/malidp_drv.h
+++ b/drivers/gpu/drm/arm/malidp_drv.h
@@ -65,6 +65,6 @@ void malidp_de_planes_destroy(struct drm_device *drm);
 int malidp_crtc_init(struct drm_device *drm);
 
 /* often used combination of rotational bits */
-#define MALIDP_ROTATED_MASK	(DRM_ROTATE_90 | DRM_ROTATE_270)
+#define MALIDP_ROTATED_MASK	(DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270)
 
 #endif  /* __MALIDP_DRV_H__ */
diff --git a/drivers/gpu/drm/arm/malidp_planes.c b/drivers/gpu/drm/arm/malidp_planes.c
index 814fda23cead..063a8d2b0be3 100644
--- a/drivers/gpu/drm/arm/malidp_planes.c
+++ b/drivers/gpu/drm/arm/malidp_planes.c
@@ -80,7 +80,7 @@ static void malidp_plane_reset(struct drm_plane *plane)
 	state = kzalloc(sizeof(*state), GFP_KERNEL);
 	if (state) {
 		state->base.plane = plane;
-		state->base.rotation = DRM_ROTATE_0;
+		state->base.rotation = DRM_MODE_ROTATE_0;
 		plane->state = &state->base;
 	}
 }
@@ -221,7 +221,7 @@ static int malidp_de_plane_check(struct drm_plane *plane,
 		return ret;
 
 	/* packed RGB888 / BGR888 can't be rotated or flipped */
-	if (state->rotation != DRM_ROTATE_0 &&
+	if (state->rotation != DRM_MODE_ROTATE_0 &&
 	    (fb->format->format == DRM_FORMAT_RGB888 ||
 	     fb->format->format == DRM_FORMAT_BGR888))
 		return -EINVAL;
@@ -315,12 +315,12 @@ static void malidp_de_plane_update(struct drm_plane *plane,
 	val &= ~LAYER_ROT_MASK;
 
 	/* setup the rotation and axis flip bits */
-	if (plane->state->rotation & DRM_ROTATE_MASK)
-		val |= ilog2(plane->state->rotation & DRM_ROTATE_MASK) <<
+	if (plane->state->rotation & DRM_MODE_ROTATE_MASK)
+		val |= ilog2(plane->state->rotation & DRM_MODE_ROTATE_MASK) <<
 		       LAYER_ROT_OFFSET;
-	if (plane->state->rotation & DRM_REFLECT_X)
+	if (plane->state->rotation & DRM_MODE_REFLECT_X)
 		val |= LAYER_H_FLIP;
-	if (plane->state->rotation & DRM_REFLECT_Y)
+	if (plane->state->rotation & DRM_MODE_REFLECT_Y)
 		val |= LAYER_V_FLIP;
 
 	/*
@@ -370,8 +370,8 @@ int malidp_de_planes_init(struct drm_device *drm)
 	struct malidp_plane *plane = NULL;
 	enum drm_plane_type plane_type;
 	unsigned long crtcs = 1 << drm->mode_config.num_crtc;
-	unsigned long flags = DRM_ROTATE_0 | DRM_ROTATE_90 | DRM_ROTATE_180 |
-			      DRM_ROTATE_270 | DRM_REFLECT_X | DRM_REFLECT_Y;
+	unsigned long flags = DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_180 |
+			      DRM_MODE_ROTATE_270 | DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y;
 	u32 *formats;
 	int ret, i, j, n;
 
@@ -420,7 +420,7 @@ int malidp_de_planes_init(struct drm_device *drm)
 			continue;
 		}
 
-		drm_plane_create_rotation_property(&plane->base, DRM_ROTATE_0, flags);
+		drm_plane_create_rotation_property(&plane->base, DRM_MODE_ROTATE_0, flags);
 		malidp_hw_write(malidp->dev, MALIDP_ALPHA_LUT,
 				plane->layer->base + MALIDP_LAYER_COMPOSE);
 	}
diff --git a/drivers/gpu/drm/armada/armada_overlay.c b/drivers/gpu/drm/armada/armada_overlay.c
index 424e465ff407..e9a29df4b443 100644
--- a/drivers/gpu/drm/armada/armada_overlay.c
+++ b/drivers/gpu/drm/armada/armada_overlay.c
@@ -125,7 +125,7 @@ armada_ovl_plane_update(struct drm_plane *plane, struct drm_crtc *crtc,
 				 src_x, src_y, src_w, src_h);
 
 	ret = drm_plane_helper_check_update(plane, crtc, fb, &src, &dest, &clip,
-					    DRM_ROTATE_0,
+					    DRM_MODE_ROTATE_0,
 					    0, INT_MAX, true, false, &visible);
 	if (ret)
 		return ret;
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
index 29cc10d053eb..1124200bb280 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
@@ -678,8 +678,8 @@ static int atmel_hlcdc_plane_atomic_check(struct drm_plane *p,
 		if (!state->bpp[i])
 			return -EINVAL;
 
-		switch (state->base.rotation & DRM_ROTATE_MASK) {
-		case DRM_ROTATE_90:
+		switch (state->base.rotation & DRM_MODE_ROTATE_MASK) {
+		case DRM_MODE_ROTATE_90:
 			offset = ((y_offset + state->src_y + patched_src_w - 1) /
 				  ydiv) * fb->pitches[i];
 			offset += ((x_offset + state->src_x) / xdiv) *
@@ -688,7 +688,7 @@ static int atmel_hlcdc_plane_atomic_check(struct drm_plane *p,
 					  fb->pitches[i];
 			state->pstride[i] = -fb->pitches[i] - state->bpp[i];
 			break;
-		case DRM_ROTATE_180:
+		case DRM_MODE_ROTATE_180:
 			offset = ((y_offset + state->src_y + patched_src_h - 1) /
 				  ydiv) * fb->pitches[i];
 			offset += ((x_offset + state->src_x + patched_src_w - 1) /
@@ -697,7 +697,7 @@ static int atmel_hlcdc_plane_atomic_check(struct drm_plane *p,
 					   state->bpp[i]) - fb->pitches[i];
 			state->pstride[i] = -2 * state->bpp[i];
 			break;
-		case DRM_ROTATE_270:
+		case DRM_MODE_ROTATE_270:
 			offset = ((y_offset + state->src_y) / ydiv) *
 				 fb->pitches[i];
 			offset += ((x_offset + state->src_x + patched_src_h - 1) /
@@ -707,7 +707,7 @@ static int atmel_hlcdc_plane_atomic_check(struct drm_plane *p,
 					  (2 * state->bpp[i]);
 			state->pstride[i] = fb->pitches[i] - state->bpp[i];
 			break;
-		case DRM_ROTATE_0:
+		case DRM_MODE_ROTATE_0:
 		default:
 			offset = ((y_offset + state->src_y) / ydiv) *
 				 fb->pitches[i];
@@ -864,11 +864,11 @@ static int atmel_hlcdc_plane_init_properties(struct atmel_hlcdc_plane *plane,
 		int ret;
 
 		ret = drm_plane_create_rotation_property(&plane->base,
-							 DRM_ROTATE_0,
-							 DRM_ROTATE_0 |
-							 DRM_ROTATE_90 |
-							 DRM_ROTATE_180 |
-							 DRM_ROTATE_270);
+							 DRM_MODE_ROTATE_0,
+							 DRM_MODE_ROTATE_0 |
+							 DRM_MODE_ROTATE_90 |
+							 DRM_MODE_ROTATE_180 |
+							 DRM_MODE_ROTATE_270);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index cdec19a86af3..e1637011e18a 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -781,7 +781,7 @@ int drm_atomic_plane_set_property(struct drm_plane *plane,
 	} else if (property == config->prop_src_h) {
 		state->src_h = val;
 	} else if (property == plane->rotation_property) {
-		if (!is_power_of_2(val & DRM_ROTATE_MASK))
+		if (!is_power_of_2(val & DRM_MODE_ROTATE_MASK))
 			return -EINVAL;
 		state->rotation = val;
 	} else if (property == plane->zpos_property) {
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 6426339427a4..636e561486a8 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -3225,7 +3225,7 @@ void drm_atomic_helper_plane_reset(struct drm_plane *plane)
 
 	if (plane->state) {
 		plane->state->plane = plane;
-		plane->state->rotation = DRM_ROTATE_0;
+		plane->state->rotation = DRM_MODE_ROTATE_0;
 	}
 }
 EXPORT_SYMBOL(drm_atomic_helper_plane_reset);
diff --git a/drivers/gpu/drm/drm_blend.c b/drivers/gpu/drm/drm_blend.c
index dee67ef6c670..db6aeec50b82 100644
--- a/drivers/gpu/drm/drm_blend.c
+++ b/drivers/gpu/drm/drm_blend.c
@@ -119,17 +119,17 @@
  * drm_property_create_bitmask()) called "rotation" and has the following
  * bitmask enumaration values:
  *
- * DRM_ROTATE_0:
+ * DRM_MODE_ROTATE_0:
  * 	"rotate-0"
- * DRM_ROTATE_90:
+ * DRM_MODE_ROTATE_90:
  * 	"rotate-90"
- * DRM_ROTATE_180:
+ * DRM_MODE_ROTATE_180:
  * 	"rotate-180"
- * DRM_ROTATE_270:
+ * DRM_MODE_ROTATE_270:
  * 	"rotate-270"
- * DRM_REFLECT_X:
+ * DRM_MODE_REFLECT_X:
  * 	"reflect-x"
- * DRM_REFLECT_Y:
+ * DRM_MODE_REFLECT_Y:
  * 	"reflect-y"
  *
  * Rotation is the specified amount in degrees in counter clockwise direction,
@@ -142,17 +142,17 @@ int drm_plane_create_rotation_property(struct drm_plane *plane,
 				       unsigned int supported_rotations)
 {
 	static const struct drm_prop_enum_list props[] = {
-		{ __builtin_ffs(DRM_ROTATE_0) - 1,   "rotate-0" },
-		{ __builtin_ffs(DRM_ROTATE_90) - 1,  "rotate-90" },
-		{ __builtin_ffs(DRM_ROTATE_180) - 1, "rotate-180" },
-		{ __builtin_ffs(DRM_ROTATE_270) - 1, "rotate-270" },
-		{ __builtin_ffs(DRM_REFLECT_X) - 1,  "reflect-x" },
-		{ __builtin_ffs(DRM_REFLECT_Y) - 1,  "reflect-y" },
+		{ __builtin_ffs(DRM_MODE_ROTATE_0) - 1,   "rotate-0" },
+		{ __builtin_ffs(DRM_MODE_ROTATE_90) - 1,  "rotate-90" },
+		{ __builtin_ffs(DRM_MODE_ROTATE_180) - 1, "rotate-180" },
+		{ __builtin_ffs(DRM_MODE_ROTATE_270) - 1, "rotate-270" },
+		{ __builtin_ffs(DRM_MODE_REFLECT_X) - 1,  "reflect-x" },
+		{ __builtin_ffs(DRM_MODE_REFLECT_Y) - 1,  "reflect-y" },
 	};
 	struct drm_property *prop;
 
-	WARN_ON((supported_rotations & DRM_ROTATE_MASK) == 0);
-	WARN_ON(!is_power_of_2(rotation & DRM_ROTATE_MASK));
+	WARN_ON((supported_rotations & DRM_MODE_ROTATE_MASK) == 0);
+	WARN_ON(!is_power_of_2(rotation & DRM_MODE_ROTATE_MASK));
 	WARN_ON(rotation & ~supported_rotations);
 
 	prop = drm_property_create_bitmask(plane->dev, 0, "rotation",
@@ -178,14 +178,14 @@ EXPORT_SYMBOL(drm_plane_create_rotation_property);
  * @supported_rotations: Supported rotations
  *
  * Attempt to simplify the rotation to a form that is supported.
- * Eg. if the hardware supports everything except DRM_REFLECT_X
+ * Eg. if the hardware supports everything except DRM_MODE_REFLECT_X
  * one could call this function like this:
  *
- * drm_rotation_simplify(rotation, DRM_ROTATE_0 |
- *                       DRM_ROTATE_90 | DRM_ROTATE_180 |
- *                       DRM_ROTATE_270 | DRM_REFLECT_Y);
+ * drm_rotation_simplify(rotation, DRM_MODE_ROTATE_0 |
+ *                       DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_180 |
+ *                       DRM_MODE_ROTATE_270 | DRM_MODE_REFLECT_Y);
  *
- * to eliminate the DRM_ROTATE_X flag. Depending on what kind of
+ * to eliminate the DRM_MODE_ROTATE_X flag. Depending on what kind of
  * transforms the hardware supports, this function may not
  * be able to produce a supported transform, so the caller should
  * check the result afterwards.
@@ -194,9 +194,10 @@ unsigned int drm_rotation_simplify(unsigned int rotation,
 				   unsigned int supported_rotations)
 {
 	if (rotation & ~supported_rotations) {
-		rotation ^= DRM_REFLECT_X | DRM_REFLECT_Y;
-		rotation = (rotation & DRM_REFLECT_MASK) |
-		           BIT((ffs(rotation & DRM_ROTATE_MASK) + 1) % 4);
+		rotation ^= DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y;
+		rotation = (rotation & DRM_MODE_REFLECT_MASK) |
+		           BIT((ffs(rotation & DRM_MODE_ROTATE_MASK) + 1)
+		           % 4);
 	}
 
 	return rotation;
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 1f178b878e42..574af01d3ce9 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -378,7 +378,7 @@ retry:
 			goto fail;
 		}
 
-		plane_state->rotation = DRM_ROTATE_0;
+		plane_state->rotation = DRM_MODE_ROTATE_0;
 
 		plane->old_fb = plane->fb;
 		plane_mask |= 1 << drm_plane_index(plane);
@@ -431,7 +431,7 @@ static int restore_fbdev_mode_legacy(struct drm_fb_helper *fb_helper)
 		if (plane->rotation_property)
 			drm_mode_plane_set_obj_prop(plane,
 						    plane->rotation_property,
-						    DRM_ROTATE_0);
+						    DRM_MODE_ROTATE_0);
 	}
 
 	for (i = 0; i < fb_helper->crtc_count; i++) {
diff --git a/drivers/gpu/drm/drm_plane_helper.c b/drivers/gpu/drm/drm_plane_helper.c
index 2c27f6f5a668..06aee1741e96 100644
--- a/drivers/gpu/drm/drm_plane_helper.c
+++ b/drivers/gpu/drm/drm_plane_helper.c
@@ -336,7 +336,7 @@ int drm_primary_helper_update(struct drm_plane *plane, struct drm_crtc *crtc,
 
 	ret = drm_plane_helper_check_update(plane, crtc, fb,
 					    &src, &dest, &clip,
-					    DRM_ROTATE_0,
+					    DRM_MODE_ROTATE_0,
 					    DRM_PLANE_HELPER_NO_SCALING,
 					    DRM_PLANE_HELPER_NO_SCALING,
 					    false, false, &visible);
diff --git a/drivers/gpu/drm/drm_rect.c b/drivers/gpu/drm/drm_rect.c
index bc5575960ebc..9817c1445ba9 100644
--- a/drivers/gpu/drm/drm_rect.c
+++ b/drivers/gpu/drm/drm_rect.c
@@ -310,38 +310,38 @@ void drm_rect_rotate(struct drm_rect *r,
 {
 	struct drm_rect tmp;
 
-	if (rotation & (DRM_REFLECT_X | DRM_REFLECT_Y)) {
+	if (rotation & (DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y)) {
 		tmp = *r;
 
-		if (rotation & DRM_REFLECT_X) {
+		if (rotation & DRM_MODE_REFLECT_X) {
 			r->x1 = width - tmp.x2;
 			r->x2 = width - tmp.x1;
 		}
 
-		if (rotation & DRM_REFLECT_Y) {
+		if (rotation & DRM_MODE_REFLECT_Y) {
 			r->y1 = height - tmp.y2;
 			r->y2 = height - tmp.y1;
 		}
 	}
 
-	switch (rotation & DRM_ROTATE_MASK) {
-	case DRM_ROTATE_0:
+	switch (rotation & DRM_MODE_ROTATE_MASK) {
+	case DRM_MODE_ROTATE_0:
 		break;
-	case DRM_ROTATE_90:
+	case DRM_MODE_ROTATE_90:
 		tmp = *r;
 		r->x1 = tmp.y1;
 		r->x2 = tmp.y2;
 		r->y1 = width - tmp.x2;
 		r->y2 = width - tmp.x1;
 		break;
-	case DRM_ROTATE_180:
+	case DRM_MODE_ROTATE_180:
 		tmp = *r;
 		r->x1 = width - tmp.x2;
 		r->x2 = width - tmp.x1;
 		r->y1 = height - tmp.y2;
 		r->y2 = height - tmp.y1;
 		break;
-	case DRM_ROTATE_270:
+	case DRM_MODE_ROTATE_270:
 		tmp = *r;
 		r->x1 = height - tmp.y2;
 		r->x2 = height - tmp.y1;
@@ -373,8 +373,8 @@ EXPORT_SYMBOL(drm_rect_rotate);
  * them when doing a rotatation and its inverse.
  * That is, if you do ::
  *
- *     drm_rotate(&r, width, height, rotation);
- *     drm_rotate_inv(&r, width, height, rotation);
+ *     DRM_MODE_PROP_ROTATE(&r, width, height, rotation);
+ *     DRM_MODE_ROTATE_inv(&r, width, height, rotation);
  *
  * you will always get back the original rectangle.
  */
@@ -384,24 +384,24 @@ void drm_rect_rotate_inv(struct drm_rect *r,
 {
 	struct drm_rect tmp;
 
-	switch (rotation & DRM_ROTATE_MASK) {
-	case DRM_ROTATE_0:
+	switch (rotation & DRM_MODE_ROTATE_MASK) {
+	case DRM_MODE_ROTATE_0:
 		break;
-	case DRM_ROTATE_90:
+	case DRM_MODE_ROTATE_90:
 		tmp = *r;
 		r->x1 = width - tmp.y2;
 		r->x2 = width - tmp.y1;
 		r->y1 = tmp.x1;
 		r->y2 = tmp.x2;
 		break;
-	case DRM_ROTATE_180:
+	case DRM_MODE_ROTATE_180:
 		tmp = *r;
 		r->x1 = width - tmp.x2;
 		r->x2 = width - tmp.x1;
 		r->y1 = height - tmp.y2;
 		r->y2 = height - tmp.y1;
 		break;
-	case DRM_ROTATE_270:
+	case DRM_MODE_ROTATE_270:
 		tmp = *r;
 		r->x1 = tmp.y1;
 		r->x2 = tmp.y2;
@@ -412,15 +412,15 @@ void drm_rect_rotate_inv(struct drm_rect *r,
 		break;
 	}
 
-	if (rotation & (DRM_REFLECT_X | DRM_REFLECT_Y)) {
+	if (rotation & (DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y)) {
 		tmp = *r;
 
-		if (rotation & DRM_REFLECT_X) {
+		if (rotation & DRM_MODE_REFLECT_X) {
 			r->x1 = width - tmp.x2;
 			r->x2 = width - tmp.x1;
 		}
 
-		if (rotation & DRM_REFLECT_Y) {
+		if (rotation & DRM_MODE_REFLECT_Y) {
 			r->y1 = height - tmp.y2;
 			r->y2 = height - tmp.y1;
 		}
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 07f87985ea0b..1c66108f4333 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3095,17 +3095,17 @@ static const char *plane_rotation(unsigned int rotation)
 {
 	static char buf[48];
 	/*
-	 * According to doc only one DRM_ROTATE_ is allowed but this
+	 * According to doc only one DRM_MODE_ROTATE_ is allowed but this
 	 * will print them all to visualize if the values are misused
 	 */
 	snprintf(buf, sizeof(buf),
 		 "%s%s%s%s%s%s(0x%08x)",
-		 (rotation & DRM_ROTATE_0) ? "0 " : "",
-		 (rotation & DRM_ROTATE_90) ? "90 " : "",
-		 (rotation & DRM_ROTATE_180) ? "180 " : "",
-		 (rotation & DRM_ROTATE_270) ? "270 " : "",
-		 (rotation & DRM_REFLECT_X) ? "FLIPX " : "",
-		 (rotation & DRM_REFLECT_Y) ? "FLIPY " : "",
+		 (rotation & DRM_MODE_ROTATE_0) ? "0 " : "",
+		 (rotation & DRM_MODE_ROTATE_90) ? "90 " : "",
+		 (rotation & DRM_MODE_ROTATE_180) ? "180 " : "",
+		 (rotation & DRM_MODE_ROTATE_270) ? "270 " : "",
+		 (rotation & DRM_MODE_REFLECT_X) ? "FLIPX " : "",
+		 (rotation & DRM_MODE_REFLECT_Y) ? "FLIPY " : "",
 		 rotation);
 
 	return buf;
diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
index cfb47293fd53..a40c82c65450 100644
--- a/drivers/gpu/drm/i915/intel_atomic_plane.c
+++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
@@ -55,7 +55,7 @@ intel_create_plane_state(struct drm_plane *plane)
 		return NULL;
 
 	state->base.plane = plane;
-	state->base.rotation = DRM_ROTATE_0;
+	state->base.rotation = DRM_MODE_ROTATE_0;
 	state->ckey.flags = I915_SET_COLORKEY_NONE;
 
 	return state;
@@ -178,8 +178,8 @@ int intel_plane_atomic_check_with_state(struct intel_crtc_state *crtc_state,
 
 	/* CHV ignores the mirror bit when the rotate bit is set :( */
 	if (IS_CHERRYVIEW(dev_priv) &&
-	    state->rotation & DRM_ROTATE_180 &&
-	    state->rotation & DRM_REFLECT_X) {
+	    state->rotation & DRM_MODE_ROTATE_180 &&
+	    state->rotation & DRM_MODE_REFLECT_X) {
 		DRM_DEBUG_KMS("Cannot rotate and reflect at the same time\n");
 		return -EINVAL;
 	}
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 2f2bb623cf5f..6a037b856d96 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2468,7 +2468,7 @@ intel_fill_fb_info(struct drm_i915_private *dev_priv,
 
 		offset = _intel_compute_tile_offset(dev_priv, &x, &y,
 						    fb, i, fb->pitches[i],
-						    DRM_ROTATE_0, tile_size);
+						    DRM_MODE_ROTATE_0, tile_size);
 		offset /= tile_size;
 
 		if (fb->modifier != DRM_FORMAT_MOD_LINEAR) {
@@ -2503,7 +2503,7 @@ intel_fill_fb_info(struct drm_i915_private *dev_priv,
 			drm_rect_rotate(&r,
 					rot_info->plane[i].width * tile_width,
 					rot_info->plane[i].height * tile_height,
-					DRM_ROTATE_270);
+					DRM_MODE_ROTATE_270);
 			x = r.x1;
 			y = r.y1;
 
@@ -2939,7 +2939,7 @@ int skl_check_plane_surface(struct intel_plane_state *plane_state)
 	if (drm_rotation_90_or_270(rotation))
 		drm_rect_rotate(&plane_state->base.src,
 				fb->width << 16, fb->height << 16,
-				DRM_ROTATE_270);
+				DRM_MODE_ROTATE_270);
 
 	/*
 	 * Handle the AUX surface first since
@@ -3017,10 +3017,10 @@ static u32 i9xx_plane_ctl(const struct intel_crtc_state *crtc_state,
 	    fb->modifier == I915_FORMAT_MOD_X_TILED)
 		dspcntr |= DISPPLANE_TILED;
 
-	if (rotation & DRM_ROTATE_180)
+	if (rotation & DRM_MODE_ROTATE_180)
 		dspcntr |= DISPPLANE_ROTATE_180;
 
-	if (rotation & DRM_REFLECT_X)
+	if (rotation & DRM_MODE_REFLECT_X)
 		dspcntr |= DISPPLANE_MIRROR;
 
 	return dspcntr;
@@ -3048,10 +3048,10 @@ int i9xx_check_plane_surface(struct intel_plane_state *plane_state)
 		int src_w = drm_rect_width(&plane_state->base.src) >> 16;
 		int src_h = drm_rect_height(&plane_state->base.src) >> 16;
 
-		if (rotation & DRM_ROTATE_180) {
+		if (rotation & DRM_MODE_ROTATE_180) {
 			src_x += src_w - 1;
 			src_y += src_h - 1;
-		} else if (rotation & DRM_REFLECT_X) {
+		} else if (rotation & DRM_MODE_REFLECT_X) {
 			src_x += src_w - 1;
 		}
 	}
@@ -3271,17 +3271,17 @@ static u32 skl_plane_ctl_tiling(uint64_t fb_modifier)
 static u32 skl_plane_ctl_rotation(unsigned int rotation)
 {
 	switch (rotation) {
-	case DRM_ROTATE_0:
+	case DRM_MODE_ROTATE_0:
 		break;
 	/*
-	 * DRM_ROTATE_ is counter clockwise to stay compatible with Xrandr
+	 * DRM_MODE_ROTATE_ is counter clockwise to stay compatible with Xrandr
 	 * while i915 HW rotation is clockwise, thats why this swapping.
 	 */
-	case DRM_ROTATE_90:
+	case DRM_MODE_ROTATE_90:
 		return PLANE_CTL_ROTATE_270;
-	case DRM_ROTATE_180:
+	case DRM_MODE_ROTATE_180:
 		return PLANE_CTL_ROTATE_180;
-	case DRM_ROTATE_270:
+	case DRM_MODE_ROTATE_270:
 		return PLANE_CTL_ROTATE_90;
 	default:
 		MISSING_CASE(rotation);
@@ -4671,7 +4671,7 @@ int skl_update_scaler_crtc(struct intel_crtc_state *state)
 	const struct drm_display_mode *adjusted_mode = &state->base.adjusted_mode;
 
 	return skl_update_scaler(state, !state->base.active, SKL_CRTC_INDEX,
-		&state->scaler_state.scaler_id, DRM_ROTATE_0,
+		&state->scaler_state.scaler_id, DRM_MODE_ROTATE_0,
 		state->pipe_src_w, state->pipe_src_h,
 		adjusted_mode->crtc_hdisplay, adjusted_mode->crtc_vdisplay);
 }
@@ -9239,7 +9239,7 @@ static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state,
 		return 0;
 	}
 
-	if (plane_state->base.rotation & DRM_ROTATE_180)
+	if (plane_state->base.rotation & DRM_MODE_ROTATE_180)
 		cntl |= CURSOR_ROTATE_180;
 
 	return cntl;
@@ -9300,7 +9300,7 @@ static void intel_crtc_update_cursor(struct drm_crtc *crtc,
 
 		/* ILK+ do this automagically */
 		if (HAS_GMCH_DISPLAY(dev_priv) &&
-		    plane_state->base.rotation & DRM_ROTATE_180) {
+		    plane_state->base.rotation & DRM_MODE_ROTATE_180) {
 			base += (plane_state->base.crtc_h *
 				 plane_state->base.crtc_w - 1) * 4;
 		}
@@ -13607,22 +13607,22 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe)
 
 	if (INTEL_GEN(dev_priv) >= 9) {
 		supported_rotations =
-			DRM_ROTATE_0 | DRM_ROTATE_90 |
-			DRM_ROTATE_180 | DRM_ROTATE_270;
+			DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_90 |
+			DRM_MODE_ROTATE_180 | DRM_MODE_ROTATE_270;
 	} else if (IS_CHERRYVIEW(dev_priv) && pipe == PIPE_B) {
 		supported_rotations =
-			DRM_ROTATE_0 | DRM_ROTATE_180 |
-			DRM_REFLECT_X;
+			DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_180 |
+			DRM_MODE_REFLECT_X;
 	} else if (INTEL_GEN(dev_priv) >= 4) {
 		supported_rotations =
-			DRM_ROTATE_0 | DRM_ROTATE_180;
+			DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_180;
 	} else {
-		supported_rotations = DRM_ROTATE_0;
+		supported_rotations = DRM_MODE_ROTATE_0;
 	}
 
 	if (INTEL_GEN(dev_priv) >= 4)
 		drm_plane_create_rotation_property(&primary->base,
-						   DRM_ROTATE_0,
+						   DRM_MODE_ROTATE_0,
 						   supported_rotations);
 
 	drm_plane_helper_add(&primary->base, &intel_plane_helper_funcs);
@@ -13777,9 +13777,9 @@ intel_cursor_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe)
 
 	if (INTEL_GEN(dev_priv) >= 4)
 		drm_plane_create_rotation_property(&cursor->base,
-						   DRM_ROTATE_0,
-						   DRM_ROTATE_0 |
-						   DRM_ROTATE_180);
+						   DRM_MODE_ROTATE_0,
+						   DRM_MODE_ROTATE_0 |
+						   DRM_MODE_ROTATE_180);
 
 	if (INTEL_GEN(dev_priv) >= 9)
 		state->scaler_id = -1;
diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
index ded2add18b26..db7f8f0a1f36 100644
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c
@@ -801,7 +801,7 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc)
 		return false;
 	}
 	if (INTEL_GEN(dev_priv) <= 4 && !IS_G4X(dev_priv) &&
-	    cache->plane.rotation != DRM_ROTATE_0) {
+	    cache->plane.rotation != DRM_MODE_ROTATE_0) {
 		fbc->no_fbc_reason = "rotation unsupported";
 		return false;
 	}
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index 332254a8eebe..03347c6ae599 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -211,7 +211,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
 	 * This also validates that any existing fb inherited from the
 	 * BIOS is suitable for own access.
 	 */
-	vma = intel_pin_and_fence_fb_obj(&ifbdev->fb->base, DRM_ROTATE_0);
+	vma = intel_pin_and_fence_fb_obj(&ifbdev->fb->base, DRM_MODE_ROTATE_0);
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
 		goto out_unlock;
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index 8c87c717c7cd..191e14ddde0c 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -398,10 +398,10 @@ static u32 vlv_sprite_ctl(const struct intel_crtc_state *crtc_state,
 	if (fb->modifier == I915_FORMAT_MOD_X_TILED)
 		sprctl |= SP_TILED;
 
-	if (rotation & DRM_ROTATE_180)
+	if (rotation & DRM_MODE_ROTATE_180)
 		sprctl |= SP_ROTATE_180;
 
-	if (rotation & DRM_REFLECT_X)
+	if (rotation & DRM_MODE_REFLECT_X)
 		sprctl |= SP_MIRROR;
 
 	if (key->flags & I915_SET_COLORKEY_SOURCE)
@@ -533,7 +533,7 @@ static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state,
 	if (fb->modifier == I915_FORMAT_MOD_X_TILED)
 		sprctl |= SPRITE_TILED;
 
-	if (rotation & DRM_ROTATE_180)
+	if (rotation & DRM_MODE_ROTATE_180)
 		sprctl |= SPRITE_ROTATE_180;
 
 	if (key->flags & I915_SET_COLORKEY_DESTINATION)
@@ -674,7 +674,7 @@ static u32 ilk_sprite_ctl(const struct intel_crtc_state *crtc_state,
 	if (fb->modifier == I915_FORMAT_MOD_X_TILED)
 		dvscntr |= DVS_TILED;
 
-	if (rotation & DRM_ROTATE_180)
+	if (rotation & DRM_MODE_ROTATE_180)
 		dvscntr |= DVS_ROTATE_180;
 
 	if (key->flags & I915_SET_COLORKEY_DESTINATION)
@@ -1145,15 +1145,15 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv,
 
 	if (INTEL_GEN(dev_priv) >= 9) {
 		supported_rotations =
-			DRM_ROTATE_0 | DRM_ROTATE_90 |
-			DRM_ROTATE_180 | DRM_ROTATE_270;
+			DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_90 |
+			DRM_MODE_ROTATE_180 | DRM_MODE_ROTATE_270;
 	} else if (IS_CHERRYVIEW(dev_priv) && pipe == PIPE_B) {
 		supported_rotations =
-			DRM_ROTATE_0 | DRM_ROTATE_180 |
-			DRM_REFLECT_X;
+			DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_180 |
+			DRM_MODE_REFLECT_X;
 	} else {
 		supported_rotations =
-			DRM_ROTATE_0 | DRM_ROTATE_180;
+			DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_180;
 	}
 
 	intel_plane->pipe = pipe;
@@ -1180,7 +1180,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv,
 		goto fail;
 
 	drm_plane_create_rotation_property(&intel_plane->base,
-					   DRM_ROTATE_0,
+					   DRM_MODE_ROTATE_0,
 					   supported_rotations);
 
 	drm_plane_helper_add(&intel_plane->base, &intel_plane_helper_funcs);
diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c
index d63e853a0300..49546222c6d3 100644
--- a/drivers/gpu/drm/imx/ipuv3-plane.c
+++ b/drivers/gpu/drm/imx/ipuv3-plane.c
@@ -273,7 +273,7 @@ void ipu_plane_state_reset(struct drm_plane *plane)
 
 	if (ipu_state) {
 		ipu_state->base.plane = plane;
-		ipu_state->base.rotation = DRM_ROTATE_0;
+		ipu_state->base.rotation = DRM_MODE_ROTATE_0;
 	}
 
 	plane->state = &ipu_state->base;
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c
index a38c5fe6cc19..5e7d9af4cba8 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c
@@ -67,11 +67,11 @@ static void mdp5_plane_install_rotation_property(struct drm_device *dev,
 		struct drm_plane *plane)
 {
 	drm_plane_create_rotation_property(plane,
-					   DRM_ROTATE_0,
-					   DRM_ROTATE_0 |
-					   DRM_ROTATE_180 |
-					   DRM_REFLECT_X |
-					   DRM_REFLECT_Y);
+					   DRM_MODE_ROTATE_0,
+					   DRM_MODE_ROTATE_0 |
+					   DRM_MODE_ROTATE_180 |
+					   DRM_MODE_REFLECT_X |
+					   DRM_MODE_REFLECT_Y);
 }
 
 /* helper to install properties which are common to planes and crtcs */
@@ -369,14 +369,14 @@ static int mdp5_plane_atomic_check_with_state(struct drm_crtc_state *crtc_state,
 			caps |= MDP_PIPE_CAP_SCALE;
 
 		rotation = drm_rotation_simplify(state->rotation,
-						 DRM_ROTATE_0 |
-						 DRM_REFLECT_X |
-						 DRM_REFLECT_Y);
+						 DRM_MODE_ROTATE_0 |
+						 DRM_MODE_REFLECT_X |
+						 DRM_MODE_REFLECT_Y);
 
-		if (rotation & DRM_REFLECT_X)
+		if (rotation & DRM_MODE_REFLECT_X)
 			caps |= MDP_PIPE_CAP_HFLIP;
 
-		if (rotation & DRM_REFLECT_Y)
+		if (rotation & DRM_MODE_REFLECT_Y)
 			caps |= MDP_PIPE_CAP_VFLIP;
 
 		if (plane->type == DRM_PLANE_TYPE_CURSOR)
@@ -970,11 +970,11 @@ static int mdp5_plane_mode_set(struct drm_plane *plane,
 	DBG("scale config = %x", config);
 
 	rotation = drm_rotation_simplify(pstate->rotation,
-					 DRM_ROTATE_0 |
-					 DRM_REFLECT_X |
-					 DRM_REFLECT_Y);
-	hflip = !!(rotation & DRM_REFLECT_X);
-	vflip = !!(rotation & DRM_REFLECT_Y);
+					 DRM_MODE_ROTATE_0 |
+					 DRM_MODE_REFLECT_X |
+					 DRM_MODE_REFLECT_Y);
+	hflip = !!(rotation & DRM_MODE_REFLECT_X);
+	vflip = !!(rotation & DRM_MODE_REFLECT_Y);
 
 	spin_lock_irqsave(&mdp5_plane->pipe_lock, flags);
 
diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index a7663249b3ba..9303daa79aba 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -1033,7 +1033,7 @@ nv50_wndw_reset(struct drm_plane *plane)
 		plane->funcs->atomic_destroy_state(plane, plane->state);
 	plane->state = &asyw->state;
 	plane->state->plane = plane;
-	plane->state->rotation = DRM_ROTATE_0;
+	plane->state->rotation = DRM_MODE_ROTATE_0;
 }
 
 static void
diff --git a/drivers/gpu/drm/omapdrm/omap_drv.c b/drivers/gpu/drm/omapdrm/omap_drv.c
index e1f47f0b3ccf..663e930a7b0f 100644
--- a/drivers/gpu/drm/omapdrm/omap_drv.c
+++ b/drivers/gpu/drm/omapdrm/omap_drv.c
@@ -577,7 +577,7 @@ static void dev_lastclose(struct drm_device *dev)
 
 		drm_object_property_set_value(&crtc->base,
 					      crtc->primary->rotation_property,
-					      DRM_ROTATE_0);
+					      DRM_MODE_ROTATE_0);
 	}
 
 	for (i = 0; i < priv->num_planes; i++) {
@@ -588,7 +588,7 @@ static void dev_lastclose(struct drm_device *dev)
 
 		drm_object_property_set_value(&plane->base,
 					      plane->rotation_property,
-					      DRM_ROTATE_0);
+					      DRM_MODE_ROTATE_0);
 	}
 
 	if (priv->fbdev) {
diff --git a/drivers/gpu/drm/omapdrm/omap_fb.c b/drivers/gpu/drm/omapdrm/omap_fb.c
index 29dc677dd4d3..5ca0537bb427 100644
--- a/drivers/gpu/drm/omapdrm/omap_fb.c
+++ b/drivers/gpu/drm/omapdrm/omap_fb.c
@@ -167,30 +167,30 @@ void omap_framebuffer_update_scanout(struct drm_framebuffer *fb,
 		uint32_t w = win->src_w;
 		uint32_t h = win->src_h;
 
-		switch (win->rotation & DRM_ROTATE_MASK) {
+		switch (win->rotation & DRM_MODE_ROTATE_MASK) {
 		default:
 			dev_err(fb->dev->dev, "invalid rotation: %02x",
 					(uint32_t)win->rotation);
 			/* fallthru to default to no rotation */
 		case 0:
-		case DRM_ROTATE_0:
+		case DRM_MODE_ROTATE_0:
 			orient = 0;
 			break;
-		case DRM_ROTATE_90:
+		case DRM_MODE_ROTATE_90:
 			orient = MASK_XY_FLIP | MASK_X_INVERT;
 			break;
-		case DRM_ROTATE_180:
+		case DRM_MODE_ROTATE_180:
 			orient = MASK_X_INVERT | MASK_Y_INVERT;
 			break;
-		case DRM_ROTATE_270:
+		case DRM_MODE_ROTATE_270:
 			orient = MASK_XY_FLIP | MASK_Y_INVERT;
 			break;
 		}
 
-		if (win->rotation & DRM_REFLECT_X)
+		if (win->rotation & DRM_MODE_REFLECT_X)
 			orient ^= MASK_X_INVERT;
 
-		if (win->rotation & DRM_REFLECT_Y)
+		if (win->rotation & DRM_MODE_REFLECT_Y)
 			orient ^= MASK_Y_INVERT;
 
 		/* adjust x,y offset for flip/invert: */
@@ -205,9 +205,9 @@ void omap_framebuffer_update_scanout(struct drm_framebuffer *fb,
 		info->rotation_type = OMAP_DSS_ROT_TILER;
 		info->screen_width  = omap_gem_tiled_stride(plane->bo, orient);
 	} else {
-		switch (win->rotation & DRM_ROTATE_MASK) {
+		switch (win->rotation & DRM_MODE_ROTATE_MASK) {
 		case 0:
-		case DRM_ROTATE_0:
+		case DRM_MODE_ROTATE_0:
 			/* OK */
 			break;
 
diff --git a/drivers/gpu/drm/omapdrm/omap_plane.c b/drivers/gpu/drm/omapdrm/omap_plane.c
index 9168154d749e..d3d6818c68f8 100644
--- a/drivers/gpu/drm/omapdrm/omap_plane.c
+++ b/drivers/gpu/drm/omapdrm/omap_plane.c
@@ -141,7 +141,7 @@ static void omap_plane_atomic_disable(struct drm_plane *plane,
 	struct omap_plane_state *omap_state = to_omap_plane_state(plane->state);
 	struct omap_plane *omap_plane = to_omap_plane(plane);
 
-	plane->state->rotation = DRM_ROTATE_0;
+	plane->state->rotation = DRM_MODE_ROTATE_0;
 	omap_state->zorder = plane->type == DRM_PLANE_TYPE_PRIMARY
 			   ? 0 : omap_plane->id;
 
@@ -177,7 +177,7 @@ static int omap_plane_atomic_check(struct drm_plane *plane,
 	if (state->crtc_y + state->crtc_h > crtc_state->adjusted_mode.vdisplay)
 		return -EINVAL;
 
-	if (state->rotation != DRM_ROTATE_0 &&
+	if (state->rotation != DRM_MODE_ROTATE_0 &&
 	    !omap_framebuffer_supports_rotation(state->fb))
 		return -EINVAL;
 
@@ -213,15 +213,15 @@ void omap_plane_install_properties(struct drm_plane *plane,
 	if (priv->has_dmm) {
 		if (!plane->rotation_property)
 			drm_plane_create_rotation_property(plane,
-							   DRM_ROTATE_0,
-							   DRM_ROTATE_0 | DRM_ROTATE_90 |
-							   DRM_ROTATE_180 | DRM_ROTATE_270 |
-							   DRM_REFLECT_X | DRM_REFLECT_Y);
+							   DRM_MODE_ROTATE_0,
+							   DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_90 |
+							   DRM_MODE_ROTATE_180 | DRM_MODE_ROTATE_270 |
+							   DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y);
 
 		/* Attach the rotation property also to the crtc object */
 		if (plane->rotation_property && obj != &plane->base)
 			drm_object_attach_property(obj, plane->rotation_property,
-						   DRM_ROTATE_0);
+						   DRM_MODE_ROTATE_0);
 	}
 
 	drm_object_attach_property(obj, priv->zorder_prop, 0);
@@ -273,7 +273,7 @@ static void omap_plane_reset(struct drm_plane *plane)
 	 */
 	omap_state->zorder = plane->type == DRM_PLANE_TYPE_PRIMARY
 			   ? 0 : omap_plane->id;
-	omap_state->base.rotation = DRM_ROTATE_0;
+	omap_state->base.rotation = DRM_MODE_ROTATE_0;
 
 	plane->state = &omap_state->base;
 	plane->state->plane = plane;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index ef9f3a2a4030..a8876b070168 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -566,7 +566,7 @@ int vmw_du_primary_plane_atomic_check(struct drm_plane *plane,
 
 	ret = drm_plane_helper_check_update(plane, state->crtc, new_fb,
 					    &src, &dest, &clip,
-					    DRM_ROTATE_0,
+					    DRM_MODE_ROTATE_0,
 					    DRM_PLANE_HELPER_NO_SCALING,
 					    DRM_PLANE_HELPER_NO_SCALING,
 					    false, true, &visible);
@@ -845,7 +845,7 @@ void vmw_du_plane_reset(struct drm_plane *plane)
 
 	plane->state = &vps->base;
 	plane->state->plane = plane;
-	plane->state->rotation = DRM_ROTATE_0;
+	plane->state->rotation = DRM_MODE_ROTATE_0;
 }
 
 
diff --git a/include/drm/drm_blend.h b/include/drm/drm_blend.h
index bc9e596be4c2..17606026590b 100644
--- a/include/drm/drm_blend.h
+++ b/include/drm/drm_blend.h
@@ -25,32 +25,15 @@
 
 #include <linux/list.h>
 #include <linux/ctype.h>
+#include <drm/drm_mode.h>
 
 struct drm_device;
 struct drm_atomic_state;
 struct drm_plane;
 
-/*
- * Rotation property bits. DRM_ROTATE_<degrees> rotates the image by the
- * specified amount in degrees in counter clockwise direction. DRM_REFLECT_X and
- * DRM_REFLECT_Y reflects the image along the specified axis prior to rotation
- *
- * WARNING: These defines are UABI since they're exposed in the rotation
- * property.
- */
-#define DRM_ROTATE_0	BIT(0)
-#define DRM_ROTATE_90	BIT(1)
-#define DRM_ROTATE_180	BIT(2)
-#define DRM_ROTATE_270	BIT(3)
-#define DRM_ROTATE_MASK (DRM_ROTATE_0   | DRM_ROTATE_90 | \
-			 DRM_ROTATE_180 | DRM_ROTATE_270)
-#define DRM_REFLECT_X	BIT(4)
-#define DRM_REFLECT_Y	BIT(5)
-#define DRM_REFLECT_MASK (DRM_REFLECT_X | DRM_REFLECT_Y)
-
 static inline bool drm_rotation_90_or_270(unsigned int rotation)
 {
-	return rotation & (DRM_ROTATE_90 | DRM_ROTATE_270);
+	return rotation & (DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270);
 }
 
 int drm_plane_create_rotation_property(struct drm_plane *plane,
diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index 8c67fc03d53d..403339f98a92 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -75,7 +75,7 @@ extern "C" {
   * (define not exposed to user space).
   */
 #define DRM_MODE_FLAG_3D_MASK			(0x1f<<14)
-#define  DRM_MODE_FLAG_3D_NONE			(0<<14)
+#define  DRM_MODE_FLAG_3D_NONE		(0<<14)
 #define  DRM_MODE_FLAG_3D_FRAME_PACKING		(1<<14)
 #define  DRM_MODE_FLAG_3D_FIELD_ALTERNATIVE	(2<<14)
 #define  DRM_MODE_FLAG_3D_LINE_ALTERNATIVE	(3<<14)
@@ -127,6 +127,53 @@ extern "C" {
 #define DRM_MODE_LINK_STATUS_GOOD	0
 #define DRM_MODE_LINK_STATUS_BAD	1
 
+/*
+ * DRM_MODE_ROTATE_<degrees>
+ *
+ * Signals that a drm plane is been rotated <degrees> degrees in counter
+ * clockwise direction.
+ *
+ * This define is provided as a convenience, looking up the property id
+ * using the name->prop id lookup is the preferred method.
+ */
+#define DRM_MODE_ROTATE_0       (1<<0)
+#define DRM_MODE_ROTATE_90      (1<<1)
+#define DRM_MODE_ROTATE_180     (1<<2)
+#define DRM_MODE_ROTATE_270     (1<<3)
+
+/*
+ * DRM_MODE_ROTATE_MASK
+ *
+ * Bitmask used to look for drm plane rotations.
+ */
+#define DRM_MODE_ROTATE_MASK (\
+		DRM_MODE_ROTATE_0  | \
+		DRM_MODE_ROTATE_90  | \
+		DRM_MODE_ROTATE_180 | \
+		DRM_MODE_ROTATE_270)
+
+/*
+ * DRM_MODE_REFLECT_<axis>
+ *
+ * Signals that the contents of a drm plane is reflected in the <axis> axis,
+ * in the same way as mirroring.
+ *
+ * This define is provided as a convenience, looking up the property id
+ * using the name->prop id lookup is the preferred method.
+ */
+#define DRM_MODE_REFLECT_X      (1<<4)
+#define DRM_MODE_REFLECT_Y      (1<<5)
+
+/*
+ * DRM_MODE_REFLECT_MASK
+ *
+ * Bitmask used to look for drm plane reflections.
+ */
+#define DRM_MODE_REFLECT_MASK (\
+		DRM_MODE_REFLECT_X | \
+		DRM_MODE_REFLECT_Y)
+
+
 struct drm_mode_modeinfo {
 	__u32 clock;
 	__u16 hdisplay;
-- 
cgit v1.2.3


From cfbcacf42803a690be40068325d20d74b6093c8c Mon Sep 17 00:00:00 2001
From: Chunming Zhou <David1.Zhou@amd.com>
Date: Mon, 24 Apr 2017 11:09:04 +0800
Subject: drm/amdgpu: add vm ioctl
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It will be used for reserving vmid for shader debugging
that requires a fixed vmid.

v2: fix warning (Alex)

Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 16 ++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  1 +
 include/uapi/drm/amdgpu_drm.h           | 22 ++++++++++++++++++++++
 4 files changed, 40 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index dca4be970d13..1dbe76c3c366 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -948,6 +948,7 @@ void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe)
 const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
 	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_CREATE, amdgpu_gem_create_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(AMDGPU_VM, amdgpu_vm_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	/* KMS */
 	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_MMAP, amdgpu_gem_mmap_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 8ecf82c5fe74..e4e2bacdb230 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2322,3 +2322,19 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
 		}
 	}
 }
+
+int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+	union drm_amdgpu_vm *args = data;
+
+	switch (args->in.op) {
+	case AMDGPU_VM_OP_RESERVE_VMID:
+	case AMDGPU_VM_OP_UNRESERVE_VMID:
+		return -EINVAL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index e1d951ece433..b10ce2d08685 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -239,5 +239,6 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
 void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
 		      struct amdgpu_bo_va *bo_va);
 void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size);
+int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
 
 #endif
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 6c249e5cfb09..56ceb3daaba5 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -51,6 +51,7 @@ extern "C" {
 #define DRM_AMDGPU_GEM_OP		0x10
 #define DRM_AMDGPU_GEM_USERPTR		0x11
 #define DRM_AMDGPU_WAIT_FENCES		0x12
+#define DRM_AMDGPU_VM			0x13
 
 #define DRM_IOCTL_AMDGPU_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
 #define DRM_IOCTL_AMDGPU_GEM_MMAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
@@ -65,6 +66,7 @@ extern "C" {
 #define DRM_IOCTL_AMDGPU_GEM_OP		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_OP, struct drm_amdgpu_gem_op)
 #define DRM_IOCTL_AMDGPU_GEM_USERPTR	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr)
 #define DRM_IOCTL_AMDGPU_WAIT_FENCES	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences)
+#define DRM_IOCTL_AMDGPU_VM		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm)
 
 #define AMDGPU_GEM_DOMAIN_CPU		0x1
 #define AMDGPU_GEM_DOMAIN_GTT		0x2
@@ -190,6 +192,26 @@ union drm_amdgpu_ctx {
 	union drm_amdgpu_ctx_out out;
 };
 
+/* vm ioctl */
+#define AMDGPU_VM_OP_RESERVE_VMID	1
+#define AMDGPU_VM_OP_UNRESERVE_VMID	2
+
+struct drm_amdgpu_vm_in {
+	/** AMDGPU_VM_OP_* */
+	__u32	op;
+	__u32	flags;
+};
+
+struct drm_amdgpu_vm_out {
+	/** For future use, no flags defined so far */
+	__u64	flags;
+};
+
+union drm_amdgpu_vm {
+	struct drm_amdgpu_vm_in in;
+	struct drm_amdgpu_vm_out out;
+};
+
 /*
  * This is not a reliable API and you should expect it to fail for any
  * number of reasons and have fallback path that do not use userptr to
-- 
cgit v1.2.3


From 2ca8a5d2ebd12c72c8b3e5ce251a02c0cc7e18b1 Mon Sep 17 00:00:00 2001
From: Chunming Zhou <David1.Zhou@amd.com>
Date: Wed, 7 Dec 2016 17:31:19 +0800
Subject: drm/amdgpu: add RAVEN family id definition

RAVEN is a new APU.

Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 9 +++++++--
 drivers/gpu/drm/amd/include/amd_shared.h   | 1 +
 include/uapi/drm/amdgpu_drm.h              | 1 +
 3 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index d1385eba6f43..d0a26fff53f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -79,6 +79,7 @@ static const char *amdgpu_asic_name[] = {
 	"POLARIS11",
 	"POLARIS12",
 	"VEGA10",
+	"RAVEN",
 	"LAST",
 };
 
@@ -1537,8 +1538,12 @@ static int amdgpu_early_init(struct amdgpu_device *adev)
 			return r;
 		break;
 #endif
-	case CHIP_VEGA10:
-		adev->family = AMDGPU_FAMILY_AI;
+	case  CHIP_VEGA10:
+	case  CHIP_RAVEN:
+		if (adev->asic_type == CHIP_RAVEN)
+			adev->family = AMDGPU_FAMILY_RV;
+		else
+			adev->family = AMDGPU_FAMILY_AI;
 
 		r = soc15_set_ip_blocks(adev);
 		if (r)
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index 1d1ac1ef94f7..40486c7621d9 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -48,6 +48,7 @@ enum amd_asic_type {
 	CHIP_POLARIS11,
 	CHIP_POLARIS12,
 	CHIP_VEGA10,
+	CHIP_RAVEN,
 	CHIP_LAST,
 };
 
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 56ceb3daaba5..0735f47e6a5b 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -860,6 +860,7 @@ struct drm_amdgpu_info_vce_clock_table {
 #define AMDGPU_FAMILY_VI			130 /* Iceland, Tonga */
 #define AMDGPU_FAMILY_CZ			135 /* Carrizo, Stoney */
 #define AMDGPU_FAMILY_AI			141 /* Vega10 */
+#define AMDGPU_FAMILY_RV			142 /* Raven */
 
 #if defined(__cplusplus)
 }
-- 
cgit v1.2.3


From 66e236f180f0ab6b9b9efd9e7fdbfd9de98e333d Mon Sep 17 00:00:00 2001
From: Leo Liu <leo.liu@amd.com>
Date: Tue, 7 Feb 2017 11:57:08 -0500
Subject: uapi/drm: add AMDGPU_HW_IP_VCN_DEC for decode CS

Signed-off-by: Leo Liu <leo.liu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/drm/amdgpu_drm.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 0735f47e6a5b..6ad29c9a4b1f 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -431,7 +431,8 @@ struct drm_amdgpu_gem_va {
 #define AMDGPU_HW_IP_UVD          3
 #define AMDGPU_HW_IP_VCE          4
 #define AMDGPU_HW_IP_UVD_ENC      5
-#define AMDGPU_HW_IP_NUM          6
+#define AMDGPU_HW_IP_VCN_DEC      6
+#define AMDGPU_HW_IP_NUM          7
 
 #define AMDGPU_HW_IP_INSTANCE_MAX_COUNT 1
 
-- 
cgit v1.2.3


From fcfc5a90628f69c08018c8ec7b3d6dca5a8a2203 Mon Sep 17 00:00:00 2001
From: Leo Liu <leo.liu@amd.com>
Date: Wed, 15 Feb 2017 10:24:55 -0500
Subject: uapi/drm: add AMDGPU_HW_IP_VCN_ENC for encode CS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Leo Liu <leo.liu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/drm/amdgpu_drm.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 6ad29c9a4b1f..0c0c4a491038 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -432,7 +432,8 @@ struct drm_amdgpu_gem_va {
 #define AMDGPU_HW_IP_VCE          4
 #define AMDGPU_HW_IP_UVD_ENC      5
 #define AMDGPU_HW_IP_VCN_DEC      6
-#define AMDGPU_HW_IP_NUM          7
+#define AMDGPU_HW_IP_VCN_ENC      7
+#define AMDGPU_HW_IP_NUM          8
 
 #define AMDGPU_HW_IP_INSTANCE_MAX_COUNT 1
 
-- 
cgit v1.2.3


From 68e2c5ffeb164f069692d00659d24dbda31178bc Mon Sep 17 00:00:00 2001
From: Marek Olšák <marek.olsak@amd.com>
Date: Wed, 17 May 2017 20:05:08 +0200
Subject: drm/amdgpu: add an INFO query for monitoring VRAM CPU page faults
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v2: bump the DRM version

Signed-off-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        | 1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    | 3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c    | 3 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 1 +
 include/uapi/drm/amdgpu_drm.h              | 2 ++
 5 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 572e508d185f..359fb0ca8209 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1529,6 +1529,7 @@ struct amdgpu_device {
 	atomic64_t			gtt_usage;
 	atomic64_t			num_bytes_moved;
 	atomic64_t			num_evictions;
+	atomic64_t			num_vram_cpu_page_faults;
 	atomic_t			gpu_reset_counter;
 	atomic_t			vram_lost_counter;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 1bbf35f02a63..39432b0802f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -66,9 +66,10 @@
  * - 3.14.0 - Fix race in amdgpu_ctx_get_fence() and note new functionality
  * - 3.15.0 - Export more gpu info for gfx9
  * - 3.16.0 - Add reserved vmid support
+ * - 3.17.0 - Add AMDGPU_NUM_VRAM_CPU_PAGE_FAULTS.
  */
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	16
+#define KMS_DRIVER_MINOR	17
 #define KMS_DRIVER_PATCHLEVEL	0
 
 int amdgpu_vram_limit = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index b324f07f137a..cda06a1bd7d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -417,6 +417,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 	case AMDGPU_INFO_NUM_EVICTIONS:
 		ui64 = atomic64_read(&adev->num_evictions);
 		return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
+	case AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS:
+		ui64 = atomic64_read(&adev->num_vram_cpu_page_faults);
+		return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
 	case AMDGPU_INFO_VRAM_USAGE:
 		ui64 = atomic64_read(&adev->vram_usage);
 		return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 365883d7948d..8ee69652be8c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -960,6 +960,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 		return -EINVAL;
 
 	/* hurrah the memory is not visible ! */
+	atomic64_inc(&adev->num_vram_cpu_page_faults);
 	amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM);
 	lpfn =	adev->mc.visible_vram_size >> PAGE_SHIFT;
 	for (i = 0; i < abo->placement.num_placement; i++) {
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 0c0c4a491038..34128f698f5e 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -603,6 +603,8 @@ struct drm_amdgpu_cs_chunk_data {
 	#define AMDGPU_INFO_SENSOR_VDDNB		0x6
 	/* Subquery id: Query graphics voltage */
 	#define AMDGPU_INFO_SENSOR_VDDGFX		0x7
+/* Number of VRAM page faults on CPU access. */
+#define AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS	0x1E
 
 #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT	0
 #define AMDGPU_INFO_MMR_SE_INDEX_MASK	0xff
-- 
cgit v1.2.3


From d6f544f6bf419a5bc65f7f2d070db008d1119097 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Tue, 9 May 2017 01:27:11 +0300
Subject: drm: omapdrm: Remove legacy buffer synchronization support

The omapdrm driver uses a custom API to synchronize with the SGX GPU.
This is unusable as such in the mainline kernel as the API is only
partially implemented and requires additional out-of-tree patches.
Furthermore, as no SGX driver is available in the mainline kernel, the
API can't be considered as a stable mainline API.

Now that the driver supports synchronization through fences, remove
legacy buffer synchronization support. The two userspace ioctls are
turned into no-ops to avoid breaking userspace and will be removed in
the future.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 drivers/gpu/drm/omapdrm/omap_drv.c |  53 +--------
 drivers/gpu/drm/omapdrm/omap_drv.h |   5 -
 drivers/gpu/drm/omapdrm/omap_gem.c | 214 -------------------------------------
 include/uapi/drm/omap_drm.h        |   4 +-
 4 files changed, 6 insertions(+), 270 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/omapdrm/omap_drv.c b/drivers/gpu/drm/omapdrm/omap_drv.c
index b4ef3025e3e3..e2e935813cc4 100644
--- a/drivers/gpu/drm/omapdrm/omap_drv.c
+++ b/drivers/gpu/drm/omapdrm/omap_drv.c
@@ -365,53 +365,6 @@ static int ioctl_gem_new(struct drm_device *dev, void *data,
 				   &args->handle);
 }
 
-static int ioctl_gem_cpu_prep(struct drm_device *dev, void *data,
-		struct drm_file *file_priv)
-{
-	struct drm_omap_gem_cpu_prep *args = data;
-	struct drm_gem_object *obj;
-	int ret;
-
-	VERB("%p:%p: handle=%d, op=%x", dev, file_priv, args->handle, args->op);
-
-	obj = drm_gem_object_lookup(file_priv, args->handle);
-	if (!obj)
-		return -ENOENT;
-
-	ret = omap_gem_op_sync(obj, args->op);
-
-	if (!ret)
-		ret = omap_gem_op_start(obj, args->op);
-
-	drm_gem_object_unreference_unlocked(obj);
-
-	return ret;
-}
-
-static int ioctl_gem_cpu_fini(struct drm_device *dev, void *data,
-		struct drm_file *file_priv)
-{
-	struct drm_omap_gem_cpu_fini *args = data;
-	struct drm_gem_object *obj;
-	int ret;
-
-	VERB("%p:%p: handle=%d", dev, file_priv, args->handle);
-
-	obj = drm_gem_object_lookup(file_priv, args->handle);
-	if (!obj)
-		return -ENOENT;
-
-	/* XXX flushy, flushy */
-	ret = 0;
-
-	if (!ret)
-		ret = omap_gem_op_finish(obj, args->op);
-
-	drm_gem_object_unreference_unlocked(obj);
-
-	return ret;
-}
-
 static int ioctl_gem_info(struct drm_device *dev, void *data,
 		struct drm_file *file_priv)
 {
@@ -440,9 +393,11 @@ static const struct drm_ioctl_desc ioctls[DRM_COMMAND_END - DRM_COMMAND_BASE] =
 			  DRM_AUTH | DRM_MASTER | DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(OMAP_GEM_NEW, ioctl_gem_new,
 			  DRM_AUTH | DRM_RENDER_ALLOW),
-	DRM_IOCTL_DEF_DRV(OMAP_GEM_CPU_PREP, ioctl_gem_cpu_prep,
+	/* Deprecated, to be removed. */
+	DRM_IOCTL_DEF_DRV(OMAP_GEM_CPU_PREP, drm_noop,
 			  DRM_AUTH | DRM_RENDER_ALLOW),
-	DRM_IOCTL_DEF_DRV(OMAP_GEM_CPU_FINI, ioctl_gem_cpu_fini,
+	/* Deprecated, to be removed. */
+	DRM_IOCTL_DEF_DRV(OMAP_GEM_CPU_FINI, drm_noop,
 			  DRM_AUTH | DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(OMAP_GEM_INFO, ioctl_gem_info,
 			  DRM_AUTH | DRM_RENDER_ALLOW),
diff --git a/drivers/gpu/drm/omapdrm/omap_drv.h b/drivers/gpu/drm/omapdrm/omap_drv.h
index 54e6055ea1d3..16aa43c6fbc2 100644
--- a/drivers/gpu/drm/omapdrm/omap_drv.h
+++ b/drivers/gpu/drm/omapdrm/omap_drv.h
@@ -184,11 +184,6 @@ int omap_gem_mmap(struct file *filp, struct vm_area_struct *vma);
 int omap_gem_mmap_obj(struct drm_gem_object *obj,
 		struct vm_area_struct *vma);
 int omap_gem_fault(struct vm_fault *vmf);
-int omap_gem_op_start(struct drm_gem_object *obj, enum omap_gem_op op);
-int omap_gem_op_finish(struct drm_gem_object *obj, enum omap_gem_op op);
-int omap_gem_op_sync(struct drm_gem_object *obj, enum omap_gem_op op);
-int omap_gem_op_async(struct drm_gem_object *obj, enum omap_gem_op op,
-		void (*fxn)(void *arg), void *arg);
 int omap_gem_roll(struct drm_gem_object *obj, uint32_t roll);
 void omap_gem_cpu_sync(struct drm_gem_object *obj, int pgoff);
 void omap_gem_dma_sync(struct drm_gem_object *obj,
diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c
index 68a75b829b71..4bb52a5f5939 100644
--- a/drivers/gpu/drm/omapdrm/omap_gem.c
+++ b/drivers/gpu/drm/omapdrm/omap_gem.c
@@ -101,19 +101,6 @@ struct omap_gem_object {
 	 * Virtual address, if mapped.
 	 */
 	void *vaddr;
-
-	/**
-	 * sync-object allocated on demand (if needed)
-	 *
-	 * Per-buffer sync-object for tracking pending and completed hw/dma
-	 * read and write operations.
-	 */
-	struct {
-		uint32_t write_pending;
-		uint32_t write_complete;
-		uint32_t read_pending;
-		uint32_t read_complete;
-	} *sync;
 };
 
 #define to_omap_bo(x) container_of(x, struct omap_gem_object, base)
@@ -1070,205 +1057,6 @@ void omap_gem_describe_objects(struct list_head *list, struct seq_file *m)
 }
 #endif
 
-/* -----------------------------------------------------------------------------
- * Buffer Synchronization
- */
-
-static DEFINE_SPINLOCK(sync_lock);
-
-struct omap_gem_sync_waiter {
-	struct list_head list;
-	struct omap_gem_object *omap_obj;
-	enum omap_gem_op op;
-	uint32_t read_target, write_target;
-	/* notify called w/ sync_lock held */
-	void (*notify)(void *arg);
-	void *arg;
-};
-
-/* list of omap_gem_sync_waiter.. the notify fxn gets called back when
- * the read and/or write target count is achieved which can call a user
- * callback (ex. to kick 3d and/or 2d), wakeup blocked task (prep for
- * cpu access), etc.
- */
-static LIST_HEAD(waiters);
-
-static inline bool is_waiting(struct omap_gem_sync_waiter *waiter)
-{
-	struct omap_gem_object *omap_obj = waiter->omap_obj;
-	if ((waiter->op & OMAP_GEM_READ) &&
-			(omap_obj->sync->write_complete < waiter->write_target))
-		return true;
-	if ((waiter->op & OMAP_GEM_WRITE) &&
-			(omap_obj->sync->read_complete < waiter->read_target))
-		return true;
-	return false;
-}
-
-/* macro for sync debug.. */
-#define SYNCDBG 0
-#define SYNC(fmt, ...) do { if (SYNCDBG)				\
-		pr_err("%s:%d: " fmt "\n", __func__, __LINE__, ##__VA_ARGS__); \
-	} while (0)
-
-
-static void sync_op_update(void)
-{
-	struct omap_gem_sync_waiter *waiter, *n;
-	list_for_each_entry_safe(waiter, n, &waiters, list) {
-		if (!is_waiting(waiter)) {
-			list_del(&waiter->list);
-			SYNC("notify: %p", waiter);
-			waiter->notify(waiter->arg);
-			kfree(waiter);
-		}
-	}
-}
-
-static inline int sync_op(struct drm_gem_object *obj,
-		enum omap_gem_op op, bool start)
-{
-	struct omap_gem_object *omap_obj = to_omap_bo(obj);
-	int ret = 0;
-
-	spin_lock(&sync_lock);
-
-	if (!omap_obj->sync) {
-		omap_obj->sync = kzalloc(sizeof(*omap_obj->sync), GFP_ATOMIC);
-		if (!omap_obj->sync) {
-			ret = -ENOMEM;
-			goto unlock;
-		}
-	}
-
-	if (start) {
-		if (op & OMAP_GEM_READ)
-			omap_obj->sync->read_pending++;
-		if (op & OMAP_GEM_WRITE)
-			omap_obj->sync->write_pending++;
-	} else {
-		if (op & OMAP_GEM_READ)
-			omap_obj->sync->read_complete++;
-		if (op & OMAP_GEM_WRITE)
-			omap_obj->sync->write_complete++;
-		sync_op_update();
-	}
-
-unlock:
-	spin_unlock(&sync_lock);
-
-	return ret;
-}
-
-/* mark the start of read and/or write operation */
-int omap_gem_op_start(struct drm_gem_object *obj, enum omap_gem_op op)
-{
-	return sync_op(obj, op, true);
-}
-
-int omap_gem_op_finish(struct drm_gem_object *obj, enum omap_gem_op op)
-{
-	return sync_op(obj, op, false);
-}
-
-static DECLARE_WAIT_QUEUE_HEAD(sync_event);
-
-static void sync_notify(void *arg)
-{
-	struct task_struct **waiter_task = arg;
-	*waiter_task = NULL;
-	wake_up_all(&sync_event);
-}
-
-int omap_gem_op_sync(struct drm_gem_object *obj, enum omap_gem_op op)
-{
-	struct omap_gem_object *omap_obj = to_omap_bo(obj);
-	int ret = 0;
-	if (omap_obj->sync) {
-		struct task_struct *waiter_task = current;
-		struct omap_gem_sync_waiter *waiter =
-				kzalloc(sizeof(*waiter), GFP_KERNEL);
-
-		if (!waiter)
-			return -ENOMEM;
-
-		waiter->omap_obj = omap_obj;
-		waiter->op = op;
-		waiter->read_target = omap_obj->sync->read_pending;
-		waiter->write_target = omap_obj->sync->write_pending;
-		waiter->notify = sync_notify;
-		waiter->arg = &waiter_task;
-
-		spin_lock(&sync_lock);
-		if (is_waiting(waiter)) {
-			SYNC("waited: %p", waiter);
-			list_add_tail(&waiter->list, &waiters);
-			spin_unlock(&sync_lock);
-			ret = wait_event_interruptible(sync_event,
-					(waiter_task == NULL));
-			spin_lock(&sync_lock);
-			if (waiter_task) {
-				SYNC("interrupted: %p", waiter);
-				/* we were interrupted */
-				list_del(&waiter->list);
-				waiter_task = NULL;
-			} else {
-				/* freed in sync_op_update() */
-				waiter = NULL;
-			}
-		}
-		spin_unlock(&sync_lock);
-		kfree(waiter);
-	}
-	return ret;
-}
-
-/* call fxn(arg), either synchronously or asynchronously if the op
- * is currently blocked..  fxn() can be called from any context
- *
- * (TODO for now fxn is called back from whichever context calls
- * omap_gem_op_finish().. but this could be better defined later
- * if needed)
- *
- * TODO more code in common w/ _sync()..
- */
-int omap_gem_op_async(struct drm_gem_object *obj, enum omap_gem_op op,
-		void (*fxn)(void *arg), void *arg)
-{
-	struct omap_gem_object *omap_obj = to_omap_bo(obj);
-	if (omap_obj->sync) {
-		struct omap_gem_sync_waiter *waiter =
-				kzalloc(sizeof(*waiter), GFP_ATOMIC);
-
-		if (!waiter)
-			return -ENOMEM;
-
-		waiter->omap_obj = omap_obj;
-		waiter->op = op;
-		waiter->read_target = omap_obj->sync->read_pending;
-		waiter->write_target = omap_obj->sync->write_pending;
-		waiter->notify = fxn;
-		waiter->arg = arg;
-
-		spin_lock(&sync_lock);
-		if (is_waiting(waiter)) {
-			SYNC("waited: %p", waiter);
-			list_add_tail(&waiter->list, &waiters);
-			spin_unlock(&sync_lock);
-			return 0;
-		}
-
-		spin_unlock(&sync_lock);
-
-		kfree(waiter);
-	}
-
-	/* no waiting.. */
-	fxn(arg);
-
-	return 0;
-}
-
 /* -----------------------------------------------------------------------------
  * Constructor & Destructor
  */
@@ -1308,8 +1096,6 @@ void omap_gem_free_object(struct drm_gem_object *obj)
 		drm_prime_gem_destroy(obj, omap_obj->sgt);
 	}
 
-	kfree(omap_obj->sync);
-
 	drm_gem_object_release(obj);
 
 	kfree(omap_obj);
diff --git a/include/uapi/drm/omap_drm.h b/include/uapi/drm/omap_drm.h
index 7fb97863c945..fd5e3ea53f2b 100644
--- a/include/uapi/drm/omap_drm.h
+++ b/include/uapi/drm/omap_drm.h
@@ -106,8 +106,8 @@ struct drm_omap_gem_info {
 #define DRM_OMAP_GET_PARAM		0x00
 #define DRM_OMAP_SET_PARAM		0x01
 #define DRM_OMAP_GEM_NEW		0x03
-#define DRM_OMAP_GEM_CPU_PREP		0x04
-#define DRM_OMAP_GEM_CPU_FINI		0x05
+#define DRM_OMAP_GEM_CPU_PREP		0x04	/* Deprecated, to be removed */
+#define DRM_OMAP_GEM_CPU_FINI		0x05	/* Deprecated, to be removed */
 #define DRM_OMAP_GEM_INFO		0x06
 #define DRM_OMAP_NUM_IOCTLS		0x07
 
-- 
cgit v1.2.3


From daa6630a310fe2ad90ce5f7d2d196cd0353ef4fa Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Thu, 8 Jun 2017 10:37:45 +0200
Subject: openvswitch: warn about missing first netlink attribute

The first netlink attribute (value 0) must always be defined
as none/unspec.

Because we cannot change an existing UAPI, I add a comment to point the
mistake and avoid to propagate it in a new ovs API in the future.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/openvswitch.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 61b7d36dfe34..156ee4cab82e 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -343,6 +343,7 @@ enum ovs_key_attr {
 #define OVS_KEY_ATTR_MAX (__OVS_KEY_ATTR_MAX - 1)
 
 enum ovs_tunnel_key_attr {
+	/* OVS_TUNNEL_KEY_ATTR_NONE, standard nl API requires this attribute! */
 	OVS_TUNNEL_KEY_ATTR_ID,                 /* be64 Tunnel ID */
 	OVS_TUNNEL_KEY_ATTR_IPV4_SRC,           /* be32 src IP address. */
 	OVS_TUNNEL_KEY_ATTR_IPV4_DST,           /* be32 dst IP address. */
-- 
cgit v1.2.3


From 297fb414d0d190ca82bf0b46fb19d7fda1598737 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Thu, 8 Jun 2017 11:18:13 +0200
Subject: ethtool.h: remind to update 802.3ad when adding new speeds

Each time a new speed is added, the bonding 802.3ad isn't updated. Add a
comment to remind the developer to update this driver.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: Andy Gospodarek <andy@greyhouse.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index d179d7767f51..7d4a594d5d58 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1486,8 +1486,10 @@ enum ethtool_link_mode_bit_indices {
  * it was forced up into this mode or autonegotiated.
  */
 
-/* The forced speed, in units of 1Mb. All values 0 to INT_MAX are legal. */
-/* Update drivers/net/phy/phy.c:phy_speed_to_str() when adding new values */
+/* The forced speed, in units of 1Mb. All values 0 to INT_MAX are legal.
+ * Update drivers/net/phy/phy.c:phy_speed_to_str() and
+ * drivers/net/bonding/bond_3ad.c:__get_link_speed() when adding new values.
+ */
 #define SPEED_10		10
 #define SPEED_100		100
 #define SPEED_1000		1000
-- 
cgit v1.2.3


From a77395447b0aeab9473a066ff28fbee01130206b Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Thu, 8 Jun 2017 14:49:57 +0100
Subject: KEYS: DH: add __user annotations to keyctl_kdf_params

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 include/uapi/linux/keyctl.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/keyctl.h b/include/uapi/linux/keyctl.h
index 201c6644b237..ef16df06642a 100644
--- a/include/uapi/linux/keyctl.h
+++ b/include/uapi/linux/keyctl.h
@@ -70,8 +70,8 @@ struct keyctl_dh_params {
 };
 
 struct keyctl_kdf_params {
-	char *hashname;
-	char *otherinfo;
+	char __user *hashname;
+	char __user *otherinfo;
 	__u32 otherinfolen;
 	__u32 __spare[8];
 };
-- 
cgit v1.2.3


From e9083420bbacce27e43d418064d0d2dfb4b37aaa Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Tue, 4 Apr 2017 13:26:24 +1000
Subject: drm: introduce sync objects (v4)

Sync objects are new toplevel drm object, that contain a
pointer to a fence. This fence can be updated via command
submission ioctls via drivers.

There is also a generic wait obj API modelled on the vulkan
wait API (with code modelled on some amdgpu code).

These objects can be converted to an opaque fd that can be
passes between processes.

v2: rename reference/unreference to put/get (Chris)
fix leaked reference (David Zhou)
drop mutex in favour of cmpxchg (Chris)
v3: cleanups from danvet, rebase on drm_fops rename
check fd_flags is 0 in ioctls.
v4: export find/free, change replace fence to take a
syncobj. In order to support lookup first, replace
later semantics which seem in the end to be cleaner.

Reviewed-by: Sean Paul <seanpaul@chromium.org>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 Documentation/gpu/drm-internals.rst |   3 +
 Documentation/gpu/drm-mm.rst        |  12 ++
 drivers/gpu/drm/Makefile            |   3 +-
 drivers/gpu/drm/drm_file.c          |   8 +
 drivers/gpu/drm/drm_internal.h      |  13 ++
 drivers/gpu/drm/drm_ioctl.c         |  12 ++
 drivers/gpu/drm/drm_syncobj.c       | 382 ++++++++++++++++++++++++++++++++++++
 include/drm/drm_drv.h               |   1 +
 include/drm/drm_file.h              |   5 +
 include/drm/drm_syncobj.h           |  90 +++++++++
 include/uapi/drm/drm.h              |  24 +++
 11 files changed, 552 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/drm_syncobj.c
 create mode 100644 include/drm/drm_syncobj.h

(limited to 'include/uapi')

diff --git a/Documentation/gpu/drm-internals.rst b/Documentation/gpu/drm-internals.rst
index f6882ad0b3c3..0d936c67bf7d 100644
--- a/Documentation/gpu/drm-internals.rst
+++ b/Documentation/gpu/drm-internals.rst
@@ -98,6 +98,9 @@ DRIVER_ATOMIC
     implement appropriate obj->atomic_get_property() vfuncs for any
     modeset objects with driver specific properties.
 
+DRIVER_SYNCOBJ
+    Driver support drm sync objects.
+
 Major, Minor and Patchlevel
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst
index 96b9c34c21e4..9412798645c1 100644
--- a/Documentation/gpu/drm-mm.rst
+++ b/Documentation/gpu/drm-mm.rst
@@ -484,3 +484,15 @@ DRM Cache Handling
 
 .. kernel-doc:: drivers/gpu/drm/drm_cache.c
    :export:
+
+DRM Sync Objects
+===========================
+
+.. kernel-doc:: drivers/gpu/drm/drm_syncobj.c
+   :doc: Overview
+
+.. kernel-doc:: include/drm/drm_syncobj.h
+   :export:
+
+.. kernel-doc:: drivers/gpu/drm/drm_syncobj.c
+   :export:
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index acc88942c2e5..0f527a763fde 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -16,7 +16,8 @@ drm-y       :=	drm_auth.o drm_bufs.o drm_cache.o \
 		drm_framebuffer.o drm_connector.o drm_blend.o \
 		drm_encoder.o drm_mode_object.o drm_property.o \
 		drm_plane.o drm_color_mgmt.o drm_print.o \
-		drm_dumb_buffers.o drm_mode_config.o drm_vblank.o
+		drm_dumb_buffers.o drm_mode_config.o drm_vblank.o \
+		drm_syncobj.o
 
 drm-$(CONFIG_DRM_LIB_RANDOM) += lib/drm_random.o
 drm-$(CONFIG_DRM_VM) += drm_vm.o
diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
index caad93dab54b..84f3a242cc39 100644
--- a/drivers/gpu/drm/drm_file.c
+++ b/drivers/gpu/drm/drm_file.c
@@ -229,6 +229,9 @@ static int drm_open_helper(struct file *filp, struct drm_minor *minor)
 	if (drm_core_check_feature(dev, DRIVER_GEM))
 		drm_gem_open(dev, priv);
 
+	if (drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+		drm_syncobj_open(priv);
+
 	if (drm_core_check_feature(dev, DRIVER_PRIME))
 		drm_prime_init_file_private(&priv->prime);
 
@@ -276,6 +279,8 @@ out_close:
 out_prime_destroy:
 	if (drm_core_check_feature(dev, DRIVER_PRIME))
 		drm_prime_destroy_file_private(&priv->prime);
+	if (drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+		drm_syncobj_release(priv);
 	if (drm_core_check_feature(dev, DRIVER_GEM))
 		drm_gem_release(dev, priv);
 	put_pid(priv->pid);
@@ -398,6 +403,9 @@ int drm_release(struct inode *inode, struct file *filp)
 		drm_property_destroy_user_blobs(dev, file_priv);
 	}
 
+	if (drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+		drm_syncobj_release(file_priv);
+
 	if (drm_core_check_feature(dev, DRIVER_GEM))
 		drm_gem_release(dev, file_priv);
 
diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index ba3f5fb21959..5cecc974d2f9 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -143,4 +143,17 @@ static inline int drm_debugfs_crtc_crc_add(struct drm_crtc *crtc)
 {
 	return 0;
 }
+
 #endif
+
+/* drm_syncobj.c */
+void drm_syncobj_open(struct drm_file *file_private);
+void drm_syncobj_release(struct drm_file *file_private);
+int drm_syncobj_create_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file_private);
+int drm_syncobj_destroy_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file_private);
+int drm_syncobj_handle_to_fd_ioctl(struct drm_device *dev, void *data,
+				   struct drm_file *file_private);
+int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
+				   struct drm_file *file_private);
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 865e3ee4d743..f1e568176da9 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -241,6 +241,9 @@ static int drm_getcap(struct drm_device *dev, void *data, struct drm_file *file_
 		req->value |= dev->driver->prime_fd_to_handle ? DRM_PRIME_CAP_IMPORT : 0;
 		req->value |= dev->driver->prime_handle_to_fd ? DRM_PRIME_CAP_EXPORT : 0;
 		return 0;
+	case DRM_CAP_SYNCOBJ:
+		req->value = drm_core_check_feature(dev, DRIVER_SYNCOBJ);
+		return 0;
 	}
 
 	/* Other caps only work with KMS drivers */
@@ -645,6 +648,15 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
 	DRM_IOCTL_DEF(DRM_IOCTL_MODE_ATOMIC, drm_mode_atomic_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
 	DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATEPROPBLOB, drm_mode_createblob_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
 	DRM_IOCTL_DEF(DRM_IOCTL_MODE_DESTROYPROPBLOB, drm_mode_destroyblob_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+
+	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_CREATE, drm_syncobj_create_ioctl,
+		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_DESTROY, drm_syncobj_destroy_ioctl,
+		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, drm_syncobj_handle_to_fd_ioctl,
+		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, drm_syncobj_fd_to_handle_ioctl,
+		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
 };
 
 #define DRM_CORE_IOCTL_COUNT	ARRAY_SIZE( drm_ioctls )
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
new file mode 100644
index 000000000000..7144825d45f2
--- /dev/null
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -0,0 +1,382 @@
+/*
+ * Copyright 2017 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *
+ */
+
+/**
+ * DOC: Overview
+ *
+ * DRM synchronisation objects (syncobj) are a persistent objects,
+ * that contain an optional fence. The fence can be updated with a new
+ * fence, or be NULL.
+ *
+ * syncobj's can be export to fd's and back, these fd's are opaque and
+ * have no other use case, except passing the syncobj between processes.
+ *
+ * Their primary use-case is to implement Vulkan fences and semaphores.
+ *
+ * syncobj have a kref reference count, but also have an optional file.
+ * The file is only created once the syncobj is exported.
+ * The file takes a reference on the kref.
+ */
+
+#include <drm/drmP.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/anon_inodes.h>
+
+#include "drm_internal.h"
+#include <drm/drm_syncobj.h>
+
+/**
+ * drm_syncobj_find - lookup and reference a sync object.
+ * @file_private: drm file private pointer
+ * @handle: sync object handle to lookup.
+ *
+ * Returns a reference to the syncobj pointed to by handle or NULL.
+ */
+struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private,
+				     u32 handle)
+{
+	struct drm_syncobj *syncobj;
+
+	spin_lock(&file_private->syncobj_table_lock);
+
+	/* Check if we currently have a reference on the object */
+	syncobj = idr_find(&file_private->syncobj_idr, handle);
+	if (syncobj)
+		drm_syncobj_get(syncobj);
+
+	spin_unlock(&file_private->syncobj_table_lock);
+
+	return syncobj;
+}
+EXPORT_SYMBOL(drm_syncobj_find);
+
+/**
+ * drm_syncobj_replace_fence - replace fence in a sync object.
+ * @file_private: drm file private pointer.
+ * @syncobj: Sync object to replace fence in
+ * @fence: fence to install in sync file.
+ *
+ * This replaces the fence on a sync object.
+ */
+void drm_syncobj_replace_fence(struct drm_file *file_private,
+			       struct drm_syncobj *syncobj,
+			       struct dma_fence *fence)
+{
+	struct dma_fence *old_fence = NULL;
+
+	if (fence)
+		dma_fence_get(fence);
+	old_fence = xchg(&syncobj->fence, fence);
+
+	dma_fence_put(old_fence);
+}
+EXPORT_SYMBOL(drm_syncobj_replace_fence);
+
+int drm_syncobj_fence_get(struct drm_file *file_private,
+			  u32 handle,
+			  struct dma_fence **fence)
+{
+	struct drm_syncobj *syncobj = drm_syncobj_find(file_private, handle);
+	int ret = 0;
+
+	if (!syncobj)
+		return -ENOENT;
+
+	*fence = dma_fence_get(syncobj->fence);
+	if (!*fence) {
+		ret = -EINVAL;
+	}
+	drm_syncobj_put(syncobj);
+	return ret;
+}
+EXPORT_SYMBOL(drm_syncobj_fence_get);
+
+/**
+ * drm_syncobj_free - free a sync object.
+ * @kref: kref to free.
+ *
+ * Only to be called from kref_put in drm_syncobj_put.
+ */
+void drm_syncobj_free(struct kref *kref)
+{
+	struct drm_syncobj *syncobj = container_of(kref,
+						   struct drm_syncobj,
+						   refcount);
+	dma_fence_put(syncobj->fence);
+	kfree(syncobj);
+}
+EXPORT_SYMBOL(drm_syncobj_free);
+
+static int drm_syncobj_create(struct drm_file *file_private,
+			      u32 *handle)
+{
+	int ret;
+	struct drm_syncobj *syncobj;
+
+	syncobj = kzalloc(sizeof(struct drm_syncobj), GFP_KERNEL);
+	if (!syncobj)
+		return -ENOMEM;
+
+	kref_init(&syncobj->refcount);
+
+	idr_preload(GFP_KERNEL);
+	spin_lock(&file_private->syncobj_table_lock);
+	ret = idr_alloc(&file_private->syncobj_idr, syncobj, 1, 0, GFP_NOWAIT);
+	spin_unlock(&file_private->syncobj_table_lock);
+
+	idr_preload_end();
+
+	if (ret < 0) {
+		drm_syncobj_put(syncobj);
+		return ret;
+	}
+
+	*handle = ret;
+	return 0;
+}
+
+static int drm_syncobj_destroy(struct drm_file *file_private,
+			       u32 handle)
+{
+	struct drm_syncobj *syncobj;
+
+	spin_lock(&file_private->syncobj_table_lock);
+	syncobj = idr_remove(&file_private->syncobj_idr, handle);
+	spin_unlock(&file_private->syncobj_table_lock);
+
+	if (!syncobj)
+		return -EINVAL;
+
+	drm_syncobj_put(syncobj);
+	return 0;
+}
+
+static int drm_syncobj_file_release(struct inode *inode, struct file *file)
+{
+	struct drm_syncobj *syncobj = file->private_data;
+
+	drm_syncobj_put(syncobj);
+	return 0;
+}
+
+static const struct file_operations drm_syncobj_file_fops = {
+	.release = drm_syncobj_file_release,
+};
+
+static int drm_syncobj_alloc_file(struct drm_syncobj *syncobj)
+{
+	struct file *file = anon_inode_getfile("syncobj_file",
+					       &drm_syncobj_file_fops,
+					       syncobj, 0);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	drm_syncobj_get(syncobj);
+	if (cmpxchg(&syncobj->file, NULL, file)) {
+		/* lost the race */
+		fput(file);
+	}
+
+	return 0;
+}
+
+static int drm_syncobj_handle_to_fd(struct drm_file *file_private,
+				    u32 handle, int *p_fd)
+{
+	struct drm_syncobj *syncobj = drm_syncobj_find(file_private, handle);
+	int ret;
+	int fd;
+
+	if (!syncobj)
+		return -EINVAL;
+
+	fd = get_unused_fd_flags(O_CLOEXEC);
+	if (fd < 0) {
+		drm_syncobj_put(syncobj);
+		return fd;
+	}
+
+	if (!syncobj->file) {
+		ret = drm_syncobj_alloc_file(syncobj);
+		if (ret)
+			goto out_put_fd;
+	}
+	fd_install(fd, syncobj->file);
+	drm_syncobj_put(syncobj);
+	*p_fd = fd;
+	return 0;
+out_put_fd:
+	put_unused_fd(fd);
+	drm_syncobj_put(syncobj);
+	return ret;
+}
+
+static struct drm_syncobj *drm_syncobj_fdget(int fd)
+{
+	struct file *file = fget(fd);
+
+	if (!file)
+		return NULL;
+	if (file->f_op != &drm_syncobj_file_fops)
+		goto err;
+
+	return file->private_data;
+err:
+	fput(file);
+	return NULL;
+};
+
+static int drm_syncobj_fd_to_handle(struct drm_file *file_private,
+				    int fd, u32 *handle)
+{
+	struct drm_syncobj *syncobj = drm_syncobj_fdget(fd);
+	int ret;
+
+	if (!syncobj)
+		return -EINVAL;
+
+	/* take a reference to put in the idr */
+	drm_syncobj_get(syncobj);
+
+	idr_preload(GFP_KERNEL);
+	spin_lock(&file_private->syncobj_table_lock);
+	ret = idr_alloc(&file_private->syncobj_idr, syncobj, 1, 0, GFP_NOWAIT);
+	spin_unlock(&file_private->syncobj_table_lock);
+	idr_preload_end();
+
+	if (ret < 0) {
+		fput(syncobj->file);
+		return ret;
+	}
+	*handle = ret;
+	return 0;
+}
+
+/**
+ * drm_syncobj_open - initalizes syncobj file-private structures at devnode open time
+ * @dev: drm_device which is being opened by userspace
+ * @file_private: drm file-private structure to set up
+ *
+ * Called at device open time, sets up the structure for handling refcounting
+ * of sync objects.
+ */
+void
+drm_syncobj_open(struct drm_file *file_private)
+{
+	idr_init(&file_private->syncobj_idr);
+	spin_lock_init(&file_private->syncobj_table_lock);
+}
+
+static int
+drm_syncobj_release_handle(int id, void *ptr, void *data)
+{
+	struct drm_syncobj *syncobj = ptr;
+
+	drm_syncobj_put(syncobj);
+	return 0;
+}
+
+/**
+ * drm_syncobj_release - release file-private sync object resources
+ * @dev: drm_device which is being closed by userspace
+ * @file_private: drm file-private structure to clean up
+ *
+ * Called at close time when the filp is going away.
+ *
+ * Releases any remaining references on objects by this filp.
+ */
+void
+drm_syncobj_release(struct drm_file *file_private)
+{
+	idr_for_each(&file_private->syncobj_idr,
+		     &drm_syncobj_release_handle, file_private);
+	idr_destroy(&file_private->syncobj_idr);
+}
+
+int
+drm_syncobj_create_ioctl(struct drm_device *dev, void *data,
+			 struct drm_file *file_private)
+{
+	struct drm_syncobj_create *args = data;
+
+	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+		return -ENODEV;
+
+	/* no valid flags yet */
+	if (args->flags)
+		return -EINVAL;
+
+	return drm_syncobj_create(file_private,
+				  &args->handle);
+}
+
+int
+drm_syncobj_destroy_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file_private)
+{
+	struct drm_syncobj_destroy *args = data;
+
+	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+		return -ENODEV;
+
+	/* make sure padding is empty */
+	if (args->pad)
+		return -EINVAL;
+	return drm_syncobj_destroy(file_private, args->handle);
+}
+
+int
+drm_syncobj_handle_to_fd_ioctl(struct drm_device *dev, void *data,
+				   struct drm_file *file_private)
+{
+	struct drm_syncobj_handle *args = data;
+
+	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+		return -ENODEV;
+
+	if (args->pad || args->flags)
+		return -EINVAL;
+
+	return drm_syncobj_handle_to_fd(file_private, args->handle,
+					&args->fd);
+}
+
+int
+drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
+				   struct drm_file *file_private)
+{
+	struct drm_syncobj_handle *args = data;
+
+	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+		return -ENODEV;
+
+	if (args->pad || args->flags)
+		return -EINVAL;
+
+	return drm_syncobj_fd_to_handle(file_private, args->fd,
+					&args->handle);
+}
diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h
index 18f3181674e8..d855f9ae41a8 100644
--- a/include/drm/drm_drv.h
+++ b/include/drm/drm_drv.h
@@ -53,6 +53,7 @@ struct drm_mode_create_dumb;
 #define DRIVER_RENDER			0x8000
 #define DRIVER_ATOMIC			0x10000
 #define DRIVER_KMS_LEGACY_CONTEXT	0x20000
+#define DRIVER_SYNCOBJ                  0x40000
 
 /**
  * struct drm_driver - DRM driver structure
diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h
index d66f7ee07fb5..0e0c868451a5 100644
--- a/include/drm/drm_file.h
+++ b/include/drm/drm_file.h
@@ -232,6 +232,11 @@ struct drm_file {
 	/** @table_lock: Protects @object_idr. */
 	spinlock_t table_lock;
 
+	/** @syncobj_idr: Mapping of sync object handles to object pointers. */
+	struct idr syncobj_idr;
+	/** @syncobj_table_lock: Protects @syncobj_idr. */
+	spinlock_t syncobj_table_lock;
+
 	/** @filp: Pointer to the core file structure. */
 	struct file *filp;
 
diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
new file mode 100644
index 000000000000..2c3610a4a074
--- /dev/null
+++ b/include/drm/drm_syncobj.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright © 2017 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *
+ */
+#ifndef __DRM_SYNCOBJ_H__
+#define __DRM_SYNCOBJ_H__
+
+#include "linux/dma-fence.h"
+
+/**
+ * struct drm_syncobj - sync object.
+ *
+ * This structure defines a generic sync object which wraps a dma fence.
+ */
+struct drm_syncobj {
+	/**
+	 * @refcount:
+	 *
+	 * Reference count of this object.
+	 */
+	struct kref refcount;
+	/**
+	 * @fence:
+	 * NULL or a pointer to the fence bound to this object.
+	 */
+	struct dma_fence *fence;
+	/**
+	 * @file:
+	 * a file backing for this syncobj.
+	 */
+	struct file *file;
+};
+
+void drm_syncobj_free(struct kref *kref);
+
+/**
+ * drm_syncobj_get - acquire a syncobj reference
+ * @obj: sync object
+ *
+ * This acquires additional reference to @obj. It is illegal to call this
+ * without already holding a reference. No locks required.
+ */
+static inline void
+drm_syncobj_get(struct drm_syncobj *obj)
+{
+	kref_get(&obj->refcount);
+}
+
+/**
+ * drm_syncobj_put - release a reference to a sync object.
+ * @obj: sync object.
+ */
+static inline void
+drm_syncobj_put(struct drm_syncobj *obj)
+{
+	kref_put(&obj->refcount, drm_syncobj_free);
+}
+
+struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private,
+				     u32 handle);
+void drm_syncobj_replace_fence(struct drm_file *file_private,
+			       struct drm_syncobj *syncobj,
+			       struct dma_fence *fence);
+int drm_syncobj_fence_get(struct drm_file *file_private,
+			  u32 handle,
+			  struct dma_fence **fence);
+void drm_syncobj_free(struct kref *kref);
+
+#endif
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 42d9f64ce416..96c5c789e73d 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -648,6 +648,7 @@ struct drm_gem_open {
 #define DRM_CAP_ADDFB2_MODIFIERS	0x10
 #define DRM_CAP_PAGE_FLIP_TARGET	0x11
 #define DRM_CAP_CRTC_IN_VBLANK_EVENT	0x12
+#define DRM_CAP_SYNCOBJ		0x13
 
 /** DRM_IOCTL_GET_CAP ioctl argument type */
 struct drm_get_cap {
@@ -697,6 +698,24 @@ struct drm_prime_handle {
 	__s32 fd;
 };
 
+struct drm_syncobj_create {
+	__u32 handle;
+	__u32 flags;
+};
+
+struct drm_syncobj_destroy {
+	__u32 handle;
+	__u32 pad;
+};
+
+struct drm_syncobj_handle {
+	__u32 handle;
+	__u32 flags;
+
+	__s32 fd;
+	__u32 pad;
+};
+
 #if defined(__cplusplus)
 }
 #endif
@@ -815,6 +834,11 @@ extern "C" {
 #define DRM_IOCTL_MODE_CREATEPROPBLOB	DRM_IOWR(0xBD, struct drm_mode_create_blob)
 #define DRM_IOCTL_MODE_DESTROYPROPBLOB	DRM_IOWR(0xBE, struct drm_mode_destroy_blob)
 
+#define DRM_IOCTL_SYNCOBJ_CREATE	DRM_IOWR(0xBF, struct drm_syncobj_create)
+#define DRM_IOCTL_SYNCOBJ_DESTROY	DRM_IOWR(0xC0, struct drm_syncobj_destroy)
+#define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD	DRM_IOWR(0xC1, struct drm_syncobj_handle)
+#define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE	DRM_IOWR(0xC2, struct drm_syncobj_handle)
+
 /**
  * Device specific ioctls should only be in their respective headers
  * The device specific ioctl range is from 0x40 to 0x9f.
-- 
cgit v1.2.3


From 3ee45a3b533a20ed9fcc11ddb880fc4b30d28f51 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Wed, 26 Apr 2017 04:09:02 +0100
Subject: drm/syncobj: add sync_file interaction. (v1.2)

This interface allows importing the fence from a sync_file into
an existing drm sync object, or exporting the fence attached to
an existing drm sync object into a new sync file object.

This should only be used to interact with sync files where necessary.

v1.1: fence put fixes (Chris), drop fence from ioctl names (Chris)
fixup for new fence replace API.

Reviewed-by: Sean Paul <seanpaul@chromium.org>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_syncobj.c | 75 +++++++++++++++++++++++++++++++++++++++++--
 include/uapi/drm/drm.h        |  2 ++
 2 files changed, 75 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 7144825d45f2..89441bc78591 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -45,6 +45,7 @@
 #include <linux/file.h>
 #include <linux/fs.h>
 #include <linux/anon_inodes.h>
+#include <linux/sync_file.h>
 
 #include "drm_internal.h"
 #include <drm/drm_syncobj.h>
@@ -276,6 +277,59 @@ static int drm_syncobj_fd_to_handle(struct drm_file *file_private,
 	return 0;
 }
 
+int drm_syncobj_import_sync_file_fence(struct drm_file *file_private,
+				       int fd, int handle)
+{
+	struct dma_fence *fence = sync_file_get_fence(fd);
+	struct drm_syncobj *syncobj;
+
+	if (!fence)
+		return -EINVAL;
+
+	syncobj = drm_syncobj_find(file_private, handle);
+	if (!syncobj) {
+		dma_fence_put(fence);
+		return -ENOENT;
+	}
+
+	drm_syncobj_replace_fence(file_private, syncobj, fence);
+	dma_fence_put(fence);
+	drm_syncobj_put(syncobj);
+	return 0;
+}
+
+int drm_syncobj_export_sync_file(struct drm_file *file_private,
+				 int handle, int *p_fd)
+{
+	int ret;
+	struct dma_fence *fence;
+	struct sync_file *sync_file;
+	int fd = get_unused_fd_flags(O_CLOEXEC);
+
+	if (fd < 0)
+		return fd;
+
+	ret = drm_syncobj_fence_get(file_private, handle, &fence);
+	if (ret)
+		goto err_put_fd;
+
+	sync_file = sync_file_create(fence);
+
+	dma_fence_put(fence);
+
+	if (!sync_file) {
+		ret = -EINVAL;
+		goto err_put_fd;
+	}
+
+	fd_install(fd, sync_file->file);
+
+	*p_fd = fd;
+	return 0;
+err_put_fd:
+	put_unused_fd(fd);
+	return ret;
+}
 /**
  * drm_syncobj_open - initalizes syncobj file-private structures at devnode open time
  * @dev: drm_device which is being opened by userspace
@@ -358,9 +412,17 @@ drm_syncobj_handle_to_fd_ioctl(struct drm_device *dev, void *data,
 	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
 		return -ENODEV;
 
-	if (args->pad || args->flags)
+	if (args->pad)
 		return -EINVAL;
 
+	if (args->flags != 0 &&
+	    args->flags != DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE)
+		return -EINVAL;
+
+	if (args->flags & DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE)
+		return drm_syncobj_export_sync_file(file_private, args->handle,
+						    &args->fd);
+
 	return drm_syncobj_handle_to_fd(file_private, args->handle,
 					&args->fd);
 }
@@ -374,9 +436,18 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
 	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
 		return -ENODEV;
 
-	if (args->pad || args->flags)
+	if (args->pad)
+		return -EINVAL;
+
+	if (args->flags != 0 &&
+	    args->flags != DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE)
 		return -EINVAL;
 
+	if (args->flags & DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE)
+		return drm_syncobj_import_sync_file_fence(file_private,
+							  args->fd,
+							  args->handle);
+
 	return drm_syncobj_fd_to_handle(file_private, args->fd,
 					&args->handle);
 }
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 96c5c789e73d..101593ab10ac 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -708,6 +708,8 @@ struct drm_syncobj_destroy {
 	__u32 pad;
 };
 
+#define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE (1 << 0)
+#define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE (1 << 0)
 struct drm_syncobj_handle {
 	__u32 handle;
 	__u32 flags;
-- 
cgit v1.2.3


From 7fed555c02f754af44f8963890b5ef8e30e97391 Mon Sep 17 00:00:00 2001
From: Robert Bragg <robert@sixbynine.org>
Date: Tue, 13 Jun 2017 12:22:59 +0100
Subject: drm/i915: expose _SLICE_MASK GETPARM

Enables userspace to determine the maximum number of slices that can
be enabled on the device and also know what specific slices can be
enabled. This information is required, for example, to be able to
analyse some OA counter reports where the counter configuration
depends on the HW slice configuration.

Signed-off-by: Robert Bragg <robert@sixbynine.org>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_drv.c | 5 +++++
 include/uapi/drm/i915_drm.h     | 3 +++
 2 files changed, 8 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 1f802de7b94b..d503612a7fc1 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -374,6 +374,11 @@ static int i915_getparam(struct drm_device *dev, void *data,
 		 */
 		value = 1;
 		break;
+	case I915_PARAM_SLICE_MASK:
+		value = INTEL_INFO(dev_priv)->sseu.slice_mask;
+		if (!value)
+			return -ENODEV;
+		break;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", param->param);
 		return -EINVAL;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index f24a80d2d42e..25695c3d9a76 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -418,6 +418,9 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_HAS_EXEC_CAPTURE	 45
 
+/* Query the mask of slices available for this system */
+#define I915_PARAM_SLICE_MASK		 46
+
 typedef struct drm_i915_getparam {
 	__s32 param;
 	/*
-- 
cgit v1.2.3


From f532023381df49ac00cb2d1e70df607cf534720d Mon Sep 17 00:00:00 2001
From: Robert Bragg <robert@sixbynine.org>
Date: Tue, 13 Jun 2017 12:23:00 +0100
Subject: drm/i915: expose _SUBSLICE_MASK GETPARM

Assuming a uniform mask across all slices, this enables userspace to
determine the specific sub slices can be enabled. This information is
required, for example, to be able to analyse some OA counter reports
where the counter configuration depends on the HW sub slice
configuration.

Signed-off-by: Robert Bragg <robert@sixbynine.org>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_drv.c | 5 +++++
 include/uapi/drm/i915_drm.h     | 5 +++++
 2 files changed, 10 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index d503612a7fc1..a534412a5551 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -379,6 +379,11 @@ static int i915_getparam(struct drm_device *dev, void *data,
 		if (!value)
 			return -ENODEV;
 		break;
+	case I915_PARAM_SUBSLICE_MASK:
+		value = INTEL_INFO(dev_priv)->sseu.subslice_mask;
+		if (!value)
+			return -ENODEV;
+		break;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", param->param);
 		return -EINVAL;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 25695c3d9a76..464547d08173 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -421,6 +421,11 @@ typedef struct drm_i915_irq_wait {
 /* Query the mask of slices available for this system */
 #define I915_PARAM_SLICE_MASK		 46
 
+/* Assuming it's uniform for each slice, this queries the mask of subslices
+ * per-slice for this system.
+ */
+#define I915_PARAM_SUBSLICE_MASK	 47
+
 typedef struct drm_i915_getparam {
 	__s32 param;
 	/*
-- 
cgit v1.2.3


From 19f81df2859eb10e92d68991cefa39f826dea013 Mon Sep 17 00:00:00 2001
From: Robert Bragg <robert@sixbynine.org>
Date: Tue, 13 Jun 2017 12:23:03 +0100
Subject: drm/i915/perf: Add OA unit support for Gen 8+

Enables access to OA unit metrics for BDW, CHV, SKL and BXT which all
share (more-or-less) the same OA unit design.

Of particular note in comparison to Haswell: some OA unit HW config
state has become per-context state and as a consequence it is somewhat
more complicated to manage synchronous state changes from the cpu while
there's no guarantee of what context (if any) is currently actively
running on the gpu.

The periodic sampling frequency which can be particularly useful for
system-wide analysis (as opposed to command stream synchronised
MI_REPORT_PERF_COUNT commands) is perhaps the most surprising state to
have become per-context save and restored (while the OABUFFER
destination is still a shared, system-wide resource).

This support for gen8+ takes care to consider a number of timing
challenges involved in synchronously updating per-context state
primarily by programming all config state from the cpu and updating all
current and saved contexts synchronously while the OA unit is still
disabled.

The driver intentionally avoids depending on command streamer
programming to update OA state considering the lack of synchronization
between the automatic loading of OACTXCONTROL state (that includes the
periodic sampling state and enable state) on context restore and the
parsing of any general purpose BB the driver can control. I.e. this
implementation is careful to avoid the possibility of a context restore
temporarily enabling any out-of-date periodic sampling state. In
addition to the risk of transiently-out-of-date state being loaded
automatically; there are also internal HW latencies involved in the
loading of MUX configurations which would be difficult to account for
from the command streamer (and we only want to enable the unit when once
the MUX configuration is complete).

Since the Gen8+ OA unit design no longer supports clock gating the unit
off for a single given context (which effectively stopped any progress
of counters while any other context was running) and instead supports
tagging OA reports with a context ID for filtering on the CPU, it means
we can no longer hide the system-wide progress of counters from a
non-privileged application only interested in metrics for its own
context. Although we could theoretically try and subtract the progress
of other contexts before forwarding reports via read() we aren't in a
position to filter reports captured via MI_REPORT_PERF_COUNT commands.
As a result, for Gen8+, we always require the
dev.i915.perf_stream_paranoid to be unset for any access to OA metrics
if not root.

v5: Drain submitted requests when enabling metric set to ensure no
    lite-restore erases the context image we just updated (Lionel)

v6: In addition to drain, switch to kernel context & update all
    context in place (Chris)

v7: Add missing mutex_unlock() if switching to kernel context fails
    (Matthew)

v8: Simplify OA period/flex-eu-counters programming by using the
    batchbuffer instead of modifying ctx-image (Lionel)

v9: Back to updating the context image (due to erroneous testing,
    batchbuffer programming the OA unit doesn't actually work)
    (Lionel)
    Pin context before updating context image (Chris)
    Drop MMIO programming now that we switch to a kernel context with
    right values in initial context image (Chris)

v10: Just pin_map the contexts we want to modify or let the
     configuration happen on first use (Chris)

v11: Update kernel context OA config through the batchbuffer rather
     than on the fly ctx-image update (Lionel)

v12: Rework OA context registers update again by swithing away from
     user contexts and reconfiguring the kernel context through the
     batchbuffer and updating all the other contexts' context image.
     Also take care to lock slice/subslice configuration when OA is
     on. (Lionel)

v13: Request rpcs updates on all engine when updating the OA config
     (Lionel)

v14: Drop any kind of rpcs management now that we monitor sseu
     configuration changes in a later patch (Lionel)
     Remove usleep after programming the NOA configs on Gen8+, this
     doesn't seem to be needed (Lionel)

v15: Respect coding style for block comments (Chris)

v16: Add missing i915_add_request() in case we fail to emit OA
     configuration (Matthew)

Signed-off-by: Robert Bragg <robert@sixbynine.org>
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com> \o/
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_drv.h  |   46 +-
 drivers/gpu/drm/i915/i915_perf.c | 1034 +++++++++++++++++++++++++++++++++++---
 drivers/gpu/drm/i915/i915_reg.h  |   22 +
 drivers/gpu/drm/i915/intel_lrc.c |    2 +
 include/uapi/drm/i915_drm.h      |   19 +-
 5 files changed, 1028 insertions(+), 95 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2faad94c9ef2..eefb35a5d27d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2018,9 +2018,17 @@ struct i915_oa_ops {
 	void (*init_oa_buffer)(struct drm_i915_private *dev_priv);
 
 	/**
-	 * @enable_metric_set: Applies any MUX configuration to set up the
-	 * Boolean and Custom (B/C) counters that are part of the counter
-	 * reports being sampled. May apply system constraints such as
+	 * @select_metric_set: The auto generated code that checks whether a
+	 * requested OA config is applicable to the system and if so sets up
+	 * the mux, oa and flex eu register config pointers according to the
+	 * current dev_priv->perf.oa.metrics_set.
+	 */
+	int (*select_metric_set)(struct drm_i915_private *dev_priv);
+
+	/**
+	 * @enable_metric_set: Selects and applies any MUX configuration to set
+	 * up the Boolean and Custom (B/C) counters that are part of the
+	 * counter reports being sampled. May apply system constraints such as
 	 * disabling EU clock gating as required.
 	 */
 	int (*enable_metric_set)(struct drm_i915_private *dev_priv);
@@ -2051,20 +2059,13 @@ struct i915_oa_ops {
 		    size_t *offset);
 
 	/**
-	 * @oa_buffer_check: Check for OA buffer data + update tail
-	 *
-	 * This is either called via fops or the poll check hrtimer (atomic
-	 * ctx) without any locks taken.
+	 * @oa_hw_tail_read: read the OA tail pointer register
 	 *
-	 * It's safe to read OA config state here unlocked, assuming that this
-	 * is only called while the stream is enabled, while the global OA
-	 * configuration can't be modified.
-	 *
-	 * Efficiency is more important than avoiding some false positives
-	 * here, which will be handled gracefully - likely resulting in an
-	 * %EAGAIN error for userspace.
+	 * In particular this enables us to share all the fiddly code for
+	 * handling the OA unit tail pointer race that affects multiple
+	 * generations.
 	 */
-	bool (*oa_buffer_check)(struct drm_i915_private *dev_priv);
+	u32 (*oa_hw_tail_read)(struct drm_i915_private *dev_priv);
 };
 
 struct intel_cdclk_state {
@@ -2429,6 +2430,7 @@ struct drm_i915_private {
 			struct {
 				struct i915_vma *vma;
 				u8 *vaddr;
+				u32 last_ctx_id;
 				int format;
 				int format_size;
 
@@ -2498,6 +2500,15 @@ struct drm_i915_private {
 			} oa_buffer;
 
 			u32 gen7_latched_oastatus1;
+			u32 ctx_oactxctrl_offset;
+			u32 ctx_flexeu0_offset;
+
+			/**
+			 * The RPT_ID/reason field for Gen8+ includes a bit
+			 * to determine if the CTX ID in the report is valid
+			 * but the specific bit differs between Gen 8 and 9
+			 */
+			u32 gen8_valid_ctx_bit;
 
 			struct i915_oa_ops ops;
 			const struct i915_oa_format *oa_formats;
@@ -2810,6 +2821,8 @@ intel_info(const struct drm_i915_private *dev_priv)
 #define IS_KBL_ULX(dev_priv)	(INTEL_DEVID(dev_priv) == 0x590E || \
 				 INTEL_DEVID(dev_priv) == 0x5915 || \
 				 INTEL_DEVID(dev_priv) == 0x591E)
+#define IS_SKL_GT2(dev_priv)	(IS_SKYLAKE(dev_priv) && \
+				 (INTEL_DEVID(dev_priv) & 0x00F0) == 0x0010)
 #define IS_SKL_GT3(dev_priv)	(IS_SKYLAKE(dev_priv) && \
 				 (INTEL_DEVID(dev_priv) & 0x00F0) == 0x0020)
 #define IS_SKL_GT4(dev_priv)	(IS_SKYLAKE(dev_priv) && \
@@ -3554,6 +3567,9 @@ i915_gem_context_lookup_timeline(struct i915_gem_context *ctx,
 
 int i915_perf_open_ioctl(struct drm_device *dev, void *data,
 			 struct drm_file *file);
+void i915_oa_init_reg_state(struct intel_engine_cs *engine,
+			    struct i915_gem_context *ctx,
+			    uint32_t *reg_state);
 
 /* i915_gem_evict.c */
 int __must_check i915_gem_evict_something(struct i915_address_space *vm,
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 7e56b895fd34..a6af4d7dc4d6 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -196,6 +196,12 @@
 
 #include "i915_drv.h"
 #include "i915_oa_hsw.h"
+#include "i915_oa_bdw.h"
+#include "i915_oa_chv.h"
+#include "i915_oa_sklgt2.h"
+#include "i915_oa_sklgt3.h"
+#include "i915_oa_sklgt4.h"
+#include "i915_oa_bxt.h"
 
 /* HW requires this to be a power of two, between 128k and 16M, though driver
  * is currently generally designed assuming the largest 16M size is used such
@@ -215,7 +221,7 @@
  *
  * Although this can be observed explicitly while copying reports to userspace
  * by checking for a zeroed report-id field in tail reports, we want to account
- * for this earlier, as part of the _oa_buffer_check to avoid lots of redundant
+ * for this earlier, as part of the oa_buffer_check to avoid lots of redundant
  * read() attempts.
  *
  * In effect we define a tail pointer for reading that lags the real tail
@@ -237,7 +243,7 @@
  * indicates that an updated tail pointer is needed.
  *
  * Most of the implementation details for this workaround are in
- * gen7_oa_buffer_check_unlocked() and gen7_appand_oa_reports()
+ * oa_buffer_check_unlocked() and _append_oa_reports()
  *
  * Note for posterity: previously the driver used to define an effective tail
  * pointer that lagged the real pointer by a 'tail margin' measured in bytes
@@ -272,6 +278,13 @@ static u32 i915_perf_stream_paranoid = true;
 
 #define INVALID_CTX_ID 0xffffffff
 
+/* On Gen8+ automatically triggered OA reports include a 'reason' field... */
+#define OAREPORT_REASON_MASK           0x3f
+#define OAREPORT_REASON_SHIFT          19
+#define OAREPORT_REASON_TIMER          (1<<0)
+#define OAREPORT_REASON_CTX_SWITCH     (1<<3)
+#define OAREPORT_REASON_CLK_RATIO      (1<<5)
+
 
 /* For sysctl proc_dointvec_minmax of i915_oa_max_sample_rate
  *
@@ -303,6 +316,13 @@ static struct i915_oa_format hsw_oa_formats[I915_OA_FORMAT_MAX] = {
 	[I915_OA_FORMAT_C4_B8]	    = { 7, 64 },
 };
 
+static struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
+	[I915_OA_FORMAT_A12]		    = { 0, 64 },
+	[I915_OA_FORMAT_A12_B8_C8]	    = { 2, 128 },
+	[I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
+	[I915_OA_FORMAT_C4_B8]		    = { 7, 64 },
+};
+
 #define SAMPLE_OA_REPORT      (1<<0)
 
 /**
@@ -332,8 +352,20 @@ struct perf_open_properties {
 	int oa_period_exponent;
 };
 
+static u32 gen8_oa_hw_tail_read(struct drm_i915_private *dev_priv)
+{
+	return I915_READ(GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK;
+}
+
+static u32 gen7_oa_hw_tail_read(struct drm_i915_private *dev_priv)
+{
+	u32 oastatus1 = I915_READ(GEN7_OASTATUS1);
+
+	return oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
+}
+
 /**
- * gen7_oa_buffer_check_unlocked - check for data and update tail ptr state
+ * oa_buffer_check_unlocked - check for data and update tail ptr state
  * @dev_priv: i915 device instance
  *
  * This is either called via fops (for blocking reads in user ctx) or the poll
@@ -356,12 +388,11 @@ struct perf_open_properties {
  *
  * Returns: %true if the OA buffer contains data, else %false
  */
-static bool gen7_oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
+static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
 {
 	int report_size = dev_priv->perf.oa.oa_buffer.format_size;
 	unsigned long flags;
 	unsigned int aged_idx;
-	u32 oastatus1;
 	u32 head, hw_tail, aged_tail, aging_tail;
 	u64 now;
 
@@ -381,8 +412,7 @@ static bool gen7_oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
 	aged_tail = dev_priv->perf.oa.oa_buffer.tails[aged_idx].offset;
 	aging_tail = dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset;
 
-	oastatus1 = I915_READ(GEN7_OASTATUS1);
-	hw_tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
+	hw_tail = dev_priv->perf.oa.ops.oa_hw_tail_read(dev_priv);
 
 	/* The tail pointer increases in 64 byte increments,
 	 * not in report_size steps...
@@ -404,6 +434,7 @@ static bool gen7_oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
 	if (aging_tail != INVALID_TAIL_PTR &&
 	    ((now - dev_priv->perf.oa.oa_buffer.aging_timestamp) >
 	     OA_TAIL_MARGIN_NSEC)) {
+
 		aged_idx ^= 1;
 		dev_priv->perf.oa.oa_buffer.aged_tail_idx = aged_idx;
 
@@ -533,6 +564,300 @@ static int append_oa_sample(struct i915_perf_stream *stream,
 	return 0;
 }
 
+/**
+ * Copies all buffered OA reports into userspace read() buffer.
+ * @stream: An i915-perf stream opened for OA metrics
+ * @buf: destination buffer given by userspace
+ * @count: the number of bytes userspace wants to read
+ * @offset: (inout): the current position for writing into @buf
+ *
+ * Notably any error condition resulting in a short read (-%ENOSPC or
+ * -%EFAULT) will be returned even though one or more records may
+ * have been successfully copied. In this case it's up to the caller
+ * to decide if the error should be squashed before returning to
+ * userspace.
+ *
+ * Note: reports are consumed from the head, and appended to the
+ * tail, so the tail chases the head?... If you think that's mad
+ * and back-to-front you're not alone, but this follows the
+ * Gen PRM naming convention.
+ *
+ * Returns: 0 on success, negative error code on failure.
+ */
+static int gen8_append_oa_reports(struct i915_perf_stream *stream,
+				  char __user *buf,
+				  size_t count,
+				  size_t *offset)
+{
+	struct drm_i915_private *dev_priv = stream->dev_priv;
+	int report_size = dev_priv->perf.oa.oa_buffer.format_size;
+	u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr;
+	u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
+	u32 mask = (OA_BUFFER_SIZE - 1);
+	size_t start_offset = *offset;
+	unsigned long flags;
+	unsigned int aged_tail_idx;
+	u32 head, tail;
+	u32 taken;
+	int ret = 0;
+
+	if (WARN_ON(!stream->enabled))
+		return -EIO;
+
+	spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+
+	head = dev_priv->perf.oa.oa_buffer.head;
+	aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
+	tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset;
+
+	spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+
+	/*
+	 * An invalid tail pointer here means we're still waiting for the poll
+	 * hrtimer callback to give us a pointer
+	 */
+	if (tail == INVALID_TAIL_PTR)
+		return -EAGAIN;
+
+	/*
+	 * NB: oa_buffer.head/tail include the gtt_offset which we don't want
+	 * while indexing relative to oa_buf_base.
+	 */
+	head -= gtt_offset;
+	tail -= gtt_offset;
+
+	/*
+	 * An out of bounds or misaligned head or tail pointer implies a driver
+	 * bug since we validate + align the tail pointers we read from the
+	 * hardware and we are in full control of the head pointer which should
+	 * only be incremented by multiples of the report size (notably also
+	 * all a power of two).
+	 */
+	if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
+		      tail > OA_BUFFER_SIZE || tail % report_size,
+		      "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
+		      head, tail))
+		return -EIO;
+
+
+	for (/* none */;
+	     (taken = OA_TAKEN(tail, head));
+	     head = (head + report_size) & mask) {
+		u8 *report = oa_buf_base + head;
+		u32 *report32 = (void *)report;
+		u32 ctx_id;
+		u32 reason;
+
+		/*
+		 * All the report sizes factor neatly into the buffer
+		 * size so we never expect to see a report split
+		 * between the beginning and end of the buffer.
+		 *
+		 * Given the initial alignment check a misalignment
+		 * here would imply a driver bug that would result
+		 * in an overrun.
+		 */
+		if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) {
+			DRM_ERROR("Spurious OA head ptr: non-integral report offset\n");
+			break;
+		}
+
+		/*
+		 * The reason field includes flags identifying what
+		 * triggered this specific report (mostly timer
+		 * triggered or e.g. due to a context switch).
+		 *
+		 * This field is never expected to be zero so we can
+		 * check that the report isn't invalid before copying
+		 * it to userspace...
+		 */
+		reason = ((report32[0] >> OAREPORT_REASON_SHIFT) &
+			  OAREPORT_REASON_MASK);
+		if (reason == 0) {
+			if (__ratelimit(&dev_priv->perf.oa.spurious_report_rs))
+				DRM_NOTE("Skipping spurious, invalid OA report\n");
+			continue;
+		}
+
+		/*
+		 * XXX: Just keep the lower 21 bits for now since I'm not
+		 * entirely sure if the HW touches any of the higher bits in
+		 * this field
+		 */
+		ctx_id = report32[2] & 0x1fffff;
+
+		/*
+		 * Squash whatever is in the CTX_ID field if it's marked as
+		 * invalid to be sure we avoid false-positive, single-context
+		 * filtering below...
+		 *
+		 * Note: that we don't clear the valid_ctx_bit so userspace can
+		 * understand that the ID has been squashed by the kernel.
+		 */
+		if (!(report32[0] & dev_priv->perf.oa.gen8_valid_ctx_bit))
+			ctx_id = report32[2] = INVALID_CTX_ID;
+
+		/*
+		 * NB: For Gen 8 the OA unit no longer supports clock gating
+		 * off for a specific context and the kernel can't securely
+		 * stop the counters from updating as system-wide / global
+		 * values.
+		 *
+		 * Automatic reports now include a context ID so reports can be
+		 * filtered on the cpu but it's not worth trying to
+		 * automatically subtract/hide counter progress for other
+		 * contexts while filtering since we can't stop userspace
+		 * issuing MI_REPORT_PERF_COUNT commands which would still
+		 * provide a side-band view of the real values.
+		 *
+		 * To allow userspace (such as Mesa/GL_INTEL_performance_query)
+		 * to normalize counters for a single filtered context then it
+		 * needs be forwarded bookend context-switch reports so that it
+		 * can track switches in between MI_REPORT_PERF_COUNT commands
+		 * and can itself subtract/ignore the progress of counters
+		 * associated with other contexts. Note that the hardware
+		 * automatically triggers reports when switching to a new
+		 * context which are tagged with the ID of the newly active
+		 * context. To avoid the complexity (and likely fragility) of
+		 * reading ahead while parsing reports to try and minimize
+		 * forwarding redundant context switch reports (i.e. between
+		 * other, unrelated contexts) we simply elect to forward them
+		 * all.
+		 *
+		 * We don't rely solely on the reason field to identify context
+		 * switches since it's not-uncommon for periodic samples to
+		 * identify a switch before any 'context switch' report.
+		 */
+		if (!dev_priv->perf.oa.exclusive_stream->ctx ||
+		    dev_priv->perf.oa.specific_ctx_id == ctx_id ||
+		    (dev_priv->perf.oa.oa_buffer.last_ctx_id ==
+		     dev_priv->perf.oa.specific_ctx_id) ||
+		    reason & OAREPORT_REASON_CTX_SWITCH) {
+
+			/*
+			 * While filtering for a single context we avoid
+			 * leaking the IDs of other contexts.
+			 */
+			if (dev_priv->perf.oa.exclusive_stream->ctx &&
+			    dev_priv->perf.oa.specific_ctx_id != ctx_id) {
+				report32[2] = INVALID_CTX_ID;
+			}
+
+			ret = append_oa_sample(stream, buf, count, offset,
+					       report);
+			if (ret)
+				break;
+
+			dev_priv->perf.oa.oa_buffer.last_ctx_id = ctx_id;
+		}
+
+		/*
+		 * The above reason field sanity check is based on
+		 * the assumption that the OA buffer is initially
+		 * zeroed and we reset the field after copying so the
+		 * check is still meaningful once old reports start
+		 * being overwritten.
+		 */
+		report32[0] = 0;
+	}
+
+	if (start_offset != *offset) {
+		spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+
+		/*
+		 * We removed the gtt_offset for the copy loop above, indexing
+		 * relative to oa_buf_base so put back here...
+		 */
+		head += gtt_offset;
+
+		I915_WRITE(GEN8_OAHEADPTR, head & GEN8_OAHEADPTR_MASK);
+		dev_priv->perf.oa.oa_buffer.head = head;
+
+		spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+	}
+
+	return ret;
+}
+
+/**
+ * gen8_oa_read - copy status records then buffered OA reports
+ * @stream: An i915-perf stream opened for OA metrics
+ * @buf: destination buffer given by userspace
+ * @count: the number of bytes userspace wants to read
+ * @offset: (inout): the current position for writing into @buf
+ *
+ * Checks OA unit status registers and if necessary appends corresponding
+ * status records for userspace (such as for a buffer full condition) and then
+ * initiate appending any buffered OA reports.
+ *
+ * Updates @offset according to the number of bytes successfully copied into
+ * the userspace buffer.
+ *
+ * NB: some data may be successfully copied to the userspace buffer
+ * even if an error is returned, and this is reflected in the
+ * updated @offset.
+ *
+ * Returns: zero on success or a negative error code
+ */
+static int gen8_oa_read(struct i915_perf_stream *stream,
+			char __user *buf,
+			size_t count,
+			size_t *offset)
+{
+	struct drm_i915_private *dev_priv = stream->dev_priv;
+	u32 oastatus;
+	int ret;
+
+	if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr))
+		return -EIO;
+
+	oastatus = I915_READ(GEN8_OASTATUS);
+
+	/*
+	 * We treat OABUFFER_OVERFLOW as a significant error:
+	 *
+	 * Although theoretically we could handle this more gracefully
+	 * sometimes, some Gens don't correctly suppress certain
+	 * automatically triggered reports in this condition and so we
+	 * have to assume that old reports are now being trampled
+	 * over.
+	 *
+	 * Considering how we don't currently give userspace control
+	 * over the OA buffer size and always configure a large 16MB
+	 * buffer, then a buffer overflow does anyway likely indicate
+	 * that something has gone quite badly wrong.
+	 */
+	if (oastatus & GEN8_OASTATUS_OABUFFER_OVERFLOW) {
+		ret = append_oa_status(stream, buf, count, offset,
+				       DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
+		if (ret)
+			return ret;
+
+		DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n",
+			  dev_priv->perf.oa.period_exponent);
+
+		dev_priv->perf.oa.ops.oa_disable(dev_priv);
+		dev_priv->perf.oa.ops.oa_enable(dev_priv);
+
+		/*
+		 * Note: .oa_enable() is expected to re-init the oabuffer and
+		 * reset GEN8_OASTATUS for us
+		 */
+		oastatus = I915_READ(GEN8_OASTATUS);
+	}
+
+	if (oastatus & GEN8_OASTATUS_REPORT_LOST) {
+		ret = append_oa_status(stream, buf, count, offset,
+				       DRM_I915_PERF_RECORD_OA_REPORT_LOST);
+		if (ret)
+			return ret;
+		I915_WRITE(GEN8_OASTATUS,
+			   oastatus & ~GEN8_OASTATUS_REPORT_LOST);
+	}
+
+	return gen8_append_oa_reports(stream, buf, count, offset);
+}
+
 /**
  * Copies all buffered OA reports into userspace read() buffer.
  * @stream: An i915-perf stream opened for OA metrics
@@ -732,7 +1057,8 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
 		if (ret)
 			return ret;
 
-		DRM_DEBUG("OA buffer overflow: force restart\n");
+		DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n",
+			  dev_priv->perf.oa.period_exponent);
 
 		dev_priv->perf.oa.ops.oa_disable(dev_priv);
 		dev_priv->perf.oa.ops.oa_enable(dev_priv);
@@ -775,7 +1101,7 @@ static int i915_oa_wait_unlocked(struct i915_perf_stream *stream)
 		return -EIO;
 
 	return wait_event_interruptible(dev_priv->perf.oa.poll_wq,
-					dev_priv->perf.oa.ops.oa_buffer_check(dev_priv));
+					oa_buffer_check_unlocked(dev_priv));
 }
 
 /**
@@ -832,30 +1158,38 @@ static int i915_oa_read(struct i915_perf_stream *stream,
 static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
 {
 	struct drm_i915_private *dev_priv = stream->dev_priv;
-	struct intel_engine_cs *engine = dev_priv->engine[RCS];
-	struct intel_ring *ring;
-	int ret;
 
-	ret = i915_mutex_lock_interruptible(&dev_priv->drm);
-	if (ret)
-		return ret;
+	if (i915.enable_execlists)
+		dev_priv->perf.oa.specific_ctx_id = stream->ctx->hw_id;
+	else {
+		struct intel_engine_cs *engine = dev_priv->engine[RCS];
+		struct intel_ring *ring;
+		int ret;
 
-	/* As the ID is the gtt offset of the context's vma we pin
-	 * the vma to ensure the ID remains fixed.
-	 *
-	 * NB: implied RCS engine...
-	 */
-	ring = engine->context_pin(engine, stream->ctx);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
-	if (IS_ERR(ring))
-		return PTR_ERR(ring);
+		ret = i915_mutex_lock_interruptible(&dev_priv->drm);
+		if (ret)
+			return ret;
+
+		/*
+		 * As the ID is the gtt offset of the context's vma we
+		 * pin the vma to ensure the ID remains fixed.
+		 *
+		 * NB: implied RCS engine...
+		 */
+		ring = engine->context_pin(engine, stream->ctx);
+		mutex_unlock(&dev_priv->drm.struct_mutex);
+		if (IS_ERR(ring))
+			return PTR_ERR(ring);
 
-	/* Explicitly track the ID (instead of calling i915_ggtt_offset()
-	 * on the fly) considering the difference with gen8+ and
-	 * execlists
-	 */
-	dev_priv->perf.oa.specific_ctx_id =
-		i915_ggtt_offset(stream->ctx->engine[engine->id].state);
+
+		/*
+		 * Explicitly track the ID (instead of calling
+		 * i915_ggtt_offset() on the fly) considering the difference
+		 * with gen8+ and execlists
+		 */
+		dev_priv->perf.oa.specific_ctx_id =
+			i915_ggtt_offset(stream->ctx->engine[engine->id].state);
+	}
 
 	return 0;
 }
@@ -870,14 +1204,19 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
 static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
 {
 	struct drm_i915_private *dev_priv = stream->dev_priv;
-	struct intel_engine_cs *engine = dev_priv->engine[RCS];
 
-	mutex_lock(&dev_priv->drm.struct_mutex);
+	if (i915.enable_execlists) {
+		dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID;
+	} else {
+		struct intel_engine_cs *engine = dev_priv->engine[RCS];
 
-	dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID;
-	engine->context_unpin(engine, stream->ctx);
+		mutex_lock(&dev_priv->drm.struct_mutex);
 
-	mutex_unlock(&dev_priv->drm.struct_mutex);
+		dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID;
+		engine->context_unpin(engine, stream->ctx);
+
+		mutex_unlock(&dev_priv->drm.struct_mutex);
+	}
 }
 
 static void
@@ -901,6 +1240,12 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
 
 	BUG_ON(stream != dev_priv->perf.oa.exclusive_stream);
 
+	/*
+	 * Unset exclusive_stream first, it might be checked while
+	 * disabling the metric set on gen8+.
+	 */
+	dev_priv->perf.oa.exclusive_stream = NULL;
+
 	dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
 
 	free_oa_buffer(dev_priv);
@@ -911,8 +1256,6 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
 	if (stream->ctx)
 		oa_put_render_ctx_id(stream);
 
-	dev_priv->perf.oa.exclusive_stream = NULL;
-
 	if (dev_priv->perf.oa.spurious_report_rs.missed) {
 		DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
 			 dev_priv->perf.oa.spurious_report_rs.missed);
@@ -967,6 +1310,65 @@ static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv)
 	dev_priv->perf.oa.pollin = false;
 }
 
+static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv)
+{
+	u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+
+	I915_WRITE(GEN8_OASTATUS, 0);
+	I915_WRITE(GEN8_OAHEADPTR, gtt_offset);
+	dev_priv->perf.oa.oa_buffer.head = gtt_offset;
+
+	I915_WRITE(GEN8_OABUFFER_UDW, 0);
+
+	/*
+	 * PRM says:
+	 *
+	 *  "This MMIO must be set before the OATAILPTR
+	 *  register and after the OAHEADPTR register. This is
+	 *  to enable proper functionality of the overflow
+	 *  bit."
+	 */
+	I915_WRITE(GEN8_OABUFFER, gtt_offset |
+		   OABUFFER_SIZE_16M | OA_MEM_SELECT_GGTT);
+	I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);
+
+	/* Mark that we need updated tail pointers to read from... */
+	dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
+	dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
+
+	/*
+	 * Reset state used to recognise context switches, affecting which
+	 * reports we will forward to userspace while filtering for a single
+	 * context.
+	 */
+	dev_priv->perf.oa.oa_buffer.last_ctx_id = INVALID_CTX_ID;
+
+	spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+
+	/*
+	 * NB: although the OA buffer will initially be allocated
+	 * zeroed via shmfs (and so this memset is redundant when
+	 * first allocating), we may re-init the OA buffer, either
+	 * when re-enabling a stream or in error/reset paths.
+	 *
+	 * The reason we clear the buffer for each re-init is for the
+	 * sanity check in gen8_append_oa_reports() that looks at the
+	 * reason field to make sure it's non-zero which relies on
+	 * the assumption that new reports are being written to zeroed
+	 * memory...
+	 */
+	memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
+
+	/*
+	 * Maybe make ->pollin per-stream state if we support multiple
+	 * concurrent streams in the future.
+	 */
+	dev_priv->perf.oa.pollin = false;
+}
+
 static int alloc_oa_buffer(struct drm_i915_private *dev_priv)
 {
 	struct drm_i915_gem_object *bo;
@@ -1114,6 +1516,324 @@ static void hsw_disable_metric_set(struct drm_i915_private *dev_priv)
 				      ~GT_NOA_ENABLE));
 }
 
+/*
+ * NB: It must always remain pointer safe to run this even if the OA unit
+ * has been disabled.
+ *
+ * It's fine to put out-of-date values into these per-context registers
+ * in the case that the OA unit has been disabled.
+ */
+static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx,
+					   u32 *reg_state)
+{
+	struct drm_i915_private *dev_priv = ctx->i915;
+	const struct i915_oa_reg *flex_regs = dev_priv->perf.oa.flex_regs;
+	int n_flex_regs = dev_priv->perf.oa.flex_regs_len;
+	u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset;
+	u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset;
+	/* The MMIO offsets for Flex EU registers aren't contiguous */
+	u32 flex_mmio[] = {
+		i915_mmio_reg_offset(EU_PERF_CNTL0),
+		i915_mmio_reg_offset(EU_PERF_CNTL1),
+		i915_mmio_reg_offset(EU_PERF_CNTL2),
+		i915_mmio_reg_offset(EU_PERF_CNTL3),
+		i915_mmio_reg_offset(EU_PERF_CNTL4),
+		i915_mmio_reg_offset(EU_PERF_CNTL5),
+		i915_mmio_reg_offset(EU_PERF_CNTL6),
+	};
+	int i;
+
+	reg_state[ctx_oactxctrl] = i915_mmio_reg_offset(GEN8_OACTXCONTROL);
+	reg_state[ctx_oactxctrl+1] = (dev_priv->perf.oa.period_exponent <<
+				      GEN8_OA_TIMER_PERIOD_SHIFT) |
+				     (dev_priv->perf.oa.periodic ?
+				      GEN8_OA_TIMER_ENABLE : 0) |
+				     GEN8_OA_COUNTER_RESUME;
+
+	for (i = 0; i < ARRAY_SIZE(flex_mmio); i++) {
+		u32 state_offset = ctx_flexeu0 + i * 2;
+		u32 mmio = flex_mmio[i];
+
+		/*
+		 * This arbitrary default will select the 'EU FPU0 Pipeline
+		 * Active' event. In the future it's anticipated that there
+		 * will be an explicit 'No Event' we can select, but not yet...
+		 */
+		u32 value = 0;
+		int j;
+
+		for (j = 0; j < n_flex_regs; j++) {
+			if (i915_mmio_reg_offset(flex_regs[j].addr) == mmio) {
+				value = flex_regs[j].value;
+				break;
+			}
+		}
+
+		reg_state[state_offset] = mmio;
+		reg_state[state_offset+1] = value;
+	}
+}
+
+/*
+ * Same as gen8_update_reg_state_unlocked only through the batchbuffer. This
+ * is only used by the kernel context.
+ */
+static int gen8_emit_oa_config(struct drm_i915_gem_request *req)
+{
+	struct drm_i915_private *dev_priv = req->i915;
+	const struct i915_oa_reg *flex_regs = dev_priv->perf.oa.flex_regs;
+	int n_flex_regs = dev_priv->perf.oa.flex_regs_len;
+	/* The MMIO offsets for Flex EU registers aren't contiguous */
+	u32 flex_mmio[] = {
+		i915_mmio_reg_offset(EU_PERF_CNTL0),
+		i915_mmio_reg_offset(EU_PERF_CNTL1),
+		i915_mmio_reg_offset(EU_PERF_CNTL2),
+		i915_mmio_reg_offset(EU_PERF_CNTL3),
+		i915_mmio_reg_offset(EU_PERF_CNTL4),
+		i915_mmio_reg_offset(EU_PERF_CNTL5),
+		i915_mmio_reg_offset(EU_PERF_CNTL6),
+	};
+	u32 *cs;
+	int i;
+
+	cs = intel_ring_begin(req, n_flex_regs * 2 + 4);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	*cs++ = MI_LOAD_REGISTER_IMM(n_flex_regs + 1);
+
+	*cs++ = i915_mmio_reg_offset(GEN8_OACTXCONTROL);
+	*cs++ = (dev_priv->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
+		(dev_priv->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) |
+		GEN8_OA_COUNTER_RESUME;
+
+	for (i = 0; i < ARRAY_SIZE(flex_mmio); i++) {
+		u32 mmio = flex_mmio[i];
+
+		/*
+		 * This arbitrary default will select the 'EU FPU0 Pipeline
+		 * Active' event. In the future it's anticipated that there
+		 * will be an explicit 'No Event' we can select, but not
+		 * yet...
+		 */
+		u32 value = 0;
+		int j;
+
+		for (j = 0; j < n_flex_regs; j++) {
+			if (i915_mmio_reg_offset(flex_regs[j].addr) == mmio) {
+				value = flex_regs[j].value;
+				break;
+			}
+		}
+
+		*cs++ = mmio;
+		*cs++ = value;
+	}
+
+	*cs++ = MI_NOOP;
+	intel_ring_advance(req, cs);
+
+	return 0;
+}
+
+static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_priv)
+{
+	struct intel_engine_cs *engine = dev_priv->engine[RCS];
+	struct i915_gem_timeline *timeline;
+	struct drm_i915_gem_request *req;
+	int ret;
+
+	lockdep_assert_held(&dev_priv->drm.struct_mutex);
+
+	i915_gem_retire_requests(dev_priv);
+
+	req = i915_gem_request_alloc(engine, dev_priv->kernel_context);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
+	ret = gen8_emit_oa_config(req);
+	if (ret) {
+		i915_add_request(req);
+		return ret;
+	}
+
+	/* Queue this switch after all other activity */
+	list_for_each_entry(timeline, &dev_priv->gt.timelines, link) {
+		struct drm_i915_gem_request *prev;
+		struct intel_timeline *tl;
+
+		tl = &timeline->engine[engine->id];
+		prev = i915_gem_active_raw(&tl->last_request,
+					   &dev_priv->drm.struct_mutex);
+		if (prev)
+			i915_sw_fence_await_sw_fence_gfp(&req->submit,
+							 &prev->submit,
+							 GFP_KERNEL);
+	}
+
+	ret = i915_switch_context(req);
+	i915_add_request(req);
+
+	return ret;
+}
+
+/*
+ * Manages updating the per-context aspects of the OA stream
+ * configuration across all contexts.
+ *
+ * The awkward consideration here is that OACTXCONTROL controls the
+ * exponent for periodic sampling which is primarily used for system
+ * wide profiling where we'd like a consistent sampling period even in
+ * the face of context switches.
+ *
+ * Our approach of updating the register state context (as opposed to
+ * say using a workaround batch buffer) ensures that the hardware
+ * won't automatically reload an out-of-date timer exponent even
+ * transiently before a WA BB could be parsed.
+ *
+ * This function needs to:
+ * - Ensure the currently running context's per-context OA state is
+ *   updated
+ * - Ensure that all existing contexts will have the correct per-context
+ *   OA state if they are scheduled for use.
+ * - Ensure any new contexts will be initialized with the correct
+ *   per-context OA state.
+ *
+ * Note: it's only the RCS/Render context that has any OA state.
+ */
+static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
+				       bool interruptible)
+{
+	struct i915_gem_context *ctx;
+	int ret;
+	unsigned int wait_flags = I915_WAIT_LOCKED;
+
+	if (interruptible) {
+		ret = i915_mutex_lock_interruptible(&dev_priv->drm);
+		if (ret)
+			return ret;
+
+		wait_flags |= I915_WAIT_INTERRUPTIBLE;
+	} else {
+		mutex_lock(&dev_priv->drm.struct_mutex);
+	}
+
+	/* Switch away from any user context. */
+	ret = gen8_switch_to_updated_kernel_context(dev_priv);
+	if (ret)
+		goto out;
+
+	/*
+	 * The OA register config is setup through the context image. This image
+	 * might be written to by the GPU on context switch (in particular on
+	 * lite-restore). This means we can't safely update a context's image,
+	 * if this context is scheduled/submitted to run on the GPU.
+	 *
+	 * We could emit the OA register config through the batch buffer but
+	 * this might leave small interval of time where the OA unit is
+	 * configured at an invalid sampling period.
+	 *
+	 * So far the best way to work around this issue seems to be draining
+	 * the GPU from any submitted work.
+	 */
+	ret = i915_gem_wait_for_idle(dev_priv, wait_flags);
+	if (ret)
+		goto out;
+
+	/* Update all contexts now that we've stalled the submission. */
+	list_for_each_entry(ctx, &dev_priv->context_list, link) {
+		struct intel_context *ce = &ctx->engine[RCS];
+		u32 *regs;
+
+		/* OA settings will be set upon first use */
+		if (!ce->state)
+			continue;
+
+		regs = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB);
+		if (IS_ERR(regs)) {
+			ret = PTR_ERR(regs);
+			goto out;
+		}
+
+		ce->state->obj->mm.dirty = true;
+		regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
+
+		gen8_update_reg_state_unlocked(ctx, regs);
+
+		i915_gem_object_unpin_map(ce->state->obj);
+	}
+
+ out:
+	mutex_unlock(&dev_priv->drm.struct_mutex);
+
+	return ret;
+}
+
+static int gen8_enable_metric_set(struct drm_i915_private *dev_priv)
+{
+	int ret = dev_priv->perf.oa.ops.select_metric_set(dev_priv);
+	int i;
+
+	if (ret)
+		return ret;
+
+	/*
+	 * We disable slice/unslice clock ratio change reports on SKL since
+	 * they are too noisy. The HW generates a lot of redundant reports
+	 * where the ratio hasn't really changed causing a lot of redundant
+	 * work to processes and increasing the chances we'll hit buffer
+	 * overruns.
+	 *
+	 * Although we don't currently use the 'disable overrun' OABUFFER
+	 * feature it's worth noting that clock ratio reports have to be
+	 * disabled before considering to use that feature since the HW doesn't
+	 * correctly block these reports.
+	 *
+	 * Currently none of the high-level metrics we have depend on knowing
+	 * this ratio to normalize.
+	 *
+	 * Note: This register is not power context saved and restored, but
+	 * that's OK considering that we disable RC6 while the OA unit is
+	 * enabled.
+	 *
+	 * The _INCLUDE_CLK_RATIO bit allows the slice/unslice frequency to
+	 * be read back from automatically triggered reports, as part of the
+	 * RPT_ID field.
+	 */
+	if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv)) {
+		I915_WRITE(GEN8_OA_DEBUG,
+			   _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS |
+					      GEN9_OA_DEBUG_INCLUDE_CLK_RATIO));
+	}
+
+	/*
+	 * Update all contexts prior writing the mux configurations as we need
+	 * to make sure all slices/subslices are ON before writing to NOA
+	 * registers.
+	 */
+	ret = gen8_configure_all_contexts(dev_priv, true);
+	if (ret)
+		return ret;
+
+	I915_WRITE(GDT_CHICKEN_BITS, 0xA0);
+	for (i = 0; i < dev_priv->perf.oa.n_mux_configs; i++) {
+		config_oa_regs(dev_priv, dev_priv->perf.oa.mux_regs[i],
+			       dev_priv->perf.oa.mux_regs_lens[i]);
+	}
+	I915_WRITE(GDT_CHICKEN_BITS, 0x80);
+
+	config_oa_regs(dev_priv, dev_priv->perf.oa.b_counter_regs,
+		       dev_priv->perf.oa.b_counter_regs_len);
+
+	return 0;
+}
+
+static void gen8_disable_metric_set(struct drm_i915_private *dev_priv)
+{
+	/* Reset all contexts' slices/subslices configurations. */
+	gen8_configure_all_contexts(dev_priv, false);
+}
+
 static void gen7_update_oacontrol_locked(struct drm_i915_private *dev_priv)
 {
 	lockdep_assert_held(&dev_priv->perf.hook_lock);
@@ -1158,6 +1878,31 @@ static void gen7_oa_enable(struct drm_i915_private *dev_priv)
 	spin_unlock_irqrestore(&dev_priv->perf.hook_lock, flags);
 }
 
+static void gen8_oa_enable(struct drm_i915_private *dev_priv)
+{
+	u32 report_format = dev_priv->perf.oa.oa_buffer.format;
+
+	/*
+	 * Reset buf pointers so we don't forward reports from before now.
+	 *
+	 * Think carefully if considering trying to avoid this, since it
+	 * also ensures status flags and the buffer itself are cleared
+	 * in error paths, and we have checks for invalid reports based
+	 * on the assumption that certain fields are written to zeroed
+	 * memory which this helps maintains.
+	 */
+	gen8_init_oa_buffer(dev_priv);
+
+	/*
+	 * Note: we don't rely on the hardware to perform single context
+	 * filtering and instead filter on the cpu based on the context-id
+	 * field of reports
+	 */
+	I915_WRITE(GEN8_OACONTROL, (report_format <<
+				    GEN8_OA_REPORT_FORMAT_SHIFT) |
+				   GEN8_OA_COUNTER_ENABLE);
+}
+
 /**
  * i915_oa_stream_enable - handle `I915_PERF_IOCTL_ENABLE` for OA stream
  * @stream: An i915 perf stream opened for OA metrics
@@ -1184,6 +1929,11 @@ static void gen7_oa_disable(struct drm_i915_private *dev_priv)
 	I915_WRITE(GEN7_OACONTROL, 0);
 }
 
+static void gen8_oa_disable(struct drm_i915_private *dev_priv)
+{
+	I915_WRITE(GEN8_OACONTROL, 0);
+}
+
 /**
  * i915_oa_stream_disable - handle `I915_PERF_IOCTL_DISABLE` for OA stream
  * @stream: An i915 perf stream opened for OA metrics
@@ -1362,6 +2112,21 @@ err_oa_buf_alloc:
 	return ret;
 }
 
+void i915_oa_init_reg_state(struct intel_engine_cs *engine,
+			    struct i915_gem_context *ctx,
+			    u32 *reg_state)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+
+	if (engine->id != RCS)
+		return;
+
+	if (!dev_priv->perf.initialized)
+		return;
+
+	gen8_update_reg_state_unlocked(ctx, reg_state);
+}
+
 /**
  * i915_perf_read_locked - &i915_perf_stream_ops->read with error normalisation
  * @stream: An i915 perf stream
@@ -1487,7 +2252,7 @@ static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer)
 		container_of(hrtimer, typeof(*dev_priv),
 			     perf.oa.poll_check_timer);
 
-	if (dev_priv->perf.oa.ops.oa_buffer_check(dev_priv)) {
+	if (oa_buffer_check_unlocked(dev_priv)) {
 		dev_priv->perf.oa.pollin = true;
 		wake_up(&dev_priv->perf.oa.poll_wq);
 	}
@@ -1776,6 +2541,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
 	struct i915_gem_context *specific_ctx = NULL;
 	struct i915_perf_stream *stream = NULL;
 	unsigned long f_flags = 0;
+	bool privileged_op = true;
 	int stream_fd;
 	int ret;
 
@@ -1793,12 +2559,29 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
 		}
 	}
 
+	/*
+	 * On Haswell the OA unit supports clock gating off for a specific
+	 * context and in this mode there's no visibility of metrics for the
+	 * rest of the system, which we consider acceptable for a
+	 * non-privileged client.
+	 *
+	 * For Gen8+ the OA unit no longer supports clock gating off for a
+	 * specific context and the kernel can't securely stop the counters
+	 * from updating as system-wide / global values. Even though we can
+	 * filter reports based on the included context ID we can't block
+	 * clients from seeing the raw / global counter values via
+	 * MI_REPORT_PERF_COUNT commands and so consider it a privileged op to
+	 * enable the OA unit by default.
+	 */
+	if (IS_HASWELL(dev_priv) && specific_ctx)
+		privileged_op = false;
+
 	/* Similar to perf's kernel.perf_paranoid_cpu sysctl option
 	 * we check a dev.i915.perf_stream_paranoid sysctl option
 	 * to determine if it's ok to access system wide OA counters
 	 * without CAP_SYS_ADMIN privileges.
 	 */
-	if (!specific_ctx &&
+	if (privileged_op &&
 	    i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) {
 		DRM_DEBUG("Insufficient privileges to open system-wide i915 perf stream\n");
 		ret = -EACCES;
@@ -2070,9 +2853,6 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data,
  */
 void i915_perf_register(struct drm_i915_private *dev_priv)
 {
-	if (!IS_HASWELL(dev_priv))
-		return;
-
 	if (!dev_priv->perf.initialized)
 		return;
 
@@ -2088,11 +2868,38 @@ void i915_perf_register(struct drm_i915_private *dev_priv)
 	if (!dev_priv->perf.metrics_kobj)
 		goto exit;
 
-	if (i915_perf_register_sysfs_hsw(dev_priv)) {
-		kobject_put(dev_priv->perf.metrics_kobj);
-		dev_priv->perf.metrics_kobj = NULL;
+	if (IS_HASWELL(dev_priv)) {
+		if (i915_perf_register_sysfs_hsw(dev_priv))
+			goto sysfs_error;
+	} else if (IS_BROADWELL(dev_priv)) {
+		if (i915_perf_register_sysfs_bdw(dev_priv))
+			goto sysfs_error;
+	} else if (IS_CHERRYVIEW(dev_priv)) {
+		if (i915_perf_register_sysfs_chv(dev_priv))
+			goto sysfs_error;
+	} else if (IS_SKYLAKE(dev_priv)) {
+		if (IS_SKL_GT2(dev_priv)) {
+			if (i915_perf_register_sysfs_sklgt2(dev_priv))
+				goto sysfs_error;
+		} else if (IS_SKL_GT3(dev_priv)) {
+			if (i915_perf_register_sysfs_sklgt3(dev_priv))
+				goto sysfs_error;
+		} else if (IS_SKL_GT4(dev_priv)) {
+			if (i915_perf_register_sysfs_sklgt4(dev_priv))
+				goto sysfs_error;
+		} else
+			goto sysfs_error;
+	} else if (IS_BROXTON(dev_priv)) {
+		if (i915_perf_register_sysfs_bxt(dev_priv))
+			goto sysfs_error;
 	}
 
+	goto exit;
+
+sysfs_error:
+	kobject_put(dev_priv->perf.metrics_kobj);
+	dev_priv->perf.metrics_kobj = NULL;
+
 exit:
 	mutex_unlock(&dev_priv->perf.lock);
 }
@@ -2108,13 +2915,24 @@ exit:
  */
 void i915_perf_unregister(struct drm_i915_private *dev_priv)
 {
-	if (!IS_HASWELL(dev_priv))
-		return;
-
 	if (!dev_priv->perf.metrics_kobj)
 		return;
 
-	i915_perf_unregister_sysfs_hsw(dev_priv);
+	if (IS_HASWELL(dev_priv))
+		i915_perf_unregister_sysfs_hsw(dev_priv);
+	else if (IS_BROADWELL(dev_priv))
+		i915_perf_unregister_sysfs_bdw(dev_priv);
+	else if (IS_CHERRYVIEW(dev_priv))
+		i915_perf_unregister_sysfs_chv(dev_priv);
+	else if (IS_SKYLAKE(dev_priv)) {
+		if (IS_SKL_GT2(dev_priv))
+			i915_perf_unregister_sysfs_sklgt2(dev_priv);
+		else if (IS_SKL_GT3(dev_priv))
+			i915_perf_unregister_sysfs_sklgt3(dev_priv);
+		else if (IS_SKL_GT4(dev_priv))
+			i915_perf_unregister_sysfs_sklgt4(dev_priv);
+	} else if (IS_BROXTON(dev_priv))
+		i915_perf_unregister_sysfs_bxt(dev_priv);
 
 	kobject_put(dev_priv->perf.metrics_kobj);
 	dev_priv->perf.metrics_kobj = NULL;
@@ -2173,36 +2991,105 @@ static struct ctl_table dev_root[] = {
  */
 void i915_perf_init(struct drm_i915_private *dev_priv)
 {
-	if (!IS_HASWELL(dev_priv))
-		return;
-
-	hrtimer_init(&dev_priv->perf.oa.poll_check_timer,
-		     CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-	dev_priv->perf.oa.poll_check_timer.function = oa_poll_check_timer_cb;
-	init_waitqueue_head(&dev_priv->perf.oa.poll_wq);
+	dev_priv->perf.oa.n_builtin_sets = 0;
+
+	if (IS_HASWELL(dev_priv)) {
+		dev_priv->perf.oa.ops.init_oa_buffer = gen7_init_oa_buffer;
+		dev_priv->perf.oa.ops.enable_metric_set = hsw_enable_metric_set;
+		dev_priv->perf.oa.ops.disable_metric_set = hsw_disable_metric_set;
+		dev_priv->perf.oa.ops.oa_enable = gen7_oa_enable;
+		dev_priv->perf.oa.ops.oa_disable = gen7_oa_disable;
+		dev_priv->perf.oa.ops.read = gen7_oa_read;
+		dev_priv->perf.oa.ops.oa_hw_tail_read =
+			gen7_oa_hw_tail_read;
+
+		dev_priv->perf.oa.oa_formats = hsw_oa_formats;
+
+		dev_priv->perf.oa.n_builtin_sets =
+			i915_oa_n_builtin_metric_sets_hsw;
+	} else if (i915.enable_execlists) {
+		/* Note: that although we could theoretically also support the
+		 * legacy ringbuffer mode on BDW (and earlier iterations of
+		 * this driver, before upstreaming did this) it didn't seem
+		 * worth the complexity to maintain now that BDW+ enable
+		 * execlist mode by default.
+		 */
 
-	INIT_LIST_HEAD(&dev_priv->perf.streams);
-	mutex_init(&dev_priv->perf.lock);
-	spin_lock_init(&dev_priv->perf.hook_lock);
-	spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock);
+		if (IS_GEN8(dev_priv)) {
+			dev_priv->perf.oa.ctx_oactxctrl_offset = 0x120;
+			dev_priv->perf.oa.ctx_flexeu0_offset = 0x2ce;
+			dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<25);
+
+			if (IS_BROADWELL(dev_priv)) {
+				dev_priv->perf.oa.n_builtin_sets =
+					i915_oa_n_builtin_metric_sets_bdw;
+				dev_priv->perf.oa.ops.select_metric_set =
+					i915_oa_select_metric_set_bdw;
+			} else if (IS_CHERRYVIEW(dev_priv)) {
+				dev_priv->perf.oa.n_builtin_sets =
+					i915_oa_n_builtin_metric_sets_chv;
+				dev_priv->perf.oa.ops.select_metric_set =
+					i915_oa_select_metric_set_chv;
+			}
+		} else if (IS_GEN9(dev_priv)) {
+			dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128;
+			dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de;
+			dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16);
+
+			if (IS_SKL_GT2(dev_priv)) {
+				dev_priv->perf.oa.n_builtin_sets =
+					i915_oa_n_builtin_metric_sets_sklgt2;
+				dev_priv->perf.oa.ops.select_metric_set =
+					i915_oa_select_metric_set_sklgt2;
+			} else if (IS_SKL_GT3(dev_priv)) {
+				dev_priv->perf.oa.n_builtin_sets =
+					i915_oa_n_builtin_metric_sets_sklgt3;
+				dev_priv->perf.oa.ops.select_metric_set =
+					i915_oa_select_metric_set_sklgt3;
+			} else if (IS_SKL_GT4(dev_priv)) {
+				dev_priv->perf.oa.n_builtin_sets =
+					i915_oa_n_builtin_metric_sets_sklgt4;
+				dev_priv->perf.oa.ops.select_metric_set =
+					i915_oa_select_metric_set_sklgt4;
+			} else if (IS_BROXTON(dev_priv)) {
+				dev_priv->perf.oa.n_builtin_sets =
+					i915_oa_n_builtin_metric_sets_bxt;
+				dev_priv->perf.oa.ops.select_metric_set =
+					i915_oa_select_metric_set_bxt;
+			}
+		}
 
-	dev_priv->perf.oa.ops.init_oa_buffer = gen7_init_oa_buffer;
-	dev_priv->perf.oa.ops.enable_metric_set = hsw_enable_metric_set;
-	dev_priv->perf.oa.ops.disable_metric_set = hsw_disable_metric_set;
-	dev_priv->perf.oa.ops.oa_enable = gen7_oa_enable;
-	dev_priv->perf.oa.ops.oa_disable = gen7_oa_disable;
-	dev_priv->perf.oa.ops.read = gen7_oa_read;
-	dev_priv->perf.oa.ops.oa_buffer_check =
-		gen7_oa_buffer_check_unlocked;
+		if (dev_priv->perf.oa.n_builtin_sets) {
+			dev_priv->perf.oa.ops.init_oa_buffer = gen8_init_oa_buffer;
+			dev_priv->perf.oa.ops.enable_metric_set =
+				gen8_enable_metric_set;
+			dev_priv->perf.oa.ops.disable_metric_set =
+				gen8_disable_metric_set;
+			dev_priv->perf.oa.ops.oa_enable = gen8_oa_enable;
+			dev_priv->perf.oa.ops.oa_disable = gen8_oa_disable;
+			dev_priv->perf.oa.ops.read = gen8_oa_read;
+			dev_priv->perf.oa.ops.oa_hw_tail_read =
+				gen8_oa_hw_tail_read;
+
+			dev_priv->perf.oa.oa_formats = gen8_plus_oa_formats;
+		}
+	}
 
-	dev_priv->perf.oa.oa_formats = hsw_oa_formats;
+	if (dev_priv->perf.oa.n_builtin_sets) {
+		hrtimer_init(&dev_priv->perf.oa.poll_check_timer,
+				CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+		dev_priv->perf.oa.poll_check_timer.function = oa_poll_check_timer_cb;
+		init_waitqueue_head(&dev_priv->perf.oa.poll_wq);
 
-	dev_priv->perf.oa.n_builtin_sets =
-		i915_oa_n_builtin_metric_sets_hsw;
+		INIT_LIST_HEAD(&dev_priv->perf.streams);
+		mutex_init(&dev_priv->perf.lock);
+		spin_lock_init(&dev_priv->perf.hook_lock);
+		spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock);
 
-	dev_priv->perf.sysctl_header = register_sysctl_table(dev_root);
+		dev_priv->perf.sysctl_header = register_sysctl_table(dev_root);
 
-	dev_priv->perf.initialized = true;
+		dev_priv->perf.initialized = true;
+	}
 }
 
 /**
@@ -2217,5 +3104,6 @@ void i915_perf_fini(struct drm_i915_private *dev_priv)
 	unregister_sysctl_table(dev_priv->perf.sysctl_header);
 
 	memset(&dev_priv->perf.oa.ops, 0, sizeof(dev_priv->perf.oa.ops));
+
 	dev_priv->perf.initialized = false;
 }
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 88e4707f571d..bd535f12db18 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -656,6 +656,12 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 
 #define GEN8_OACTXID _MMIO(0x2364)
 
+#define GEN8_OA_DEBUG _MMIO(0x2B04)
+#define  GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS    (1<<5)
+#define  GEN9_OA_DEBUG_INCLUDE_CLK_RATIO	    (1<<6)
+#define  GEN9_OA_DEBUG_DISABLE_GO_1_0_REPORTS	    (1<<2)
+#define  GEN9_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS   (1<<1)
+
 #define GEN8_OACONTROL _MMIO(0x2B00)
 #define  GEN8_OA_REPORT_FORMAT_A12	    (0<<2)
 #define  GEN8_OA_REPORT_FORMAT_A12_B8_C8    (2<<2)
@@ -677,6 +683,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define  GEN7_OABUFFER_STOP_RESUME_ENABLE   (1<<1)
 #define  GEN7_OABUFFER_RESUME		    (1<<0)
 
+#define GEN8_OABUFFER_UDW _MMIO(0x23b4)
 #define GEN8_OABUFFER _MMIO(0x2b14)
 
 #define GEN7_OASTATUS1 _MMIO(0x2364)
@@ -695,7 +702,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define  GEN8_OASTATUS_REPORT_LOST	    (1<<0)
 
 #define GEN8_OAHEADPTR _MMIO(0x2B0C)
+#define GEN8_OAHEADPTR_MASK    0xffffffc0
 #define GEN8_OATAILPTR _MMIO(0x2B10)
+#define GEN8_OATAILPTR_MASK    0xffffffc0
 
 #define OABUFFER_SIZE_128K  (0<<3)
 #define OABUFFER_SIZE_256K  (1<<3)
@@ -708,7 +717,17 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 
 #define OA_MEM_SELECT_GGTT  (1<<0)
 
+/*
+ * Flexible, Aggregate EU Counter Registers.
+ * Note: these aren't contiguous
+ */
 #define EU_PERF_CNTL0	    _MMIO(0xe458)
+#define EU_PERF_CNTL1	    _MMIO(0xe558)
+#define EU_PERF_CNTL2	    _MMIO(0xe658)
+#define EU_PERF_CNTL3	    _MMIO(0xe758)
+#define EU_PERF_CNTL4	    _MMIO(0xe45c)
+#define EU_PERF_CNTL5	    _MMIO(0xe55c)
+#define EU_PERF_CNTL6	    _MMIO(0xe65c)
 
 #define GDT_CHICKEN_BITS    _MMIO(0x9840)
 #define GT_NOA_ENABLE	    0x00000080
@@ -2494,6 +2513,9 @@ enum skl_disp_power_wells {
 #define   GEN8_RC_SEMA_IDLE_MSG_DISABLE	(1 << 12)
 #define   GEN8_FF_DOP_CLOCK_GATE_DISABLE	(1<<10)
 
+#define GEN6_RCS_PWR_FSM _MMIO(0x22ac)
+#define GEN9_RCS_FE_FSM2 _MMIO(0x22a4)
+
 /* Fuse readout registers for GT */
 #define CHV_FUSE_GT			_MMIO(VLV_DISPLAY_BASE + 0x2168)
 #define   CHV_FGT_DISABLE_SS0		(1 << 10)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d49dbaa931b5..7404cf2aac28 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1962,6 +1962,8 @@ static void execlists_init_reg_state(u32 *regs,
 		regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
 		CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
 			make_rpcs(dev_priv));
+
+		i915_oa_init_reg_state(engine, ctx, regs);
 	}
 }
 
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 464547d08173..15bc9f78ba4d 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1316,13 +1316,18 @@ struct drm_i915_gem_context_param {
 };
 
 enum drm_i915_oa_format {
-	I915_OA_FORMAT_A13 = 1,
-	I915_OA_FORMAT_A29,
-	I915_OA_FORMAT_A13_B8_C8,
-	I915_OA_FORMAT_B4_C8,
-	I915_OA_FORMAT_A45_B8_C8,
-	I915_OA_FORMAT_B4_C8_A16,
-	I915_OA_FORMAT_C4_B8,
+	I915_OA_FORMAT_A13 = 1,	    /* HSW only */
+	I915_OA_FORMAT_A29,	    /* HSW only */
+	I915_OA_FORMAT_A13_B8_C8,   /* HSW only */
+	I915_OA_FORMAT_B4_C8,	    /* HSW only */
+	I915_OA_FORMAT_A45_B8_C8,   /* HSW only */
+	I915_OA_FORMAT_B4_C8_A16,   /* HSW only */
+	I915_OA_FORMAT_C4_B8,	    /* HSW+ */
+
+	/* Gen8+ */
+	I915_OA_FORMAT_A12,
+	I915_OA_FORMAT_A12_B8_C8,
+	I915_OA_FORMAT_A32u40_A4u32_B8_C8,
 
 	I915_OA_FORMAT_MAX	    /* non-ABI */
 };
-- 
cgit v1.2.3


From 98830d91da082b0285d35bdf5b5ae98decac7df6 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 7 Jun 2017 17:13:35 -0700
Subject: drm/vc4: Add T-format scanout support.

The T tiling format is what V3D uses for textures, with no raster
support at all until later revisions of the hardware (and always at a
large 3D performance penalty).  If we can't scan out V3D's format,
then we often need to do a relayout at some stage of the pipeline,
either right before texturing from the scanout buffer (common in X11
without a compositor) or between a tiled screen buffer right before
scanout (an option I've considered in trying to resolve this
inconsistency, but which means needing to use the dirty fb ioctl and
having some update policy).

T-format scanout lets us avoid either of those shadow copies, for a
massive, obvious performance improvement to X11 window dragging
without a compositor.  Unfortunately, enabling a compositor to work
around the discrepancy has turned out to be too costly in memory
consumption for the Raspbian distribution.

Because the HVS operates a scanline at a time, compositing from T does
increase the memory bandwidth cost of scanout.  On my 1920x1080@32bpp
display on a RPi3, we go from about 15% of system memory bandwidth
with linear to about 20% with tiled.  However, for X11 this still ends
up being a huge performance win in active usage.

This patch doesn't yet handle src_x/src_y offsetting within the tiled
buffer.  However, we fail to do so for untiled buffers already.

Signed-off-by: Eric Anholt <eric@anholt.net>
Link: http://patchwork.freedesktop.org/patch/msgid/20170608001336.12842-1-eric@anholt.net
Reviewed-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/gpu/drm/vc4/vc4_plane.c | 31 +++++++++++++++++++++++++++----
 drivers/gpu/drm/vc4/vc4_regs.h  | 19 +++++++++++++++++++
 include/uapi/drm/drm_fourcc.h   | 23 ++++++++++++++++++++++-
 3 files changed, 68 insertions(+), 5 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index da18dec21696..fa6809d8b0fe 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -500,8 +500,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
 	u32 ctl0_offset = vc4_state->dlist_count;
 	const struct hvs_format *format = vc4_get_hvs_format(fb->format->format);
 	int num_planes = drm_format_num_planes(format->drm);
-	u32 scl0, scl1;
-	u32 lbm_size;
+	u32 scl0, scl1, pitch0;
+	u32 lbm_size, tiling;
 	unsigned long irqflags;
 	int ret, i;
 
@@ -542,11 +542,31 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
 		scl1 = vc4_get_scl_field(state, 0);
 	}
 
+	switch (fb->modifier) {
+	case DRM_FORMAT_MOD_LINEAR:
+		tiling = SCALER_CTL0_TILING_LINEAR;
+		pitch0 = VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH);
+		break;
+	case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED:
+		tiling = SCALER_CTL0_TILING_256B_OR_T;
+
+		pitch0 = (VC4_SET_FIELD(0, SCALER_PITCH0_TILE_Y_OFFSET),
+			  VC4_SET_FIELD(0, SCALER_PITCH0_TILE_WIDTH_L),
+			  VC4_SET_FIELD((vc4_state->src_w[0] + 31) >> 5,
+					SCALER_PITCH0_TILE_WIDTH_R));
+		break;
+	default:
+		DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx",
+			      (long long)fb->modifier);
+		return -EINVAL;
+	}
+
 	/* Control word */
 	vc4_dlist_write(vc4_state,
 			SCALER_CTL0_VALID |
 			(format->pixel_order << SCALER_CTL0_ORDER_SHIFT) |
 			(format->hvs << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
+			VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) |
 			(vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) |
 			VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) |
 			VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1));
@@ -600,8 +620,11 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
 	for (i = 0; i < num_planes; i++)
 		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
 
-	/* Pitch word 0/1/2 */
-	for (i = 0; i < num_planes; i++) {
+	/* Pitch word 0 */
+	vc4_dlist_write(vc4_state, pitch0);
+
+	/* Pitch word 1/2 */
+	for (i = 1; i < num_planes; i++) {
 		vc4_dlist_write(vc4_state,
 				VC4_SET_FIELD(fb->pitches[i], SCALER_SRC_PITCH));
 	}
diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h
index 932093936178..d382c34c1b9e 100644
--- a/drivers/gpu/drm/vc4/vc4_regs.h
+++ b/drivers/gpu/drm/vc4/vc4_regs.h
@@ -709,6 +709,13 @@ enum hvs_pixel_format {
 #define SCALER_CTL0_SIZE_MASK			VC4_MASK(29, 24)
 #define SCALER_CTL0_SIZE_SHIFT			24
 
+#define SCALER_CTL0_TILING_MASK			VC4_MASK(21, 20)
+#define SCALER_CTL0_TILING_SHIFT		20
+#define SCALER_CTL0_TILING_LINEAR		0
+#define SCALER_CTL0_TILING_64B			1
+#define SCALER_CTL0_TILING_128B			2
+#define SCALER_CTL0_TILING_256B_OR_T		3
+
 #define SCALER_CTL0_HFLIP                       BIT(16)
 #define SCALER_CTL0_VFLIP                       BIT(15)
 
@@ -838,7 +845,19 @@ enum hvs_pixel_format {
 #define SCALER_PPF_KERNEL_OFFSET_SHIFT		0
 #define SCALER_PPF_KERNEL_UNCACHED		BIT(31)
 
+/* PITCH0/1/2 fields for raster. */
 #define SCALER_SRC_PITCH_MASK			VC4_MASK(15, 0)
 #define SCALER_SRC_PITCH_SHIFT			0
 
+/* PITCH0 fields for T-tiled. */
+#define SCALER_PITCH0_TILE_WIDTH_L_MASK		VC4_MASK(22, 16)
+#define SCALER_PITCH0_TILE_WIDTH_L_SHIFT	16
+#define SCALER_PITCH0_TILE_LINE_DIR		BIT(15)
+#define SCALER_PITCH0_TILE_INITIAL_LINE_DIR	BIT(14)
+/* Y offset within a tile. */
+#define SCALER_PITCH0_TILE_Y_OFFSET_MASK	VC4_MASK(13, 7)
+#define SCALER_PITCH0_TILE_Y_OFFSET_SHIFT	7
+#define SCALER_PITCH0_TILE_WIDTH_R_MASK		VC4_MASK(6, 0)
+#define SCALER_PITCH0_TILE_WIDTH_R_SHIFT	0
+
 #endif /* VC4_REGS_H */
diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h
index 55e301047b3e..7586c46f68bf 100644
--- a/include/uapi/drm/drm_fourcc.h
+++ b/include/uapi/drm/drm_fourcc.h
@@ -182,6 +182,7 @@ extern "C" {
 #define DRM_FORMAT_MOD_VENDOR_SAMSUNG 0x04
 #define DRM_FORMAT_MOD_VENDOR_QCOM    0x05
 #define DRM_FORMAT_MOD_VENDOR_VIVANTE 0x06
+#define DRM_FORMAT_MOD_VENDOR_BROADCOM 0x07
 /* add more to the end as needed */
 
 #define fourcc_mod_code(vendor, val) \
@@ -306,7 +307,6 @@ extern "C" {
  */
 #define DRM_FORMAT_MOD_VIVANTE_SPLIT_SUPER_TILED fourcc_mod_code(VIVANTE, 4)
 
-
 /* NVIDIA Tegra frame buffer modifiers */
 
 /*
@@ -351,6 +351,27 @@ extern "C" {
  */
 #define NV_FORMAT_MOD_TEGRA_16BX2_BLOCK(v) fourcc_mod_tegra_code(2, v)
 
+/*
+ * Broadcom VC4 "T" format
+ *
+ * This is the primary layout that the V3D GPU can texture from (it
+ * can't do linear).  The T format has:
+ *
+ * - 64b utiles of pixels in a raster-order grid according to cpp.  It's 4x4
+ *   pixels at 32 bit depth.
+ *
+ * - 1k subtiles made of a 4x4 raster-order grid of 64b utiles (so usually
+ *   16x16 pixels).
+ *
+ * - 4k tiles made of a 2x2 grid of 1k subtiles (so usually 32x32 pixels).  On
+ *   even 4k tile rows, they're arranged as (BL, TL, TR, BR), and on odd rows
+ *   they're (TR, BR, BL, TL), where bottom left is start of memory.
+ *
+ * - an image made of 4k tiles in rows either left-to-right (even rows of 4k
+ *   tiles) or right-to-left (odd rows of 4k tiles).
+ */
+#define DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED fourcc_mod_code(BROADCOM, 1)
+
 #if defined(__cplusplus)
 }
 #endif
-- 
cgit v1.2.3


From 83753117f1de4f6ef7588fac9545065eed1e85e2 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 7 Jun 2017 17:13:36 -0700
Subject: drm/vc4: Add get/set tiling ioctls.

This allows mesa to set the tiling format for a BO and have that
tiling format be respected by mesa on the other side of an
import/export (and by vc4 scanout in the kernel), without defining a
protocol to pass the tiling through userspace.

Signed-off-by: Eric Anholt <eric@anholt.net>
Link: http://patchwork.freedesktop.org/patch/msgid/20170608001336.12842-2-eric@anholt.net
Acked-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/vc4/vc4_bo.c  | 83 +++++++++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/vc4/vc4_drv.c |  2 ++
 drivers/gpu/drm/vc4/vc4_drv.h |  6 ++++
 drivers/gpu/drm/vc4/vc4_kms.c | 41 ++++++++++++++++++++-
 include/uapi/drm/vc4_drm.h    | 16 +++++++++
 5 files changed, 147 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c
index 590c0912afc1..487f96412d35 100644
--- a/drivers/gpu/drm/vc4/vc4_bo.c
+++ b/drivers/gpu/drm/vc4/vc4_bo.c
@@ -343,6 +343,7 @@ void vc4_free_object(struct drm_gem_object *gem_bo)
 		bo->validated_shader = NULL;
 	}
 
+	bo->t_format = false;
 	bo->free_time = jiffies;
 	list_add(&bo->size_head, cache_list);
 	list_add(&bo->unref_head, &vc4->bo_cache.time_list);
@@ -568,6 +569,88 @@ vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
 	return ret;
 }
 
+/**
+ * vc4_set_tiling_ioctl() - Sets the tiling modifier for a BO.
+ * @dev: DRM device
+ * @data: ioctl argument
+ * @file_priv: DRM file for this fd
+ *
+ * The tiling state of the BO decides the default modifier of an fb if
+ * no specific modifier was set by userspace, and the return value of
+ * vc4_get_tiling_ioctl() (so that userspace can treat a BO it
+ * received from dmabuf as the same tiling format as the producer
+ * used).
+ */
+int vc4_set_tiling_ioctl(struct drm_device *dev, void *data,
+			 struct drm_file *file_priv)
+{
+	struct drm_vc4_set_tiling *args = data;
+	struct drm_gem_object *gem_obj;
+	struct vc4_bo *bo;
+	bool t_format;
+
+	if (args->flags != 0)
+		return -EINVAL;
+
+	switch (args->modifier) {
+	case DRM_FORMAT_MOD_NONE:
+		t_format = false;
+		break;
+	case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED:
+		t_format = true;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	gem_obj = drm_gem_object_lookup(file_priv, args->handle);
+	if (!gem_obj) {
+		DRM_ERROR("Failed to look up GEM BO %d\n", args->handle);
+		return -ENOENT;
+	}
+	bo = to_vc4_bo(gem_obj);
+	bo->t_format = t_format;
+
+	drm_gem_object_unreference_unlocked(gem_obj);
+
+	return 0;
+}
+
+/**
+ * vc4_get_tiling_ioctl() - Gets the tiling modifier for a BO.
+ * @dev: DRM device
+ * @data: ioctl argument
+ * @file_priv: DRM file for this fd
+ *
+ * Returns the tiling modifier for a BO as set by vc4_set_tiling_ioctl().
+ */
+int vc4_get_tiling_ioctl(struct drm_device *dev, void *data,
+			 struct drm_file *file_priv)
+{
+	struct drm_vc4_get_tiling *args = data;
+	struct drm_gem_object *gem_obj;
+	struct vc4_bo *bo;
+
+	if (args->flags != 0 || args->modifier != 0)
+		return -EINVAL;
+
+	gem_obj = drm_gem_object_lookup(file_priv, args->handle);
+	if (!gem_obj) {
+		DRM_ERROR("Failed to look up GEM BO %d\n", args->handle);
+		return -ENOENT;
+	}
+	bo = to_vc4_bo(gem_obj);
+
+	if (bo->t_format)
+		args->modifier = DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED;
+	else
+		args->modifier = DRM_FORMAT_MOD_NONE;
+
+	drm_gem_object_unreference_unlocked(gem_obj);
+
+	return 0;
+}
+
 void vc4_bo_cache_init(struct drm_device *dev)
 {
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
index 136bb4213dc0..c6b487c3d2b7 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -138,6 +138,8 @@ static const struct drm_ioctl_desc vc4_drm_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(VC4_GET_HANG_STATE, vc4_get_hang_state_ioctl,
 			  DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(VC4_GET_PARAM, vc4_get_param_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(VC4_SET_TILING, vc4_set_tiling_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(VC4_GET_TILING, vc4_get_tiling_ioctl, DRM_RENDER_ALLOW),
 };
 
 static struct drm_driver vc4_drm_driver = {
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
index a5bf2e5e0b57..df22698d62ee 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -148,6 +148,8 @@ struct vc4_bo {
 	 */
 	uint64_t write_seqno;
 
+	bool t_format;
+
 	/* List entry for the BO's position in either
 	 * vc4_exec_info->unref_list or vc4_dev->bo_cache.time_list
 	 */
@@ -470,6 +472,10 @@ int vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
 			       struct drm_file *file_priv);
 int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data,
 		      struct drm_file *file_priv);
+int vc4_set_tiling_ioctl(struct drm_device *dev, void *data,
+			 struct drm_file *file_priv);
+int vc4_get_tiling_ioctl(struct drm_device *dev, void *data,
+			 struct drm_file *file_priv);
 int vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
 			     struct drm_file *file_priv);
 int vc4_mmap(struct file *filp, struct vm_area_struct *vma);
diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c
index 928d191ef90f..202f7ebf5a7b 100644
--- a/drivers/gpu/drm/vc4/vc4_kms.c
+++ b/drivers/gpu/drm/vc4/vc4_kms.c
@@ -202,11 +202,50 @@ static int vc4_atomic_commit(struct drm_device *dev,
 	return 0;
 }
 
+static struct drm_framebuffer *vc4_fb_create(struct drm_device *dev,
+					     struct drm_file *file_priv,
+					     const struct drm_mode_fb_cmd2 *mode_cmd)
+{
+	struct drm_mode_fb_cmd2 mode_cmd_local;
+
+	/* If the user didn't specify a modifier, use the
+	 * vc4_set_tiling_ioctl() state for the BO.
+	 */
+	if (!(mode_cmd->flags & DRM_MODE_FB_MODIFIERS)) {
+		struct drm_gem_object *gem_obj;
+		struct vc4_bo *bo;
+
+		gem_obj = drm_gem_object_lookup(file_priv,
+						mode_cmd->handles[0]);
+		if (!gem_obj) {
+			DRM_ERROR("Failed to look up GEM BO %d\n",
+				  mode_cmd->handles[0]);
+			return ERR_PTR(-ENOENT);
+		}
+		bo = to_vc4_bo(gem_obj);
+
+		mode_cmd_local = *mode_cmd;
+
+		if (bo->t_format) {
+			mode_cmd_local.modifier[0] =
+				DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED;
+		} else {
+			mode_cmd_local.modifier[0] = DRM_FORMAT_MOD_NONE;
+		}
+
+		drm_gem_object_unreference_unlocked(gem_obj);
+
+		mode_cmd = &mode_cmd_local;
+	}
+
+	return drm_fb_cma_create(dev, file_priv, mode_cmd);
+}
+
 static const struct drm_mode_config_funcs vc4_mode_funcs = {
 	.output_poll_changed = vc4_output_poll_changed,
 	.atomic_check = drm_atomic_helper_check,
 	.atomic_commit = vc4_atomic_commit,
-	.fb_create = drm_fb_cma_create,
+	.fb_create = vc4_fb_create,
 };
 
 int vc4_kms_load(struct drm_device *dev)
diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h
index f07a09016726..6ac4c5c014cb 100644
--- a/include/uapi/drm/vc4_drm.h
+++ b/include/uapi/drm/vc4_drm.h
@@ -38,6 +38,8 @@ extern "C" {
 #define DRM_VC4_CREATE_SHADER_BO                  0x05
 #define DRM_VC4_GET_HANG_STATE                    0x06
 #define DRM_VC4_GET_PARAM                         0x07
+#define DRM_VC4_SET_TILING                        0x08
+#define DRM_VC4_GET_TILING                        0x09
 
 #define DRM_IOCTL_VC4_SUBMIT_CL           DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl)
 #define DRM_IOCTL_VC4_WAIT_SEQNO          DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno)
@@ -47,6 +49,8 @@ extern "C" {
 #define DRM_IOCTL_VC4_CREATE_SHADER_BO    DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, struct drm_vc4_create_shader_bo)
 #define DRM_IOCTL_VC4_GET_HANG_STATE      DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_HANG_STATE, struct drm_vc4_get_hang_state)
 #define DRM_IOCTL_VC4_GET_PARAM           DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_PARAM, struct drm_vc4_get_param)
+#define DRM_IOCTL_VC4_SET_TILING          DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SET_TILING, struct drm_vc4_set_tiling)
+#define DRM_IOCTL_VC4_GET_TILING          DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_TILING, struct drm_vc4_get_tiling)
 
 struct drm_vc4_submit_rcl_surface {
 	__u32 hindex; /* Handle index, or ~0 if not present. */
@@ -295,6 +299,18 @@ struct drm_vc4_get_param {
 	__u64 value;
 };
 
+struct drm_vc4_get_tiling {
+	__u32 handle;
+	__u32 flags;
+	__u64 modifier;
+};
+
+struct drm_vc4_set_tiling {
+	__u32 handle;
+	__u32 flags;
+	__u64 modifier;
+};
+
 #if defined(__cplusplus)
 }
 #endif
-- 
cgit v1.2.3


From 167b606aa262270ab6aeb5700adca6b1f33da26a Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jcrouse@codeaurora.org>
Date: Mon, 8 May 2017 14:34:59 -0600
Subject: drm/msm: Remove DRM_MSM_NUM_IOCTLS

The ioctl array is sparsely populated but the compiler will make sure
that it is sufficiently sized for all the values that we have so we
can safely use ARRAY_SIZE() instead of having a constantly changing
#define in the uapi header.

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/msm_drv.c | 2 +-
 include/uapi/drm/msm_drm.h    | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 9d498eb81906..18d02ccb6c9b 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -842,7 +842,7 @@ static struct drm_driver msm_driver = {
 	.debugfs_init       = msm_debugfs_init,
 #endif
 	.ioctls             = msm_ioctls,
-	.num_ioctls         = DRM_MSM_NUM_IOCTLS,
+	.num_ioctls         = ARRAY_SIZE(msm_ioctls),
 	.fops               = &fops,
 	.name               = "msm",
 	.desc               = "MSM Snapdragon DRM",
diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
index a4a189a240d7..a9985fe6efcd 100644
--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -261,7 +261,6 @@ struct drm_msm_gem_madvise {
 #define DRM_MSM_GEM_SUBMIT             0x06
 #define DRM_MSM_WAIT_FENCE             0x07
 #define DRM_MSM_GEM_MADVISE            0x08
-#define DRM_MSM_NUM_IOCTLS             0x09
 
 #define DRM_IOCTL_MSM_GET_PARAM        DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GET_PARAM, struct drm_msm_param)
 #define DRM_IOCTL_MSM_GEM_NEW          DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_NEW, struct drm_msm_gem_new)
-- 
cgit v1.2.3


From 49fd08baa36ac10b13ea7b23fc6bbee8b4a6fcfe Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jcrouse@codeaurora.org>
Date: Mon, 8 May 2017 14:35:01 -0600
Subject: drm/msm: Add hint to DRM_IOCTL_MSM_GEM_INFO to return an object IOVA

Modify the 'pad' member of struct drm_msm_gem_info to 'flags'. If the
user sets 'flags' to non-zero it means that they want a IOVA for the
GEM object instead of a mmap() offset. Return the iova in the 'offset'
member.

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
[robclark: s/hint/flags in commit msg]
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/msm_drv.c | 23 +++++++++++++++++++++--
 include/uapi/drm/msm_drm.h    |  8 ++++++--
 2 files changed, 27 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 18d02ccb6c9b..beb4f6b3ac70 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -699,6 +699,17 @@ static int msm_ioctl_gem_cpu_fini(struct drm_device *dev, void *data,
 	return ret;
 }
 
+static int msm_ioctl_gem_info_iova(struct drm_device *dev,
+		struct drm_gem_object *obj, uint64_t *iova)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+
+	if (!priv->gpu)
+		return -EINVAL;
+
+	return msm_gem_get_iova(obj, priv->gpu->id, iova);
+}
+
 static int msm_ioctl_gem_info(struct drm_device *dev, void *data,
 		struct drm_file *file)
 {
@@ -706,14 +717,22 @@ static int msm_ioctl_gem_info(struct drm_device *dev, void *data,
 	struct drm_gem_object *obj;
 	int ret = 0;
 
-	if (args->pad)
+	if (args->flags & ~MSM_INFO_FLAGS)
 		return -EINVAL;
 
 	obj = drm_gem_object_lookup(file, args->handle);
 	if (!obj)
 		return -ENOENT;
 
-	args->offset = msm_gem_mmap_offset(obj);
+	if (args->flags & MSM_INFO_IOVA) {
+		uint64_t iova;
+
+		ret = msm_ioctl_gem_info_iova(dev, obj, &iova);
+		if (!ret)
+			args->offset = iova;
+	} else {
+		args->offset = msm_gem_mmap_offset(obj);
+	}
 
 	drm_gem_object_unreference_unlocked(obj);
 
diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
index a9985fe6efcd..26c54f6d595d 100644
--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -104,10 +104,14 @@ struct drm_msm_gem_new {
 	__u32 handle;         /* out */
 };
 
+#define MSM_INFO_IOVA	0x01
+
+#define MSM_INFO_FLAGS (MSM_INFO_IOVA)
+
 struct drm_msm_gem_info {
 	__u32 handle;         /* in */
-	__u32 pad;
-	__u64 offset;         /* out, offset to pass to mmap() */
+	__u32 flags;	      /* in - combination of MSM_INFO_* flags */
+	__u64 offset;         /* out, mmap() offset or iova */
 };
 
 #define MSM_PREP_READ        0x01
-- 
cgit v1.2.3


From 1a71cf2fa646799d4397a49b223549d8617fece0 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 16 Jun 2017 15:05:23 +0100
Subject: drm/i915: Allow execbuffer to use the first object as the batch

Currently, the last object in the execlist is the always the batch.
However, when building the batch buffer we often know the batch object
first and if we can use the first slot in the execlist we can emit
relocation instructions relative to it immediately and avoid a separate
pass to adjust the relocations to point to the last execlist slot.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c            |  1 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  5 ++++-
 include/uapi/drm/i915_drm.h                | 19 +++++++++++++++++--
 3 files changed, 22 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index e33a2ed9244c..3c2af70034cf 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -367,6 +367,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_EXEC_ASYNC:
 	case I915_PARAM_HAS_EXEC_FENCE:
 	case I915_PARAM_HAS_EXEC_CAPTURE:
+	case I915_PARAM_HAS_EXEC_BATCH_FIRST:
 		/* For the time being all of these are always true;
 		 * if some supported hardware does not have one of these
 		 * features this value needs to be provided from
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index f4b02ef3987f..e262133a7cf5 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -645,7 +645,10 @@ ht_needs_resize(const struct i915_gem_context_vma_lut *lut)
 
 static unsigned int eb_batch_index(const struct i915_execbuffer *eb)
 {
-	return eb->buffer_count - 1;
+	if (eb->args->flags & I915_EXEC_BATCH_FIRST)
+		return 0;
+	else
+		return eb->buffer_count - 1;
 }
 
 static int eb_select_context(struct i915_execbuffer *eb)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 15bc9f78ba4d..7ccbd6a2bbe0 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -418,7 +418,6 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_HAS_EXEC_CAPTURE	 45
 
-/* Query the mask of slices available for this system */
 #define I915_PARAM_SLICE_MASK		 46
 
 /* Assuming it's uniform for each slice, this queries the mask of subslices
@@ -426,6 +425,12 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_SUBSLICE_MASK	 47
 
+/*
+ * Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying the batch buffer
+ * as the first execobject as opposed to the last. See I915_EXEC_BATCH_FIRST.
+ */
+#define I915_PARAM_HAS_EXEC_BATCH_FIRST	 48
+
 typedef struct drm_i915_getparam {
 	__s32 param;
 	/*
@@ -912,7 +917,17 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_FENCE_OUT		(1<<17)
 
-#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_OUT<<1))
+/*
+ * Traditionally the execbuf ioctl has only considered the final element in
+ * the execobject[] to be the executable batch. Often though, the client
+ * will known the batch object prior to construction and being able to place
+ * it into the execobject[] array first can simplify the relocation tracking.
+ * Setting I915_EXEC_BATCH_FIRST tells execbuf to use element 0 of the
+ * execobject[] as the * batch instead (the default is to use the last
+ * element).
+ */
+#define I915_EXEC_BATCH_FIRST		(1<<18)
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_BATCH_FIRST<<1))
 
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
-- 
cgit v1.2.3


From 660e855813f78b7fe63ff1ebc4f2ca07d94add0b Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Mon, 13 Mar 2017 22:18:15 +0000
Subject: amdgpu: use drm sync objects for shared semaphores (v6)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This creates a new command submission chunk for amdgpu
to add in and out sync objects around the submission.

Sync objects are managed via the drm syncobj ioctls.

The command submission interface is enhanced with two new
chunks, one for syncobj pre submission dependencies,
and one for post submission sync obj signalling,
and just takes a list of handles for each.

This is based on work originally done by David Zhou at AMD,
with input from Christian Konig on what things should look like.

In theory VkFences could be backed with sync objects and
just get passed into the cs as syncobj handles as well.

NOTE: this interface addition needs a version bump to expose
it to userspace.

TODO: update to dep_sync when rebasing onto amdgpu master.
(with this - r-b from Christian)

v1.1: keep file reference on import.
v2: move to using syncobjs
v2.1: change some APIs to just use p pointer.
v3: make more robust against CS failures, we now add the
wait sems but only remove them once the CS job has been
submitted.
v4: rewrite names of API and base on new syncobj code.
v5: move post deps earlier, rename some apis
v6: lookup post deps earlier, and just replace fences
in post deps stage (Christian)

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  3 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 88 ++++++++++++++++++++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  2 +-
 include/uapi/drm/amdgpu_drm.h           |  6 +++
 4 files changed, 97 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e0adad590ecb..9f827aca90ee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1159,6 +1159,9 @@ struct amdgpu_cs_parser {
 
 	/* user fence */
 	struct amdgpu_bo_list_entry	uf_entry;
+
+	unsigned num_post_dep_syncobjs;
+	struct drm_syncobj **post_dep_syncobjs;
 };
 
 #define AMDGPU_PREAMBLE_IB_PRESENT          (1 << 0) /* bit set means command submit involves a preamble IB */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 29469e6b58b8..aeee6840e82b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -27,6 +27,7 @@
 #include <linux/pagemap.h>
 #include <drm/drmP.h>
 #include <drm/amdgpu_drm.h>
+#include <drm/drm_syncobj.h>
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 
@@ -154,6 +155,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 			break;
 
 		case AMDGPU_CHUNK_ID_DEPENDENCIES:
+		case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
+		case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
 			break;
 
 		default:
@@ -682,6 +685,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
 		ttm_eu_backoff_reservation(&parser->ticket,
 					   &parser->validated);
 	}
+
+	for (i = 0; i < parser->num_post_dep_syncobjs; i++)
+		drm_syncobj_put(parser->post_dep_syncobjs[i]);
+	kfree(parser->post_dep_syncobjs);
+
 	dma_fence_put(parser->fence);
 
 	if (parser->ctx)
@@ -971,6 +979,64 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
 	return 0;
 }
 
+static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
+						 uint32_t handle)
+{
+	int r;
+	struct dma_fence *fence;
+	r = drm_syncobj_fence_get(p->filp, handle, &fence);
+	if (r)
+		return r;
+
+	r = amdgpu_sync_fence(p->adev, &p->job->sync, fence);
+	dma_fence_put(fence);
+
+	return r;
+}
+
+static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
+					    struct amdgpu_cs_chunk *chunk)
+{
+	unsigned num_deps;
+	int i, r;
+	struct drm_amdgpu_cs_chunk_sem *deps;
+
+	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
+	num_deps = chunk->length_dw * 4 /
+		sizeof(struct drm_amdgpu_cs_chunk_sem);
+
+	for (i = 0; i < num_deps; ++i) {
+		r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle);
+		if (r)
+			return r;
+	}
+	return 0;
+}
+
+static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
+					     struct amdgpu_cs_chunk *chunk)
+{
+	unsigned num_deps;
+	int i;
+	struct drm_amdgpu_cs_chunk_sem *deps;
+	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
+	num_deps = chunk->length_dw * 4 /
+		sizeof(struct drm_amdgpu_cs_chunk_sem);
+
+	p->post_dep_syncobjs = kmalloc_array(num_deps,
+					     sizeof(struct drm_syncobj *),
+					     GFP_KERNEL);
+	p->num_post_dep_syncobjs = 0;
+
+	for (i = 0; i < num_deps; ++i) {
+		p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle);
+		if (!p->post_dep_syncobjs[i])
+			return -EINVAL;
+		p->num_post_dep_syncobjs++;
+	}
+	return 0;
+}
+
 static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
 				  struct amdgpu_cs_parser *p)
 {
@@ -985,12 +1051,30 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
 			r = amdgpu_cs_process_fence_dep(p, chunk);
 			if (r)
 				return r;
+		} else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_IN) {
+			r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
+			if (r)
+				return r;
+		} else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_OUT) {
+			r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
+			if (r)
+				return r;
 		}
 	}
 
 	return 0;
 }
 
+static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
+{
+	int i;
+
+	for (i = 0; i < p->num_post_dep_syncobjs; ++i) {
+		drm_syncobj_replace_fence(p->filp, p->post_dep_syncobjs[i],
+					  p->fence);
+	}
+}
+
 static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 			    union drm_amdgpu_cs *cs)
 {
@@ -1011,6 +1095,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	job->owner = p->filp;
 	job->fence_ctx = entity->fence_context;
 	p->fence = dma_fence_get(&job->base.s_fence->finished);
+
+	amdgpu_cs_post_dependencies(p);
+
 	cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence);
 	job->uf_sequence = cs->out.handle;
 	amdgpu_job_free_resources(job);
@@ -1018,7 +1105,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 
 	trace_amdgpu_cs_ioctl(job);
 	amd_sched_entity_push_job(&job->base);
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 8168f8ec711a..4c7c2628ace4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -782,7 +782,7 @@ static struct drm_driver kms_driver = {
 	.driver_features =
 	    DRIVER_USE_AGP |
 	    DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM |
-	    DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET,
+	    DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ,
 	.load = amdgpu_driver_load_kms,
 	.open = amdgpu_driver_open_kms,
 	.postclose = amdgpu_driver_postclose_kms,
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 34128f698f5e..d9aa4a339650 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -440,6 +440,8 @@ struct drm_amdgpu_gem_va {
 #define AMDGPU_CHUNK_ID_IB		0x01
 #define AMDGPU_CHUNK_ID_FENCE		0x02
 #define AMDGPU_CHUNK_ID_DEPENDENCIES	0x03
+#define AMDGPU_CHUNK_ID_SYNCOBJ_IN      0x04
+#define AMDGPU_CHUNK_ID_SYNCOBJ_OUT     0x05
 
 struct drm_amdgpu_cs_chunk {
 	__u32		chunk_id;
@@ -507,6 +509,10 @@ struct drm_amdgpu_cs_chunk_fence {
 	__u32 offset;
 };
 
+struct drm_amdgpu_cs_chunk_sem {
+	__u32 handle;
+};
+
 struct drm_amdgpu_cs_chunk_data {
 	union {
 		struct drm_amdgpu_cs_chunk_ib		ib_data;
-- 
cgit v1.2.3