From 24557865c8b1a6d0eaccaac47aabd9b23badf8fd Mon Sep 17 00:00:00 2001
From: Sean Paul <seanpaul@chromium.org>
Date: Mon, 8 Jan 2018 14:55:37 -0500
Subject: drm: Add Content Protection property

This patch adds a new optional connector property to allow userspace to enable
protection over the content it is displaying. This will typically be implemented
by the driver using HDCP.

The property is a tri-state with the following values:
- OFF: Self explanatory, no content protection
- DESIRED: Userspace requests that the driver enable protection
- ENABLED: Once the driver has authenticated the link, it sets this value

The driver is responsible for downgrading ENABLED to DESIRED if the link becomes
unprotected. The driver should also maintain the desiredness of protection
across hotplug/dpms/suspend.

If this looks familiar, I posted [1] this 3 years ago. We have been using this
in ChromeOS across exynos, mediatek, and rockchip over that time.

Changes in v2:
 - Pimp kerneldoc for content_protection_property (Daniel)
 - Drop sysfs attribute
Changes in v3:
 - None
Changes in v4:
- Changed kerneldoc to recommend userspace polling (Daniel)
- Changed kerneldoc to briefly describe how to attach the property (Daniel)
Changes in v5:
- checkpatch whitespace noise
- Change DRM_MODE_CONTENT_PROTECTION_OFF to DRM_MODE_CONTENT_PROTECTION_UNDESIRED
Changes in v6:
- None

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Sean Paul <seanpaul@chromium.org>

[1] https://lists.freedesktop.org/archives/dri-devel/2014-December/073336.html
Link: https://patchwork.freedesktop.org/patch/msgid/20180108195545.218615-4-seanpaul@chromium.org
---
 include/uapi/drm/drm_mode.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index 5597a87154e5..d1a69ff24fe8 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -173,6 +173,10 @@ extern "C" {
 		DRM_MODE_REFLECT_X | \
 		DRM_MODE_REFLECT_Y)
 
+/* Content Protection Flags */
+#define DRM_MODE_CONTENT_PROTECTION_UNDESIRED	0
+#define DRM_MODE_CONTENT_PROTECTION_DESIRED     1
+#define DRM_MODE_CONTENT_PROTECTION_ENABLED     2
 
 struct drm_mode_modeinfo {
 	__u32 clock;
-- 
cgit v1.2.3


From c6ed6dad5cfb76d72d8f9accba9b9f8d572c518c Mon Sep 17 00:00:00 2001
From: Ville Syrjälä <ville.syrjala@linux.intel.com>
Date: Wed, 15 Nov 2017 17:49:13 +0200
Subject: drm/uapi: Validate the mode flags/type
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently userspace is allowed to feed in any king of garbage in the
high bits of the mode flags/type, as are drivers when probing modes.
Reject any mode with bogus flags/type.

Hopefully this won't break any current userspace...

v2: Split the type and flags checks to separates ifs (Chris)

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jose Abreu <Jose.Abreu@synopsys.com>
Cc: Adam Jackson <ajax@redhat.com>
Cc: Keith Packard <keithp@keithp.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Adam Jackson <ajax@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20171115154913.23827-1-ville.syrjala@linux.intel.com
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/drm_modes.c |  6 ++++++
 include/uapi/drm/drm_mode.h | 24 ++++++++++++++++++++++++
 2 files changed, 30 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index 1a72883b836e..34b5123ebfc0 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -1036,6 +1036,12 @@ EXPORT_SYMBOL(drm_mode_equal_no_clocks_no_stereo);
 enum drm_mode_status
 drm_mode_validate_basic(const struct drm_display_mode *mode)
 {
+	if (mode->type & ~DRM_MODE_TYPE_ALL)
+		return MODE_BAD;
+
+	if (mode->flags & ~DRM_MODE_FLAG_ALL)
+		return MODE_BAD;
+
 	if ((mode->flags & DRM_MODE_FLAG_3D_MASK) > DRM_MODE_FLAG_3D_MAX)
 		return MODE_BAD;
 
diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index 5597a87154e5..004db470b477 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -46,6 +46,14 @@ extern "C" {
 #define DRM_MODE_TYPE_USERDEF	(1<<5)
 #define DRM_MODE_TYPE_DRIVER	(1<<6)
 
+#define DRM_MODE_TYPE_ALL	(DRM_MODE_TYPE_BUILTIN |	\
+				 DRM_MODE_TYPE_CLOCK_C |	\
+				 DRM_MODE_TYPE_CRTC_C |		\
+				 DRM_MODE_TYPE_PREFERRED |	\
+				 DRM_MODE_TYPE_DEFAULT |	\
+				 DRM_MODE_TYPE_USERDEF |	\
+				 DRM_MODE_TYPE_DRIVER)
+
 /* Video mode flags */
 /* bit compatible with the xrandr RR_ definitions (bits 0-13)
  *
@@ -99,6 +107,22 @@ extern "C" {
 #define  DRM_MODE_FLAG_PIC_AR_16_9 \
 			(DRM_MODE_PICTURE_ASPECT_16_9<<19)
 
+#define  DRM_MODE_FLAG_ALL	(DRM_MODE_FLAG_PHSYNC |		\
+				 DRM_MODE_FLAG_NHSYNC |		\
+				 DRM_MODE_FLAG_PVSYNC |		\
+				 DRM_MODE_FLAG_NVSYNC |		\
+				 DRM_MODE_FLAG_INTERLACE |	\
+				 DRM_MODE_FLAG_DBLSCAN |	\
+				 DRM_MODE_FLAG_CSYNC |		\
+				 DRM_MODE_FLAG_PCSYNC |		\
+				 DRM_MODE_FLAG_NCSYNC |		\
+				 DRM_MODE_FLAG_HSKEW |		\
+				 DRM_MODE_FLAG_BCAST |		\
+				 DRM_MODE_FLAG_PIXMUX |		\
+				 DRM_MODE_FLAG_DBLCLK |		\
+				 DRM_MODE_FLAG_CLKDIV2 |	\
+				 DRM_MODE_FLAG_3D_MASK)
+
 /* DPMS flags */
 /* bit compatible with the xorg definitions. */
 #define DRM_MODE_DPMS_ON	0
-- 
cgit v1.2.3


From d15f40c84cdd2c4ac717d3c1e0146c07b5996d18 Mon Sep 17 00:00:00 2001
From: Ville Syrjälä <ville.syrjala@linux.intel.com>
Date: Tue, 14 Nov 2017 20:32:51 +0200
Subject: drm/uapi: Deprecate DRM_MODE_FLAG_PIXMUX
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reject any mode with DRM_MODE_FLAG_PIXMUX. We have no code that even
checks for this flag hence it can't possibly do any good.

Looks like this flag had something to do the the controller<->ramdac
interface with some ancient S3 graphics adapters. Why someone though
it would be a good idea to expose it directly to users I don't know.
And later on it got copied into the randr protocol and kms uapi.

Cc: Jose Abreu <Jose.Abreu@synopsys.com>
Cc: Adam Jackson <ajax@redhat.com>
Cc: Keith Packard <keithp@keithp.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20171114183258.16976-4-ville.syrjala@linux.intel.com
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Jose Abreu <joabreu@synopsys.com>
---
 include/drm/drm_modes.h     | 2 +-
 include/uapi/drm/drm_mode.h | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/include/drm/drm_modes.h b/include/drm/drm_modes.h
index 9f3421c8efcd..bce573375dd8 100644
--- a/include/drm/drm_modes.h
+++ b/include/drm/drm_modes.h
@@ -300,7 +300,7 @@ struct drm_display_mode {
 	 *  - DRM_MODE_FLAG_NCSYNC: composite sync is active low.
 	 *  - DRM_MODE_FLAG_HSKEW: hskew provided (not used?).
 	 *  - DRM_MODE_FLAG_BCAST: not used?
-	 *  - DRM_MODE_FLAG_PIXMUX: not used?
+	 *  - DRM_MODE_FLAG_PIXMUX: <deprecated>
 	 *  - DRM_MODE_FLAG_DBLCLK: double-clocked mode.
 	 *  - DRM_MODE_FLAG_CLKDIV2: half-clocked mode.
 	 *
diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index 004db470b477..8d872e17223e 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -75,7 +75,7 @@ extern "C" {
 #define DRM_MODE_FLAG_NCSYNC			(1<<8)
 #define DRM_MODE_FLAG_HSKEW			(1<<9) /* hskew provided */
 #define DRM_MODE_FLAG_BCAST			(1<<10)
-#define DRM_MODE_FLAG_PIXMUX			(1<<11)
+#define DRM_MODE_FLAG_PIXMUX			(1<<11) /* deprecated */
 #define DRM_MODE_FLAG_DBLCLK			(1<<12)
 #define DRM_MODE_FLAG_CLKDIV2			(1<<13)
  /*
@@ -118,7 +118,6 @@ extern "C" {
 				 DRM_MODE_FLAG_NCSYNC |		\
 				 DRM_MODE_FLAG_HSKEW |		\
 				 DRM_MODE_FLAG_BCAST |		\
-				 DRM_MODE_FLAG_PIXMUX |		\
 				 DRM_MODE_FLAG_DBLCLK |		\
 				 DRM_MODE_FLAG_CLKDIV2 |	\
 				 DRM_MODE_FLAG_3D_MASK)
-- 
cgit v1.2.3


From 05ebac0980543cf93dbd4a29e6b03c1c9b883c70 Mon Sep 17 00:00:00 2001
From: Ville Syrjälä <ville.syrjala@linux.intel.com>
Date: Tue, 14 Nov 2017 20:32:52 +0200
Subject: drm/uapi: Deprecate DRM_MODE_FLAG_BCAST
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reject any mode with DRM_MODE_FLAG_BCAST. We have no code that even
checks for this flag hence it can't possibly do any good.

I think this maybe originated from fbdev where it was supposed to
indicate PAL/NTSC broadcast timings. I have no idea why those would
have to be identified by a flag rather than by just the timings
themselves. And then I assume it got copied into xfree86 for
fbdevhw, and later on it leaked into the randr protocol and kms uapi.

Since kms fbdev emulation never uses the corresponding fbdev flag
there should be no sane way for this to come back into kms via
userspace either.

Cc: Jose Abreu <Jose.Abreu@synopsys.com>
Cc: Adam Jackson <ajax@redhat.com>
Cc: Keith Packard <keithp@keithp.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20171114183258.16976-5-ville.syrjala@linux.intel.com
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Jose Abreu <joabreu@synopsys.com>
---
 include/drm/drm_modes.h     | 2 +-
 include/uapi/drm/drm_mode.h | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/include/drm/drm_modes.h b/include/drm/drm_modes.h
index bce573375dd8..09773e766e1f 100644
--- a/include/drm/drm_modes.h
+++ b/include/drm/drm_modes.h
@@ -299,7 +299,7 @@ struct drm_display_mode {
 	 *  - DRM_MODE_FLAG_PCSYNC: composite sync is active high.
 	 *  - DRM_MODE_FLAG_NCSYNC: composite sync is active low.
 	 *  - DRM_MODE_FLAG_HSKEW: hskew provided (not used?).
-	 *  - DRM_MODE_FLAG_BCAST: not used?
+	 *  - DRM_MODE_FLAG_BCAST: <deprecated>
 	 *  - DRM_MODE_FLAG_PIXMUX: <deprecated>
 	 *  - DRM_MODE_FLAG_DBLCLK: double-clocked mode.
 	 *  - DRM_MODE_FLAG_CLKDIV2: half-clocked mode.
diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index 8d872e17223e..a7cded1c43e8 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -74,7 +74,7 @@ extern "C" {
 #define DRM_MODE_FLAG_PCSYNC			(1<<7)
 #define DRM_MODE_FLAG_NCSYNC			(1<<8)
 #define DRM_MODE_FLAG_HSKEW			(1<<9) /* hskew provided */
-#define DRM_MODE_FLAG_BCAST			(1<<10)
+#define DRM_MODE_FLAG_BCAST			(1<<10) /* deprecated */
 #define DRM_MODE_FLAG_PIXMUX			(1<<11) /* deprecated */
 #define DRM_MODE_FLAG_DBLCLK			(1<<12)
 #define DRM_MODE_FLAG_CLKDIV2			(1<<13)
@@ -117,7 +117,6 @@ extern "C" {
 				 DRM_MODE_FLAG_PCSYNC |		\
 				 DRM_MODE_FLAG_NCSYNC |		\
 				 DRM_MODE_FLAG_HSKEW |		\
-				 DRM_MODE_FLAG_BCAST |		\
 				 DRM_MODE_FLAG_DBLCLK |		\
 				 DRM_MODE_FLAG_CLKDIV2 |	\
 				 DRM_MODE_FLAG_3D_MASK)
-- 
cgit v1.2.3


From b7245cc536b95167d67b53e27adff964400045f1 Mon Sep 17 00:00:00 2001
From: Ville Syrjälä <ville.syrjala@linux.intel.com>
Date: Wed, 15 Nov 2017 17:45:04 +0200
Subject: drm/uapi: Deprecate nonsense kms mode types
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

BUILTIN, CRTC_C, CLOCK_C, and DEFULT mode types are unused. Let's
refuse to generate them or accept them from userspace either. A
cursory check didn't reveal any userspace code that would depend
on these.

v2: Recommend DRIVER instead of BUILTIN (ajax)

Cc: Jose Abreu <Jose.Abreu@synopsys.com>
Cc: Adam Jackson <ajax@redhat.com>
Cc: Keith Packard <keithp@keithp.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Adam Jackson <ajax@redhat.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20171115154504.14338-1-ville.syrjala@linux.intel.com
Reviewed-by: Jose Abreu <joabreu@synopsys.com>
---
 include/drm/drm_modes.h     |  7 ++++---
 include/uapi/drm/drm_mode.h | 14 +++++---------
 2 files changed, 9 insertions(+), 12 deletions(-)

(limited to 'include/uapi')

diff --git a/include/drm/drm_modes.h b/include/drm/drm_modes.h
index 99dd815269e9..71cbb10e22dc 100644
--- a/include/drm/drm_modes.h
+++ b/include/drm/drm_modes.h
@@ -242,8 +242,6 @@ struct drm_display_mode {
 	 * A bitmask of flags, mostly about the source of a mode. Possible flags
 	 * are:
 	 *
-	 *  - DRM_MODE_TYPE_BUILTIN: Meant for hard-coded modes, effectively
-	 *    unused.
 	 *  - DRM_MODE_TYPE_PREFERRED: Preferred mode, usually the native
 	 *    resolution of an LCD panel. There should only be one preferred
 	 *    mode per connector at any given time.
@@ -253,8 +251,11 @@ struct drm_display_mode {
 	 *  - DRM_MODE_TYPE_USERDEF: Mode defined via kernel command line
 	 *
 	 * Plus a big list of flags which shouldn't be used at all, but are
-	 * still around since these flags are also used in the userspace ABI:
+	 * still around since these flags are also used in the userspace ABI.
+	 * We no longer accept modes with these types though:
 	 *
+	 *  - DRM_MODE_TYPE_BUILTIN: Meant for hard-coded modes, unused.
+	 *    Use DRM_MODE_TYPE_DRIVER instead.
 	 *  - DRM_MODE_TYPE_DEFAULT: Again a leftover, use
 	 *    DRM_MODE_TYPE_PREFERRED instead.
 	 *  - DRM_MODE_TYPE_CLOCK_C and DRM_MODE_TYPE_CRTC_C: Define leftovers
diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index a7cded1c43e8..eb9b68c7c218 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -38,19 +38,15 @@ extern "C" {
 #define DRM_DISPLAY_MODE_LEN	32
 #define DRM_PROP_NAME_LEN	32
 
-#define DRM_MODE_TYPE_BUILTIN	(1<<0)
-#define DRM_MODE_TYPE_CLOCK_C	((1<<1) | DRM_MODE_TYPE_BUILTIN)
-#define DRM_MODE_TYPE_CRTC_C	((1<<2) | DRM_MODE_TYPE_BUILTIN)
+#define DRM_MODE_TYPE_BUILTIN	(1<<0) /* deprecated */
+#define DRM_MODE_TYPE_CLOCK_C	((1<<1) | DRM_MODE_TYPE_BUILTIN) /* deprecated */
+#define DRM_MODE_TYPE_CRTC_C	((1<<2) | DRM_MODE_TYPE_BUILTIN) /* deprecated */
 #define DRM_MODE_TYPE_PREFERRED	(1<<3)
-#define DRM_MODE_TYPE_DEFAULT	(1<<4)
+#define DRM_MODE_TYPE_DEFAULT	(1<<4) /* deprecated */
 #define DRM_MODE_TYPE_USERDEF	(1<<5)
 #define DRM_MODE_TYPE_DRIVER	(1<<6)
 
-#define DRM_MODE_TYPE_ALL	(DRM_MODE_TYPE_BUILTIN |	\
-				 DRM_MODE_TYPE_CLOCK_C |	\
-				 DRM_MODE_TYPE_CRTC_C |		\
-				 DRM_MODE_TYPE_PREFERRED |	\
-				 DRM_MODE_TYPE_DEFAULT |	\
+#define DRM_MODE_TYPE_ALL	(DRM_MODE_TYPE_PREFERRED |	\
 				 DRM_MODE_TYPE_USERDEF |	\
 				 DRM_MODE_TYPE_DRIVER)
 
-- 
cgit v1.2.3


From 6ec5bd348934887494541bcc9b53d621b1f2962c Mon Sep 17 00:00:00 2001
From: Ville Syrjälä <ville.syrjala@linux.intel.com>
Date: Fri, 2 Feb 2018 22:42:31 +0200
Subject: drm/i915: Deprecate I915_SET_COLORKEY_NONE
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Deprecate the silly I915_SET_COLORKEY_NONE flag. The obvious
way to disable colorkey is to just set flags to 0, which is
exactly what the intel ddx has been doing all along.

Currently when userspace sets the flags to 0, we end up in a
funny state where colorkey is disabled, but various colorkey
vs. scaling checks still consider colorkey to be enabled, and
thus we don't allow plane scaling to kick in.

In case there is some other userspace out there that actually
uses this flag (unlikely as this is an i915 specific uapi)
we'll keep on accepting it.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180202204231.27905-1-ville.syrjala@linux.intel.com
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_atomic_plane.c | 1 -
 drivers/gpu/drm/i915/intel_display.c      | 4 ++--
 drivers/gpu/drm/i915/intel_sprite.c       | 5 ++++-
 include/uapi/drm/i915_drm.h               | 4 +++-
 4 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
index 8e6dc159f64d..57ee8b786cd8 100644
--- a/drivers/gpu/drm/i915/intel_atomic_plane.c
+++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
@@ -56,7 +56,6 @@ intel_create_plane_state(struct drm_plane *plane)
 
 	state->base.plane = plane;
 	state->base.rotation = DRM_MODE_ROTATE_0;
-	state->ckey.flags = I915_SET_COLORKEY_NONE;
 
 	return state;
 }
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index e79b9242eb66..e9bba2ab4904 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -4787,7 +4787,7 @@ static int skl_update_scaler_plane(struct intel_crtc_state *crtc_state,
 		return ret;
 
 	/* check colorkey */
-	if (plane_state->ckey.flags != I915_SET_COLORKEY_NONE) {
+	if (plane_state->ckey.flags) {
 		DRM_DEBUG_KMS("[PLANE:%d:%s] scaling with color key not allowed",
 			      intel_plane->base.base.id,
 			      intel_plane->base.name);
@@ -12788,7 +12788,7 @@ intel_check_primary_plane(struct intel_plane *plane,
 
 	if (INTEL_GEN(dev_priv) >= 9) {
 		/* use scaler when colorkey is not required */
-		if (state->ckey.flags == I915_SET_COLORKEY_NONE) {
+		if (!state->ckey.flags) {
 			min_scale = 1;
 			max_scale = skl_max_scale(to_intel_crtc(crtc), crtc_state);
 		}
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index 630d20eecf3c..32f10621fac8 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -894,7 +894,7 @@ intel_check_sprite_plane(struct intel_plane *plane,
 	/* setup can_scale, min_scale, max_scale */
 	if (INTEL_GEN(dev_priv) >= 9) {
 		/* use scaler when colorkey is not required */
-		if (state->ckey.flags == I915_SET_COLORKEY_NONE) {
+		if (!state->ckey.flags) {
 			can_scale = 1;
 			min_scale = 1;
 			max_scale = skl_max_scale(crtc, crtc_state);
@@ -1070,6 +1070,9 @@ int intel_sprite_set_colorkey(struct drm_device *dev, void *data,
 	struct drm_modeset_acquire_ctx ctx;
 	int ret = 0;
 
+	/* ignore the pointless "none" flag */
+	set->flags &= ~I915_SET_COLORKEY_NONE;
+
 	/* Make sure we don't try to enable both src & dest simultaneously */
 	if ((set->flags & (I915_SET_COLORKEY_DESTINATION | I915_SET_COLORKEY_SOURCE)) == (I915_SET_COLORKEY_DESTINATION | I915_SET_COLORKEY_SOURCE))
 		return -EINVAL;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 536ee4febd74..29fa48e4755d 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1358,7 +1358,9 @@ struct drm_intel_overlay_attrs {
  * active on a given plane.
  */
 
-#define I915_SET_COLORKEY_NONE		(1<<0) /* disable color key matching */
+#define I915_SET_COLORKEY_NONE		(1<<0) /* Deprecated. Instead set
+						* flags==0 to disable colorkeying.
+						*/
 #define I915_SET_COLORKEY_DESTINATION	(1<<1)
 #define I915_SET_COLORKEY_SOURCE	(1<<2)
 struct drm_intel_sprite_colorkey {
-- 
cgit v1.2.3


From a11024457d348672b26b3d4581ed19c793399b48 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Tue, 6 Feb 2018 20:32:46 -0500
Subject: uapi: Fix type used in ioctl parameter structures

Use __u32 and __u64 instead of POSIX types that may not be defined
in user mode builds.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 include/uapi/linux/kfd_ioctl.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index f4cab5b3ba9a..111d73ba2d96 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -263,10 +263,10 @@ struct kfd_ioctl_get_tile_config_args {
 };
 
 struct kfd_ioctl_set_trap_handler_args {
-	uint64_t tba_addr;		/* to KFD */
-	uint64_t tma_addr;		/* to KFD */
-	uint32_t gpu_id;		/* to KFD */
-	uint32_t pad;
+	__u64 tba_addr;		/* to KFD */
+	__u64 tma_addr;		/* to KFD */
+	__u32 gpu_id;		/* to KFD */
+	__u32 pad;
 };
 
 #define AMDKFD_IOCTL_BASE 'K'
-- 
cgit v1.2.3


From 65101d8c9108201118efa7e08f4e2c57f438deb9 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Fri, 12 Jan 2018 10:09:26 +0100
Subject: drm/vc4: Expose performance counters to userspace

The V3D engine has various hardware counters which might be interesting
to userspace performance analysis tools.

Expose new ioctls to create/destroy a performance monitor object and
query the counter values of this perfmance monitor.

Note that a perfomance monitor is given an ID that is only valid on the
file descriptor it has been allocated from. A performance monitor can be
attached to a CL submission and the driver will enable HW counters for
this request and update the performance monitor values at the end of the
job.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Eric Anholt <eric@anholt.net>
Link: https://patchwork.freedesktop.org/patch/msgid/20180112090926.12538-1-boris.brezillon@free-electrons.com
---
 drivers/gpu/drm/vc4/Makefile      |   1 +
 drivers/gpu/drm/vc4/vc4_drv.c     |  26 ++++++
 drivers/gpu/drm/vc4/vc4_drv.h     |  68 ++++++++++++++
 drivers/gpu/drm/vc4/vc4_gem.c     |  48 +++++++++-
 drivers/gpu/drm/vc4/vc4_irq.c     |  40 +++++++-
 drivers/gpu/drm/vc4/vc4_perfmon.c | 188 ++++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/vc4/vc4_regs.h    |  35 +------
 drivers/gpu/drm/vc4/vc4_v3d.c     |  64 ++++++-------
 include/uapi/drm/vc4_drm.h        |  76 +++++++++++++++
 9 files changed, 474 insertions(+), 72 deletions(-)
 create mode 100644 drivers/gpu/drm/vc4/vc4_perfmon.c

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/vc4/Makefile b/drivers/gpu/drm/vc4/Makefile
index f5500df51686..4a3a868235f8 100644
--- a/drivers/gpu/drm/vc4/Makefile
+++ b/drivers/gpu/drm/vc4/Makefile
@@ -15,6 +15,7 @@ vc4-y := \
 	vc4_vec.o \
 	vc4_hvs.o \
 	vc4_irq.o \
+	vc4_perfmon.o \
 	vc4_plane.o \
 	vc4_render_cl.o \
 	vc4_trace_points.o \
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
index ceb385fd69c5..94b99c90425a 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -101,6 +101,7 @@ static int vc4_get_param_ioctl(struct drm_device *dev, void *data,
 	case DRM_VC4_PARAM_SUPPORTS_THREADED_FS:
 	case DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER:
 	case DRM_VC4_PARAM_SUPPORTS_MADVISE:
+	case DRM_VC4_PARAM_SUPPORTS_PERFMON:
 		args->value = true;
 		break;
 	default:
@@ -111,6 +112,26 @@ static int vc4_get_param_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
+static int vc4_open(struct drm_device *dev, struct drm_file *file)
+{
+	struct vc4_file *vc4file;
+
+	vc4file = kzalloc(sizeof(*vc4file), GFP_KERNEL);
+	if (!vc4file)
+		return -ENOMEM;
+
+	vc4_perfmon_open_file(vc4file);
+	file->driver_priv = vc4file;
+	return 0;
+}
+
+static void vc4_close(struct drm_device *dev, struct drm_file *file)
+{
+	struct vc4_file *vc4file = file->driver_priv;
+
+	vc4_perfmon_close_file(vc4file);
+}
+
 static const struct vm_operations_struct vc4_vm_ops = {
 	.fault = vc4_fault,
 	.open = drm_gem_vm_open,
@@ -143,6 +164,9 @@ static const struct drm_ioctl_desc vc4_drm_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(VC4_GET_TILING, vc4_get_tiling_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(VC4_LABEL_BO, vc4_label_bo_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(VC4_GEM_MADVISE, vc4_gem_madvise_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(VC4_PERFMON_CREATE, vc4_perfmon_create_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(VC4_PERFMON_DESTROY, vc4_perfmon_destroy_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(VC4_PERFMON_GET_VALUES, vc4_perfmon_get_values_ioctl, DRM_RENDER_ALLOW),
 };
 
 static struct drm_driver vc4_drm_driver = {
@@ -153,6 +177,8 @@ static struct drm_driver vc4_drm_driver = {
 			    DRIVER_RENDER |
 			    DRIVER_PRIME),
 	.lastclose = drm_fb_helper_lastclose,
+	.open = vc4_open,
+	.postclose = vc4_close,
 	.irq_handler = vc4_irq,
 	.irq_preinstall = vc4_irq_preinstall,
 	.irq_postinstall = vc4_irq_postinstall,
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
index 3af22936d9b3..fefa1664a9f5 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -11,6 +11,8 @@
 #include <drm/drm_encoder.h>
 #include <drm/drm_gem_cma_helper.h>
 
+#include "uapi/drm/vc4_drm.h"
+
 /* Don't forget to update vc4_bo.c: bo_type_names[] when adding to
  * this.
  */
@@ -29,6 +31,36 @@ enum vc4_kernel_bo_type {
 	VC4_BO_TYPE_COUNT
 };
 
+/* Performance monitor object. The perform lifetime is controlled by userspace
+ * using perfmon related ioctls. A perfmon can be attached to a submit_cl
+ * request, and when this is the case, HW perf counters will be activated just
+ * before the submit_cl is submitted to the GPU and disabled when the job is
+ * done. This way, only events related to a specific job will be counted.
+ */
+struct vc4_perfmon {
+	/* Tracks the number of users of the perfmon, when this counter reaches
+	 * zero the perfmon is destroyed.
+	 */
+	refcount_t refcnt;
+
+	/* Number of counters activated in this perfmon instance
+	 * (should be less than DRM_VC4_MAX_PERF_COUNTERS).
+	 */
+	u8 ncounters;
+
+	/* Events counted by the HW perf counters. */
+	u8 events[DRM_VC4_MAX_PERF_COUNTERS];
+
+	/* Storage for counter values. Counters are incremented by the HW
+	 * perf counter values every time the perfmon is attached to a GPU job.
+	 * This way, perfmon users don't have to retrieve the results after
+	 * each job if they want to track events covering several submissions.
+	 * Note that counter values can't be reset, but you can fake a reset by
+	 * destroying the perfmon and creating a new one.
+	 */
+	u64 counters[0];
+};
+
 struct vc4_dev {
 	struct drm_device *dev;
 
@@ -121,6 +153,11 @@ struct vc4_dev {
 	wait_queue_head_t job_wait_queue;
 	struct work_struct job_done_work;
 
+	/* Used to track the active perfmon if any. Access to this field is
+	 * protected by job_lock.
+	 */
+	struct vc4_perfmon *active_perfmon;
+
 	/* List of struct vc4_seqno_cb for callbacks to be made from a
 	 * workqueue when the given seqno is passed.
 	 */
@@ -406,6 +443,21 @@ struct vc4_exec_info {
 	void *uniforms_v;
 	uint32_t uniforms_p;
 	uint32_t uniforms_size;
+
+	/* Pointer to a performance monitor object if the user requested it,
+	 * NULL otherwise.
+	 */
+	struct vc4_perfmon *perfmon;
+};
+
+/* Per-open file private data. Any driver-specific resource that has to be
+ * released when the DRM file is closed should be placed here.
+ */
+struct vc4_file {
+	struct {
+		struct idr idr;
+		struct mutex lock;
+	} perfmon;
 };
 
 static inline struct vc4_exec_info *
@@ -646,3 +698,19 @@ bool vc4_check_tex_size(struct vc4_exec_info *exec,
 /* vc4_validate_shader.c */
 struct vc4_validated_shader_info *
 vc4_validate_shader(struct drm_gem_cma_object *shader_obj);
+
+/* vc4_perfmon.c */
+void vc4_perfmon_get(struct vc4_perfmon *perfmon);
+void vc4_perfmon_put(struct vc4_perfmon *perfmon);
+void vc4_perfmon_start(struct vc4_dev *vc4, struct vc4_perfmon *perfmon);
+void vc4_perfmon_stop(struct vc4_dev *vc4, struct vc4_perfmon *perfmon,
+		      bool capture);
+struct vc4_perfmon *vc4_perfmon_find(struct vc4_file *vc4file, int id);
+void vc4_perfmon_open_file(struct vc4_file *vc4file);
+void vc4_perfmon_close_file(struct vc4_file *vc4file);
+int vc4_perfmon_create_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file_priv);
+int vc4_perfmon_destroy_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file_priv);
+int vc4_perfmon_get_values_ioctl(struct drm_device *dev, void *data,
+				 struct drm_file *file_priv);
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
index 638540943c61..3ac801b14d4e 100644
--- a/drivers/gpu/drm/vc4/vc4_gem.c
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -454,14 +454,30 @@ again:
 
 	vc4_flush_caches(dev);
 
+	/* Only start the perfmon if it was not already started by a previous
+	 * job.
+	 */
+	if (exec->perfmon && vc4->active_perfmon != exec->perfmon)
+		vc4_perfmon_start(vc4, exec->perfmon);
+
 	/* Either put the job in the binner if it uses the binner, or
 	 * immediately move it to the to-be-rendered queue.
 	 */
 	if (exec->ct0ca != exec->ct0ea) {
 		submit_cl(dev, 0, exec->ct0ca, exec->ct0ea);
 	} else {
+		struct vc4_exec_info *next;
+
 		vc4_move_job_to_render(dev, exec);
-		goto again;
+		next = vc4_first_bin_job(vc4);
+
+		/* We can't start the next bin job if the previous job had a
+		 * different perfmon instance attached to it. The same goes
+		 * if one of them had a perfmon attached to it and the other
+		 * one doesn't.
+		 */
+		if (next && next->perfmon == exec->perfmon)
+			goto again;
 	}
 }
 
@@ -621,6 +637,7 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec,
 		 struct ww_acquire_ctx *acquire_ctx)
 {
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct vc4_exec_info *renderjob;
 	uint64_t seqno;
 	unsigned long irqflags;
 	struct vc4_fence *fence;
@@ -646,11 +663,14 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec,
 
 	list_add_tail(&exec->head, &vc4->bin_job_list);
 
-	/* If no job was executing, kick ours off.  Otherwise, it'll
-	 * get started when the previous job's flush done interrupt
-	 * occurs.
+	/* If no bin job was executing and if the render job (if any) has the
+	 * same perfmon as our job attached to it (or if both jobs don't have
+	 * perfmon activated), then kick ours off.  Otherwise, it'll get
+	 * started when the previous job's flush/render done interrupt occurs.
 	 */
-	if (vc4_first_bin_job(vc4) == exec) {
+	renderjob = vc4_first_render_job(vc4);
+	if (vc4_first_bin_job(vc4) == exec &&
+	    (!renderjob || renderjob->perfmon == exec->perfmon)) {
 		vc4_submit_next_bin_job(dev);
 		vc4_queue_hangcheck(dev);
 	}
@@ -915,6 +935,9 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
 	vc4->bin_alloc_used &= ~exec->bin_slots;
 	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 
+	/* Release the reference we had on the perf monitor. */
+	vc4_perfmon_put(exec->perfmon);
+
 	mutex_lock(&vc4->power_lock);
 	if (--vc4->power_refcount == 0) {
 		pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev);
@@ -1067,6 +1090,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
 		    struct drm_file *file_priv)
 {
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct vc4_file *vc4file = file_priv->driver_priv;
 	struct drm_vc4_submit_cl *args = data;
 	struct vc4_exec_info *exec;
 	struct ww_acquire_ctx acquire_ctx;
@@ -1080,6 +1104,11 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
+	if (args->pad2 != 0) {
+		DRM_DEBUG("->pad2 must be set to zero\n");
+		return -EINVAL;
+	}
+
 	exec = kcalloc(1, sizeof(*exec), GFP_KERNEL);
 	if (!exec) {
 		DRM_ERROR("malloc failure on exec struct\n");
@@ -1105,6 +1134,15 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
 	if (ret)
 		goto fail;
 
+	if (args->perfmonid) {
+		exec->perfmon = vc4_perfmon_find(vc4file,
+						 args->perfmonid);
+		if (!exec->perfmon) {
+			ret = -ENOENT;
+			goto fail;
+		}
+	}
+
 	if (exec->args->bin_cl_size != 0) {
 		ret = vc4_get_bcl(dev, exec);
 		if (ret)
diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c
index 3dd62d75f531..4cd2ccfe15f4 100644
--- a/drivers/gpu/drm/vc4/vc4_irq.c
+++ b/drivers/gpu/drm/vc4/vc4_irq.c
@@ -104,13 +104,20 @@ static void
 vc4_irq_finish_bin_job(struct drm_device *dev)
 {
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
-	struct vc4_exec_info *exec = vc4_first_bin_job(vc4);
+	struct vc4_exec_info *next, *exec = vc4_first_bin_job(vc4);
 
 	if (!exec)
 		return;
 
 	vc4_move_job_to_render(dev, exec);
-	vc4_submit_next_bin_job(dev);
+	next = vc4_first_bin_job(vc4);
+
+	/* Only submit the next job in the bin list if it matches the perfmon
+	 * attached to the one that just finished (or if both jobs don't have
+	 * perfmon attached to them).
+	 */
+	if (next && next->perfmon == exec->perfmon)
+		vc4_submit_next_bin_job(dev);
 }
 
 static void
@@ -122,6 +129,10 @@ vc4_cancel_bin_job(struct drm_device *dev)
 	if (!exec)
 		return;
 
+	/* Stop the perfmon so that the next bin job can be started. */
+	if (exec->perfmon)
+		vc4_perfmon_stop(vc4, exec->perfmon, false);
+
 	list_move_tail(&exec->head, &vc4->bin_job_list);
 	vc4_submit_next_bin_job(dev);
 }
@@ -131,18 +142,41 @@ vc4_irq_finish_render_job(struct drm_device *dev)
 {
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	struct vc4_exec_info *exec = vc4_first_render_job(vc4);
+	struct vc4_exec_info *nextbin, *nextrender;
 
 	if (!exec)
 		return;
 
 	vc4->finished_seqno++;
 	list_move_tail(&exec->head, &vc4->job_done_list);
+
+	nextbin = vc4_first_bin_job(vc4);
+	nextrender = vc4_first_render_job(vc4);
+
+	/* Only stop the perfmon if following jobs in the queue don't expect it
+	 * to be enabled.
+	 */
+	if (exec->perfmon && !nextrender &&
+	    (!nextbin || nextbin->perfmon != exec->perfmon))
+		vc4_perfmon_stop(vc4, exec->perfmon, true);
+
+	/* If there's a render job waiting, start it. If this is not the case
+	 * we may have to unblock the binner if it's been stalled because of
+	 * perfmon (this can be checked by comparing the perfmon attached to
+	 * the finished renderjob to the one attached to the next bin job: if
+	 * they don't match, this means the binner is stalled and should be
+	 * restarted).
+	 */
+	if (nextrender)
+		vc4_submit_next_render_job(dev);
+	else if (nextbin && nextbin->perfmon != exec->perfmon)
+		vc4_submit_next_bin_job(dev);
+
 	if (exec->fence) {
 		dma_fence_signal_locked(exec->fence);
 		dma_fence_put(exec->fence);
 		exec->fence = NULL;
 	}
-	vc4_submit_next_render_job(dev);
 
 	wake_up_all(&vc4->job_wait_queue);
 	schedule_work(&vc4->job_done_work);
diff --git a/drivers/gpu/drm/vc4/vc4_perfmon.c b/drivers/gpu/drm/vc4/vc4_perfmon.c
new file mode 100644
index 000000000000..437e7a27f21d
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_perfmon.c
@@ -0,0 +1,188 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018 Broadcom
+ */
+
+/**
+ * DOC: VC4 V3D performance monitor module
+ *
+ * The V3D block provides 16 hardware counters which can count various events.
+ */
+
+#include "vc4_drv.h"
+#include "vc4_regs.h"
+
+#define VC4_PERFMONID_MIN	1
+#define VC4_PERFMONID_MAX	U32_MAX
+
+void vc4_perfmon_get(struct vc4_perfmon *perfmon)
+{
+	if (perfmon)
+		refcount_inc(&perfmon->refcnt);
+}
+
+void vc4_perfmon_put(struct vc4_perfmon *perfmon)
+{
+	if (perfmon && refcount_dec_and_test(&perfmon->refcnt))
+		kfree(perfmon);
+}
+
+void vc4_perfmon_start(struct vc4_dev *vc4, struct vc4_perfmon *perfmon)
+{
+	unsigned int i;
+	u32 mask;
+
+	if (WARN_ON_ONCE(!perfmon || vc4->active_perfmon))
+		return;
+
+	for (i = 0; i < perfmon->ncounters; i++)
+		V3D_WRITE(V3D_PCTRS(i), perfmon->events[i]);
+
+	mask = GENMASK(perfmon->ncounters - 1, 0);
+	V3D_WRITE(V3D_PCTRC, mask);
+	V3D_WRITE(V3D_PCTRE, V3D_PCTRE_EN | mask);
+	vc4->active_perfmon = perfmon;
+}
+
+void vc4_perfmon_stop(struct vc4_dev *vc4, struct vc4_perfmon *perfmon,
+		      bool capture)
+{
+	unsigned int i;
+
+	if (WARN_ON_ONCE(!vc4->active_perfmon ||
+			 perfmon != vc4->active_perfmon))
+		return;
+
+	if (capture) {
+		for (i = 0; i < perfmon->ncounters; i++)
+			perfmon->counters[i] += V3D_READ(V3D_PCTR(i));
+	}
+
+	V3D_WRITE(V3D_PCTRE, 0);
+	vc4->active_perfmon = NULL;
+}
+
+struct vc4_perfmon *vc4_perfmon_find(struct vc4_file *vc4file, int id)
+{
+	struct vc4_perfmon *perfmon;
+
+	mutex_lock(&vc4file->perfmon.lock);
+	perfmon = idr_find(&vc4file->perfmon.idr, id);
+	vc4_perfmon_get(perfmon);
+	mutex_unlock(&vc4file->perfmon.lock);
+
+	return perfmon;
+}
+
+void vc4_perfmon_open_file(struct vc4_file *vc4file)
+{
+	mutex_init(&vc4file->perfmon.lock);
+	idr_init(&vc4file->perfmon.idr);
+}
+
+static int vc4_perfmon_idr_del(int id, void *elem, void *data)
+{
+	struct vc4_perfmon *perfmon = elem;
+
+	vc4_perfmon_put(perfmon);
+
+	return 0;
+}
+
+void vc4_perfmon_close_file(struct vc4_file *vc4file)
+{
+	mutex_lock(&vc4file->perfmon.lock);
+	idr_for_each(&vc4file->perfmon.idr, vc4_perfmon_idr_del, NULL);
+	idr_destroy(&vc4file->perfmon.idr);
+	mutex_unlock(&vc4file->perfmon.lock);
+}
+
+int vc4_perfmon_create_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file_priv)
+{
+	struct vc4_file *vc4file = file_priv->driver_priv;
+	struct drm_vc4_perfmon_create *req = data;
+	struct vc4_perfmon *perfmon;
+	unsigned int i;
+	int ret;
+
+	/* Number of monitored counters cannot exceed HW limits. */
+	if (req->ncounters > DRM_VC4_MAX_PERF_COUNTERS ||
+	    !req->ncounters)
+		return -EINVAL;
+
+	/* Make sure all events are valid. */
+	for (i = 0; i < req->ncounters; i++) {
+		if (req->events[i] >= VC4_PERFCNT_NUM_EVENTS)
+			return -EINVAL;
+	}
+
+	perfmon = kzalloc(sizeof(*perfmon) + (req->ncounters * sizeof(u64)),
+			  GFP_KERNEL);
+	if (!perfmon)
+		return -ENOMEM;
+
+	for (i = 0; i < req->ncounters; i++)
+		perfmon->events[i] = req->events[i];
+
+	perfmon->ncounters = req->ncounters;
+
+	refcount_set(&perfmon->refcnt, 1);
+
+	mutex_lock(&vc4file->perfmon.lock);
+	ret = idr_alloc(&vc4file->perfmon.idr, perfmon, VC4_PERFMONID_MIN,
+			VC4_PERFMONID_MAX, GFP_KERNEL);
+	mutex_unlock(&vc4file->perfmon.lock);
+
+	if (ret < 0) {
+		kfree(perfmon);
+		return ret;
+	}
+
+	req->id = ret;
+	return 0;
+}
+
+int vc4_perfmon_destroy_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file_priv)
+{
+	struct vc4_file *vc4file = file_priv->driver_priv;
+	struct drm_vc4_perfmon_destroy *req = data;
+	struct vc4_perfmon *perfmon;
+
+	mutex_lock(&vc4file->perfmon.lock);
+	perfmon = idr_remove(&vc4file->perfmon.idr, req->id);
+	mutex_unlock(&vc4file->perfmon.lock);
+
+	if (!perfmon)
+		return -EINVAL;
+
+	vc4_perfmon_put(perfmon);
+	return 0;
+}
+
+int vc4_perfmon_get_values_ioctl(struct drm_device *dev, void *data,
+				 struct drm_file *file_priv)
+{
+	struct vc4_file *vc4file = file_priv->driver_priv;
+	struct drm_vc4_perfmon_get_values *req = data;
+	struct vc4_perfmon *perfmon;
+	int ret;
+
+	mutex_lock(&vc4file->perfmon.lock);
+	perfmon = idr_find(&vc4file->perfmon.idr, req->id);
+	vc4_perfmon_get(perfmon);
+	mutex_unlock(&vc4file->perfmon.lock);
+
+	if (!perfmon)
+		return -EINVAL;
+
+	if (copy_to_user(u64_to_user_ptr(req->values_ptr), perfmon->counters,
+			 perfmon->ncounters * sizeof(u64)))
+		ret = -EFAULT;
+	else
+		ret = 0;
+
+	vc4_perfmon_put(perfmon);
+	return ret;
+}
diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h
index 55677bd50f66..b9749cb24063 100644
--- a/drivers/gpu/drm/vc4/vc4_regs.h
+++ b/drivers/gpu/drm/vc4/vc4_regs.h
@@ -122,38 +122,9 @@
 #define V3D_VPMBASE  0x00504
 #define V3D_PCTRC    0x00670
 #define V3D_PCTRE    0x00674
-#define V3D_PCTR0    0x00680
-#define V3D_PCTRS0   0x00684
-#define V3D_PCTR1    0x00688
-#define V3D_PCTRS1   0x0068c
-#define V3D_PCTR2    0x00690
-#define V3D_PCTRS2   0x00694
-#define V3D_PCTR3    0x00698
-#define V3D_PCTRS3   0x0069c
-#define V3D_PCTR4    0x006a0
-#define V3D_PCTRS4   0x006a4
-#define V3D_PCTR5    0x006a8
-#define V3D_PCTRS5   0x006ac
-#define V3D_PCTR6    0x006b0
-#define V3D_PCTRS6   0x006b4
-#define V3D_PCTR7    0x006b8
-#define V3D_PCTRS7   0x006bc
-#define V3D_PCTR8    0x006c0
-#define V3D_PCTRS8   0x006c4
-#define V3D_PCTR9    0x006c8
-#define V3D_PCTRS9   0x006cc
-#define V3D_PCTR10   0x006d0
-#define V3D_PCTRS10  0x006d4
-#define V3D_PCTR11   0x006d8
-#define V3D_PCTRS11  0x006dc
-#define V3D_PCTR12   0x006e0
-#define V3D_PCTRS12  0x006e4
-#define V3D_PCTR13   0x006e8
-#define V3D_PCTRS13  0x006ec
-#define V3D_PCTR14   0x006f0
-#define V3D_PCTRS14  0x006f4
-#define V3D_PCTR15   0x006f8
-#define V3D_PCTRS15  0x006fc
+# define V3D_PCTRE_EN	BIT(31)
+#define V3D_PCTR(x)  (0x00680 + ((x) * 8))
+#define V3D_PCTRS(x) (0x00684 + ((x) * 8))
 #define V3D_DBGE     0x00f00
 #define V3D_FDBGO    0x00f04
 #define V3D_FDBGB    0x00f08
diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c
index 493f392b3a0a..bfc2fa73d2ae 100644
--- a/drivers/gpu/drm/vc4/vc4_v3d.c
+++ b/drivers/gpu/drm/vc4/vc4_v3d.c
@@ -68,38 +68,38 @@ static const struct {
 	REGDEF(V3D_VPMBASE),
 	REGDEF(V3D_PCTRC),
 	REGDEF(V3D_PCTRE),
-	REGDEF(V3D_PCTR0),
-	REGDEF(V3D_PCTRS0),
-	REGDEF(V3D_PCTR1),
-	REGDEF(V3D_PCTRS1),
-	REGDEF(V3D_PCTR2),
-	REGDEF(V3D_PCTRS2),
-	REGDEF(V3D_PCTR3),
-	REGDEF(V3D_PCTRS3),
-	REGDEF(V3D_PCTR4),
-	REGDEF(V3D_PCTRS4),
-	REGDEF(V3D_PCTR5),
-	REGDEF(V3D_PCTRS5),
-	REGDEF(V3D_PCTR6),
-	REGDEF(V3D_PCTRS6),
-	REGDEF(V3D_PCTR7),
-	REGDEF(V3D_PCTRS7),
-	REGDEF(V3D_PCTR8),
-	REGDEF(V3D_PCTRS8),
-	REGDEF(V3D_PCTR9),
-	REGDEF(V3D_PCTRS9),
-	REGDEF(V3D_PCTR10),
-	REGDEF(V3D_PCTRS10),
-	REGDEF(V3D_PCTR11),
-	REGDEF(V3D_PCTRS11),
-	REGDEF(V3D_PCTR12),
-	REGDEF(V3D_PCTRS12),
-	REGDEF(V3D_PCTR13),
-	REGDEF(V3D_PCTRS13),
-	REGDEF(V3D_PCTR14),
-	REGDEF(V3D_PCTRS14),
-	REGDEF(V3D_PCTR15),
-	REGDEF(V3D_PCTRS15),
+	REGDEF(V3D_PCTR(0)),
+	REGDEF(V3D_PCTRS(0)),
+	REGDEF(V3D_PCTR(1)),
+	REGDEF(V3D_PCTRS(1)),
+	REGDEF(V3D_PCTR(2)),
+	REGDEF(V3D_PCTRS(2)),
+	REGDEF(V3D_PCTR(3)),
+	REGDEF(V3D_PCTRS(3)),
+	REGDEF(V3D_PCTR(4)),
+	REGDEF(V3D_PCTRS(4)),
+	REGDEF(V3D_PCTR(5)),
+	REGDEF(V3D_PCTRS(5)),
+	REGDEF(V3D_PCTR(6)),
+	REGDEF(V3D_PCTRS(6)),
+	REGDEF(V3D_PCTR(7)),
+	REGDEF(V3D_PCTRS(7)),
+	REGDEF(V3D_PCTR(8)),
+	REGDEF(V3D_PCTRS(8)),
+	REGDEF(V3D_PCTR(9)),
+	REGDEF(V3D_PCTRS(9)),
+	REGDEF(V3D_PCTR(10)),
+	REGDEF(V3D_PCTRS(10)),
+	REGDEF(V3D_PCTR(11)),
+	REGDEF(V3D_PCTRS(11)),
+	REGDEF(V3D_PCTR(12)),
+	REGDEF(V3D_PCTRS(12)),
+	REGDEF(V3D_PCTR(13)),
+	REGDEF(V3D_PCTRS(13)),
+	REGDEF(V3D_PCTR(14)),
+	REGDEF(V3D_PCTRS(14)),
+	REGDEF(V3D_PCTR(15)),
+	REGDEF(V3D_PCTRS(15)),
 	REGDEF(V3D_DBGE),
 	REGDEF(V3D_FDBGO),
 	REGDEF(V3D_FDBGB),
diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h
index 52263b575bdc..b95a0e11cb07 100644
--- a/include/uapi/drm/vc4_drm.h
+++ b/include/uapi/drm/vc4_drm.h
@@ -42,6 +42,9 @@ extern "C" {
 #define DRM_VC4_GET_TILING                        0x09
 #define DRM_VC4_LABEL_BO                          0x0a
 #define DRM_VC4_GEM_MADVISE                       0x0b
+#define DRM_VC4_PERFMON_CREATE                    0x0c
+#define DRM_VC4_PERFMON_DESTROY                   0x0d
+#define DRM_VC4_PERFMON_GET_VALUES                0x0e
 
 #define DRM_IOCTL_VC4_SUBMIT_CL           DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl)
 #define DRM_IOCTL_VC4_WAIT_SEQNO          DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno)
@@ -55,6 +58,9 @@ extern "C" {
 #define DRM_IOCTL_VC4_GET_TILING          DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_TILING, struct drm_vc4_get_tiling)
 #define DRM_IOCTL_VC4_LABEL_BO            DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_LABEL_BO, struct drm_vc4_label_bo)
 #define DRM_IOCTL_VC4_GEM_MADVISE         DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GEM_MADVISE, struct drm_vc4_gem_madvise)
+#define DRM_IOCTL_VC4_PERFMON_CREATE      DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_PERFMON_CREATE, struct drm_vc4_perfmon_create)
+#define DRM_IOCTL_VC4_PERFMON_DESTROY     DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_PERFMON_DESTROY, struct drm_vc4_perfmon_destroy)
+#define DRM_IOCTL_VC4_PERFMON_GET_VALUES  DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_PERFMON_GET_VALUES, struct drm_vc4_perfmon_get_values)
 
 struct drm_vc4_submit_rcl_surface {
 	__u32 hindex; /* Handle index, or ~0 if not present. */
@@ -173,6 +179,15 @@ struct drm_vc4_submit_cl {
 	 * wait ioctl).
 	 */
 	__u64 seqno;
+
+	/* ID of the perfmon to attach to this job. 0 means no perfmon. */
+	__u32 perfmonid;
+
+	/* Unused field to align this struct on 64 bits. Must be set to 0.
+	 * If one ever needs to add an u32 field to this struct, this field
+	 * can be used.
+	 */
+	__u32 pad2;
 };
 
 /**
@@ -308,6 +323,7 @@ struct drm_vc4_get_hang_state {
 #define DRM_VC4_PARAM_SUPPORTS_THREADED_FS	5
 #define DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER	6
 #define DRM_VC4_PARAM_SUPPORTS_MADVISE		7
+#define DRM_VC4_PARAM_SUPPORTS_PERFMON		8
 
 struct drm_vc4_get_param {
 	__u32 param;
@@ -352,6 +368,66 @@ struct drm_vc4_gem_madvise {
 	__u32 pad;
 };
 
+enum {
+	VC4_PERFCNT_FEP_VALID_PRIMS_NO_RENDER,
+	VC4_PERFCNT_FEP_VALID_PRIMS_RENDER,
+	VC4_PERFCNT_FEP_CLIPPED_QUADS,
+	VC4_PERFCNT_FEP_VALID_QUADS,
+	VC4_PERFCNT_TLB_QUADS_NOT_PASSING_STENCIL,
+	VC4_PERFCNT_TLB_QUADS_NOT_PASSING_Z_AND_STENCIL,
+	VC4_PERFCNT_TLB_QUADS_PASSING_Z_AND_STENCIL,
+	VC4_PERFCNT_TLB_QUADS_ZERO_COVERAGE,
+	VC4_PERFCNT_TLB_QUADS_NON_ZERO_COVERAGE,
+	VC4_PERFCNT_TLB_QUADS_WRITTEN_TO_COLOR_BUF,
+	VC4_PERFCNT_PLB_PRIMS_OUTSIDE_VIEWPORT,
+	VC4_PERFCNT_PLB_PRIMS_NEED_CLIPPING,
+	VC4_PERFCNT_PSE_PRIMS_REVERSED,
+	VC4_PERFCNT_QPU_TOTAL_IDLE_CYCLES,
+	VC4_PERFCNT_QPU_TOTAL_CLK_CYCLES_VERTEX_COORD_SHADING,
+	VC4_PERFCNT_QPU_TOTAL_CLK_CYCLES_FRAGMENT_SHADING,
+	VC4_PERFCNT_QPU_TOTAL_CLK_CYCLES_EXEC_VALID_INST,
+	VC4_PERFCNT_QPU_TOTAL_CLK_CYCLES_WAITING_TMUS,
+	VC4_PERFCNT_QPU_TOTAL_CLK_CYCLES_WAITING_SCOREBOARD,
+	VC4_PERFCNT_QPU_TOTAL_CLK_CYCLES_WAITING_VARYINGS,
+	VC4_PERFCNT_QPU_TOTAL_INST_CACHE_HIT,
+	VC4_PERFCNT_QPU_TOTAL_INST_CACHE_MISS,
+	VC4_PERFCNT_QPU_TOTAL_UNIFORM_CACHE_HIT,
+	VC4_PERFCNT_QPU_TOTAL_UNIFORM_CACHE_MISS,
+	VC4_PERFCNT_TMU_TOTAL_TEXT_QUADS_PROCESSED,
+	VC4_PERFCNT_TMU_TOTAL_TEXT_CACHE_MISS,
+	VC4_PERFCNT_VPM_TOTAL_CLK_CYCLES_VDW_STALLED,
+	VC4_PERFCNT_VPM_TOTAL_CLK_CYCLES_VCD_STALLED,
+	VC4_PERFCNT_L2C_TOTAL_L2_CACHE_HIT,
+	VC4_PERFCNT_L2C_TOTAL_L2_CACHE_MISS,
+	VC4_PERFCNT_NUM_EVENTS,
+};
+
+#define DRM_VC4_MAX_PERF_COUNTERS	16
+
+struct drm_vc4_perfmon_create {
+	__u32 id;
+	__u32 ncounters;
+	__u8 events[DRM_VC4_MAX_PERF_COUNTERS];
+};
+
+struct drm_vc4_perfmon_destroy {
+	__u32 id;
+};
+
+/*
+ * Returns the values of the performance counters tracked by this
+ * perfmon (as an array of ncounters u64 values).
+ *
+ * No implicit synchronization is performed, so the user has to
+ * guarantee that any jobs using this perfmon have already been
+ * completed  (probably by blocking on the seqno returned by the
+ * last exec that used the perfmon).
+ */
+struct drm_vc4_perfmon_get_values {
+	__u32 id;
+	__u64 values_ptr;
+};
+
 #if defined(__cplusplus)
 }
 #endif
-- 
cgit v1.2.3


From 60bbade2a63de363a7acdb5e3b0f166a883fe3be Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Wed, 17 Jan 2018 13:18:47 +0800
Subject: drm/amdgpu: Expose more GPU sensor queries

Add sub-queries for stable pstate shader/memory clock.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 18 ++++++++++++++++++
 include/uapi/drm/amdgpu_drm.h           |  4 ++++
 2 files changed, 22 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 9f189ab07cdd..b929986dd3d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -758,6 +758,24 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 				return -EINVAL;
 			}
 			break;
+		case AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_SCLK:
+			/* get stable pstate sclk in Mhz */
+			if (amdgpu_dpm_read_sensor(adev,
+						   AMDGPU_PP_SENSOR_STABLE_PSTATE_SCLK,
+						   (void *)&ui32, &ui32_size)) {
+				return -EINVAL;
+			}
+			ui32 /= 100;
+			break;
+		case AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_MCLK:
+			/* get stable pstate mclk in Mhz */
+			if (amdgpu_dpm_read_sensor(adev,
+						   AMDGPU_PP_SENSOR_STABLE_PSTATE_MCLK,
+						   (void *)&ui32, &ui32_size)) {
+				return -EINVAL;
+			}
+			ui32 /= 100;
+			break;
 		default:
 			DRM_DEBUG_KMS("Invalid request %d\n",
 				      info->sensor_info.type);
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 4d21191aaed0..1816bd8200d1 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -664,6 +664,10 @@ struct drm_amdgpu_cs_chunk_data {
 	#define AMDGPU_INFO_SENSOR_VDDNB		0x6
 	/* Subquery id: Query graphics voltage */
 	#define AMDGPU_INFO_SENSOR_VDDGFX		0x7
+	/* Subquery id: Query GPU stable pstate shader clock */
+	#define AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_SCLK		0x8
+	/* Subquery id: Query GPU stable pstate memory clock */
+	#define AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_MCLK		0x9
 /* Number of VRAM page faults on CPU access. */
 #define AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS	0x1E
 #define AMDGPU_INFO_VRAM_LOST_COUNTER		0x1F
-- 
cgit v1.2.3


From 6a8bd08d0465b2b8d214007c58598e2c15312296 Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@gmail.com>
Date: Wed, 13 Dec 2017 15:12:57 -0500
Subject: drm/msm: add sudo flag to submit ioctl

This flags cause cmdstream to be executed from the ringbuffer (RB)
instead of IB1.  Normally not something you'd ever want to do, but
it is super useful for firmware debugging.

Hidden behind CAP_SYS_RAWIO and a default=n kconfig option which
depends on EXPERT (and has a suitably scary warning), to prevent
it from being used on accident.

Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/Kconfig           | 13 +++++++
 drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 65 +++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/msm/msm_gem.h         |  1 +
 drivers/gpu/drm/msm/msm_gem_submit.c  |  9 +++++
 include/uapi/drm/msm_drm.h            |  2 ++
 5 files changed, 90 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig
index 99d39b2aefa6..3065cb290aa8 100644
--- a/drivers/gpu/drm/msm/Kconfig
+++ b/drivers/gpu/drm/msm/Kconfig
@@ -28,6 +28,19 @@ config DRM_MSM_REGISTER_LOGGING
 	  that can be parsed by envytools demsm tool.  If enabled, register
 	  logging can be switched on via msm.reglog=y module param.
 
+config DRM_MSM_GPU_SUDO
+	bool "Enable SUDO flag on submits"
+	depends on DRM_MSM && EXPERT
+	default n
+	help
+	  Enable userspace that has CAP_SYS_RAWIO to submit GPU commands
+	  that are run from RB instead of IB1.  This essentially gives
+	  userspace kernel level access, but is useful for firmware
+	  debugging.
+
+	  Only use this if you are a driver developer.  This should *not*
+	  be enabled for production kernels.  If unsure, say N.
+
 config DRM_MSM_HDMI_HDCP
 	bool "Enable HDMI HDCP support in MSM DRM driver"
 	depends on DRM_MSM && QCOM_SCM
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index 579c28c8c994..fa08b4897a56 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -140,6 +140,65 @@ static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
 		gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
 }
 
+static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit,
+	struct msm_file_private *ctx)
+{
+	struct msm_drm_private *priv = gpu->dev->dev_private;
+	struct msm_ringbuffer *ring = submit->ring;
+	struct msm_gem_object *obj;
+	uint32_t *ptr, dwords;
+	unsigned int i;
+
+	for (i = 0; i < submit->nr_cmds; i++) {
+		switch (submit->cmd[i].type) {
+		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
+			break;
+		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
+			if (priv->lastctx == ctx)
+				break;
+		case MSM_SUBMIT_CMD_BUF:
+			/* copy commands into RB: */
+			obj = submit->bos[submit->cmd[i].idx].obj;
+			dwords = submit->cmd[i].size;
+
+			ptr = msm_gem_get_vaddr(&obj->base);
+
+			/* _get_vaddr() shouldn't fail at this point,
+			 * since we've already mapped it once in
+			 * submit_reloc()
+			 */
+			if (WARN_ON(!ptr))
+				return;
+
+			for (i = 0; i < dwords; i++) {
+				/* normally the OUT_PKTn() would wait
+				 * for space for the packet.  But since
+				 * we just OUT_RING() the whole thing,
+				 * need to call adreno_wait_ring()
+				 * ourself:
+				 */
+				adreno_wait_ring(ring, 1);
+				OUT_RING(ring, ptr[i]);
+			}
+
+			msm_gem_put_vaddr(&obj->base);
+
+			break;
+		}
+	}
+
+	a5xx_flush(gpu, ring);
+	a5xx_preempt_trigger(gpu);
+
+	/* we might not necessarily have a cmd from userspace to
+	 * trigger an event to know that submit has completed, so
+	 * do this manually:
+	 */
+	a5xx_idle(gpu, ring);
+	ring->memptrs->fence = submit->seqno;
+	msm_gpu_retire(gpu);
+}
+
 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 	struct msm_file_private *ctx)
 {
@@ -149,6 +208,12 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 	struct msm_ringbuffer *ring = submit->ring;
 	unsigned int i, ibs = 0;
 
+	if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
+		priv->lastctx = NULL;
+		a5xx_submit_in_rb(gpu, submit, ctx);
+		return;
+	}
+
 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
 	OUT_RING(ring, 0x02);
 
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index 9320e184b48d..c5d9bd3e47a8 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -146,6 +146,7 @@ struct msm_gem_submit {
 	struct msm_gpu_submitqueue *queue;
 	struct pid *pid;    /* submitting process */
 	bool valid;         /* true if no cmdstream patching needed */
+	bool in_rb;         /* "sudo" mode, copy cmds into RB */
 	struct msm_ringbuffer *ring;
 	unsigned int nr_cmds;
 	unsigned int nr_bos;
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index b8dc8f96caf2..7bd83e0afa97 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -430,6 +430,12 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 	if (MSM_PIPE_FLAGS(args->flags) & ~MSM_SUBMIT_FLAGS)
 		return -EINVAL;
 
+	if (args->flags & MSM_SUBMIT_SUDO) {
+		if (!IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) ||
+		    !capable(CAP_SYS_RAWIO))
+			return -EINVAL;
+	}
+
 	queue = msm_submitqueue_get(ctx, args->queueid);
 	if (!queue)
 		return -ENOENT;
@@ -471,6 +477,9 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 		goto out_unlock;
 	}
 
+	if (args->flags & MSM_SUBMIT_SUDO)
+		submit->in_rb = true;
+
 	ret = submit_lookup_objects(submit, args, file);
 	if (ret)
 		goto out;
diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
index bbbaffad772d..c06d0a5bdd80 100644
--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -201,10 +201,12 @@ struct drm_msm_gem_submit_bo {
 #define MSM_SUBMIT_NO_IMPLICIT   0x80000000 /* disable implicit sync */
 #define MSM_SUBMIT_FENCE_FD_IN   0x40000000 /* enable input fence_fd */
 #define MSM_SUBMIT_FENCE_FD_OUT  0x20000000 /* enable output fence_fd */
+#define MSM_SUBMIT_SUDO          0x10000000 /* run submitted cmds from RB */
 #define MSM_SUBMIT_FLAGS                ( \
 		MSM_SUBMIT_NO_IMPLICIT   | \
 		MSM_SUBMIT_FENCE_FD_IN   | \
 		MSM_SUBMIT_FENCE_FD_OUT  | \
+		MSM_SUBMIT_SUDO          | \
 		0)
 
 /* Each cmdstream submit consists of a table of buffers involved, and
-- 
cgit v1.2.3


From 68824bb59d4892457401cdcd89e9bb3eef3ea780 Mon Sep 17 00:00:00 2001
From: Ville Syrjälä <ville.syrjala@linux.intel.com>
Date: Thu, 22 Feb 2018 23:42:29 +0200
Subject: drm/uapi: The ctm matrix uses sign-magnitude representation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The documentation for the ctm matrix suggests a two's complement
format, but at least the i915 implementation is using sign-magnitude
instead. And looks like malidp is doing the same. Change the docs
to match the current implementation, and change the type from __s64
to __u64 to drive the point home.

Cc: dri-devel@lists.freedesktop.org
Cc: Mihail Atanassov <mihail.atanassov@arm.com>
Cc: Liviu Dudau <liviu.dudau@arm.com>
Cc: Brian Starkey <brian.starkey@arm.com>
Cc: Mali DP Maintainers <malidp@foss.arm.com>
Cc: Johnson Lin <johnson.lin@intel.com>
Cc: Uma Shankar <uma.shankar@intel.com>
Cc: Shashank Sharma <shashank.sharma@intel.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180222214232.6064-1-ville.syrjala@linux.intel.com
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
---
 include/uapi/drm/drm_mode.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index 2c575794fb52..b5d7d9e0eff5 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -598,8 +598,11 @@ struct drm_mode_crtc_lut {
 };
 
 struct drm_color_ctm {
-	/* Conversion matrix in S31.32 format. */
-	__s64 matrix[9];
+	/*
+	 * Conversion matrix in S31.32 sign-magnitude
+	 * (not two's complement!) format.
+	 */
+	__u64 matrix[9];
 };
 
 struct drm_color_lut {
-- 
cgit v1.2.3


From 2fe4c22c53fc2e3f35be2cd0033cb3d15ebd41b1 Mon Sep 17 00:00:00 2001
From: Sean Young <sean@mess.org>
Date: Tue, 27 Feb 2018 08:03:56 -0500
Subject: media: rc: lirc does not use LIRC_CAN_SEND_SCANCODE feature

Since commit 02d742f4b209 ("media: lirc: lirc daemon fails to detect raw
IR device"), the feature LIRC_CAN_SEND_SCANCODE is no longer used as it
tripped up lircd. The ability to send scancodes for IR Tx is implied by
LIRC_CAN_SEND_PULSE (i.e. any device that can send can use IR Tx encoders).

So, remove LIRC_CAN_SEND_SCANCODE since it never used. This fixes:

Documentation/output/lirc.h.rst:6: WARNING: undefined label:
lirc-can-send-scancode (if the link has no caption the label must precede
a section header

As this flag was added for kernel 4.16, let's remove it, while not too
late.

Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 include/uapi/linux/lirc.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/lirc.h b/include/uapi/linux/lirc.h
index 4fe580d36e41..f5bf06ecd87d 100644
--- a/include/uapi/linux/lirc.h
+++ b/include/uapi/linux/lirc.h
@@ -54,7 +54,6 @@
 #define LIRC_CAN_SEND_RAW              LIRC_MODE2SEND(LIRC_MODE_RAW)
 #define LIRC_CAN_SEND_PULSE            LIRC_MODE2SEND(LIRC_MODE_PULSE)
 #define LIRC_CAN_SEND_MODE2            LIRC_MODE2SEND(LIRC_MODE_MODE2)
-#define LIRC_CAN_SEND_SCANCODE         LIRC_MODE2SEND(LIRC_MODE_SCANCODE)
 #define LIRC_CAN_SEND_LIRCCODE         LIRC_MODE2SEND(LIRC_MODE_LIRCCODE)
 
 #define LIRC_CAN_SEND_MASK             0x0000003f
-- 
cgit v1.2.3


From 147ccf931e3152e06961bd6d29a5650d25c2f156 Mon Sep 17 00:00:00 2001
From: Ville Syrjälä <ville.syrjala@linux.intel.com>
Date: Tue, 6 Mar 2018 18:48:47 +0200
Subject: drm/uapi: Deprecate DRM_MODE_PROP_PENDING
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

DRM_MODE_PROP_PENDING is not used anywhere (except printed out
by libdrm proptest/modetest).

This seems to be yet another thing blindly copied from xrandr.
Quoting from the protocol spec:
"If 'pending' is TRUE, changes made to property values with
 RRChangeOutputProperty will be saved in the pending property value
 and be automatically copied to the current value on the next
 RRSetCrtcConfig request involving the named output. If 'pending' is
 FALSE, changes are copied immediately."

So it was some kind of early idea for atomic property updates.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180306164849.2862-4-ville.syrjala@linux.intel.com
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 include/uapi/drm/drm_mode.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index b5d7d9e0eff5..50bcf4214ff9 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -363,7 +363,7 @@ struct drm_mode_get_connector {
 	__u32 pad;
 };
 
-#define DRM_MODE_PROP_PENDING	(1<<0)
+#define DRM_MODE_PROP_PENDING	(1<<0) /* deprecated, do not use */
 #define DRM_MODE_PROP_RANGE	(1<<1)
 #define DRM_MODE_PROP_IMMUTABLE	(1<<2)
 #define DRM_MODE_PROP_ENUM	(1<<3) /* enumerated type with text strings */
-- 
cgit v1.2.3


From a446ae2c6e6555048301f2339cfd97b8eed6d0b7 Mon Sep 17 00:00:00 2001
From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Date: Tue, 6 Mar 2018 12:28:56 +0000
Subject: drm/i915: add query uAPI

There are a number of information that are readable from hardware
registers and that we would like to make accessible to userspace. One
particular example is the topology of the execution units (how are
execution units grouped in subslices and slices and also which ones
have been fused off for die recovery).

At the moment the GET_PARAM ioctl covers some basic needs, but
generally is only able to return a single value for each defined
parameter. This is a bit problematic with topology descriptions which
are array/maps of available units.

This change introduces a new ioctl that can deal with requests to fill
structures of potentially variable lengths. The user is expected fill
a query with length fields set at 0 on the first call, the kernel then
sets the length fields to the their expected values. A second call to
the kernel with length fields at their expected values will trigger a
copy of the data to the pointed memory locations.

The scope of this uAPI is only to provide information to userspace,
not to allow configuration of the device.

v2: Simplify dispatcher code iteration (Tvrtko)
    Tweak uapi drm_i915_query_item structure (Tvrtko)

v3: Rename pad fields into flags (Chris)
    Return error on flags field != 0 (Chris)
    Only copy length back to userspace in drm_i915_query_item (Chris)

v4: Use array of functions instead of switch (Chris)

v5: More comments in uapi (Tvrtko)
    Return query item errors in length field (All)

v6: Tweak uapi comments style to match the coding style (Lionel)

v7: Add i915_query.h (Joonas)

v8: (Lionel) Change the behavior of the item iterator to report
    invalid queries into the query item rather than stopping the
    iteration. This enables userspace applications to query newer
    items on older kernels and only have failure on the items that are
    not supported.

v9: Edit copyright headers (Joonas)

v10: Typos & comments in uapi (Joonas)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Acked-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180306122857.27317-6-lionel.g.landwerlin@intel.com
---
 drivers/gpu/drm/i915/Makefile     |  1 +
 drivers/gpu/drm/i915/i915_drv.c   |  2 ++
 drivers/gpu/drm/i915/i915_query.c | 50 +++++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_query.h | 15 ++++++++++++
 include/uapi/drm/i915_drm.h       | 46 ++++++++++++++++++++++++++++++++---
 5 files changed, 111 insertions(+), 3 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_query.c
 create mode 100644 drivers/gpu/drm/i915/i915_query.h

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 1bd9bc5b8c5c..4eee91a3a236 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -69,6 +69,7 @@ i915-y += i915_cmd_parser.o \
 	  i915_gem_timeline.o \
 	  i915_gem_userptr.o \
 	  i915_gemfs.o \
+	  i915_query.o \
 	  i915_request.o \
 	  i915_trace_points.o \
 	  i915_vma.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index c594ff5e57d0..d7c4de45644d 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -49,6 +49,7 @@
 #include "i915_drv.h"
 #include "i915_trace.h"
 #include "i915_pmu.h"
+#include "i915_query.h"
 #include "i915_vgpu.h"
 #include "intel_drv.h"
 #include "intel_uc.h"
@@ -2832,6 +2833,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_PERF_OPEN, i915_perf_open_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_PERF_ADD_CONFIG, i915_perf_add_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_PERF_REMOVE_CONFIG, i915_perf_remove_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_QUERY, i915_query_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 };
 
 static struct drm_driver driver = {
diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
new file mode 100644
index 000000000000..5582e6c3234a
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_query.c
@@ -0,0 +1,50 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "i915_query.h"
+#include <uapi/drm/i915_drm.h>
+
+static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv,
+					struct drm_i915_query_item *query_item) = {
+};
+
+int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_query *args = data;
+	struct drm_i915_query_item __user *user_item_ptr =
+		u64_to_user_ptr(args->items_ptr);
+	u32 i;
+
+	if (args->flags != 0)
+		return -EINVAL;
+
+	for (i = 0; i < args->num_items; i++, user_item_ptr++) {
+		struct drm_i915_query_item item;
+		u64 func_idx;
+		int ret;
+
+		if (copy_from_user(&item, user_item_ptr, sizeof(item)))
+			return -EFAULT;
+
+		if (item.query_id == 0)
+			return -EINVAL;
+
+		func_idx = item.query_id - 1;
+
+		if (func_idx < ARRAY_SIZE(i915_query_funcs))
+			ret = i915_query_funcs[func_idx](dev_priv, &item);
+		else
+			ret = -EINVAL;
+
+		/* Only write the length back to userspace if they differ. */
+		if (ret != item.length && put_user(ret, &user_item_ptr->length))
+			return -EFAULT;
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/i915_query.h b/drivers/gpu/drm/i915/i915_query.h
new file mode 100644
index 000000000000..31dcef181f63
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_query.h
@@ -0,0 +1,15 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#ifndef _I915_QUERY_H_
+#define _I915_QUERY_H_
+
+struct drm_device;
+struct drm_file;
+
+int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+
+#endif
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 29fa48e4755d..eedd5a23a944 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -318,6 +318,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_PERF_OPEN		0x36
 #define DRM_I915_PERF_ADD_CONFIG	0x37
 #define DRM_I915_PERF_REMOVE_CONFIG	0x38
+#define DRM_I915_QUERY			0x39
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -375,6 +376,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_PERF_OPEN	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param)
 #define DRM_IOCTL_I915_PERF_ADD_CONFIG	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config)
 #define DRM_IOCTL_I915_PERF_REMOVE_CONFIG	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64)
+#define DRM_IOCTL_I915_QUERY			DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_QUERY, struct drm_i915_query)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -1606,15 +1608,53 @@ struct drm_i915_perf_oa_config {
 	__u32 n_flex_regs;
 
 	/*
-	 * These fields are pointers to tuples of u32 values (register
-	 * address, value). For example the expected length of the buffer
-	 * pointed by mux_regs_ptr is (2 * sizeof(u32) * n_mux_regs).
+	 * These fields are pointers to tuples of u32 values (register address,
+	 * value). For example the expected length of the buffer pointed by
+	 * mux_regs_ptr is (2 * sizeof(u32) * n_mux_regs).
 	 */
 	__u64 mux_regs_ptr;
 	__u64 boolean_regs_ptr;
 	__u64 flex_regs_ptr;
 };
 
+struct drm_i915_query_item {
+	__u64 query_id;
+
+	/*
+	 * When set to zero by userspace, this is filled with the size of the
+	 * data to be written at the data_ptr pointer. The kernel sets this
+	 * value to a negative value to signal an error on a particular query
+	 * item.
+	 */
+	__s32 length;
+
+	/*
+	 * Unused for now. Must be cleared to zero.
+	 */
+	__u32 flags;
+
+	/*
+	 * Data will be written at the location pointed by data_ptr when the
+	 * value of length matches the length of the data to be written by the
+	 * kernel.
+	 */
+	__u64 data_ptr;
+};
+
+struct drm_i915_query {
+	__u32 num_items;
+
+	/*
+	 * Unused for now. Must be cleared to zero.
+	 */
+	__u32 flags;
+
+	/*
+	 * This points to an array of num_items drm_i915_query_item structures.
+	 */
+	__u64 items_ptr;
+};
+
 #if defined(__cplusplus)
 }
 #endif
-- 
cgit v1.2.3


From c822e059185585f79b2007b1d2cafacf4264e610 Mon Sep 17 00:00:00 2001
From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Date: Tue, 6 Mar 2018 12:28:57 +0000
Subject: drm/i915: expose rcs topology through query uAPI

With the introduction of asymmetric slices in CNL, we cannot rely on
the previous SUBSLICE_MASK getparam to tell userspace what subslices
are available. Here we introduce a more detailed way of querying the
Gen's GPU topology that doesn't aggregate numbers.

This is essential for monitoring parts of the GPU with the OA unit,
because counters need to be normalized to the number of
EUs/subslices/slices. The current aggregated numbers like EU_TOTAL do
not gives us sufficient information.

The Mesa series making use of this API is :

    https://patchwork.freedesktop.org/series/38795/

As a bonus we can draw representations of the GPU :

    https://imgur.com/a/vuqpa

v2: Rename uapi struct s/_mask/_info/ (Tvrtko)
    Report max_slice/subslice/eus_per_subslice rather than strides (Tvrtko)
    Add uapi macros to read data from *_info structs (Tvrtko)

v3: Use !!(v & DRM_I915_BIT()) for uapi macros instead of custom shifts (Tvrtko)

v4: factorize query item writting (Tvrtko)
    tweak uapi struct/define names (Tvrtko)

v5: Replace ALIGN() macro (Chris)

v6: Updated uapi comments (Tvrtko)
    Moved flags != 0 checks into vfuncs (Tvrtko)

v7: Use access_ok() before copying anything, to avoid overflows (Chris)
    Switch BUG_ON() to GEM_WARN_ON() (Tvrtko)

v8: Tweak uapi comments style to match the coding style (Lionel)

v9: Fix error in comment about computation of enabled subslice (Tvrtko)

v10: Fix/update comments in uAPI (Sagar)

v11: Drop drm_i915_query_(slice|subslice|eu)_info in favor of a single
     drm_i915_query_topology_info (Joonas)

v12: Add subslice_stride/eu_stride in drm_i915_query_topology_info (Joonas)

v13: Fix comment in uAPI (Joonas)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Acked-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180306122857.27317-7-lionel.g.landwerlin@intel.com
---
 drivers/gpu/drm/i915/i915_query.c | 75 +++++++++++++++++++++++++++++++++++++++
 include/uapi/drm/i915_drm.h       | 62 ++++++++++++++++++++++++++++++++
 2 files changed, 137 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
index 5582e6c3234a..3ace929dd90f 100644
--- a/drivers/gpu/drm/i915/i915_query.c
+++ b/drivers/gpu/drm/i915/i915_query.c
@@ -8,8 +8,83 @@
 #include "i915_query.h"
 #include <uapi/drm/i915_drm.h>
 
+static int query_topology_info(struct drm_i915_private *dev_priv,
+			       struct drm_i915_query_item *query_item)
+{
+	const struct sseu_dev_info *sseu = &INTEL_INFO(dev_priv)->sseu;
+	struct drm_i915_query_topology_info topo;
+	u32 slice_length, subslice_length, eu_length, total_length;
+
+	if (query_item->flags != 0)
+		return -EINVAL;
+
+	if (sseu->max_slices == 0)
+		return -ENODEV;
+
+	BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask));
+
+	slice_length = sizeof(sseu->slice_mask);
+	subslice_length = sseu->max_slices *
+		DIV_ROUND_UP(sseu->max_subslices,
+			     sizeof(sseu->subslice_mask[0]) * BITS_PER_BYTE);
+	eu_length = sseu->max_slices * sseu->max_subslices *
+		DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE);
+
+	total_length = sizeof(topo) + slice_length + subslice_length + eu_length;
+
+	if (query_item->length == 0)
+		return total_length;
+
+	if (query_item->length < total_length)
+		return -EINVAL;
+
+	if (copy_from_user(&topo, u64_to_user_ptr(query_item->data_ptr),
+			   sizeof(topo)))
+		return -EFAULT;
+
+	if (topo.flags != 0)
+		return -EINVAL;
+
+	if (!access_ok(VERIFY_WRITE, u64_to_user_ptr(query_item->data_ptr),
+		       total_length))
+		return -EFAULT;
+
+	memset(&topo, 0, sizeof(topo));
+	topo.max_slices = sseu->max_slices;
+	topo.max_subslices = sseu->max_subslices;
+	topo.max_eus_per_subslice = sseu->max_eus_per_subslice;
+
+	topo.subslice_offset = slice_length;
+	topo.subslice_stride = DIV_ROUND_UP(sseu->max_subslices, BITS_PER_BYTE);
+	topo.eu_offset = slice_length + subslice_length;
+	topo.eu_stride =
+		DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE);
+
+	if (__copy_to_user(u64_to_user_ptr(query_item->data_ptr),
+			   &topo, sizeof(topo)))
+		return -EFAULT;
+
+	if (__copy_to_user(u64_to_user_ptr(query_item->data_ptr + sizeof(topo)),
+			   &sseu->slice_mask, slice_length))
+		return -EFAULT;
+
+	if (__copy_to_user(u64_to_user_ptr(query_item->data_ptr +
+					   sizeof(topo) + slice_length),
+			   sseu->subslice_mask, subslice_length))
+		return -EFAULT;
+
+	if (__copy_to_user(u64_to_user_ptr(query_item->data_ptr +
+					   sizeof(topo) +
+					   slice_length + subslice_length),
+			   sseu->eu_mask, eu_length))
+		return -EFAULT;
+
+	return total_length;
+}
+
 static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv,
 					struct drm_i915_query_item *query_item) = {
+	query_topology_info,
 };
 
 int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index eedd5a23a944..7f5634ce8e88 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1619,6 +1619,7 @@ struct drm_i915_perf_oa_config {
 
 struct drm_i915_query_item {
 	__u64 query_id;
+#define DRM_I915_QUERY_TOPOLOGY_INFO    1
 
 	/*
 	 * When set to zero by userspace, this is filled with the size of the
@@ -1655,6 +1656,67 @@ struct drm_i915_query {
 	__u64 items_ptr;
 };
 
+/*
+ * Data written by the kernel with query DRM_I915_QUERY_TOPOLOGY_INFO :
+ *
+ * data: contains the 3 pieces of information :
+ *
+ * - the slice mask with one bit per slice telling whether a slice is
+ *   available. The availability of slice X can be queried with the following
+ *   formula :
+ *
+ *           (data[X / 8] >> (X % 8)) & 1
+ *
+ * - the subslice mask for each slice with one bit per subslice telling
+ *   whether a subslice is available. The availability of subslice Y in slice
+ *   X can be queried with the following formula :
+ *
+ *           (data[subslice_offset +
+ *                 X * subslice_stride +
+ *                 Y / 8] >> (Y % 8)) & 1
+ *
+ * - the EU mask for each subslice in each slice with one bit per EU telling
+ *   whether an EU is available. The availability of EU Z in subslice Y in
+ *   slice X can be queried with the following formula :
+ *
+ *           (data[eu_offset +
+ *                 (X * max_subslices + Y) * eu_stride +
+ *                 Z / 8] >> (Z % 8)) & 1
+ */
+struct drm_i915_query_topology_info {
+	/*
+	 * Unused for now. Must be cleared to zero.
+	 */
+	__u16 flags;
+
+	__u16 max_slices;
+	__u16 max_subslices;
+	__u16 max_eus_per_subslice;
+
+	/*
+	 * Offset in data[] at which the subslice masks are stored.
+	 */
+	__u16 subslice_offset;
+
+	/*
+	 * Stride at which each of the subslice masks for each slice are
+	 * stored.
+	 */
+	__u16 subslice_stride;
+
+	/*
+	 * Offset in data[] at which the EU masks are stored.
+	 */
+	__u16 eu_offset;
+
+	/*
+	 * Stride at which each of the EU masks for each subslice are stored.
+	 */
+	__u16 eu_stride;
+
+	__u8 data[];
+};
+
 #if defined(__cplusplus)
 }
 #endif
-- 
cgit v1.2.3


From 0538aaf927b205662a3f3d641ecbc8b0f3236e2a Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Mon, 22 Jan 2018 15:56:11 +0100
Subject: drm/etnaviv: add more minor features fields

Newer GPU cores added yet more feature bits. Make room for them and
let userspace query them.

Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
---
 drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 36 +++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/etnaviv/etnaviv_gpu.h | 16 ++++++----------
 include/uapi/drm/etnaviv_drm.h        |  6 ++++++
 3 files changed, 48 insertions(+), 10 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index 260a73a44e69..42ad286d5ec0 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -84,6 +84,30 @@ int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value)
 		*value = gpu->identity.minor_features5;
 		break;
 
+	case ETNAVIV_PARAM_GPU_FEATURES_7:
+		*value = gpu->identity.minor_features6;
+		break;
+
+	case ETNAVIV_PARAM_GPU_FEATURES_8:
+		*value = gpu->identity.minor_features7;
+		break;
+
+	case ETNAVIV_PARAM_GPU_FEATURES_9:
+		*value = gpu->identity.minor_features8;
+		break;
+
+	case ETNAVIV_PARAM_GPU_FEATURES_10:
+		*value = gpu->identity.minor_features9;
+		break;
+
+	case ETNAVIV_PARAM_GPU_FEATURES_11:
+		*value = gpu->identity.minor_features10;
+		break;
+
+	case ETNAVIV_PARAM_GPU_FEATURES_12:
+		*value = gpu->identity.minor_features11;
+		break;
+
 	case ETNAVIV_PARAM_GPU_STREAM_COUNT:
 		*value = gpu->identity.stream_count;
 		break;
@@ -823,6 +847,18 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m)
 		   gpu->identity.minor_features4);
 	seq_printf(m, "\t minor_features5: 0x%08x\n",
 		   gpu->identity.minor_features5);
+	seq_printf(m, "\t minor_features6: 0x%08x\n",
+		   gpu->identity.minor_features6);
+	seq_printf(m, "\t minor_features7: 0x%08x\n",
+		   gpu->identity.minor_features7);
+	seq_printf(m, "\t minor_features8: 0x%08x\n",
+		   gpu->identity.minor_features8);
+	seq_printf(m, "\t minor_features9: 0x%08x\n",
+		   gpu->identity.minor_features9);
+	seq_printf(m, "\t minor_features10: 0x%08x\n",
+		   gpu->identity.minor_features10);
+	seq_printf(m, "\t minor_features11: 0x%08x\n",
+		   gpu->identity.minor_features11);
 
 	seq_puts(m, "\tspecs\n");
 	seq_printf(m, "\t stream_count:  %d\n",
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
index 3b19528330a0..18460df401b7 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
@@ -38,21 +38,17 @@ struct etnaviv_chip_identity {
 
 	/* Supported minor feature fields. */
 	u32 minor_features0;
-
-	/* Supported minor feature 1 fields. */
 	u32 minor_features1;
-
-	/* Supported minor feature 2 fields. */
 	u32 minor_features2;
-
-	/* Supported minor feature 3 fields. */
 	u32 minor_features3;
-
-	/* Supported minor feature 4 fields. */
 	u32 minor_features4;
-
-	/* Supported minor feature 5 fields. */
 	u32 minor_features5;
+	u32 minor_features6;
+	u32 minor_features7;
+	u32 minor_features8;
+	u32 minor_features9;
+	u32 minor_features10;
+	u32 minor_features11;
 
 	/* Number of streams supported. */
 	u32 stream_count;
diff --git a/include/uapi/drm/etnaviv_drm.h b/include/uapi/drm/etnaviv_drm.h
index e9b997a0ef27..0d5c49dc478c 100644
--- a/include/uapi/drm/etnaviv_drm.h
+++ b/include/uapi/drm/etnaviv_drm.h
@@ -55,6 +55,12 @@ struct drm_etnaviv_timespec {
 #define ETNAVIV_PARAM_GPU_FEATURES_4                0x07
 #define ETNAVIV_PARAM_GPU_FEATURES_5                0x08
 #define ETNAVIV_PARAM_GPU_FEATURES_6                0x09
+#define ETNAVIV_PARAM_GPU_FEATURES_7                0x0a
+#define ETNAVIV_PARAM_GPU_FEATURES_8                0x0b
+#define ETNAVIV_PARAM_GPU_FEATURES_9                0x0c
+#define ETNAVIV_PARAM_GPU_FEATURES_10               0x0d
+#define ETNAVIV_PARAM_GPU_FEATURES_11               0x0e
+#define ETNAVIV_PARAM_GPU_FEATURES_12               0x0f
 
 #define ETNAVIV_PARAM_GPU_STREAM_COUNT              0x10
 #define ETNAVIV_PARAM_GPU_REGISTER_MAX              0x11
-- 
cgit v1.2.3


From 1e09b05386f32efbebb798cf0341eca4b424c960 Mon Sep 17 00:00:00 2001
From: Hawking Zhang <Hawking.Zhang@amd.com>
Date: Thu, 8 Mar 2018 18:01:24 +0800
Subject: drm/amdgpu: query vram type from atombios

The vram type for dGPU is stored in umc_info while sys mem type
for APU is stored in integratedsysteminfo

Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c | 95 +++++++++++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h |  1 +
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c            |  4 +-
 include/uapi/drm/amdgpu_drm.h                    |  1 +
 4 files changed, 94 insertions(+), 7 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index ff8efd0f8fd5..a0f48cb9b8f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -114,6 +114,9 @@ union igp_info {
 	struct atom_integrated_system_info_v1_11 v11;
 };
 
+union umc_info {
+	struct atom_umc_info_v3_1 v31;
+};
 /*
  * Return vram width from integrated system info table, if available,
  * or 0 if not.
@@ -143,6 +146,94 @@ int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev)
 	return 0;
 }
 
+static int convert_atom_mem_type_to_vram_type (struct amdgpu_device *adev,
+					       int atom_mem_type)
+{
+	int vram_type;
+
+	if (adev->flags & AMD_IS_APU) {
+		switch (atom_mem_type) {
+		case Ddr2MemType:
+		case LpDdr2MemType:
+			vram_type = AMDGPU_VRAM_TYPE_DDR2;
+			break;
+		case Ddr3MemType:
+		case LpDdr3MemType:
+			vram_type = AMDGPU_VRAM_TYPE_DDR3;
+			break;
+		case Ddr4MemType:
+		case LpDdr4MemType:
+			vram_type = AMDGPU_VRAM_TYPE_DDR4;
+			break;
+		default:
+			vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
+			break;
+		}
+	} else {
+		switch (atom_mem_type) {
+		case ATOM_DGPU_VRAM_TYPE_GDDR5:
+			vram_type = AMDGPU_VRAM_TYPE_GDDR5;
+			break;
+		case ATOM_DGPU_VRAM_TYPE_HBM:
+			vram_type = AMDGPU_VRAM_TYPE_HBM;
+			break;
+		default:
+			vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
+			break;
+		}
+	}
+
+	return vram_type;
+}
+/*
+ * Return vram type from either integrated system info table
+ * or umc info table, if available, or 0 (TYPE_UNKNOWN) if not
+ */
+int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev)
+{
+	struct amdgpu_mode_info *mode_info = &adev->mode_info;
+	int index;
+	u16 data_offset, size;
+	union igp_info *igp_info;
+	union umc_info *umc_info;
+	u8 frev, crev;
+	u8 mem_type;
+
+	if (adev->flags & AMD_IS_APU)
+		index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+						    integratedsysteminfo);
+	else
+		index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+						    umc_info);
+	if (amdgpu_atom_parse_data_header(mode_info->atom_context,
+					  index, &size,
+					  &frev, &crev, &data_offset)) {
+		if (adev->flags & AMD_IS_APU) {
+			igp_info = (union igp_info *)
+				(mode_info->atom_context->bios + data_offset);
+			switch (crev) {
+			case 11:
+				mem_type = igp_info->v11.memorytype;
+				return convert_atom_mem_type_to_vram_type(adev, mem_type);
+			default:
+				return 0;
+			}
+		} else {
+			umc_info = (union umc_info *)
+				(mode_info->atom_context->bios + data_offset);
+			switch (crev) {
+			case 1:
+				mem_type = umc_info->v31.vram_type;
+				return convert_atom_mem_type_to_vram_type(adev, mem_type);
+			default:
+				return 0;
+			}
+		}
+	}
+
+	return 0;
+}
+
 union firmware_info {
 	struct atom_firmware_info_v3_1 v31;
 };
@@ -151,10 +242,6 @@ union smu_info {
 	struct atom_smu_info_v3_1 v31;
 };
 
-union umc_info {
-	struct atom_umc_info_v3_1 v31;
-};
-
 int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev)
 {
 	struct amdgpu_mode_info *mode_info = &adev->mode_info;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
index 288b97e54347..7689c961c4ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
@@ -28,6 +28,7 @@ bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev)
 void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev);
 int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev);
 int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev);
+int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev);
 int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 67cd1fe17649..ceab14f16795 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -836,9 +836,9 @@ static int gmc_v9_0_sw_init(void *handle)
 
 	spin_lock_init(&adev->gmc.invalidate_lock);
 
+	adev->gmc.vram_type = amdgpu_atomfirmware_get_vram_type(adev);
 	switch (adev->asic_type) {
 	case CHIP_RAVEN:
-		adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
 		if (adev->rev_id == 0x0 || adev->rev_id == 0x1) {
 			amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
 		} else {
@@ -849,8 +849,6 @@ static int gmc_v9_0_sw_init(void *handle)
 		}
 		break;
 	case CHIP_VEGA10:
-		/* XXX Don't know how to get VRAM type yet. */
-		adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM;
 		/*
 		 * To fulfill 4-level page support,
 		 * vm size is 256TB (48bit), maximum size of Vega10,
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 1816bd8200d1..528f6d041e90 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -806,6 +806,7 @@ struct drm_amdgpu_info_firmware {
 #define AMDGPU_VRAM_TYPE_GDDR5 5
 #define AMDGPU_VRAM_TYPE_HBM   6
 #define AMDGPU_VRAM_TYPE_DDR3  7
+#define AMDGPU_VRAM_TYPE_DDR4  8
 
 struct drm_amdgpu_info_device {
 	/** PCI Device ID */
-- 
cgit v1.2.3


From c7bcbfa4f8d1e0e1078adfe959d4b65542bccf66 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Thu, 15 Mar 2018 17:27:46 -0400
Subject: drm/amdkfd: Remove limit on number of GPUs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently the number of GPUs is limited by aperture placement options
available on GFX7 and GFX8 hardware. This limitation is not necessary.
Scratch and LDS represent per-work-item and per-work-group storage
respectively. Different work-items and work-groups use the same virtual
address to access their own data. Work running on different GPUs is by
definition in different work-groups (different dispatches, in fact).
That means the same virtual addresses can be used for these apertures
on different GPUs.

Add a new AMDKFD_IOC_GET_PROCESS_APERTURES_NEW ioctl that removes the
artificial limitation on the number of GPUs that can be supported. The
new ioctl allows user mode to query the number of GPUs to allocate
enough memory for all GPUs to be reported.

This deprecates AMDKFD_IOC_GET_PROCESS_APERTURES.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c     | 94 ++++++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 22 +++----
 include/uapi/linux/kfd_ioctl.h               | 27 +++++++-
 3 files changed, 128 insertions(+), 15 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 6fe24964540b..7d4009418ec3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -825,6 +825,97 @@ static int kfd_ioctl_get_process_apertures(struct file *filp,
 	return 0;
 }
 
+static int kfd_ioctl_get_process_apertures_new(struct file *filp,
+				struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_get_process_apertures_new_args *args = data;
+	struct kfd_process_device_apertures *pa;
+	struct kfd_process_device *pdd;
+	uint32_t nodes = 0;
+	int ret;
+
+	dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
+
+	if (args->num_of_nodes == 0) {
+		/* Return number of nodes, so that user space can alloacate
+		 * sufficient memory
+		 */
+		mutex_lock(&p->mutex);
+
+		if (!kfd_has_process_device_data(p))
+			goto out_unlock;
+
+		/* Run over all pdd of the process */
+		pdd = kfd_get_first_process_device_data(p);
+		do {
+			args->num_of_nodes++;
+			pdd = kfd_get_next_process_device_data(p, pdd);
+		} while (pdd);
+
+		goto out_unlock;
+	}
+
+	/* Fill in process-aperture information for all available
+	 * nodes, but not more than args->num_of_nodes as that is
+	 * the amount of memory allocated by user
+	 */
+	pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
+				args->num_of_nodes), GFP_KERNEL);
+	if (!pa)
+		return -ENOMEM;
+
+	mutex_lock(&p->mutex);
+
+	if (!kfd_has_process_device_data(p)) {
+		args->num_of_nodes = 0;
+		kfree(pa);
+		goto out_unlock;
+	}
+
+	/* Run over all pdd of the process */
+	pdd = kfd_get_first_process_device_data(p);
+	do {
+		pa[nodes].gpu_id = pdd->dev->id;
+		pa[nodes].lds_base = pdd->lds_base;
+		pa[nodes].lds_limit = pdd->lds_limit;
+		pa[nodes].gpuvm_base = pdd->gpuvm_base;
+		pa[nodes].gpuvm_limit = pdd->gpuvm_limit;
+		pa[nodes].scratch_base = pdd->scratch_base;
+		pa[nodes].scratch_limit = pdd->scratch_limit;
+
+		dev_dbg(kfd_device,
+			"gpu id %u\n", pdd->dev->id);
+		dev_dbg(kfd_device,
+			"lds_base %llX\n", pdd->lds_base);
+		dev_dbg(kfd_device,
+			"lds_limit %llX\n", pdd->lds_limit);
+		dev_dbg(kfd_device,
+			"gpuvm_base %llX\n", pdd->gpuvm_base);
+		dev_dbg(kfd_device,
+			"gpuvm_limit %llX\n", pdd->gpuvm_limit);
+		dev_dbg(kfd_device,
+			"scratch_base %llX\n", pdd->scratch_base);
+		dev_dbg(kfd_device,
+			"scratch_limit %llX\n", pdd->scratch_limit);
+		nodes++;
+
+		pdd = kfd_get_next_process_device_data(p, pdd);
+	} while (pdd && (nodes < args->num_of_nodes));
+	mutex_unlock(&p->mutex);
+
+	args->num_of_nodes = nodes;
+	ret = copy_to_user(
+			(void __user *)args->kfd_process_device_apertures_ptr,
+			pa,
+			(nodes * sizeof(struct kfd_process_device_apertures)));
+	kfree(pa);
+	return ret ? -EFAULT : 0;
+
+out_unlock:
+	mutex_unlock(&p->mutex);
+	return 0;
+}
+
 static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
 					void *data)
 {
@@ -1017,6 +1108,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
 
 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER,
 			kfd_ioctl_set_trap_handler, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,
+			kfd_ioctl_get_process_apertures_new, 0),
 };
 
 #define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index 7377513050e6..a06b0100af96 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -282,14 +282,14 @@
 	(((uint64_t)(base) & \
 		0xFFFFFF0000000000UL) | 0xFFFFFFFFFFL)
 
-#define MAKE_SCRATCH_APP_BASE(gpu_num) \
-	(((uint64_t)(gpu_num) << 61) + 0x100000000L)
+#define MAKE_SCRATCH_APP_BASE() \
+	(((uint64_t)(0x1UL) << 61) + 0x100000000L)
 
 #define MAKE_SCRATCH_APP_LIMIT(base) \
 	(((uint64_t)base & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
 
-#define MAKE_LDS_APP_BASE(gpu_num) \
-	(((uint64_t)(gpu_num) << 61) + 0x0)
+#define MAKE_LDS_APP_BASE() \
+	(((uint64_t)(0x1UL) << 61) + 0x0)
 #define MAKE_LDS_APP_LIMIT(base) \
 	(((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
 
@@ -314,7 +314,7 @@ int kfd_init_apertures(struct kfd_process *process)
 			return -1;
 		}
 		/*
-		 * For 64 bit process aperture will be statically reserved in
+		 * For 64 bit process apertures will be statically reserved in
 		 * the x86_64 non canonical process address space
 		 * amdkfd doesn't currently support apertures for 32 bit process
 		 */
@@ -323,12 +323,11 @@ int kfd_init_apertures(struct kfd_process *process)
 			pdd->gpuvm_base = pdd->gpuvm_limit = 0;
 			pdd->scratch_base = pdd->scratch_limit = 0;
 		} else {
-			/*
-			 * node id couldn't be 0 - the three MSB bits of
-			 * aperture shoudn't be 0
+			/* Same LDS and scratch apertures can be used
+			 * on all GPUs. This allows using more dGPUs
+			 * than placement options for apertures.
 			 */
-			pdd->lds_base = MAKE_LDS_APP_BASE(id + 1);
-
+			pdd->lds_base = MAKE_LDS_APP_BASE();
 			pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
 
 			pdd->gpuvm_base = MAKE_GPUVM_APP_BASE(id + 1);
@@ -336,8 +335,7 @@ int kfd_init_apertures(struct kfd_process *process)
 			pdd->gpuvm_limit =
 					MAKE_GPUVM_APP_LIMIT(pdd->gpuvm_base);
 
-			pdd->scratch_base = MAKE_SCRATCH_APP_BASE(id + 1);
-
+			pdd->scratch_base = MAKE_SCRATCH_APP_BASE();
 			pdd->scratch_limit =
 				MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
 		}
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 111d73ba2d96..52014370e2e5 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -107,8 +107,6 @@ struct kfd_ioctl_get_clock_counters_args {
 	__u32 pad;
 };
 
-#define NUM_OF_SUPPORTED_GPUS 7
-
 struct kfd_process_device_apertures {
 	__u64 lds_base;		/* from KFD */
 	__u64 lds_limit;		/* from KFD */
@@ -120,6 +118,12 @@ struct kfd_process_device_apertures {
 	__u32 pad;
 };
 
+/*
+ * AMDKFD_IOC_GET_PROCESS_APERTURES is deprecated. Use
+ * AMDKFD_IOC_GET_PROCESS_APERTURES_NEW instead, which supports an
+ * unlimited number of GPUs.
+ */
+#define NUM_OF_SUPPORTED_GPUS 7
 struct kfd_ioctl_get_process_apertures_args {
 	struct kfd_process_device_apertures
 			process_apertures[NUM_OF_SUPPORTED_GPUS];/* from KFD */
@@ -129,6 +133,19 @@ struct kfd_ioctl_get_process_apertures_args {
 	__u32 pad;
 };
 
+struct kfd_ioctl_get_process_apertures_new_args {
+	/* User allocated. Pointer to struct kfd_process_device_apertures
+	 * filled in by Kernel
+	 */
+	__u64 kfd_process_device_apertures_ptr;
+	/* to KFD - indicates amount of memory present in
+	 *  kfd_process_device_apertures_ptr
+	 * from KFD - Number of entries filled by KFD.
+	 */
+	__u32 num_of_nodes;
+	__u32 pad;
+};
+
 #define MAX_ALLOWED_NUM_POINTS    100
 #define MAX_ALLOWED_AW_BUFF_SIZE 4096
 #define MAX_ALLOWED_WAC_BUFF_SIZE  128
@@ -332,7 +349,11 @@ struct kfd_ioctl_set_trap_handler_args {
 #define AMDKFD_IOC_SET_TRAP_HANDLER		\
 		AMDKFD_IOW(0x13, struct kfd_ioctl_set_trap_handler_args)
 
+#define AMDKFD_IOC_GET_PROCESS_APERTURES_NEW	\
+		AMDKFD_IOWR(0x14,		\
+			struct kfd_ioctl_get_process_apertures_new_args)
+
 #define AMDKFD_COMMAND_START		0x01
-#define AMDKFD_COMMAND_END		0x14
+#define AMDKFD_COMMAND_END		0x15
 
 #endif
-- 
cgit v1.2.3


From 5ec7e02854b3b9b55936c3b44b8acfb85e333f49 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Thu, 15 Mar 2018 17:27:51 -0400
Subject: drm/amdkfd: Add ioctls for GPUVM memory management
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v2:
* Fix error handling after kfd_bind_process_to_device in
  kfd_ioctl_map_memory_to_gpu
v3:
* Add ioctl to acquire VM from a DRM FD
v4:
* Return number of successful map/unmap operations in failure cases
* Facilitate partial retry after failed map/unmap
* Added comments with parameter descriptions to new APIs
* Defined AMDKFD_IOC_FREE_MEMORY_OF_GPU write-only

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c        | 377 ++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h           |   8 +
 drivers/gpu/drm/amd/include/kgd_kfd_interface.h |   2 +
 include/uapi/linux/kfd_ioctl.h                  |  97 +++++-
 4 files changed, 483 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 7d4009418ec3..a563ff2ca7dd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -24,6 +24,7 @@
 #include <linux/export.h>
 #include <linux/err.h>
 #include <linux/fs.h>
+#include <linux/file.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
@@ -1046,6 +1047,366 @@ static int kfd_ioctl_get_tile_config(struct file *filep,
 	return 0;
 }
 
+static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,
+				void *data)
+{
+	struct kfd_ioctl_acquire_vm_args *args = data;
+	struct kfd_process_device *pdd;
+	struct kfd_dev *dev;
+	struct file *drm_file;
+	int ret;
+
+	dev = kfd_device_by_id(args->gpu_id);
+	if (!dev)
+		return -EINVAL;
+
+	drm_file = fget(args->drm_fd);
+	if (!drm_file)
+		return -EINVAL;
+
+	mutex_lock(&p->mutex);
+
+	pdd = kfd_get_process_device_data(dev, p);
+	if (!pdd) {
+		ret = -EINVAL;
+		goto err_unlock;
+	}
+
+	if (pdd->drm_file) {
+		ret = pdd->drm_file == drm_file ? 0 : -EBUSY;
+		goto err_unlock;
+	}
+
+	ret = kfd_process_device_init_vm(pdd, drm_file);
+	if (ret)
+		goto err_unlock;
+	/* On success, the PDD keeps the drm_file reference */
+	mutex_unlock(&p->mutex);
+
+	return 0;
+
+err_unlock:
+	mutex_unlock(&p->mutex);
+	fput(drm_file);
+	return ret;
+}
+
+bool kfd_dev_is_large_bar(struct kfd_dev *dev)
+{
+	struct kfd_local_mem_info mem_info;
+
+	if (dev->device_info->needs_iommu_device)
+		return false;
+
+	dev->kfd2kgd->get_local_mem_info(dev->kgd, &mem_info);
+	if (mem_info.local_mem_size_private == 0 &&
+			mem_info.local_mem_size_public > 0)
+		return true;
+	return false;
+}
+
+static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
+					struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_alloc_memory_of_gpu_args *args = data;
+	struct kfd_process_device *pdd;
+	void *mem;
+	struct kfd_dev *dev;
+	int idr_handle;
+	long err;
+	uint64_t offset = args->mmap_offset;
+	uint32_t flags = args->flags;
+
+	if (args->size == 0)
+		return -EINVAL;
+
+	dev = kfd_device_by_id(args->gpu_id);
+	if (!dev)
+		return -EINVAL;
+
+	if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) &&
+		(flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&
+		!kfd_dev_is_large_bar(dev)) {
+		pr_err("Alloc host visible vram on small bar is not allowed\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&p->mutex);
+
+	pdd = kfd_bind_process_to_device(dev, p);
+	if (IS_ERR(pdd)) {
+		err = PTR_ERR(pdd);
+		goto err_unlock;
+	}
+
+	err = dev->kfd2kgd->alloc_memory_of_gpu(
+		dev->kgd, args->va_addr, args->size,
+		pdd->vm, (struct kgd_mem **) &mem, &offset,
+		flags);
+
+	if (err)
+		goto err_unlock;
+
+	idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
+	if (idr_handle < 0) {
+		err = -EFAULT;
+		goto err_free;
+	}
+
+	mutex_unlock(&p->mutex);
+
+	args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
+	args->mmap_offset = offset;
+
+	return 0;
+
+err_free:
+	dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
+err_unlock:
+	mutex_unlock(&p->mutex);
+	return err;
+}
+
+static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
+					struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_free_memory_of_gpu_args *args = data;
+	struct kfd_process_device *pdd;
+	void *mem;
+	struct kfd_dev *dev;
+	int ret;
+
+	dev = kfd_device_by_id(GET_GPU_ID(args->handle));
+	if (!dev)
+		return -EINVAL;
+
+	mutex_lock(&p->mutex);
+
+	pdd = kfd_get_process_device_data(dev, p);
+	if (!pdd) {
+		pr_err("Process device data doesn't exist\n");
+		ret = -EINVAL;
+		goto err_unlock;
+	}
+
+	mem = kfd_process_device_translate_handle(
+		pdd, GET_IDR_HANDLE(args->handle));
+	if (!mem) {
+		ret = -EINVAL;
+		goto err_unlock;
+	}
+
+	ret = dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
+
+	/* If freeing the buffer failed, leave the handle in place for
+	 * clean-up during process tear-down.
+	 */
+	if (!ret)
+		kfd_process_device_remove_obj_handle(
+			pdd, GET_IDR_HANDLE(args->handle));
+
+err_unlock:
+	mutex_unlock(&p->mutex);
+	return ret;
+}
+
+static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
+					struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_map_memory_to_gpu_args *args = data;
+	struct kfd_process_device *pdd, *peer_pdd;
+	void *mem;
+	struct kfd_dev *dev, *peer;
+	long err = 0;
+	int i;
+	uint32_t *devices_arr = NULL;
+
+	dev = kfd_device_by_id(GET_GPU_ID(args->handle));
+	if (!dev)
+		return -EINVAL;
+
+	if (!args->n_devices) {
+		pr_debug("Device IDs array empty\n");
+		return -EINVAL;
+	}
+	if (args->n_success > args->n_devices) {
+		pr_debug("n_success exceeds n_devices\n");
+		return -EINVAL;
+	}
+
+	devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr),
+			      GFP_KERNEL);
+	if (!devices_arr)
+		return -ENOMEM;
+
+	err = copy_from_user(devices_arr,
+			     (void __user *)args->device_ids_array_ptr,
+			     args->n_devices * sizeof(*devices_arr));
+	if (err != 0) {
+		err = -EFAULT;
+		goto copy_from_user_failed;
+	}
+
+	mutex_lock(&p->mutex);
+
+	pdd = kfd_bind_process_to_device(dev, p);
+	if (IS_ERR(pdd)) {
+		err = PTR_ERR(pdd);
+		goto bind_process_to_device_failed;
+	}
+
+	mem = kfd_process_device_translate_handle(pdd,
+						GET_IDR_HANDLE(args->handle));
+	if (!mem) {
+		err = -ENOMEM;
+		goto get_mem_obj_from_handle_failed;
+	}
+
+	for (i = args->n_success; i < args->n_devices; i++) {
+		peer = kfd_device_by_id(devices_arr[i]);
+		if (!peer) {
+			pr_debug("Getting device by id failed for 0x%x\n",
+				 devices_arr[i]);
+			err = -EINVAL;
+			goto get_mem_obj_from_handle_failed;
+		}
+
+		peer_pdd = kfd_bind_process_to_device(peer, p);
+		if (IS_ERR(peer_pdd)) {
+			err = PTR_ERR(peer_pdd);
+			goto get_mem_obj_from_handle_failed;
+		}
+		err = peer->kfd2kgd->map_memory_to_gpu(
+			peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
+		if (err) {
+			pr_err("Failed to map to gpu %d/%d\n",
+			       i, args->n_devices);
+			goto map_memory_to_gpu_failed;
+		}
+		args->n_success = i+1;
+	}
+
+	mutex_unlock(&p->mutex);
+
+	err = dev->kfd2kgd->sync_memory(dev->kgd, (struct kgd_mem *) mem, true);
+	if (err) {
+		pr_debug("Sync memory failed, wait interrupted by user signal\n");
+		goto sync_memory_failed;
+	}
+
+	/* Flush TLBs after waiting for the page table updates to complete */
+	for (i = 0; i < args->n_devices; i++) {
+		peer = kfd_device_by_id(devices_arr[i]);
+		if (WARN_ON_ONCE(!peer))
+			continue;
+		peer_pdd = kfd_get_process_device_data(peer, p);
+		if (WARN_ON_ONCE(!peer_pdd))
+			continue;
+		kfd_flush_tlb(peer_pdd);
+	}
+
+	kfree(devices_arr);
+
+	return err;
+
+bind_process_to_device_failed:
+get_mem_obj_from_handle_failed:
+map_memory_to_gpu_failed:
+	mutex_unlock(&p->mutex);
+copy_from_user_failed:
+sync_memory_failed:
+	kfree(devices_arr);
+
+	return err;
+}
+
+static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
+					struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_unmap_memory_from_gpu_args *args = data;
+	struct kfd_process_device *pdd, *peer_pdd;
+	void *mem;
+	struct kfd_dev *dev, *peer;
+	long err = 0;
+	uint32_t *devices_arr = NULL, i;
+
+	dev = kfd_device_by_id(GET_GPU_ID(args->handle));
+	if (!dev)
+		return -EINVAL;
+
+	if (!args->n_devices) {
+		pr_debug("Device IDs array empty\n");
+		return -EINVAL;
+	}
+	if (args->n_success > args->n_devices) {
+		pr_debug("n_success exceeds n_devices\n");
+		return -EINVAL;
+	}
+
+	devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr),
+			      GFP_KERNEL);
+	if (!devices_arr)
+		return -ENOMEM;
+
+	err = copy_from_user(devices_arr,
+			     (void __user *)args->device_ids_array_ptr,
+			     args->n_devices * sizeof(*devices_arr));
+	if (err != 0) {
+		err = -EFAULT;
+		goto copy_from_user_failed;
+	}
+
+	mutex_lock(&p->mutex);
+
+	pdd = kfd_get_process_device_data(dev, p);
+	if (!pdd) {
+		err = PTR_ERR(pdd);
+		goto bind_process_to_device_failed;
+	}
+
+	mem = kfd_process_device_translate_handle(pdd,
+						GET_IDR_HANDLE(args->handle));
+	if (!mem) {
+		err = -ENOMEM;
+		goto get_mem_obj_from_handle_failed;
+	}
+
+	for (i = args->n_success; i < args->n_devices; i++) {
+		peer = kfd_device_by_id(devices_arr[i]);
+		if (!peer) {
+			err = -EINVAL;
+			goto get_mem_obj_from_handle_failed;
+		}
+
+		peer_pdd = kfd_get_process_device_data(peer, p);
+		if (!peer_pdd) {
+			err = -ENODEV;
+			goto get_mem_obj_from_handle_failed;
+		}
+		err = dev->kfd2kgd->unmap_memory_to_gpu(
+			peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
+		if (err) {
+			pr_err("Failed to unmap from gpu %d/%d\n",
+			       i, args->n_devices);
+			goto unmap_memory_from_gpu_failed;
+		}
+		args->n_success = i+1;
+	}
+	kfree(devices_arr);
+
+	mutex_unlock(&p->mutex);
+
+	return 0;
+
+bind_process_to_device_failed:
+get_mem_obj_from_handle_failed:
+unmap_memory_from_gpu_failed:
+	mutex_unlock(&p->mutex);
+copy_from_user_failed:
+	kfree(devices_arr);
+	return err;
+}
+
 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
 	[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
 			    .cmd_drv = 0, .name = #ioctl}
@@ -1111,6 +1472,22 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
 
 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,
 			kfd_ioctl_get_process_apertures_new, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM,
+			kfd_ioctl_acquire_vm, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU,
+			kfd_ioctl_alloc_memory_of_gpu, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU,
+			kfd_ioctl_free_memory_of_gpu, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU,
+			kfd_ioctl_map_memory_to_gpu, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU,
+			kfd_ioctl_unmap_memory_from_gpu, 0),
+
 };
 
 #define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index aaed005ce1f5..1542807373d7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -509,6 +509,14 @@ struct qcm_process_device {
 int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
 					       struct dma_fence *fence);
 
+/* 8 byte handle containing GPU ID in the most significant 4 bytes and
+ * idr_handle in the least significant 4 bytes
+ */
+#define MAKE_HANDLE(gpu_id, idr_handle) \
+	(((uint64_t)(gpu_id) << 32) + idr_handle)
+#define GET_GPU_ID(handle) (handle >> 32)
+#define GET_IDR_HANDLE(handle) (handle & 0xFFFFFFFF)
+
 enum kfd_pdd_bound {
 	PDD_UNBOUND = 0,
 	PDD_BOUND,
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index b1f35c8be2cf..237289a72bb7 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -130,6 +130,7 @@ struct tile_config {
 
 /*
  * Allocation flag domains
+ * NOTE: This must match the corresponding definitions in kfd_ioctl.h.
  */
 #define ALLOC_MEM_FLAGS_VRAM		(1 << 0)
 #define ALLOC_MEM_FLAGS_GTT		(1 << 1)
@@ -138,6 +139,7 @@ struct tile_config {
 
 /*
  * Allocation flags attributes/access options.
+ * NOTE: This must match the corresponding definitions in kfd_ioctl.h.
  */
 #define ALLOC_MEM_FLAGS_WRITABLE	(1 << 31)
 #define ALLOC_MEM_FLAGS_EXECUTABLE	(1 << 30)
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 52014370e2e5..b4f5073dbac2 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -286,6 +286,86 @@ struct kfd_ioctl_set_trap_handler_args {
 	__u32 pad;
 };
 
+struct kfd_ioctl_acquire_vm_args {
+	__u32 drm_fd;	/* to KFD */
+	__u32 gpu_id;	/* to KFD */
+};
+
+/* Allocation flags: memory types */
+#define KFD_IOC_ALLOC_MEM_FLAGS_VRAM		(1 << 0)
+#define KFD_IOC_ALLOC_MEM_FLAGS_GTT		(1 << 1)
+#define KFD_IOC_ALLOC_MEM_FLAGS_USERPTR		(1 << 2)
+#define KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL	(1 << 3)
+/* Allocation flags: attributes/access options */
+#define KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE	(1 << 31)
+#define KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE	(1 << 30)
+#define KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC		(1 << 29)
+#define KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE	(1 << 28)
+#define KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM	(1 << 27)
+#define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT	(1 << 26)
+
+/* Allocate memory for later SVM (shared virtual memory) mapping.
+ *
+ * @va_addr:     virtual address of the memory to be allocated
+ *               all later mappings on all GPUs will use this address
+ * @size:        size in bytes
+ * @handle:      buffer handle returned to user mode, used to refer to
+ *               this allocation for mapping, unmapping and freeing
+ * @mmap_offset: for CPU-mapping the allocation by mmapping a render node
+ *               for userptrs this is overloaded to specify the CPU address
+ * @gpu_id:      device identifier
+ * @flags:       memory type and attributes. See KFD_IOC_ALLOC_MEM_FLAGS above
+ */
+struct kfd_ioctl_alloc_memory_of_gpu_args {
+	__u64 va_addr;		/* to KFD */
+	__u64 size;		/* to KFD */
+	__u64 handle;		/* from KFD */
+	__u64 mmap_offset;	/* to KFD (userptr), from KFD (mmap offset) */
+	__u32 gpu_id;		/* to KFD */
+	__u32 flags;
+};
+
+/* Free memory allocated with kfd_ioctl_alloc_memory_of_gpu
+ *
+ * @handle: memory handle returned by alloc
+ */
+struct kfd_ioctl_free_memory_of_gpu_args {
+	__u64 handle;		/* to KFD */
+};
+
+/* Map memory to one or more GPUs
+ *
+ * @handle:                memory handle returned by alloc
+ * @device_ids_array_ptr:  array of gpu_ids (__u32 per device)
+ * @n_devices:             number of devices in the array
+ * @n_success:             number of devices mapped successfully
+ *
+ * @n_success returns information to the caller how many devices from
+ * the start of the array have mapped the buffer successfully. It can
+ * be passed into a subsequent retry call to skip those devices. For
+ * the first call the caller should initialize it to 0.
+ *
+ * If the ioctl completes with return code 0 (success), n_success ==
+ * n_devices.
+ */
+struct kfd_ioctl_map_memory_to_gpu_args {
+	__u64 handle;			/* to KFD */
+	__u64 device_ids_array_ptr;	/* to KFD */
+	__u32 n_devices;		/* to KFD */
+	__u32 n_success;		/* to/from KFD */
+};
+
+/* Unmap memory from one or more GPUs
+ *
+ * same arguments as for mapping
+ */
+struct kfd_ioctl_unmap_memory_from_gpu_args {
+	__u64 handle;			/* to KFD */
+	__u64 device_ids_array_ptr;	/* to KFD */
+	__u32 n_devices;		/* to KFD */
+	__u32 n_success;		/* to/from KFD */
+};
+
 #define AMDKFD_IOCTL_BASE 'K'
 #define AMDKFD_IO(nr)			_IO(AMDKFD_IOCTL_BASE, nr)
 #define AMDKFD_IOR(nr, type)		_IOR(AMDKFD_IOCTL_BASE, nr, type)
@@ -353,7 +433,22 @@ struct kfd_ioctl_set_trap_handler_args {
 		AMDKFD_IOWR(0x14,		\
 			struct kfd_ioctl_get_process_apertures_new_args)
 
+#define AMDKFD_IOC_ACQUIRE_VM			\
+		AMDKFD_IOW(0x15, struct kfd_ioctl_acquire_vm_args)
+
+#define AMDKFD_IOC_ALLOC_MEMORY_OF_GPU		\
+		AMDKFD_IOWR(0x16, struct kfd_ioctl_alloc_memory_of_gpu_args)
+
+#define AMDKFD_IOC_FREE_MEMORY_OF_GPU		\
+		AMDKFD_IOW(0x17, struct kfd_ioctl_free_memory_of_gpu_args)
+
+#define AMDKFD_IOC_MAP_MEMORY_TO_GPU		\
+		AMDKFD_IOWR(0x18, struct kfd_ioctl_map_memory_to_gpu_args)
+
+#define AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU	\
+		AMDKFD_IOWR(0x19, struct kfd_ioctl_unmap_memory_from_gpu_args)
+
 #define AMDKFD_COMMAND_START		0x01
-#define AMDKFD_COMMAND_END		0x15
+#define AMDKFD_COMMAND_END		0x1A
 
 #endif
-- 
cgit v1.2.3


From 4bbb3e0e8239f9079bf1fe20b3c0cb598714ae61 Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Tue, 13 Mar 2018 14:51:27 +0900
Subject: net: Fix vlan untag for bridge and vlan_dev with reorder_hdr off

When we have a bridge with vlan_filtering on and a vlan device on top of
it, packets would be corrupted in skb_vlan_untag() called from
br_dev_xmit().

The problem sits in skb_reorder_vlan_header() used in skb_vlan_untag(),
which makes use of skb->mac_len. In this function mac_len is meant for
handling rx path with vlan devices with reorder_header disabled, but in
tx path mac_len is typically 0 and cannot be used, which is the problem
in this case.

The current code even does not properly handle rx path (skb_vlan_untag()
called from __netif_receive_skb_core()) with reorder_header off actually.

In rx path single tag case, it works as follows:

- Before skb_reorder_vlan_header()

 mac_header                                data
   v                                        v
   +-------------------+-------------+------+----
   |        ETH        |    VLAN     | ETH  |
   |       ADDRS       | TPID | TCI  | TYPE |
   +-------------------+-------------+------+----
   <-------- mac_len --------->
                       <------------->
                        to be removed

- After skb_reorder_vlan_header()

            mac_header                     data
                 v                          v
                 +-------------------+------+----
                 |        ETH        | ETH  |
                 |       ADDRS       | TYPE |
                 +-------------------+------+----
                 <-------- mac_len --------->

This is ok, but in rx double tag case, it corrupts packets:

- Before skb_reorder_vlan_header()

 mac_header                                              data
   v                                                      v
   +-------------------+-------------+-------------+------+----
   |        ETH        |    VLAN     |    VLAN     | ETH  |
   |       ADDRS       | TPID | TCI  | TPID | TCI  | TYPE |
   +-------------------+-------------+-------------+------+----
   <--------------- mac_len ---------------->
                                     <------------->
                                    should be removed
                       <--------------------------->
                         actually will be removed

- After skb_reorder_vlan_header()

            mac_header                                   data
                 v                                        v
                               +-------------------+------+----
                               |        ETH        | ETH  |
                               |       ADDRS       | TYPE |
                               +-------------------+------+----
                 <--------------- mac_len ---------------->

So, two of vlan tags are both removed while only inner one should be
removed and mac_header (and mac_len) is broken.

skb_vlan_untag() is meant for removing the vlan header at (skb->data - 2),
so use skb->data and skb->mac_header to calculate the right offset.

Reported-by: Brandon Carpenter <brandon.carpenter@cypherpath.com>
Fixes: a6e18ff11170 ("vlan: Fix untag operations of stacked vlans with REORDER_HEADER off")
Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_ether.h | 1 +
 net/core/skbuff.c             | 7 +++++--
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index 8bbbcb5cd94b..820de5d222d2 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -30,6 +30,7 @@
  */
 
 #define ETH_ALEN	6		/* Octets in one ethernet addr	 */
+#define ETH_TLEN	2		/* Octets in ethernet type field */
 #define ETH_HLEN	14		/* Total octets in header.	 */
 #define ETH_ZLEN	60		/* Min. octets in frame sans FCS */
 #define ETH_DATA_LEN	1500		/* Max. octets in payload	 */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index baf990528943..b103f46ec512 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -5020,13 +5020,16 @@ EXPORT_SYMBOL_GPL(skb_gso_validate_mac_len);
 
 static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
 {
+	int mac_len;
+
 	if (skb_cow(skb, skb_headroom(skb)) < 0) {
 		kfree_skb(skb);
 		return NULL;
 	}
 
-	memmove(skb->data - ETH_HLEN, skb->data - skb->mac_len - VLAN_HLEN,
-		2 * ETH_ALEN);
+	mac_len = skb->data - skb_mac_header(skb);
+	memmove(skb_mac_header(skb) + VLAN_HLEN, skb_mac_header(skb),
+		mac_len - VLAN_HLEN - ETH_TLEN);
 	skb->mac_header += VLAN_HLEN;
 	return skb;
 }
-- 
cgit v1.2.3


From a6618f4aedb2b60932d766bd82ae7ce866e842aa Mon Sep 17 00:00:00 2001
From: Kirill Marinushkin <k.marinushkin@gmail.com>
Date: Mon, 19 Mar 2018 07:11:08 +0100
Subject: ALSA: usb-audio: Fix parsing descriptor of UAC2 processing unit

Currently, the offsets in the UAC2 processing unit descriptor are
calculated incorrectly. It causes an issue when connecting the device which
provides such a feature:

~~~~
[84126.724420] usb 1-1.3.1: invalid Processing Unit descriptor (id 18)
~~~~

After this patch is applied, the UAC2 processing unit inits w/o this error.

Fixes: 23caaf19b11e ("ALSA: usb-mixer: Add support for Audio Class v2.0")
Signed-off-by: Kirill Marinushkin <k.marinushkin@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/uapi/linux/usb/audio.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/usb/audio.h b/include/uapi/linux/usb/audio.h
index 17a022c5b414..da3315ed1bcd 100644
--- a/include/uapi/linux/usb/audio.h
+++ b/include/uapi/linux/usb/audio.h
@@ -370,7 +370,7 @@ static inline __u8 uac_processing_unit_bControlSize(struct uac_processing_unit_d
 {
 	return (protocol == UAC_VERSION_1) ?
 		desc->baSourceID[desc->bNrInPins + 4] :
-		desc->baSourceID[desc->bNrInPins + 6];
+		2; /* in UAC2, this value is constant */
 }
 
 static inline __u8 *uac_processing_unit_bmControls(struct uac_processing_unit_descriptor *desc,
@@ -378,7 +378,7 @@ static inline __u8 *uac_processing_unit_bmControls(struct uac_processing_unit_de
 {
 	return (protocol == UAC_VERSION_1) ?
 		&desc->baSourceID[desc->bNrInPins + 5] :
-		&desc->baSourceID[desc->bNrInPins + 7];
+		&desc->baSourceID[desc->bNrInPins + 6];
 }
 
 static inline __u8 uac_processing_unit_iProcessing(struct uac_processing_unit_descriptor *desc,
-- 
cgit v1.2.3


From 3ac952b10dec55e50312340ca4cf8f631944ae17 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Fri, 16 Mar 2018 11:04:53 -0500
Subject: drm/amdgpu: add VCN to firmware query interface

Need to be able to query the VCN firmware version from
userspace to determine supported features, etc.

Reviewed-by: Huang Rui <ray.huang@amd.com>
Acked-by: Leo Liu <leo.liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 12 ++++++++++++
 include/uapi/drm/amdgpu_drm.h           |  2 ++
 2 files changed, 14 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index e851c66cbb5e..4b7824d30e73 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -190,6 +190,10 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
 		fw_info->ver = adev->uvd.fw_version;
 		fw_info->feature = 0;
 		break;
+	case AMDGPU_INFO_FW_VCN:
+		fw_info->ver = adev->vcn.fw_version;
+		fw_info->feature = 0;
+		break;
 	case AMDGPU_INFO_FW_GMC:
 		fw_info->ver = adev->gmc.fw_version;
 		fw_info->feature = 0;
@@ -1198,6 +1202,14 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
 			   i, fw_info.feature, fw_info.ver);
 	}
 
+	/* VCN */
+	query_fw.fw_type = AMDGPU_INFO_FW_VCN;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "VCN feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
 	return 0;
 }
 
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 528f6d041e90..c363b67f2d0a 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -618,6 +618,8 @@ struct drm_amdgpu_cs_chunk_data {
 	#define AMDGPU_INFO_FW_SOS		0x0c
 	/* Subquery id: Query PSP ASD firmware version */
 	#define AMDGPU_INFO_FW_ASD		0x0d
+	/* Subquery id: Query VCN firmware version */
+	#define AMDGPU_INFO_FW_VCN		0x0e
 /* number of bytes moved for TTM migration */
 #define AMDGPU_INFO_NUM_BYTES_MOVED		0x0f
 /* the used VRAM size */
-- 
cgit v1.2.3