summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/drm/i915/i915_drv.c1
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c3
-rw-r--r--include/uapi/drm/i915_drm.h29
3 files changed, 32 insertions, 1 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 507a6f69426f..6c9c2037fadf 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -349,6 +349,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
case I915_PARAM_HAS_EXEC_HANDLE_LUT:
case I915_PARAM_HAS_COHERENT_PHYS_GTT:
case I915_PARAM_HAS_EXEC_SOFTPIN:
+ case I915_PARAM_HAS_EXEC_ASYNC:
/* For the time being all of these are always true;
* if some supported hardware does not have one of these
* features this value needs to be provided from
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index c66e90571031..6fd60682bf93 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1111,6 +1111,9 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
list_for_each_entry(vma, vmas, exec_list) {
struct drm_i915_gem_object *obj = vma->obj;
+ if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC)
+ continue;
+
ret = i915_gem_request_await_object
(req, obj, obj->base.pending_write_domain);
if (ret)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 57093b455db6..6620b6ad76ed 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -397,6 +397,12 @@ typedef struct drm_i915_irq_wait {
#define I915_PARAM_HAS_SCHEDULER 41
#define I915_PARAM_HUC_STATUS 42
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to opt-out of
+ * synchronisation with implicit fencing on individual objects.
+ * See EXEC_OBJECT_ASYNC.
+ */
+#define I915_PARAM_HAS_EXEC_ASYNC 43
+
typedef struct drm_i915_getparam {
__s32 param;
/*
@@ -737,8 +743,29 @@ struct drm_i915_gem_exec_object2 {
#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
#define EXEC_OBJECT_PINNED (1<<4)
#define EXEC_OBJECT_PAD_TO_SIZE (1<<5)
+/* The kernel implicitly tracks GPU activity on all GEM objects, and
+ * synchronises operations with outstanding rendering. This includes
+ * rendering on other devices if exported via dma-buf. However, sometimes
+ * this tracking is too coarse and the user knows better. For example,
+ * if the object is split into non-overlapping ranges shared between different
+ * clients or engines (i.e. suballocating objects), the implicit tracking
+ * by kernel assumes that each operation affects the whole object rather
+ * than an individual range, causing needless synchronisation between clients.
+ * The kernel will also forgo any CPU cache flushes prior to rendering from
+ * the object as the client is expected to be also handling such domain
+ * tracking.
+ *
+ * The kernel maintains the implicit tracking in order to manage resources
+ * used by the GPU - this flag only disables the synchronisation prior to
+ * rendering with this object in this execbuf.
+ *
+ * Opting out of implicit synhronisation requires the user to do its own
+ * explicit tracking to avoid rendering corruption. See, for example,
+ * I915_PARAM_HAS_EXEC_FENCE to order execbufs and execute them asynchronously.
+ */
+#define EXEC_OBJECT_ASYNC (1<<6)
/* All remaining bits are MBZ and RESERVED FOR FUTURE USE */
-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1)
+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_ASYNC<<1)
__u64 flags;
union {