From 9e2793f6e4e2ca452457e459f013cc8e6b08a789 Mon Sep 17 00:00:00 2001 From: Dave Gordon Date: Thu, 14 Jul 2016 14:52:03 +0100 Subject: drm/i915: compile-time consistency check on __EXEC_OBJECT flags Two different sets of flag bits are stored in the 'flags' member of a 'struct drm_i915_gem_exec_object2', and they're defined in two different source files, increasing the risk of an accidental clash. Some flags in this field are supplied by the user; these are defined in i915_drm.h, and they start from the LSB and work up. Other flags are defined in i915_gem_execbuffer, for internal use within that file only; they start from the MSB and work down. So here we add a compile-time check that the two sets of flags do not overlap, which would cause all sorts of confusion. Signed-off-by: Dave Gordon Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1468504324-12690-1-git-send-email-david.s.gordon@intel.com --- include/uapi/drm/i915_drm.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index d7e81a3886fd..51b9360bb376 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -698,12 +698,13 @@ struct drm_i915_gem_exec_object2 { */ __u64 offset; -#define EXEC_OBJECT_NEEDS_FENCE (1<<0) -#define EXEC_OBJECT_NEEDS_GTT (1<<1) -#define EXEC_OBJECT_WRITE (1<<2) +#define EXEC_OBJECT_NEEDS_FENCE (1<<0) +#define EXEC_OBJECT_NEEDS_GTT (1<<1) +#define EXEC_OBJECT_WRITE (1<<2) #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3) -#define EXEC_OBJECT_PINNED (1<<4) -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1) +#define EXEC_OBJECT_PINNED (1<<4) +/* All remaining bits are MBZ and RESERVED FOR FUTURE USE */ +#define __EXEC_OBJECT_UNKNOWN_FLAGS (-(EXEC_OBJECT_PINNED<<1)) __u64 flags; __u64 rsvd1; -- cgit v1.2.3 From 3373ce2eccd56651579b1864fecf98b46fd1cb67 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 1 Jul 2016 17:32:08 +0300 Subject: drm/i915: Give proper names to MOCS entries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The purpose for each MOCS entry isn't well defined atm. Defining these is important to remove any uncertainty about the use of these entries for example in terms of performance and GPU/CPU coherency. Suggested by Ville. v4: - Rename I915_MOCS_AUTO to I915_MOCS_PTE. (Chris) CC: Rong R Yang CC: Yakui Zhao CC: Ville Syrjälä CC: Chris Wilson Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1467383528-16142-1-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_mocs.c | 13 +++++++------ include/uapi/drm/i915_drm.h | 24 ++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 6 deletions(-) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index 927825f5b284..2280c329d37f 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -97,7 +97,8 @@ struct drm_i915_mocs_table { * end. */ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { - { /* 0x00000009 */ + [I915_MOCS_UNCACHED] = { + /* 0x00000009 */ .control_value = LE_CACHEABILITY(LE_UC) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | @@ -106,7 +107,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { /* 0x0010 */ .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), }, - { + [I915_MOCS_PTE] = { /* 0x00000038 */ .control_value = LE_CACHEABILITY(LE_PAGETABLE) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | @@ -115,7 +116,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { /* 0x0030 */ .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), }, - { + [I915_MOCS_CACHED] = { /* 0x0000003b */ .control_value = LE_CACHEABILITY(LE_WB) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | @@ -128,7 +129,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { /* NOTE: the LE_TGT_CACHE is not used on Broxton */ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { - { + [I915_MOCS_UNCACHED] = { /* 0x00000009 */ .control_value = LE_CACHEABILITY(LE_UC) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | @@ -138,7 +139,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { /* 0x0010 */ .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), }, - { + [I915_MOCS_PTE] = { /* 0x00000038 */ .control_value = LE_CACHEABILITY(LE_PAGETABLE) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | @@ -148,7 +149,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { /* 0x0030 */ .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), }, - { + [I915_MOCS_CACHED] = { /* 0x00000039 */ .control_value = LE_CACHEABILITY(LE_UC) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 51b9360bb376..33ce5ff9556a 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -62,6 +62,30 @@ extern "C" { #define I915_ERROR_UEVENT "ERROR" #define I915_RESET_UEVENT "RESET" +/* + * MOCS indexes used for GPU surfaces, defining the cacheability of the + * surface data and the coherency for this data wrt. CPU vs. GPU accesses. + */ +enum i915_mocs_table_index { + /* + * Not cached anywhere, coherency between CPU and GPU accesses is + * guaranteed. + */ + I915_MOCS_UNCACHED, + /* + * Cacheability and coherency controlled by the kernel automatically + * based on the DRM_I915_GEM_SET_CACHING IOCTL setting and the current + * usage of the surface (used for display scanout or not). + */ + I915_MOCS_PTE, + /* + * Cached in all GPU caches available on the platform. + * Coherency between CPU and GPU accesses to the surface is not + * guaranteed without extra synchronization. + */ + I915_MOCS_CACHED, +}; + /* Each region is a minimum of 16k, and there are at most 255 of them. */ #define I915_NR_TEX_REGIONS 255 /* table size 2k - maximum due to use -- cgit v1.2.3 From 91b2db6f65fbbb1a6688bcc2e52596b723ea2472 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 16:32:23 +0100 Subject: drm/i915: Pad GTT views of exec objects up to user specified size Our GPUs impose certain requirements upon buffers that depend upon how exactly they are used. Typically this is expressed as that they require a larger surface than would be naively computed by pitch * height. Normally such requirements are hidden away in the userspace driver, but when we accept pointers from strangers and later impose extra conditions on them, the original client allocator has no idea about the monstrosities in the GPU and we require the userspace driver to inform the kernel how many padding pages are required beyond the client allocation. v2: Long time, no see v3: Try an anonymous union for uapi struct compatibility Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/1470324762-2545-7-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 6 ++- drivers/gpu/drm/i915/i915_gem.c | 80 ++++++++++++++---------------- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 17 ++++++- include/uapi/drm/i915_drm.h | 8 ++- 4 files changed, 64 insertions(+), 47 deletions(-) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 74a31358fd87..1e1369319326 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3032,11 +3032,13 @@ void i915_gem_free_object(struct drm_gem_object *obj); int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj, struct i915_address_space *vm, + u64 size, u64 alignment, u64 flags); int __must_check i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view, + u64 size, u64 alignment, u64 flags); @@ -3313,8 +3315,8 @@ i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj, struct drm_i915_private *dev_priv = to_i915(obj->base.dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; - return i915_gem_object_pin(obj, &ggtt->base, - alignment, flags | PIN_GLOBAL); + return i915_gem_object_pin(obj, &ggtt->base, 0, alignment, + flags | PIN_GLOBAL); } void i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d8e150508db5..b4af5d1c0ff5 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1692,7 +1692,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) } /* Now pin it into the GTT if needed */ - ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE); + ret = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); if (ret) goto unlock; @@ -2956,6 +2956,7 @@ static bool i915_gem_valid_gtt_space(struct i915_vma *vma, * @obj: object to bind * @vm: address space to bind into * @ggtt_view: global gtt view if applicable + * @size: requested size in bytes (can be larger than the VMA) * @alignment: requested alignment * @flags: mask of PIN_* flags to use */ @@ -2963,21 +2964,20 @@ static struct i915_vma * i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, struct i915_address_space *vm, const struct i915_ggtt_view *ggtt_view, + u64 size, u64 alignment, u64 flags) { struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; - u32 fence_alignment, unfenced_alignment; - u32 search_flag, alloc_flag; u64 start, end; - u64 size, fence_size; + u32 search_flag, alloc_flag; struct i915_vma *vma; int ret; if (i915_is_ggtt(vm)) { - u32 view_size; + u32 fence_size, fence_alignment, unfenced_alignment; + u64 view_size; if (WARN_ON(!ggtt_view)) return ERR_PTR(-EINVAL); @@ -2995,48 +2995,39 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, view_size, obj->tiling_mode, false); - size = flags & PIN_MAPPABLE ? fence_size : view_size; + size = max(size, view_size); + if (flags & PIN_MAPPABLE) + size = max_t(u64, size, fence_size); + + if (alignment == 0) + alignment = flags & PIN_MAPPABLE ? fence_alignment : + unfenced_alignment; + if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { + DRM_DEBUG("Invalid object (view type=%u) alignment requested %llx\n", + ggtt_view ? ggtt_view->type : 0, + alignment); + return ERR_PTR(-EINVAL); + } } else { - fence_size = i915_gem_get_gtt_size(dev, - obj->base.size, - obj->tiling_mode); - fence_alignment = i915_gem_get_gtt_alignment(dev, - obj->base.size, - obj->tiling_mode, - true); - unfenced_alignment = - i915_gem_get_gtt_alignment(dev, - obj->base.size, - obj->tiling_mode, - false); - size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; + size = max_t(u64, size, obj->base.size); + alignment = 4096; } start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; end = vm->total; if (flags & PIN_MAPPABLE) - end = min_t(u64, end, ggtt->mappable_end); + end = min_t(u64, end, dev_priv->ggtt.mappable_end); if (flags & PIN_ZONE_4G) end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE); - if (alignment == 0) - alignment = flags & PIN_MAPPABLE ? fence_alignment : - unfenced_alignment; - if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { - DRM_DEBUG("Invalid object (view type=%u) alignment requested %llx\n", - ggtt_view ? ggtt_view->type : 0, - alignment); - return ERR_PTR(-EINVAL); - } - /* If binding the object/GGTT view requires more space than the entire * aperture has, reject it early before evicting everything in a vain * attempt to find space. */ if (size > end) { - DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n", + DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n", ggtt_view ? ggtt_view->type : 0, - size, + size, obj->base.size, flags & PIN_MAPPABLE ? "mappable" : "total", end); return ERR_PTR(-E2BIG); @@ -3530,7 +3521,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, * (e.g. libkms for the bootup splash), we have to ensure that we * always use map_and_fenceable for all scanout buffers. */ - ret = i915_gem_object_ggtt_pin(obj, view, alignment, + ret = i915_gem_object_ggtt_pin(obj, view, 0, alignment, view->type == I915_GGTT_VIEW_NORMAL ? PIN_MAPPABLE : 0); if (ret) @@ -3678,12 +3669,14 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) } static bool -i915_vma_misplaced(struct i915_vma *vma, u64 alignment, u64 flags) +i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) { struct drm_i915_gem_object *obj = vma->obj; - if (alignment && - vma->node.start & (alignment - 1)) + if (vma->node.size < size) + return true; + + if (alignment && vma->node.start & (alignment - 1)) return true; if (flags & PIN_MAPPABLE && !obj->map_and_fenceable) @@ -3727,6 +3720,7 @@ static int i915_gem_object_do_pin(struct drm_i915_gem_object *obj, struct i915_address_space *vm, const struct i915_ggtt_view *ggtt_view, + u64 size, u64 alignment, u64 flags) { @@ -3754,7 +3748,7 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj, if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) return -EBUSY; - if (i915_vma_misplaced(vma, alignment, flags)) { + if (i915_vma_misplaced(vma, size, alignment, flags)) { WARN(vma->pin_count, "bo is already pinned in %s with incorrect alignment:" " offset=%08x %08x, req.alignment=%llx, req.map_and_fenceable=%d," @@ -3775,8 +3769,8 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj, bound = vma ? vma->bound : 0; if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { - vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment, - flags); + vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, + size, alignment, flags); if (IS_ERR(vma)) return PTR_ERR(vma); } else { @@ -3798,17 +3792,19 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj, int i915_gem_object_pin(struct drm_i915_gem_object *obj, struct i915_address_space *vm, + u64 size, u64 alignment, u64 flags) { return i915_gem_object_do_pin(obj, vm, i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL, - alignment, flags); + size, alignment, flags); } int i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view, + u64 size, u64 alignment, u64 flags) { @@ -3819,7 +3815,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, BUG_ON(!view); return i915_gem_object_do_pin(obj, &ggtt->base, view, - alignment, flags | PIN_GLOBAL); + size, alignment, flags | PIN_GLOBAL); } void diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 63984c4d8e5a..d2e27e730ecb 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -682,10 +682,14 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, flags |= PIN_HIGH; } - ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags); + ret = i915_gem_object_pin(obj, vma->vm, + entry->pad_to_size, + entry->alignment, + flags); if ((ret == -ENOSPC || ret == -E2BIG) && only_mappable_for_reloc(entry->flags)) ret = i915_gem_object_pin(obj, vma->vm, + entry->pad_to_size, entry->alignment, flags & ~PIN_MAPPABLE); if (ret) @@ -748,6 +752,9 @@ eb_vma_misplaced(struct i915_vma *vma) vma->node.start & (entry->alignment - 1)) return true; + if (vma->node.size < entry->pad_to_size) + return true; + if (entry->flags & EXEC_OBJECT_PINNED && vma->node.start != entry->offset) return true; @@ -1091,6 +1098,14 @@ validate_exec_list(struct drm_device *dev, if (exec[i].alignment && !is_power_of_2(exec[i].alignment)) return -EINVAL; + /* pad_to_size was once a reserved field, so sanitize it */ + if (exec[i].flags & EXEC_OBJECT_PAD_TO_SIZE) { + if (offset_in_page(exec[i].pad_to_size)) + return -EINVAL; + } else { + exec[i].pad_to_size = 0; + } + /* First check for malicious input causing overflow in * the worst case where we need to allocate the entire * relocation tree as a single array. diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 33ce5ff9556a..0f292733cffc 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -727,11 +727,15 @@ struct drm_i915_gem_exec_object2 { #define EXEC_OBJECT_WRITE (1<<2) #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3) #define EXEC_OBJECT_PINNED (1<<4) +#define EXEC_OBJECT_PAD_TO_SIZE (1<<5) /* All remaining bits are MBZ and RESERVED FOR FUTURE USE */ -#define __EXEC_OBJECT_UNKNOWN_FLAGS (-(EXEC_OBJECT_PINNED<<1)) +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1) __u64 flags; - __u64 rsvd1; + union { + __u64 rsvd1; + __u64 pad_to_size; + }; __u64 rsvd2; }; -- cgit v1.2.3 From deeb1519b65a92ca06c8e8554a92df0fdb4d5dea Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 5 Aug 2016 10:14:22 +0100 Subject: drm/i915: Document and reject invalid tiling modes Through the GTT interface to the fence registers, we can only handle linear, X and Y tiling. The more esoteric tiling patterns are ignored. Document that the tiling ABI only supports upto Y tiling, and reject any attempts to set a tiling mode other than NONE, X or Y. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470388464-28458-17-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_tiling.c | 3 +++ include/uapi/drm/i915_drm.h | 1 + 2 files changed, 4 insertions(+) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index c0e01333bddf..6817f69947d9 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -68,6 +68,9 @@ i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode) if (tiling_mode == I915_TILING_NONE) return true; + if (tiling_mode > I915_TILING_LAST) + return false; + if (IS_GEN2(dev) || (tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))) tile_width = 128; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 0f292733cffc..452629de7a57 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -926,6 +926,7 @@ struct drm_i915_gem_caching { #define I915_TILING_NONE 0 #define I915_TILING_X 1 #define I915_TILING_Y 2 +#define I915_TILING_LAST I915_TILING_Y #define I915_BIT_6_SWIZZLE_NONE 0 #define I915_BIT_6_SWIZZLE_9 1 -- cgit v1.2.3 From 4fea83ff0f61676389b17803365c1e8d2b652183 Mon Sep 17 00:00:00 2001 From: Flora Cui Date: Wed, 20 Jul 2016 14:44:38 +0800 Subject: drm/amdgpu: expose AMDGPU_GEM_CREATE_VRAM_CLEARED to user space MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit V2: fix the return value for fill failure and validate bo before filling data Reviewed-by: Christian König Signed-off-by: Flora Cui Reviewed-by: Chunming Zhou Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 33 ++++++++++++++++++++++++++++++ include/uapi/drm/amdgpu_drm.h | 2 ++ 2 files changed, 35 insertions(+) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 67de19c46ddb..d8e69a7e51f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -340,11 +340,44 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, if (unlikely(r != 0)) { return r; } + + if (flags & AMDGPU_GEM_CREATE_VRAM_CLEARED && + bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) { + struct fence *fence; + + if (adev->mman.buffer_funcs_ring == NULL || + !adev->mman.buffer_funcs_ring->ready) { + r = -EBUSY; + goto fail_free; + } + + r = amdgpu_bo_reserve(bo, false); + if (unlikely(r != 0)) + goto fail_free; + + amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM); + r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); + if (unlikely(r != 0)) + goto fail_unreserve; + + amdgpu_fill_buffer(bo, 0, bo->tbo.resv, &fence); + amdgpu_bo_fence(bo, fence, false); + amdgpu_bo_unreserve(bo); + fence_put(bo->tbo.moving); + bo->tbo.moving = fence_get(fence); + fence_put(fence); + } *bo_ptr = bo; trace_amdgpu_bo_create(bo); return 0; + +fail_unreserve: + amdgpu_bo_unreserve(bo); +fail_free: + amdgpu_bo_unref(&bo); + return r; } int amdgpu_bo_create(struct amdgpu_device *adev, diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 462246aa200e..a902a602490b 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -77,6 +77,8 @@ extern "C" { #define AMDGPU_GEM_CREATE_NO_CPU_ACCESS (1 << 1) /* Flag that USWC attributes should be used for GTT */ #define AMDGPU_GEM_CREATE_CPU_GTT_USWC (1 << 2) +/* Flag that the memory should be in VRAM and cleared */ +#define AMDGPU_GEM_CREATE_VRAM_CLEARED (1 << 3) struct drm_amdgpu_gem_create_in { /** the requested memory size */ -- cgit v1.2.3 From f837297ad82480024d3ad08cd84f6670bcafa862 Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Mon, 8 Aug 2016 16:23:39 +0900 Subject: drm: Add DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE/RELATIVE flags v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These flags allow userspace to explicitly specify the target vertical blank period when a flip should take effect. v2: * Add new struct drm_mode_crtc_page_flip_target instead of modifying struct drm_mode_crtc_page_flip, to make sure all existing userspace code keeps compiling (Daniel Vetter) Acked-by: Christian König Reviewed-by: Daniel Vetter Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/drm_crtc.c | 48 ++++++++++++++++++++++++++++++++++++++------- drivers/gpu/drm/drm_ioctl.c | 8 ++++++++ include/uapi/drm/drm.h | 1 + include/uapi/drm/drm_mode.h | 39 +++++++++++++++++++++++++++++++++--- 4 files changed, 86 insertions(+), 10 deletions(-) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index a316f7336aeb..93b9821f7f24 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -5398,15 +5398,23 @@ out: int drm_mode_page_flip_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { - struct drm_mode_crtc_page_flip *page_flip = data; + struct drm_mode_crtc_page_flip_target *page_flip = data; struct drm_crtc *crtc; struct drm_framebuffer *fb = NULL; struct drm_pending_vblank_event *e = NULL; - u32 target_vblank = 0; + u32 target_vblank = page_flip->sequence; int ret = -EINVAL; - if (page_flip->flags & ~DRM_MODE_PAGE_FLIP_FLAGS || - page_flip->reserved != 0) + if (page_flip->flags & ~DRM_MODE_PAGE_FLIP_FLAGS) + return -EINVAL; + + if (page_flip->sequence != 0 && !(page_flip->flags & DRM_MODE_PAGE_FLIP_TARGET)) + return -EINVAL; + + /* Only one of the DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE/RELATIVE flags + * can be specified + */ + if ((page_flip->flags & DRM_MODE_PAGE_FLIP_TARGET) == DRM_MODE_PAGE_FLIP_TARGET) return -EINVAL; if ((page_flip->flags & DRM_MODE_PAGE_FLIP_ASYNC) && !dev->mode_config.async_page_flip) @@ -5417,15 +5425,41 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev, return -ENOENT; if (crtc->funcs->page_flip_target) { + u32 current_vblank; int r; r = drm_crtc_vblank_get(crtc); if (r) return r; - target_vblank = drm_crtc_vblank_count(crtc) + - !(page_flip->flags & DRM_MODE_PAGE_FLIP_ASYNC); - } else if (crtc->funcs->page_flip == NULL) { + current_vblank = drm_crtc_vblank_count(crtc); + + switch (page_flip->flags & DRM_MODE_PAGE_FLIP_TARGET) { + case DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE: + if ((int)(target_vblank - current_vblank) > 1) { + DRM_DEBUG("Invalid absolute flip target %u, " + "must be <= %u\n", target_vblank, + current_vblank + 1); + drm_crtc_vblank_put(crtc); + return -EINVAL; + } + break; + case DRM_MODE_PAGE_FLIP_TARGET_RELATIVE: + if (target_vblank != 0 && target_vblank != 1) { + DRM_DEBUG("Invalid relative flip target %u, " + "must be 0 or 1\n", target_vblank); + drm_crtc_vblank_put(crtc); + return -EINVAL; + } + target_vblank += current_vblank; + break; + default: + target_vblank = current_vblank + + !(page_flip->flags & DRM_MODE_PAGE_FLIP_ASYNC); + break; + } + } else if (crtc->funcs->page_flip == NULL || + (page_flip->flags & DRM_MODE_PAGE_FLIP_TARGET)) { return -EINVAL; } diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index 33af4a5ddca1..0099d2a4d618 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -228,6 +228,7 @@ static int drm_getstats(struct drm_device *dev, void *data, static int drm_getcap(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_get_cap *req = data; + struct drm_crtc *crtc; req->value = 0; switch (req->capability) { @@ -254,6 +255,13 @@ static int drm_getcap(struct drm_device *dev, void *data, struct drm_file *file_ case DRM_CAP_ASYNC_PAGE_FLIP: req->value = dev->mode_config.async_page_flip; break; + case DRM_CAP_PAGE_FLIP_TARGET: + req->value = 1; + drm_for_each_crtc(crtc, dev) { + if (!crtc->funcs->page_flip_target) + req->value = 0; + } + break; case DRM_CAP_CURSOR_WIDTH: if (dev->mode_config.cursor_width) req->value = dev->mode_config.cursor_width; diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index 452675fb55d9..b2c52843bc70 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -646,6 +646,7 @@ struct drm_gem_open { #define DRM_CAP_CURSOR_WIDTH 0x8 #define DRM_CAP_CURSOR_HEIGHT 0x9 #define DRM_CAP_ADDFB2_MODIFIERS 0x10 +#define DRM_CAP_PAGE_FLIP_TARGET 0x11 /** DRM_IOCTL_GET_CAP ioctl argument type */ struct drm_get_cap { diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index 49a72659b801..df0e3504c349 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -520,7 +520,13 @@ struct drm_color_lut { #define DRM_MODE_PAGE_FLIP_EVENT 0x01 #define DRM_MODE_PAGE_FLIP_ASYNC 0x02 -#define DRM_MODE_PAGE_FLIP_FLAGS (DRM_MODE_PAGE_FLIP_EVENT|DRM_MODE_PAGE_FLIP_ASYNC) +#define DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE 0x4 +#define DRM_MODE_PAGE_FLIP_TARGET_RELATIVE 0x8 +#define DRM_MODE_PAGE_FLIP_TARGET (DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE | \ + DRM_MODE_PAGE_FLIP_TARGET_RELATIVE) +#define DRM_MODE_PAGE_FLIP_FLAGS (DRM_MODE_PAGE_FLIP_EVENT | \ + DRM_MODE_PAGE_FLIP_ASYNC | \ + DRM_MODE_PAGE_FLIP_TARGET) /* * Request a page flip on the specified crtc. @@ -543,8 +549,7 @@ struct drm_color_lut { * 'as soon as possible', meaning that it not delay waiting for vblank. * This may cause tearing on the screen. * - * The reserved field must be zero until we figure out something - * clever to use it for. + * The reserved field must be zero. */ struct drm_mode_crtc_page_flip { @@ -555,6 +560,34 @@ struct drm_mode_crtc_page_flip { __u64 user_data; }; +/* + * Request a page flip on the specified crtc. + * + * Same as struct drm_mode_crtc_page_flip, but supports new flags and + * re-purposes the reserved field: + * + * The sequence field must be zero unless either of the + * DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE/RELATIVE flags is specified. When + * the ABSOLUTE flag is specified, the sequence field denotes the absolute + * vblank sequence when the flip should take effect. When the RELATIVE + * flag is specified, the sequence field denotes the relative (to the + * current one when the ioctl is called) vblank sequence when the flip + * should take effect. NOTE: DRM_IOCTL_WAIT_VBLANK must still be used to + * make sure the vblank sequence before the target one has passed before + * calling this ioctl. The purpose of the + * DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE/RELATIVE flags is merely to clarify + * the target for when code dealing with a page flip runs during a + * vertical blank period. + */ + +struct drm_mode_crtc_page_flip_target { + __u32 crtc_id; + __u32 fb_id; + __u32 flags; + __u32 sequence; + __u64 user_data; +}; + /* create a dumb scanout buffer */ struct drm_mode_create_dumb { __u32 height; -- cgit v1.2.3 From 1255501d8681775d564de45742c6e82b7782b7f5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 16 Aug 2016 09:50:40 +0100 Subject: drm/i915: Embrace the race in busy-ioctl Daniel Vetter proposed a new challenge to the serialisation inside the busy-ioctl that exposed a flaw that could result in us reporting the wrong engine as being busy. If the request is reallocated as we test its busyness and then reassigned to this object by another thread, we would not notice that the test itself was incorrect. We are faced with a choice of using __i915_gem_active_get_request_rcu() to first acquire a reference to the request preventing the race, or to acknowledge the race and accept the limitations upon the accuracy of the busy flags. Note that we guarantee that we never falsely report the object as idle (providing userspace itself doesn't race), and so the most important use of the busy-ioctl and its guarantees are fulfilled. Signed-off-by: Chris Wilson Cc: Daniel Vetter Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Reviewed-by: Mika Kuoppala Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1471337440-16777-1-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 112 +++++++++++++++++++++++----------------- include/uapi/drm/i915_drm.h | 16 +++++- 2 files changed, 81 insertions(+), 47 deletions(-) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 7e08c774a1aa..a8d0f70c22f9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3807,49 +3807,68 @@ static __always_inline unsigned int __busy_set_if_active(const struct i915_gem_active *active, unsigned int (*flag)(unsigned int id)) { - /* For more discussion about the barriers and locking concerns, - * see __i915_gem_active_get_rcu(). - */ - do { - struct drm_i915_gem_request *request; - unsigned int id; - - request = rcu_dereference(active->request); - if (!request || i915_gem_request_completed(request)) - return 0; + struct drm_i915_gem_request *request; - id = request->engine->exec_id; + request = rcu_dereference(active->request); + if (!request || i915_gem_request_completed(request)) + return 0; - /* Check that the pointer wasn't reassigned and overwritten. - * - * In __i915_gem_active_get_rcu(), we enforce ordering between - * the first rcu pointer dereference (imposing a - * read-dependency only on access through the pointer) and - * the second lockless access through the memory barrier - * following a successful atomic_inc_not_zero(). Here there - * is no such barrier, and so we must manually insert an - * explicit read barrier to ensure that the following - * access occurs after all the loads through the first - * pointer. - * - * It is worth comparing this sequence with - * raw_write_seqcount_latch() which operates very similarly. - * The challenge here is the visibility of the other CPU - * writes to the reallocated request vs the local CPU ordering. - * Before the other CPU can overwrite the request, it will - * have updated our active->request and gone through a wmb. - * During the read here, we want to make sure that the values - * we see have not been overwritten as we do so - and we do - * that by serialising the second pointer check with the writes - * on other other CPUs. - * - * The corresponding write barrier is part of - * rcu_assign_pointer(). - */ - smp_rmb(); - if (request == rcu_access_pointer(active->request)) - return flag(id); - } while (1); + /* This is racy. See __i915_gem_active_get_rcu() for an in detail + * discussion of how to handle the race correctly, but for reporting + * the busy state we err on the side of potentially reporting the + * wrong engine as being busy (but we guarantee that the result + * is at least self-consistent). + * + * As we use SLAB_DESTROY_BY_RCU, the request may be reallocated + * whilst we are inspecting it, even under the RCU read lock as we are. + * This means that there is a small window for the engine and/or the + * seqno to have been overwritten. The seqno will always be in the + * future compared to the intended, and so we know that if that + * seqno is idle (on whatever engine) our request is idle and the + * return 0 above is correct. + * + * The issue is that if the engine is switched, it is just as likely + * to report that it is busy (but since the switch happened, we know + * the request should be idle). So there is a small chance that a busy + * result is actually the wrong engine. + * + * So why don't we care? + * + * For starters, the busy ioctl is a heuristic that is by definition + * racy. Even with perfect serialisation in the driver, the hardware + * state is constantly advancing - the state we report to the user + * is stale. + * + * The critical information for the busy-ioctl is whether the object + * is idle as userspace relies on that to detect whether its next + * access will stall, or if it has missed submitting commands to + * the hardware allowing the GPU to stall. We never generate a + * false-positive for idleness, thus busy-ioctl is reliable at the + * most fundamental level, and we maintain the guarantee that a + * busy object left to itself will eventually become idle (and stay + * idle!). + * + * We allow ourselves the leeway of potentially misreporting the busy + * state because that is an optimisation heuristic that is constantly + * in flux. Being quickly able to detect the busy/idle state is much + * more important than accurate logging of exactly which engines were + * busy. + * + * For accuracy in reporting the engine, we could use + * + * result = 0; + * request = __i915_gem_active_get_rcu(active); + * if (request) { + * if (!i915_gem_request_completed(request)) + * result = flag(request->engine->exec_id); + * i915_gem_request_put(request); + * } + * + * but that still remains susceptible to both hardware and userspace + * races. So we accept making the result of that race slightly worse, + * given the rarity of the race and its low impact on the result. + */ + return flag(READ_ONCE(request->engine->exec_id)); } static __always_inline unsigned int @@ -3897,11 +3916,12 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * retired and freed. We take a local copy of the pointer, * but before we add its engine into the busy set, the other * thread reallocates it and assigns it to a task on another - * engine with a fresh and incomplete seqno. - * - * So after we lookup the engine's id, we double check that - * the active request is the same and only then do we add it - * into the busy set. + * engine with a fresh and incomplete seqno. Guarding against + * that requires careful serialisation and reference counting, + * i.e. using __i915_gem_active_get_request_rcu(). We don't, + * instead we expect that if the result is busy, which engines + * are busy is not completely reliable - we only guarantee + * that the object was busy. */ rcu_read_lock(); diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 452629de7a57..5501fe83ed92 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -855,7 +855,16 @@ struct drm_i915_gem_busy { * having flushed any pending activity), and a non-zero return that * the object is still in-flight on the GPU. (The GPU has not yet * signaled completion for all pending requests that reference the - * object.) + * object.) An object is guaranteed to become idle eventually (so + * long as no new GPU commands are executed upon it). Due to the + * asynchronous nature of the hardware, an object reported + * as busy may become idle before the ioctl is completed. + * + * Furthermore, if the object is busy, which engine is busy is only + * provided as a guide. There are race conditions which prevent the + * report of which engines are busy from being always accurate. + * However, the converse is not true. If the object is idle, the + * result of the ioctl, that all engines are idle, is accurate. * * The returned dword is split into two fields to indicate both * the engines on which the object is being read, and the @@ -878,6 +887,11 @@ struct drm_i915_gem_busy { * execution engines, e.g. multiple media engines, which are * mapped to the same identifier in the EXECBUFFER2 ioctl and * so are not separately reported for busyness. + * + * Caveat emptor: + * Only the boolean result of this query is reliable; that is whether + * the object is idle or busy. The report of which engines are busy + * should be only used as a heuristic. */ __u32 busy; }; -- cgit v1.2.3 From e7893c4bd34b9d2f942d77666656efaa084a3f87 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Tue, 26 Jul 2016 14:13:21 +0800 Subject: drm/amdgpu: add shadow bo support V2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit shadow bo is the shadow of a bo, which is always in GTT, which can be used to backup the original bo. V2: reference shadow parent, shadow bo will be freed by who allocted him. Signed-off-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 48 ++++++++++++++++++++++++++++-- include/uapi/drm/amdgpu_drm.h | 2 ++ 3 files changed, 48 insertions(+), 3 deletions(-) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index c54f5b962b80..26f6028bb72b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -505,6 +505,7 @@ struct amdgpu_bo { struct amdgpu_device *adev; struct drm_gem_object gem_base; struct amdgpu_bo *parent; + struct amdgpu_bo *shadow; struct ttm_bo_kmap_obj dma_buf_vmap; struct amdgpu_mn *mn; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index d8e69a7e51f9..278017d3dc3f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -380,6 +380,37 @@ fail_free: return r; } +static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, + unsigned long size, int byte_align, + struct amdgpu_bo *bo) +{ + struct ttm_placement placement = {0}; + struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1]; + int r; + + if (bo->shadow) + return 0; + + bo->flags |= AMDGPU_GEM_CREATE_SHADOW; + memset(&placements, 0, + (AMDGPU_GEM_DOMAIN_MAX + 1) * sizeof(struct ttm_place)); + + amdgpu_ttm_placement_init(adev, &placement, + placements, AMDGPU_GEM_DOMAIN_GTT, + AMDGPU_GEM_CREATE_CPU_GTT_USWC); + + r = amdgpu_bo_create_restricted(adev, size, byte_align, true, + AMDGPU_GEM_DOMAIN_GTT, + AMDGPU_GEM_CREATE_CPU_GTT_USWC, + NULL, &placement, + bo->tbo.resv, + &bo->shadow); + if (!r) + bo->shadow->parent = amdgpu_bo_ref(bo); + + return r; +} + int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size, int byte_align, bool kernel, u32 domain, u64 flags, @@ -389,6 +420,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev, { struct ttm_placement placement = {0}; struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1]; + int r; memset(&placements, 0, (AMDGPU_GEM_DOMAIN_MAX + 1) * sizeof(struct ttm_place)); @@ -396,9 +428,19 @@ int amdgpu_bo_create(struct amdgpu_device *adev, amdgpu_ttm_placement_init(adev, &placement, placements, domain, flags); - return amdgpu_bo_create_restricted(adev, size, byte_align, kernel, - domain, flags, sg, &placement, - resv, bo_ptr); + r = amdgpu_bo_create_restricted(adev, size, byte_align, kernel, + domain, flags, sg, &placement, + resv, bo_ptr); + if (r) + return r; + + if (flags & AMDGPU_GEM_CREATE_SHADOW) { + r = amdgpu_bo_create_shadow(adev, size, byte_align, (*bo_ptr)); + if (r) + amdgpu_bo_unref(bo_ptr); + } + + return r; } int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr) diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index a902a602490b..5aef0b71079b 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -79,6 +79,8 @@ extern "C" { #define AMDGPU_GEM_CREATE_CPU_GTT_USWC (1 << 2) /* Flag that the memory should be in VRAM and cleared */ #define AMDGPU_GEM_CREATE_VRAM_CLEARED (1 << 3) +/* Flag that create shadow bo(GTT) while allocating vram bo */ +#define AMDGPU_GEM_CREATE_SHADOW (1 << 4) struct drm_amdgpu_gem_create_in { /** the requested memory size */ -- cgit v1.2.3 From 83a59b6338c71425f3159fc9ab31380f237af733 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Wed, 17 Aug 2016 23:58:58 +0200 Subject: drm/amdgpu: add AMDGPU_INFO_NUM_EVICTIONS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For profiling. v2: really bump the minor version Signed-off-by: Marek Olšák Reviewed-by: Christian König Reviewed-by: Edward O'Callaghan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 3 +++ include/uapi/drm/amdgpu_drm.h | 2 ++ 3 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 58b1db82f589..f5c99a008717 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -53,9 +53,10 @@ * - 3.2.0 - GFX8: Uses EOP_TC_WB_ACTION_EN, so UMDs don't have to do the same * at the end of IBs. * - 3.3.0 - Add VM support for UVD on supported hardware. + * - 3.4.0 - Add AMDGPU_INFO_NUM_EVICTIONS. */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 3 +#define KMS_DRIVER_MINOR 4 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index d942654a1de0..4c8e68a82c47 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -373,6 +373,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file case AMDGPU_INFO_NUM_BYTES_MOVED: ui64 = atomic64_read(&adev->num_bytes_moved); return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; + case AMDGPU_INFO_NUM_EVICTIONS: + ui64 = atomic64_read(&adev->num_evictions); + return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_VRAM_USAGE: ui64 = atomic64_read(&adev->vram_usage); return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 5aef0b71079b..ae2845fdcb5f 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -485,6 +485,8 @@ struct drm_amdgpu_cs_chunk_data { #define AMDGPU_INFO_DEV_INFO 0x16 /* visible vram usage */ #define AMDGPU_INFO_VIS_VRAM_USAGE 0x17 +/* number of TTM buffer evictions */ +#define AMDGPU_INFO_NUM_EVICTIONS 0x18 #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0 #define AMDGPU_INFO_MMR_SE_INDEX_MASK 0xff -- cgit v1.2.3 From 4cc6907501ed2393a70ad92a30e00dc54c536e50 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 25 Aug 2016 19:05:19 +0100 Subject: drm/i915: Add I915_PARAM_MMAP_GTT_VERSION to advertise unlimited mmaps Now that we have working partial VMA and faulting support for all objects, including fence support, advertise to userspace that it can take advantage of unlimited GGTT mmaps. v2: Make room in the kerneldoc for a more detailed explanation of the limitations of the GTT mmap interface. Signed-off-by: Chris Wilson Cc: Daniel Vetter Cc: Joonas Lahtinen Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20160825180519.11341-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.c | 7 ++++++ drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 53 +++++++++++++++++++++++++++++++++++++++++ include/uapi/drm/i915_drm.h | 1 + 4 files changed, 62 insertions(+) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 1d2230f7b749..47fe07283d88 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -355,6 +355,13 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_MIN_EU_IN_POOL: value = INTEL_INFO(dev)->min_eu_in_pool; break; + case I915_PARAM_MMAP_GTT_VERSION: + /* Though we've started our numbering from 1, and so class all + * earlier versions as 0, in effect their value is undefined as + * the ioctl will report EINVAL for the unknown param! + */ + value = i915_gem_mmap_gtt_version(); + break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 04b4fd6c32e4..9fdaf23336df 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3218,6 +3218,7 @@ int i915_gem_dumb_create(struct drm_file *file_priv, struct drm_mode_create_dumb *args); int i915_gem_mmap_gtt(struct drm_file *file_priv, struct drm_device *dev, uint32_t handle, uint64_t *offset); +int i915_gem_mmap_gtt_version(void); void i915_gem_track_fb(struct drm_i915_gem_object *old, struct drm_i915_gem_object *new, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 04607d4115d6..d37b44126942 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1679,6 +1679,56 @@ static unsigned int tile_row_pages(struct drm_i915_gem_object *obj) return size >> PAGE_SHIFT; } +/** + * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps + * + * A history of the GTT mmap interface: + * + * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to + * aligned and suitable for fencing, and still fit into the available + * mappable space left by the pinned display objects. A classic problem + * we called the page-fault-of-doom where we would ping-pong between + * two objects that could not fit inside the GTT and so the memcpy + * would page one object in at the expense of the other between every + * single byte. + * + * 1 - Objects can be any size, and have any compatible fencing (X Y, or none + * as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the + * object is too large for the available space (or simply too large + * for the mappable aperture!), a view is created instead and faulted + * into userspace. (This view is aligned and sized appropriately for + * fenced access.) + * + * Restrictions: + * + * * snoopable objects cannot be accessed via the GTT. It can cause machine + * hangs on some architectures, corruption on others. An attempt to service + * a GTT page fault from a snoopable object will generate a SIGBUS. + * + * * the object must be able to fit into RAM (physical memory, though no + * limited to the mappable aperture). + * + * + * Caveats: + * + * * a new GTT page fault will synchronize rendering from the GPU and flush + * all data to system memory. Subsequent access will not be synchronized. + * + * * all mappings are revoked on runtime device suspend. + * + * * there are only 8, 16 or 32 fence registers to share between all users + * (older machines require fence register for display and blitter access + * as well). Contention of the fence registers will cause the previous users + * to be unmapped and any new access will generate new page faults. + * + * * running out of memory while servicing a fault may generate a SIGBUS, + * rather than the expected SIGSEGV. + */ +int i915_gem_mmap_gtt_version(void) +{ + return 1; +} + /** * i915_gem_fault - fault a page into the GTT * @area: CPU VMA in question @@ -1694,6 +1744,9 @@ static unsigned int tile_row_pages(struct drm_i915_gem_object *obj) * from the GTT and/or fence registers to make room. So performance may * suffer if the GTT working set is large or there are few fence registers * left. + * + * The current feature set supported by i915_gem_fault() and thus GTT mmaps + * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version). */ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf) { diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 5501fe83ed92..03725fe89859 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -387,6 +387,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_HAS_EXEC_SOFTPIN 37 #define I915_PARAM_HAS_POOLED_EU 38 #define I915_PARAM_MIN_EU_IN_POOL 39 +#define I915_PARAM_MMAP_GTT_VERSION 40 typedef struct drm_i915_getparam { __s32 param; -- cgit v1.2.3 From 295d0dafd31c9a26f3d34a9bdc75f226e18fd9a2 Mon Sep 17 00:00:00 2001 From: Ken Wang Date: Tue, 24 May 2016 21:02:53 +0800 Subject: drm/amdgpu: Add SI Family information Signed-off-by: Ken Wang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 + include/uapi/drm/amdgpu_drm.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index aef9a9ac6280..f95bcb80dde3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1245,6 +1245,7 @@ static int amdgpu_early_init(struct amdgpu_device *adev) case CHIP_PITCAIRN: case CHIP_OLAND: case CHIP_HAINAN: + adev->family = AMDGPU_FAMILY_SI; r = si_set_ip_blocks(adev); if (r) return r; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index ae2845fdcb5f..d6b5a21f3d3c 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -649,6 +649,7 @@ struct drm_amdgpu_info_hw_ip { * Supported GPU families */ #define AMDGPU_FAMILY_UNKNOWN 0 +#define AMDGPU_FAMILY_SI 110 /* Hainan, Oland, Verde, Pitcairn, Tahiti */ #define AMDGPU_FAMILY_CI 120 /* Bonaire, Hawaii */ #define AMDGPU_FAMILY_KV 125 /* Kaveri, Kabini, Mullins */ #define AMDGPU_FAMILY_VI 130 /* Iceland, Tonga */ -- cgit v1.2.3 From d9c181e22a0599fa7e27c3717f56bc1b3b020e63 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sat, 23 Apr 2016 10:08:59 -0400 Subject: drm/msm: extend the submit ioctl to pass in flags We'll want to be able to pass in flags, such as asking for explicit fencing, and possibly other things down the road. Fortunately we don't need a full 32b for the pipe-id. So use the upper 16 bits for flags (which could be extended or reduced later if needed, so start adding flags from the high bits). Since anything with the upper bits set would not be a valid pipe-id, an old userspace would not set any of the upper bits, and an old kernel would reject it as an invalid pipe-id. Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem_submit.c | 5 ++++- include/uapi/drm/msm_drm.h | 15 ++++++++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 9766f9ae4b7d..0b96d85d99f9 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -370,7 +370,10 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, /* for now, we just have 3d pipe.. eventually this would need to * be more clever to dispatch to appropriate gpu module: */ - if (args->pipe != MSM_PIPE_3D0) + if (MSM_PIPE_ID(args->flags) != MSM_PIPE_3D0) + return -EINVAL; + + if (MSM_PIPE_FLAGS(args->flags) & ~MSM_SUBMIT_FLAGS) return -EINVAL; ret = mutex_lock_interruptible(&dev->struct_mutex); diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index 49f778de8e06..913e08cd5ceb 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -42,6 +42,15 @@ extern "C" { #define MSM_PIPE_2D1 0x02 #define MSM_PIPE_3D0 0x10 +/* The pipe-id just uses the lower bits, so can be OR'd with flags in + * the upper 16 bits (which could be extended further, if needed, maybe + * we extend/overload the pipe-id some day to deal with multiple rings, + * but even then I don't think we need the full lower 16 bits). + */ +#define MSM_PIPE_ID_MASK 0xffff +#define MSM_PIPE_ID(x) ((x) & MSM_PIPE_ID_MASK) +#define MSM_PIPE_FLAGS(x) ((x) & ~MSM_PIPE_ID_MASK) + /* timeouts are specified in clock-monotonic absolute times (to simplify * restarting interrupted ioctls). The following struct is logically the * same as 'struct timespec' but 32/64b ABI safe. @@ -175,12 +184,16 @@ struct drm_msm_gem_submit_bo { __u64 presumed; /* in/out, presumed buffer address */ }; +/* Valid submit ioctl flags: */ +/* to start, nothing.. */ +#define MSM_SUBMIT_FLAGS 0 + /* Each cmdstream submit consists of a table of buffers involved, and * one or more cmdstream buffers. This allows for conditional execution * (context-restore), and IB buffers needed for per tile/bin draw cmds. */ struct drm_msm_gem_submit { - __u32 pipe; /* in, MSM_PIPE_x */ + __u32 flags; /* MSM_PIPE_x | MSM_SUBMIT_x */ __u32 fence; /* out */ __u32 nr_bos; /* in, number of submit_bo's */ __u32 nr_cmds; /* in, number of submit_cmd's */ -- cgit v1.2.3 From f0a42bb5423a1387e54a2d3451a10d4358b8cfb6 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 16 Jun 2016 16:08:19 -0400 Subject: drm/msm: submit support for in-fences Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/Kconfig | 1 + drivers/gpu/drm/msm/msm_gem_submit.c | 34 +++++++++++++++++++++++++++++++--- include/uapi/drm/msm_drm.h | 9 +++++++-- 3 files changed, 39 insertions(+), 5 deletions(-) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig index 7c7a0314a756..d96b2b6898a3 100644 --- a/drivers/gpu/drm/msm/Kconfig +++ b/drivers/gpu/drm/msm/Kconfig @@ -11,6 +11,7 @@ config DRM_MSM select TMPFS select QCOM_SCM select SND_SOC_HDMI_CODEC if SND_SOC + select SYNC_FILE default y help DRM/KMS driver for MSM/snapdragon. diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 0b96d85d99f9..3a4a8ddeb1db 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -15,6 +15,8 @@ * this program. If not, see . */ +#include + #include "msm_drv.h" #include "msm_gpu.h" #include "msm_gem.h" @@ -361,6 +363,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, struct msm_file_private *ctx = file->driver_priv; struct msm_gem_submit *submit; struct msm_gpu *gpu = priv->gpu; + struct fence *in_fence = NULL; unsigned i; int ret; @@ -394,9 +397,32 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, if (ret) goto out; - ret = submit_fence_sync(submit); - if (ret) - goto out; + if (args->flags & MSM_SUBMIT_FENCE_FD_IN) { + in_fence = sync_file_get_fence(args->fence_fd); + + if (!in_fence) { + ret = -EINVAL; + goto out; + } + + /* TODO if we get an array-fence due to userspace merging multiple + * fences, we need a way to determine if all the backing fences + * are from our own context.. + */ + + if (in_fence->context != gpu->fctx->context) { + ret = fence_wait(in_fence, true); + if (ret) + goto out; + } + + } + + if (!(args->fence & MSM_SUBMIT_NO_IMPLICIT)) { + ret = submit_fence_sync(submit); + if (ret) + goto out; + } ret = submit_pin_objects(submit); if (ret) @@ -467,6 +493,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, args->fence = submit->fence->seqno; out: + if (in_fence) + fence_put(in_fence); submit_cleanup(submit); if (ret) msm_gem_submit_free(submit); diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index 913e08cd5ceb..0402994cdbb7 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -185,8 +185,12 @@ struct drm_msm_gem_submit_bo { }; /* Valid submit ioctl flags: */ -/* to start, nothing.. */ -#define MSM_SUBMIT_FLAGS 0 +#define MSM_SUBMIT_NO_IMPLICIT 0x80000000 /* disable implicit sync */ +#define MSM_SUBMIT_FENCE_FD_IN 0x40000000 /* enable input fence_fd */ +#define MSM_SUBMIT_FLAGS ( \ + MSM_SUBMIT_NO_IMPLICIT | \ + MSM_SUBMIT_FENCE_FD_IN | \ + 0) /* Each cmdstream submit consists of a table of buffers involved, and * one or more cmdstream buffers. This allows for conditional execution @@ -199,6 +203,7 @@ struct drm_msm_gem_submit { __u32 nr_cmds; /* in, number of submit_cmd's */ __u64 __user bos; /* in, ptr to array of submit_bo's */ __u64 __user cmds; /* in, ptr to array of submit_cmd's */ + __s32 fence_fd; /* in/out fence fd (see MSM_SUBMIT_FENCE_FD_IN) */ }; /* The normal way to synchronize with the GPU is just to CPU_PREP on -- cgit v1.2.3 From 4cd0945901a6dd0190824a98471449df9129d21c Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 16 Jun 2016 16:43:49 -0400 Subject: drm/msm: submit support for out-fences Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem_submit.c | 25 +++++++++++++++++++++++++ include/uapi/drm/msm_drm.h | 4 +++- 2 files changed, 28 insertions(+), 1 deletion(-) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 65284febb2f9..3ac14cd1e5b9 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -364,6 +364,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, struct msm_gem_submit *submit; struct msm_gpu *gpu = priv->gpu; struct fence *in_fence = NULL; + struct sync_file *sync_file = NULL; + int out_fence_fd = -1; unsigned i; int ret; @@ -383,6 +385,14 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, if (ret) return ret; + if (args->flags & MSM_SUBMIT_FENCE_FD_OUT) { + out_fence_fd = get_unused_fd_flags(O_CLOEXEC); + if (out_fence_fd < 0) { + ret = out_fence_fd; + goto out_unlock; + } + } + submit = submit_create(dev, gpu, args->nr_bos, args->nr_cmds); if (!submit) { ret = -ENOMEM; @@ -495,10 +505,23 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, goto out; } + if (args->flags & MSM_SUBMIT_FENCE_FD_OUT) { + sync_file = sync_file_create(submit->fence); + if (!sync_file) { + ret = -ENOMEM; + goto out; + } + } + msm_gpu_submit(gpu, submit, ctx); args->fence = submit->fence->seqno; + if (args->flags & MSM_SUBMIT_FENCE_FD_OUT) { + fd_install(out_fence_fd, sync_file->file); + args->fence_fd = out_fence_fd; + } + out: if (in_fence) fence_put(in_fence); @@ -506,6 +529,8 @@ out: if (ret) msm_gem_submit_free(submit); out_unlock: + if (ret && (out_fence_fd >= 0)) + put_unused_fd(out_fence_fd); mutex_unlock(&dev->struct_mutex); return ret; } diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index 0402994cdbb7..8c51e8a0df89 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -187,9 +187,11 @@ struct drm_msm_gem_submit_bo { /* Valid submit ioctl flags: */ #define MSM_SUBMIT_NO_IMPLICIT 0x80000000 /* disable implicit sync */ #define MSM_SUBMIT_FENCE_FD_IN 0x40000000 /* enable input fence_fd */ +#define MSM_SUBMIT_FENCE_FD_OUT 0x20000000 /* enable output fence_fd */ #define MSM_SUBMIT_FLAGS ( \ MSM_SUBMIT_NO_IMPLICIT | \ MSM_SUBMIT_FENCE_FD_IN | \ + MSM_SUBMIT_FENCE_FD_OUT | \ 0) /* Each cmdstream submit consists of a table of buffers involved, and @@ -203,7 +205,7 @@ struct drm_msm_gem_submit { __u32 nr_cmds; /* in, number of submit_cmd's */ __u64 __user bos; /* in, ptr to array of submit_bo's */ __u64 __user cmds; /* in, ptr to array of submit_cmd's */ - __s32 fence_fd; /* in/out fence fd (see MSM_SUBMIT_FENCE_FD_IN) */ + __s32 fence_fd; /* in/out fence fd (see MSM_SUBMIT_FENCE_FD_IN/OUT) */ }; /* The normal way to synchronize with the GPU is just to CPU_PREP on -- cgit v1.2.3