From 9e2793f6e4e2ca452457e459f013cc8e6b08a789 Mon Sep 17 00:00:00 2001 From: Dave Gordon Date: Thu, 14 Jul 2016 14:52:03 +0100 Subject: drm/i915: compile-time consistency check on __EXEC_OBJECT flags Two different sets of flag bits are stored in the 'flags' member of a 'struct drm_i915_gem_exec_object2', and they're defined in two different source files, increasing the risk of an accidental clash. Some flags in this field are supplied by the user; these are defined in i915_drm.h, and they start from the LSB and work up. Other flags are defined in i915_gem_execbuffer, for internal use within that file only; they start from the MSB and work down. So here we add a compile-time check that the two sets of flags do not overlap, which would cause all sorts of confusion. Signed-off-by: Dave Gordon Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1468504324-12690-1-git-send-email-david.s.gordon@intel.com --- include/uapi/drm/i915_drm.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index d7e81a3886fd..51b9360bb376 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -698,12 +698,13 @@ struct drm_i915_gem_exec_object2 { */ __u64 offset; -#define EXEC_OBJECT_NEEDS_FENCE (1<<0) -#define EXEC_OBJECT_NEEDS_GTT (1<<1) -#define EXEC_OBJECT_WRITE (1<<2) +#define EXEC_OBJECT_NEEDS_FENCE (1<<0) +#define EXEC_OBJECT_NEEDS_GTT (1<<1) +#define EXEC_OBJECT_WRITE (1<<2) #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3) -#define EXEC_OBJECT_PINNED (1<<4) -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1) +#define EXEC_OBJECT_PINNED (1<<4) +/* All remaining bits are MBZ and RESERVED FOR FUTURE USE */ +#define __EXEC_OBJECT_UNKNOWN_FLAGS (-(EXEC_OBJECT_PINNED<<1)) __u64 flags; __u64 rsvd1; -- cgit v1.2.3 From 3373ce2eccd56651579b1864fecf98b46fd1cb67 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 1 Jul 2016 17:32:08 +0300 Subject: drm/i915: Give proper names to MOCS entries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The purpose for each MOCS entry isn't well defined atm. Defining these is important to remove any uncertainty about the use of these entries for example in terms of performance and GPU/CPU coherency. Suggested by Ville. v4: - Rename I915_MOCS_AUTO to I915_MOCS_PTE. (Chris) CC: Rong R Yang CC: Yakui Zhao CC: Ville Syrjälä CC: Chris Wilson Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1467383528-16142-1-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_mocs.c | 13 +++++++------ include/uapi/drm/i915_drm.h | 24 ++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 6 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index 927825f5b284..2280c329d37f 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -97,7 +97,8 @@ struct drm_i915_mocs_table { * end. */ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { - { /* 0x00000009 */ + [I915_MOCS_UNCACHED] = { + /* 0x00000009 */ .control_value = LE_CACHEABILITY(LE_UC) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | @@ -106,7 +107,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { /* 0x0010 */ .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), }, - { + [I915_MOCS_PTE] = { /* 0x00000038 */ .control_value = LE_CACHEABILITY(LE_PAGETABLE) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | @@ -115,7 +116,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { /* 0x0030 */ .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), }, - { + [I915_MOCS_CACHED] = { /* 0x0000003b */ .control_value = LE_CACHEABILITY(LE_WB) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | @@ -128,7 +129,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { /* NOTE: the LE_TGT_CACHE is not used on Broxton */ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { - { + [I915_MOCS_UNCACHED] = { /* 0x00000009 */ .control_value = LE_CACHEABILITY(LE_UC) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | @@ -138,7 +139,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { /* 0x0010 */ .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), }, - { + [I915_MOCS_PTE] = { /* 0x00000038 */ .control_value = LE_CACHEABILITY(LE_PAGETABLE) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | @@ -148,7 +149,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { /* 0x0030 */ .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), }, - { + [I915_MOCS_CACHED] = { /* 0x00000039 */ .control_value = LE_CACHEABILITY(LE_UC) | LE_TGT_CACHE(LE_TC_LLC_ELLC) | diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 51b9360bb376..33ce5ff9556a 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -62,6 +62,30 @@ extern "C" { #define I915_ERROR_UEVENT "ERROR" #define I915_RESET_UEVENT "RESET" +/* + * MOCS indexes used for GPU surfaces, defining the cacheability of the + * surface data and the coherency for this data wrt. CPU vs. GPU accesses. + */ +enum i915_mocs_table_index { + /* + * Not cached anywhere, coherency between CPU and GPU accesses is + * guaranteed. + */ + I915_MOCS_UNCACHED, + /* + * Cacheability and coherency controlled by the kernel automatically + * based on the DRM_I915_GEM_SET_CACHING IOCTL setting and the current + * usage of the surface (used for display scanout or not). + */ + I915_MOCS_PTE, + /* + * Cached in all GPU caches available on the platform. + * Coherency between CPU and GPU accesses to the surface is not + * guaranteed without extra synchronization. + */ + I915_MOCS_CACHED, +}; + /* Each region is a minimum of 16k, and there are at most 255 of them. */ #define I915_NR_TEX_REGIONS 255 /* table size 2k - maximum due to use -- cgit v1.2.3 From 91b2db6f65fbbb1a6688bcc2e52596b723ea2472 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Aug 2016 16:32:23 +0100 Subject: drm/i915: Pad GTT views of exec objects up to user specified size Our GPUs impose certain requirements upon buffers that depend upon how exactly they are used. Typically this is expressed as that they require a larger surface than would be naively computed by pitch * height. Normally such requirements are hidden away in the userspace driver, but when we accept pointers from strangers and later impose extra conditions on them, the original client allocator has no idea about the monstrosities in the GPU and we require the userspace driver to inform the kernel how many padding pages are required beyond the client allocation. v2: Long time, no see v3: Try an anonymous union for uapi struct compatibility Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/1470324762-2545-7-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 6 ++- drivers/gpu/drm/i915/i915_gem.c | 80 ++++++++++++++---------------- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 17 ++++++- include/uapi/drm/i915_drm.h | 8 ++- 4 files changed, 64 insertions(+), 47 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 74a31358fd87..1e1369319326 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3032,11 +3032,13 @@ void i915_gem_free_object(struct drm_gem_object *obj); int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj, struct i915_address_space *vm, + u64 size, u64 alignment, u64 flags); int __must_check i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view, + u64 size, u64 alignment, u64 flags); @@ -3313,8 +3315,8 @@ i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj, struct drm_i915_private *dev_priv = to_i915(obj->base.dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; - return i915_gem_object_pin(obj, &ggtt->base, - alignment, flags | PIN_GLOBAL); + return i915_gem_object_pin(obj, &ggtt->base, 0, alignment, + flags | PIN_GLOBAL); } void i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d8e150508db5..b4af5d1c0ff5 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1692,7 +1692,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) } /* Now pin it into the GTT if needed */ - ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE); + ret = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); if (ret) goto unlock; @@ -2956,6 +2956,7 @@ static bool i915_gem_valid_gtt_space(struct i915_vma *vma, * @obj: object to bind * @vm: address space to bind into * @ggtt_view: global gtt view if applicable + * @size: requested size in bytes (can be larger than the VMA) * @alignment: requested alignment * @flags: mask of PIN_* flags to use */ @@ -2963,21 +2964,20 @@ static struct i915_vma * i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, struct i915_address_space *vm, const struct i915_ggtt_view *ggtt_view, + u64 size, u64 alignment, u64 flags) { struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; - u32 fence_alignment, unfenced_alignment; - u32 search_flag, alloc_flag; u64 start, end; - u64 size, fence_size; + u32 search_flag, alloc_flag; struct i915_vma *vma; int ret; if (i915_is_ggtt(vm)) { - u32 view_size; + u32 fence_size, fence_alignment, unfenced_alignment; + u64 view_size; if (WARN_ON(!ggtt_view)) return ERR_PTR(-EINVAL); @@ -2995,48 +2995,39 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, view_size, obj->tiling_mode, false); - size = flags & PIN_MAPPABLE ? fence_size : view_size; + size = max(size, view_size); + if (flags & PIN_MAPPABLE) + size = max_t(u64, size, fence_size); + + if (alignment == 0) + alignment = flags & PIN_MAPPABLE ? fence_alignment : + unfenced_alignment; + if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { + DRM_DEBUG("Invalid object (view type=%u) alignment requested %llx\n", + ggtt_view ? ggtt_view->type : 0, + alignment); + return ERR_PTR(-EINVAL); + } } else { - fence_size = i915_gem_get_gtt_size(dev, - obj->base.size, - obj->tiling_mode); - fence_alignment = i915_gem_get_gtt_alignment(dev, - obj->base.size, - obj->tiling_mode, - true); - unfenced_alignment = - i915_gem_get_gtt_alignment(dev, - obj->base.size, - obj->tiling_mode, - false); - size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; + size = max_t(u64, size, obj->base.size); + alignment = 4096; } start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; end = vm->total; if (flags & PIN_MAPPABLE) - end = min_t(u64, end, ggtt->mappable_end); + end = min_t(u64, end, dev_priv->ggtt.mappable_end); if (flags & PIN_ZONE_4G) end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE); - if (alignment == 0) - alignment = flags & PIN_MAPPABLE ? fence_alignment : - unfenced_alignment; - if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { - DRM_DEBUG("Invalid object (view type=%u) alignment requested %llx\n", - ggtt_view ? ggtt_view->type : 0, - alignment); - return ERR_PTR(-EINVAL); - } - /* If binding the object/GGTT view requires more space than the entire * aperture has, reject it early before evicting everything in a vain * attempt to find space. */ if (size > end) { - DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n", + DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n", ggtt_view ? ggtt_view->type : 0, - size, + size, obj->base.size, flags & PIN_MAPPABLE ? "mappable" : "total", end); return ERR_PTR(-E2BIG); @@ -3530,7 +3521,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, * (e.g. libkms for the bootup splash), we have to ensure that we * always use map_and_fenceable for all scanout buffers. */ - ret = i915_gem_object_ggtt_pin(obj, view, alignment, + ret = i915_gem_object_ggtt_pin(obj, view, 0, alignment, view->type == I915_GGTT_VIEW_NORMAL ? PIN_MAPPABLE : 0); if (ret) @@ -3678,12 +3669,14 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) } static bool -i915_vma_misplaced(struct i915_vma *vma, u64 alignment, u64 flags) +i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) { struct drm_i915_gem_object *obj = vma->obj; - if (alignment && - vma->node.start & (alignment - 1)) + if (vma->node.size < size) + return true; + + if (alignment && vma->node.start & (alignment - 1)) return true; if (flags & PIN_MAPPABLE && !obj->map_and_fenceable) @@ -3727,6 +3720,7 @@ static int i915_gem_object_do_pin(struct drm_i915_gem_object *obj, struct i915_address_space *vm, const struct i915_ggtt_view *ggtt_view, + u64 size, u64 alignment, u64 flags) { @@ -3754,7 +3748,7 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj, if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) return -EBUSY; - if (i915_vma_misplaced(vma, alignment, flags)) { + if (i915_vma_misplaced(vma, size, alignment, flags)) { WARN(vma->pin_count, "bo is already pinned in %s with incorrect alignment:" " offset=%08x %08x, req.alignment=%llx, req.map_and_fenceable=%d," @@ -3775,8 +3769,8 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj, bound = vma ? vma->bound : 0; if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { - vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment, - flags); + vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, + size, alignment, flags); if (IS_ERR(vma)) return PTR_ERR(vma); } else { @@ -3798,17 +3792,19 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj, int i915_gem_object_pin(struct drm_i915_gem_object *obj, struct i915_address_space *vm, + u64 size, u64 alignment, u64 flags) { return i915_gem_object_do_pin(obj, vm, i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL, - alignment, flags); + size, alignment, flags); } int i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view, + u64 size, u64 alignment, u64 flags) { @@ -3819,7 +3815,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, BUG_ON(!view); return i915_gem_object_do_pin(obj, &ggtt->base, view, - alignment, flags | PIN_GLOBAL); + size, alignment, flags | PIN_GLOBAL); } void diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 63984c4d8e5a..d2e27e730ecb 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -682,10 +682,14 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, flags |= PIN_HIGH; } - ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags); + ret = i915_gem_object_pin(obj, vma->vm, + entry->pad_to_size, + entry->alignment, + flags); if ((ret == -ENOSPC || ret == -E2BIG) && only_mappable_for_reloc(entry->flags)) ret = i915_gem_object_pin(obj, vma->vm, + entry->pad_to_size, entry->alignment, flags & ~PIN_MAPPABLE); if (ret) @@ -748,6 +752,9 @@ eb_vma_misplaced(struct i915_vma *vma) vma->node.start & (entry->alignment - 1)) return true; + if (vma->node.size < entry->pad_to_size) + return true; + if (entry->flags & EXEC_OBJECT_PINNED && vma->node.start != entry->offset) return true; @@ -1091,6 +1098,14 @@ validate_exec_list(struct drm_device *dev, if (exec[i].alignment && !is_power_of_2(exec[i].alignment)) return -EINVAL; + /* pad_to_size was once a reserved field, so sanitize it */ + if (exec[i].flags & EXEC_OBJECT_PAD_TO_SIZE) { + if (offset_in_page(exec[i].pad_to_size)) + return -EINVAL; + } else { + exec[i].pad_to_size = 0; + } + /* First check for malicious input causing overflow in * the worst case where we need to allocate the entire * relocation tree as a single array. diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 33ce5ff9556a..0f292733cffc 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -727,11 +727,15 @@ struct drm_i915_gem_exec_object2 { #define EXEC_OBJECT_WRITE (1<<2) #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3) #define EXEC_OBJECT_PINNED (1<<4) +#define EXEC_OBJECT_PAD_TO_SIZE (1<<5) /* All remaining bits are MBZ and RESERVED FOR FUTURE USE */ -#define __EXEC_OBJECT_UNKNOWN_FLAGS (-(EXEC_OBJECT_PINNED<<1)) +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1) __u64 flags; - __u64 rsvd1; + union { + __u64 rsvd1; + __u64 pad_to_size; + }; __u64 rsvd2; }; -- cgit v1.2.3 From deeb1519b65a92ca06c8e8554a92df0fdb4d5dea Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 5 Aug 2016 10:14:22 +0100 Subject: drm/i915: Document and reject invalid tiling modes Through the GTT interface to the fence registers, we can only handle linear, X and Y tiling. The more esoteric tiling patterns are ignored. Document that the tiling ABI only supports upto Y tiling, and reject any attempts to set a tiling mode other than NONE, X or Y. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1470388464-28458-17-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_tiling.c | 3 +++ include/uapi/drm/i915_drm.h | 1 + 2 files changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index c0e01333bddf..6817f69947d9 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -68,6 +68,9 @@ i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode) if (tiling_mode == I915_TILING_NONE) return true; + if (tiling_mode > I915_TILING_LAST) + return false; + if (IS_GEN2(dev) || (tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))) tile_width = 128; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 0f292733cffc..452629de7a57 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -926,6 +926,7 @@ struct drm_i915_gem_caching { #define I915_TILING_NONE 0 #define I915_TILING_X 1 #define I915_TILING_Y 2 +#define I915_TILING_LAST I915_TILING_Y #define I915_BIT_6_SWIZZLE_NONE 0 #define I915_BIT_6_SWIZZLE_9 1 -- cgit v1.2.3