diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-23 00:08:22 +0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-23 00:08:22 +0400 |
commit | be53bfdb8088e9d1924199cc1a96e113756b1075 (patch) | |
tree | 8c65eb9d82ca4c0f11c17cfdc44d5263820b415b /drivers/gpu/drm/i915/i915_gem.c | |
parent | b2094ef840697bc8ca5d17a83b7e30fad5f1e9fa (diff) | |
parent | 5466c7b1683a23dbbcfb7ee4a71c4f23886001c7 (diff) | |
download | linux-be53bfdb8088e9d1924199cc1a96e113756b1075.tar.xz |
Merge branch 'drm-next' of git://people.freedesktop.org/~airlied/linux
Pull drm main changes from Dave Airlie:
"This is the main drm pull request, I'm probably going to send two more
smaller ones, will explain below.
This contains a patch that is also in the fbdev tree, but it should be
the same patch, it added an API for hot unplugging framebuffer
devices, and I need that API for a new driver.
It also contains some changes to the i2c tree which Jean has acked,
and one change to moorestown platform stuff in x86.
Highlights:
- new drivers: UDL driver for USB displaylink devices, kms only,
should support correct hotplug operations.
- core: i2c speedups + better hotplug support, EDID overriding via
firmware interface - allows user to load a firmware for a broken
monitor/kvm from userspace, it even has documentation for it.
- exynos: new HDMI audio + hdmi 1.4 + virtual output driver
- gma500: code cleanup
- radeon: cleanups, CS optimisations, streamout support and pageflip
fix
- nouveau: NVD9 displayport support + more reclocking work
- i915: re-enabling GMBUS, finish gpu patch (might help hibernation
who knows), missed irq fixes, stencil tiling fixes, interlaced
support, aliasesd PPGTT support for SNB/IVB, swizzling for SNB/IVB,
semaphore fixes
As well as the usual bunch of cleanups and fixes all over the place.
I've got two things I'd like to merge a bit later:
a) AMD support for all their new radeonhd 7000 series GPU and APUs.
AMD dropped this a bit late due to insane internal review
processes, (please AMD just follow Intel and let open source guys
ship stuff early) however I don't want to penalise people who own
this hardware (since its been on sale for 3-4 months and GPU hw
doesn't exactly have a lifetime in years) and consign them to
using closed drivers for longer than necessary. The changes are
well contained and just plug into the driver new gpu functionality
so they should be fairly regression proof. I just want to give
them a bit of a run on the hw AMD kindly sent me.
b) drm prime/dma-buf interface code. This is just infrastructure
code to expose the dma-buf stuff to drm drivers and to userspace.
I'm not planning on pushing any driver support in this cycle
(except maybe exynos), but I'd like to get the infrastructure code
in so for the next cycle I can start getting the driver support
into the individual drivers. We have started driver support for
i915, nouveau and udl along with I think exynos and omap in
staging. However this code relies on the dma-buf tree being
pulled into your tree first since it needs the latest interfaces
from that tree. I'll push to get that tree sent asap.
(oh and any warnings you see in i915 are gcc's fault from what anyone
can see)."
Fix up trivial conflicts in arch/x86/platform/mrst/mrst.c due to the new
msic_thermal_platform_data() thermal function being added next to the
tc35876x_platform_data() i2c device function..
* 'drm-next' of git://people.freedesktop.org/~airlied/linux: (326 commits)
drm/i915: use DDC_ADDR instead of hard-coding it
drm/radeon: use DDC_ADDR instead of hard-coding it
drm: remove unneeded redefinition of DDC_ADDR
drm/exynos: added virtual display driver.
drm: allow loading an EDID as firmware to override broken monitor
drm/exynos: enable hdmi audio feature
drm/exynos: add default pixel format for plane
drm/exynos: cleanup exynos_hdmi.h
drm/exynos: add is_local member in exynos_drm_subdrv struct
drm/exynos: add subdrv open/close functions
drm/exynos: remove module of exynos drm subdrv
drm/exynos: release pending pageflip events when closed
drm/exynos: added new funtion to get/put dma address.
drm/exynos: update gem and buffer framework.
drm/exynos: added mode_fixup feature and code clean.
drm/exynos: add HDMI version 1.4 support
drm/exynos: remove exynos_mixer.h
gma500: Fix mmap frambuffer
drm/radeon: Drop radeon_gem_object_(un)pin.
drm/radeon: Restrict offset for legacy display engine.
...
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 528 |
1 files changed, 286 insertions, 242 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e55badb2d86d..1f441f5c2405 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -58,6 +58,7 @@ static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj); static int i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc); +static void i915_gem_object_truncate(struct drm_i915_gem_object *obj); /* some bookkeeping */ static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, @@ -258,73 +259,6 @@ static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj) obj->tiling_mode != I915_TILING_NONE; } -static inline void -slow_shmem_copy(struct page *dst_page, - int dst_offset, - struct page *src_page, - int src_offset, - int length) -{ - char *dst_vaddr, *src_vaddr; - - dst_vaddr = kmap(dst_page); - src_vaddr = kmap(src_page); - - memcpy(dst_vaddr + dst_offset, src_vaddr + src_offset, length); - - kunmap(src_page); - kunmap(dst_page); -} - -static inline void -slow_shmem_bit17_copy(struct page *gpu_page, - int gpu_offset, - struct page *cpu_page, - int cpu_offset, - int length, - int is_read) -{ - char *gpu_vaddr, *cpu_vaddr; - - /* Use the unswizzled path if this page isn't affected. */ - if ((page_to_phys(gpu_page) & (1 << 17)) == 0) { - if (is_read) - return slow_shmem_copy(cpu_page, cpu_offset, - gpu_page, gpu_offset, length); - else - return slow_shmem_copy(gpu_page, gpu_offset, - cpu_page, cpu_offset, length); - } - - gpu_vaddr = kmap(gpu_page); - cpu_vaddr = kmap(cpu_page); - - /* Copy the data, XORing A6 with A17 (1). The user already knows he's - * XORing with the other bits (A9 for Y, A9 and A10 for X) - */ - while (length > 0) { - int cacheline_end = ALIGN(gpu_offset + 1, 64); - int this_length = min(cacheline_end - gpu_offset, length); - int swizzled_gpu_offset = gpu_offset ^ 64; - - if (is_read) { - memcpy(cpu_vaddr + cpu_offset, - gpu_vaddr + swizzled_gpu_offset, - this_length); - } else { - memcpy(gpu_vaddr + swizzled_gpu_offset, - cpu_vaddr + cpu_offset, - this_length); - } - cpu_offset += this_length; - gpu_offset += this_length; - length -= this_length; - } - - kunmap(cpu_page); - kunmap(gpu_page); -} - /** * This is the fast shmem pread path, which attempts to copy_from_user directly * from the backing pages of the object to the user's address space. On a @@ -385,6 +319,58 @@ i915_gem_shmem_pread_fast(struct drm_device *dev, return 0; } +static inline int +__copy_to_user_swizzled(char __user *cpu_vaddr, + const char *gpu_vaddr, int gpu_offset, + int length) +{ + int ret, cpu_offset = 0; + + while (length > 0) { + int cacheline_end = ALIGN(gpu_offset + 1, 64); + int this_length = min(cacheline_end - gpu_offset, length); + int swizzled_gpu_offset = gpu_offset ^ 64; + + ret = __copy_to_user(cpu_vaddr + cpu_offset, + gpu_vaddr + swizzled_gpu_offset, + this_length); + if (ret) + return ret + length; + + cpu_offset += this_length; + gpu_offset += this_length; + length -= this_length; + } + + return 0; +} + +static inline int +__copy_from_user_swizzled(char __user *gpu_vaddr, int gpu_offset, + const char *cpu_vaddr, + int length) +{ + int ret, cpu_offset = 0; + + while (length > 0) { + int cacheline_end = ALIGN(gpu_offset + 1, 64); + int this_length = min(cacheline_end - gpu_offset, length); + int swizzled_gpu_offset = gpu_offset ^ 64; + + ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, + cpu_vaddr + cpu_offset, + this_length); + if (ret) + return ret + length; + + cpu_offset += this_length; + gpu_offset += this_length; + length -= this_length; + } + + return 0; +} + /** * This is the fallback shmem pread path, which allocates temporary storage * in kernel space to copy_to_user into outside of the struct_mutex, so we @@ -398,72 +384,34 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_file *file) { struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; - struct mm_struct *mm = current->mm; - struct page **user_pages; + char __user *user_data; ssize_t remain; - loff_t offset, pinned_pages, i; - loff_t first_data_page, last_data_page, num_pages; - int shmem_page_offset; - int data_page_index, data_page_offset; - int page_length; - int ret; - uint64_t data_ptr = args->data_ptr; - int do_bit17_swizzling; + loff_t offset; + int shmem_page_offset, page_length, ret; + int obj_do_bit17_swizzling, page_do_bit17_swizzling; + user_data = (char __user *) (uintptr_t) args->data_ptr; remain = args->size; - /* Pin the user pages containing the data. We can't fault while - * holding the struct mutex, yet we want to hold it while - * dereferencing the user data. - */ - first_data_page = data_ptr / PAGE_SIZE; - last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; - num_pages = last_data_page - first_data_page + 1; + obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); - user_pages = drm_malloc_ab(num_pages, sizeof(struct page *)); - if (user_pages == NULL) - return -ENOMEM; + offset = args->offset; mutex_unlock(&dev->struct_mutex); - down_read(&mm->mmap_sem); - pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, - num_pages, 1, 0, user_pages, NULL); - up_read(&mm->mmap_sem); - mutex_lock(&dev->struct_mutex); - if (pinned_pages < num_pages) { - ret = -EFAULT; - goto out; - } - - ret = i915_gem_object_set_cpu_read_domain_range(obj, - args->offset, - args->size); - if (ret) - goto out; - - do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); - - offset = args->offset; while (remain > 0) { struct page *page; + char *vaddr; /* Operation in this page * * shmem_page_offset = offset within page in shmem file - * data_page_index = page number in get_user_pages return - * data_page_offset = offset with data_page_index page. * page_length = bytes to copy for this page */ shmem_page_offset = offset_in_page(offset); - data_page_index = data_ptr / PAGE_SIZE - first_data_page; - data_page_offset = offset_in_page(data_ptr); - page_length = remain; if ((shmem_page_offset + page_length) > PAGE_SIZE) page_length = PAGE_SIZE - shmem_page_offset; - if ((data_page_offset + page_length) > PAGE_SIZE) - page_length = PAGE_SIZE - data_page_offset; page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT); if (IS_ERR(page)) { @@ -471,36 +419,38 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, goto out; } - if (do_bit17_swizzling) { - slow_shmem_bit17_copy(page, - shmem_page_offset, - user_pages[data_page_index], - data_page_offset, - page_length, - 1); - } else { - slow_shmem_copy(user_pages[data_page_index], - data_page_offset, - page, - shmem_page_offset, - page_length); - } + page_do_bit17_swizzling = obj_do_bit17_swizzling && + (page_to_phys(page) & (1 << 17)) != 0; + + vaddr = kmap(page); + if (page_do_bit17_swizzling) + ret = __copy_to_user_swizzled(user_data, + vaddr, shmem_page_offset, + page_length); + else + ret = __copy_to_user(user_data, + vaddr + shmem_page_offset, + page_length); + kunmap(page); mark_page_accessed(page); page_cache_release(page); + if (ret) { + ret = -EFAULT; + goto out; + } + remain -= page_length; - data_ptr += page_length; + user_data += page_length; offset += page_length; } out: - for (i = 0; i < pinned_pages; i++) { - SetPageDirty(user_pages[i]); - mark_page_accessed(user_pages[i]); - page_cache_release(user_pages[i]); - } - drm_free_large(user_pages); + mutex_lock(&dev->struct_mutex); + /* Fixup: Kill any reinstated backing storage pages */ + if (obj->madv == __I915_MADV_PURGED) + i915_gem_object_truncate(obj); return ret; } @@ -841,71 +791,36 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_file *file) { struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; - struct mm_struct *mm = current->mm; - struct page **user_pages; ssize_t remain; - loff_t offset, pinned_pages, i; - loff_t first_data_page, last_data_page, num_pages; - int shmem_page_offset; - int data_page_index, data_page_offset; - int page_length; - int ret; - uint64_t data_ptr = args->data_ptr; - int do_bit17_swizzling; + loff_t offset; + char __user *user_data; + int shmem_page_offset, page_length, ret; + int obj_do_bit17_swizzling, page_do_bit17_swizzling; + user_data = (char __user *) (uintptr_t) args->data_ptr; remain = args->size; - /* Pin the user pages containing the data. We can't fault while - * holding the struct mutex, and all of the pwrite implementations - * want to hold it while dereferencing the user data. - */ - first_data_page = data_ptr / PAGE_SIZE; - last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; - num_pages = last_data_page - first_data_page + 1; - - user_pages = drm_malloc_ab(num_pages, sizeof(struct page *)); - if (user_pages == NULL) - return -ENOMEM; - - mutex_unlock(&dev->struct_mutex); - down_read(&mm->mmap_sem); - pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, - num_pages, 0, 0, user_pages, NULL); - up_read(&mm->mmap_sem); - mutex_lock(&dev->struct_mutex); - if (pinned_pages < num_pages) { - ret = -EFAULT; - goto out; - } - - ret = i915_gem_object_set_to_cpu_domain(obj, 1); - if (ret) - goto out; - - do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); + obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); offset = args->offset; obj->dirty = 1; + mutex_unlock(&dev->struct_mutex); + while (remain > 0) { struct page *page; + char *vaddr; /* Operation in this page * * shmem_page_offset = offset within page in shmem file - * data_page_index = page number in get_user_pages return - * data_page_offset = offset with data_page_index page. * page_length = bytes to copy for this page */ shmem_page_offset = offset_in_page(offset); - data_page_index = data_ptr / PAGE_SIZE - first_data_page; - data_page_offset = offset_in_page(data_ptr); page_length = remain; if ((shmem_page_offset + page_length) > PAGE_SIZE) page_length = PAGE_SIZE - shmem_page_offset; - if ((data_page_offset + page_length) > PAGE_SIZE) - page_length = PAGE_SIZE - data_page_offset; page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT); if (IS_ERR(page)) { @@ -913,34 +828,45 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, goto out; } - if (do_bit17_swizzling) { - slow_shmem_bit17_copy(page, - shmem_page_offset, - user_pages[data_page_index], - data_page_offset, - page_length, - 0); - } else { - slow_shmem_copy(page, - shmem_page_offset, - user_pages[data_page_index], - data_page_offset, - page_length); - } + page_do_bit17_swizzling = obj_do_bit17_swizzling && + (page_to_phys(page) & (1 << 17)) != 0; + + vaddr = kmap(page); + if (page_do_bit17_swizzling) + ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, + user_data, + page_length); + else + ret = __copy_from_user(vaddr + shmem_page_offset, + user_data, + page_length); + kunmap(page); set_page_dirty(page); mark_page_accessed(page); page_cache_release(page); + if (ret) { + ret = -EFAULT; + goto out; + } + remain -= page_length; - data_ptr += page_length; + user_data += page_length; offset += page_length; } out: - for (i = 0; i < pinned_pages; i++) - page_cache_release(user_pages[i]); - drm_free_large(user_pages); + mutex_lock(&dev->struct_mutex); + /* Fixup: Kill any reinstated backing storage pages */ + if (obj->madv == __I915_MADV_PURGED) + i915_gem_object_truncate(obj); + /* and flush dirty cachelines in case the object isn't in the cpu write + * domain anymore. */ + if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { + i915_gem_clflush_object(obj); + intel_gtt_chipset_flush(); + } return ret; } @@ -996,10 +922,13 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, * pread/pwrite currently are reading and writing from the CPU * perspective, requiring manual detiling by the client. */ - if (obj->phys_obj) + if (obj->phys_obj) { ret = i915_gem_phys_pwrite(dev, obj, args, file); - else if (obj->gtt_space && - obj->base.write_domain != I915_GEM_DOMAIN_CPU) { + goto out; + } + + if (obj->gtt_space && + obj->base.write_domain != I915_GEM_DOMAIN_CPU) { ret = i915_gem_object_pin(obj, 0, true); if (ret) goto out; @@ -1018,18 +947,24 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, out_unpin: i915_gem_object_unpin(obj); - } else { - ret = i915_gem_object_set_to_cpu_domain(obj, 1); - if (ret) - goto out; - ret = -EFAULT; - if (!i915_gem_object_needs_bit17_swizzle(obj)) - ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file); - if (ret == -EFAULT) - ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file); + if (ret != -EFAULT) + goto out; + /* Fall through to the shmfs paths because the gtt paths might + * fail with non-page-backed user pointers (e.g. gtt mappings + * when moving data between textures). */ } + ret = i915_gem_object_set_to_cpu_domain(obj, 1); + if (ret) + goto out; + + ret = -EFAULT; + if (!i915_gem_object_needs_bit17_swizzle(obj)) + ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file); + if (ret == -EFAULT) + ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file); + out: drm_gem_object_unreference(&obj->base); unlock: @@ -1141,7 +1076,6 @@ int i915_gem_mmap_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_mmap *args = data; struct drm_gem_object *obj; unsigned long addr; @@ -1153,11 +1087,6 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, if (obj == NULL) return -ENOENT; - if (obj->size > dev_priv->mm.gtt_mappable_end) { - drm_gem_object_unreference_unlocked(obj); - return -E2BIG; - } - down_write(¤t->mm->mmap_sem); addr = do_mmap(obj->filp, 0, args->size, PROT_READ | PROT_WRITE, MAP_SHARED, @@ -1647,6 +1576,28 @@ i915_gem_process_flushing_list(struct intel_ring_buffer *ring, } } +static u32 +i915_gem_get_seqno(struct drm_device *dev) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + u32 seqno = dev_priv->next_seqno; + + /* reserve 0 for non-seqno */ + if (++dev_priv->next_seqno == 0) + dev_priv->next_seqno = 1; + + return seqno; +} + +u32 +i915_gem_next_request_seqno(struct intel_ring_buffer *ring) +{ + if (ring->outstanding_lazy_request == 0) + ring->outstanding_lazy_request = i915_gem_get_seqno(ring->dev); + + return ring->outstanding_lazy_request; +} + int i915_add_request(struct intel_ring_buffer *ring, struct drm_file *file, @@ -1654,10 +1605,19 @@ i915_add_request(struct intel_ring_buffer *ring, { drm_i915_private_t *dev_priv = ring->dev->dev_private; uint32_t seqno; + u32 request_ring_position; int was_empty; int ret; BUG_ON(request == NULL); + seqno = i915_gem_next_request_seqno(ring); + + /* Record the position of the start of the request so that + * should we detect the updated seqno part-way through the + * GPU processing the request, we never over-estimate the + * position of the head. + */ + request_ring_position = intel_ring_get_tail(ring); ret = ring->add_request(ring, &seqno); if (ret) @@ -1667,6 +1627,7 @@ i915_add_request(struct intel_ring_buffer *ring, request->seqno = seqno; request->ring = ring; + request->tail = request_ring_position; request->emitted_jiffies = jiffies; was_empty = list_empty(&ring->request_list); list_add_tail(&request->list, &ring->request_list); @@ -1681,7 +1642,7 @@ i915_add_request(struct intel_ring_buffer *ring, spin_unlock(&file_priv->mm.lock); } - ring->outstanding_lazy_request = false; + ring->outstanding_lazy_request = 0; if (!dev_priv->mm.suspended) { if (i915_enable_hangcheck) { @@ -1803,7 +1764,7 @@ void i915_gem_reset(struct drm_device *dev) /** * This function clears the request list as sequence numbers are passed. */ -static void +void i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) { uint32_t seqno; @@ -1831,6 +1792,12 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) break; trace_i915_gem_request_retire(ring, request->seqno); + /* We know the GPU must have read the request to have + * sent us the seqno + interrupt, so use the position + * of tail of the request to update the last known position + * of the GPU head. + */ + ring->last_retired_head = request->tail; list_del(&request->list); i915_gem_request_remove_from_client(request); @@ -1943,7 +1910,8 @@ i915_gem_retire_work_handler(struct work_struct *work) */ int i915_wait_request(struct intel_ring_buffer *ring, - uint32_t seqno) + uint32_t seqno, + bool do_retire) { drm_i915_private_t *dev_priv = ring->dev->dev_private; u32 ier; @@ -2017,17 +1985,12 @@ i915_wait_request(struct intel_ring_buffer *ring, if (atomic_read(&dev_priv->mm.wedged)) ret = -EAGAIN; - if (ret && ret != -ERESTARTSYS) - DRM_ERROR("%s returns %d (awaiting %d at %d, next %d)\n", - __func__, ret, seqno, ring->get_seqno(ring), - dev_priv->next_seqno); - /* Directly dispatch request retiring. While we have the work queue * to handle this, the waiter on a request often wants an associated * buffer to have made it to the inactive list, and we would need * a separate wait queue to handle that. */ - if (ret == 0) + if (ret == 0 && do_retire) i915_gem_retire_requests_ring(ring); return ret; @@ -2051,7 +2014,8 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj) * it. */ if (obj->active) { - ret = i915_wait_request(obj->ring, obj->last_rendering_seqno); + ret = i915_wait_request(obj->ring, obj->last_rendering_seqno, + true); if (ret) return ret; } @@ -2089,6 +2053,7 @@ static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) int i915_gem_object_unbind(struct drm_i915_gem_object *obj) { + drm_i915_private_t *dev_priv = obj->base.dev->dev_private; int ret = 0; if (obj->gtt_space == NULL) @@ -2133,6 +2098,11 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj) trace_i915_gem_object_unbind(obj); i915_gem_gtt_unbind_object(obj); + if (obj->has_aliasing_ppgtt_mapping) { + i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj); + obj->has_aliasing_ppgtt_mapping = 0; + } + i915_gem_object_put_pages_gtt(obj); list_del_init(&obj->gtt_list); @@ -2172,7 +2142,7 @@ i915_gem_flush_ring(struct intel_ring_buffer *ring, return 0; } -static int i915_ring_idle(struct intel_ring_buffer *ring) +static int i915_ring_idle(struct intel_ring_buffer *ring, bool do_retire) { int ret; @@ -2186,18 +2156,18 @@ static int i915_ring_idle(struct intel_ring_buffer *ring) return ret; } - return i915_wait_request(ring, i915_gem_next_request_seqno(ring)); + return i915_wait_request(ring, i915_gem_next_request_seqno(ring), + do_retire); } -int -i915_gpu_idle(struct drm_device *dev) +int i915_gpu_idle(struct drm_device *dev, bool do_retire) { drm_i915_private_t *dev_priv = dev->dev_private; int ret, i; /* Flush everything onto the inactive list. */ for (i = 0; i < I915_NUM_RINGS; i++) { - ret = i915_ring_idle(&dev_priv->ring[i]); + ret = i915_ring_idle(&dev_priv->ring[i], do_retire); if (ret) return ret; } @@ -2400,7 +2370,8 @@ i915_gem_object_flush_fence(struct drm_i915_gem_object *obj, if (!ring_passed_seqno(obj->last_fenced_ring, obj->last_fenced_seqno)) { ret = i915_wait_request(obj->last_fenced_ring, - obj->last_fenced_seqno); + obj->last_fenced_seqno, + true); if (ret) return ret; } @@ -2432,6 +2403,8 @@ i915_gem_object_put_fence(struct drm_i915_gem_object *obj) if (obj->fence_reg != I915_FENCE_REG_NONE) { struct drm_i915_private *dev_priv = obj->base.dev->dev_private; + + WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count); i915_gem_clear_fence_reg(obj->base.dev, &dev_priv->fence_regs[obj->fence_reg]); @@ -2456,7 +2429,7 @@ i915_find_fence_reg(struct drm_device *dev, if (!reg->obj) return reg; - if (!reg->obj->pin_count) + if (!reg->pin_count) avail = reg; } @@ -2466,7 +2439,7 @@ i915_find_fence_reg(struct drm_device *dev, /* None available, try to steal one or wait for a user to finish */ avail = first = NULL; list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { - if (reg->obj->pin_count) + if (reg->pin_count) continue; if (first == NULL) @@ -2541,7 +2514,8 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj, if (!ring_passed_seqno(obj->last_fenced_ring, reg->setup_seqno)) { ret = i915_wait_request(obj->last_fenced_ring, - reg->setup_seqno); + reg->setup_seqno, + true); if (ret) return ret; } @@ -2560,7 +2534,7 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj, reg = i915_find_fence_reg(dev, pipelined); if (reg == NULL) - return -ENOSPC; + return -EDEADLK; ret = i915_gem_object_flush_fence(obj, pipelined); if (ret) @@ -2660,6 +2634,7 @@ i915_gem_clear_fence_reg(struct drm_device *dev, list_del_init(®->lru_list); reg->obj = NULL; reg->setup_seqno = 0; + reg->pin_count = 0; } /** @@ -2946,6 +2921,8 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, enum i915_cache_level cache_level) { + struct drm_device *dev = obj->base.dev; + drm_i915_private_t *dev_priv = dev->dev_private; int ret; if (obj->cache_level == cache_level) @@ -2974,6 +2951,9 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, } i915_gem_gtt_rebind_object(obj, cache_level); + if (obj->has_aliasing_ppgtt_mapping) + i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, + obj, cache_level); } if (cache_level == I915_CACHE_NONE) { @@ -3084,10 +3064,13 @@ i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj) return ret; } + ret = i915_gem_object_wait_rendering(obj); + if (ret) + return ret; + /* Ensure that we invalidate the GPU's caches and TLBs. */ obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; - - return i915_gem_object_wait_rendering(obj); + return 0; } /** @@ -3619,8 +3602,8 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, obj->base.write_domain = I915_GEM_DOMAIN_CPU; obj->base.read_domains = I915_GEM_DOMAIN_CPU; - if (IS_GEN6(dev) || IS_GEN7(dev)) { - /* On Gen6, we can have the GPU use the LLC (the CPU + if (HAS_LLC(dev)) { + /* On some devices, we can have the GPU use the LLC (the CPU * cache) for about a 10% performance improvement * compared to uncached. Graphics requests other than * display scanout are coherent with the CPU in @@ -3710,7 +3693,7 @@ i915_gem_idle(struct drm_device *dev) return 0; } - ret = i915_gpu_idle(dev); + ret = i915_gpu_idle(dev, true); if (ret) { mutex_unlock(&dev->struct_mutex); return ret; @@ -3745,12 +3728,71 @@ i915_gem_idle(struct drm_device *dev) return 0; } +void i915_gem_init_swizzling(struct drm_device *dev) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + + if (INTEL_INFO(dev)->gen < 5 || + dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) + return; + + I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | + DISP_TILE_SURFACE_SWIZZLING); + + if (IS_GEN5(dev)) + return; + + I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); + if (IS_GEN6(dev)) + I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_SNB)); + else + I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_IVB)); +} + +void i915_gem_init_ppgtt(struct drm_device *dev) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + uint32_t pd_offset; + struct intel_ring_buffer *ring; + int i; + + if (!dev_priv->mm.aliasing_ppgtt) + return; + + pd_offset = dev_priv->mm.aliasing_ppgtt->pd_offset; + pd_offset /= 64; /* in cachelines, */ + pd_offset <<= 16; + + if (INTEL_INFO(dev)->gen == 6) { + uint32_t ecochk = I915_READ(GAM_ECOCHK); + I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | + ECOCHK_PPGTT_CACHE64B); + I915_WRITE(GFX_MODE, GFX_MODE_ENABLE(GFX_PPGTT_ENABLE)); + } else if (INTEL_INFO(dev)->gen >= 7) { + I915_WRITE(GAM_ECOCHK, ECOCHK_PPGTT_CACHE64B); + /* GFX_MODE is per-ring on gen7+ */ + } + + for (i = 0; i < I915_NUM_RINGS; i++) { + ring = &dev_priv->ring[i]; + + if (INTEL_INFO(dev)->gen >= 7) + I915_WRITE(RING_MODE_GEN7(ring), + GFX_MODE_ENABLE(GFX_PPGTT_ENABLE)); + + I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); + I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset); + } +} + int -i915_gem_init_ringbuffer(struct drm_device *dev) +i915_gem_init_hw(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; int ret; + i915_gem_init_swizzling(dev); + ret = intel_init_render_ring_buffer(dev); if (ret) return ret; @@ -3769,6 +3811,8 @@ i915_gem_init_ringbuffer(struct drm_device *dev) dev_priv->next_seqno = 1; + i915_gem_init_ppgtt(dev); + return 0; cleanup_bsd_ring: @@ -3806,7 +3850,7 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data, mutex_lock(&dev->struct_mutex); dev_priv->mm.suspended = 0; - ret = i915_gem_init_ringbuffer(dev); + ret = i915_gem_init_hw(dev); if (ret != 0) { mutex_unlock(&dev->struct_mutex); return ret; @@ -4201,7 +4245,7 @@ rescan: * This has a dramatic impact to reduce the number of * OOM-killer events whilst running the GPU aggressively. */ - if (i915_gpu_idle(dev) == 0) + if (i915_gpu_idle(dev, true) == 0) goto rescan; } mutex_unlock(&dev->struct_mutex); |