diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 1525 |
1 files changed, 842 insertions, 683 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 274d25de521e..19dbdd7dd564 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -25,9 +25,8 @@ * */ -#include "drmP.h" -#include "drm.h" -#include "i915_drm.h" +#include <drm/drmP.h> +#include <drm/i915_drm.h> #include "i915_drv.h" #include "i915_trace.h" #include "intel_drv.h" @@ -37,12 +36,12 @@ #include <linux/pci.h> #include <linux/dma-buf.h> -static __must_check int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj); static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, unsigned alignment, - bool map_and_fenceable); + bool map_and_fenceable, + bool nonblocking); static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_i915_gem_object *obj, struct drm_i915_gem_pwrite *args, @@ -56,6 +55,8 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, static int i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc); +static long i915_gem_purge(struct drm_i915_private *dev_priv, long target); +static void i915_gem_shrink_all(struct drm_i915_private *dev_priv); static void i915_gem_object_truncate(struct drm_i915_gem_object *obj); static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) @@ -141,7 +142,7 @@ int i915_mutex_lock_interruptible(struct drm_device *dev) static inline bool i915_gem_object_is_inactive(struct drm_i915_gem_object *obj) { - return !obj->active; + return obj->gtt_space && !obj->active; } int @@ -180,7 +181,7 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, pinned = 0; mutex_lock(&dev->struct_mutex); - list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) + list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) if (obj->pin_count) pinned += obj->gtt_space->size; mutex_unlock(&dev->struct_mutex); @@ -341,7 +342,7 @@ shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length, page_length); kunmap_atomic(vaddr); - return ret; + return ret ? -EFAULT : 0; } static void @@ -392,7 +393,7 @@ shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, page_length); kunmap(page); - return ret; + return ret ? - EFAULT : 0; } static int @@ -401,7 +402,6 @@ i915_gem_shmem_pread(struct drm_device *dev, struct drm_i915_gem_pread *args, struct drm_file *file) { - struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; char __user *user_data; ssize_t remain; loff_t offset; @@ -410,7 +410,8 @@ i915_gem_shmem_pread(struct drm_device *dev, int hit_slowpath = 0; int prefaulted = 0; int needs_clflush = 0; - int release_page; + struct scatterlist *sg; + int i; user_data = (char __user *) (uintptr_t) args->data_ptr; remain = args->size; @@ -424,16 +425,30 @@ i915_gem_shmem_pread(struct drm_device *dev, * anyway again before the next pread happens. */ if (obj->cache_level == I915_CACHE_NONE) needs_clflush = 1; - ret = i915_gem_object_set_to_gtt_domain(obj, false); - if (ret) - return ret; + if (obj->gtt_space) { + ret = i915_gem_object_set_to_gtt_domain(obj, false); + if (ret) + return ret; + } } + ret = i915_gem_object_get_pages(obj); + if (ret) + return ret; + + i915_gem_object_pin_pages(obj); + offset = args->offset; - while (remain > 0) { + for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) { struct page *page; + if (i < offset >> PAGE_SHIFT) + continue; + + if (remain <= 0) + break; + /* Operation in this page * * shmem_page_offset = offset within page in shmem file @@ -444,18 +459,7 @@ i915_gem_shmem_pread(struct drm_device *dev, if ((shmem_page_offset + page_length) > PAGE_SIZE) page_length = PAGE_SIZE - shmem_page_offset; - if (obj->pages) { - page = obj->pages[offset >> PAGE_SHIFT]; - release_page = 0; - } else { - page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT); - if (IS_ERR(page)) { - ret = PTR_ERR(page); - goto out; - } - release_page = 1; - } - + page = sg_page(sg); page_do_bit17_swizzling = obj_do_bit17_swizzling && (page_to_phys(page) & (1 << 17)) != 0; @@ -466,7 +470,6 @@ i915_gem_shmem_pread(struct drm_device *dev, goto next_page; hit_slowpath = 1; - page_cache_get(page); mutex_unlock(&dev->struct_mutex); if (!prefaulted) { @@ -484,16 +487,12 @@ i915_gem_shmem_pread(struct drm_device *dev, needs_clflush); mutex_lock(&dev->struct_mutex); - page_cache_release(page); + next_page: mark_page_accessed(page); - if (release_page) - page_cache_release(page); - if (ret) { - ret = -EFAULT; + if (ret) goto out; - } remain -= page_length; user_data += page_length; @@ -501,6 +500,8 @@ next_page: } out: + i915_gem_object_unpin_pages(obj); + if (hit_slowpath) { /* Fixup: Kill any reinstated backing storage pages */ if (obj->madv == __I915_MADV_PURGED) @@ -606,7 +607,7 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev, char __user *user_data; int page_offset, page_length, ret; - ret = i915_gem_object_pin(obj, 0, true); + ret = i915_gem_object_pin(obj, 0, true, true); if (ret) goto out; @@ -686,7 +687,7 @@ shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length, page_length); kunmap_atomic(vaddr); - return ret; + return ret ? -EFAULT : 0; } /* Only difference to the fast-path function is that this can handle bit17 @@ -720,7 +721,7 @@ shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length, page_do_bit17_swizzling); kunmap(page); - return ret; + return ret ? -EFAULT : 0; } static int @@ -729,7 +730,6 @@ i915_gem_shmem_pwrite(struct drm_device *dev, struct drm_i915_gem_pwrite *args, struct drm_file *file) { - struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; ssize_t remain; loff_t offset; char __user *user_data; @@ -738,7 +738,8 @@ i915_gem_shmem_pwrite(struct drm_device *dev, int hit_slowpath = 0; int needs_clflush_after = 0; int needs_clflush_before = 0; - int release_page; + int i; + struct scatterlist *sg; user_data = (char __user *) (uintptr_t) args->data_ptr; remain = args->size; @@ -752,9 +753,11 @@ i915_gem_shmem_pwrite(struct drm_device *dev, * right away and we therefore have to clflush anyway. */ if (obj->cache_level == I915_CACHE_NONE) needs_clflush_after = 1; - ret = i915_gem_object_set_to_gtt_domain(obj, true); - if (ret) - return ret; + if (obj->gtt_space) { + ret = i915_gem_object_set_to_gtt_domain(obj, true); + if (ret) + return ret; + } } /* Same trick applies for invalidate partially written cachelines before * writing. */ @@ -762,13 +765,25 @@ i915_gem_shmem_pwrite(struct drm_device *dev, && obj->cache_level == I915_CACHE_NONE) needs_clflush_before = 1; + ret = i915_gem_object_get_pages(obj); + if (ret) + return ret; + + i915_gem_object_pin_pages(obj); + offset = args->offset; obj->dirty = 1; - while (remain > 0) { + for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) { struct page *page; int partial_cacheline_write; + if (i < offset >> PAGE_SHIFT) + continue; + + if (remain <= 0) + break; + /* Operation in this page * * shmem_page_offset = offset within page in shmem file @@ -787,18 +802,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, ((shmem_page_offset | page_length) & (boot_cpu_data.x86_clflush_size - 1)); - if (obj->pages) { - page = obj->pages[offset >> PAGE_SHIFT]; - release_page = 0; - } else { - page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT); - if (IS_ERR(page)) { - ret = PTR_ERR(page); - goto out; - } - release_page = 1; - } - + page = sg_page(sg); page_do_bit17_swizzling = obj_do_bit17_swizzling && (page_to_phys(page) & (1 << 17)) != 0; @@ -810,26 +814,20 @@ i915_gem_shmem_pwrite(struct drm_device *dev, goto next_page; hit_slowpath = 1; - page_cache_get(page); mutex_unlock(&dev->struct_mutex); - ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, user_data, page_do_bit17_swizzling, partial_cacheline_write, needs_clflush_after); mutex_lock(&dev->struct_mutex); - page_cache_release(page); + next_page: set_page_dirty(page); mark_page_accessed(page); - if (release_page) - page_cache_release(page); - if (ret) { - ret = -EFAULT; + if (ret) goto out; - } remain -= page_length; user_data += page_length; @@ -837,6 +835,8 @@ next_page: } out: + i915_gem_object_unpin_pages(obj); + if (hit_slowpath) { /* Fixup: Kill any reinstated backing storage pages */ if (obj->madv == __I915_MADV_PURGED) @@ -920,10 +920,8 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, goto out; } - if (obj->gtt_space && - obj->cache_level == I915_CACHE_NONE && + if (obj->cache_level == I915_CACHE_NONE && obj->tiling_mode == I915_TILING_NONE && - obj->map_and_fenceable && obj->base.write_domain != I915_GEM_DOMAIN_CPU) { ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); /* Note that the gtt paths might fail with non-page-backed user @@ -931,7 +929,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, * textures). Fallback to the shmem path in that case. */ } - if (ret == -EFAULT) + if (ret == -EFAULT || ret == -ENOSPC) ret = i915_gem_shmem_pwrite(dev, obj, args, file); out: @@ -941,6 +939,240 @@ unlock: return ret; } +int +i915_gem_check_wedge(struct drm_i915_private *dev_priv, + bool interruptible) +{ + if (atomic_read(&dev_priv->mm.wedged)) { + struct completion *x = &dev_priv->error_completion; + bool recovery_complete; + unsigned long flags; + + /* Give the error handler a chance to run. */ + spin_lock_irqsave(&x->wait.lock, flags); + recovery_complete = x->done > 0; + spin_unlock_irqrestore(&x->wait.lock, flags); + + /* Non-interruptible callers can't handle -EAGAIN, hence return + * -EIO unconditionally for these. */ + if (!interruptible) + return -EIO; + + /* Recovery complete, but still wedged means reset failure. */ + if (recovery_complete) + return -EIO; + + return -EAGAIN; + } + + return 0; +} + +/* + * Compare seqno against outstanding lazy request. Emit a request if they are + * equal. + */ +static int +i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno) +{ + int ret; + + BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex)); + + ret = 0; + if (seqno == ring->outstanding_lazy_request) + ret = i915_add_request(ring, NULL, NULL); + + return ret; +} + +/** + * __wait_seqno - wait until execution of seqno has finished + * @ring: the ring expected to report seqno + * @seqno: duh! + * @interruptible: do an interruptible wait (normally yes) + * @timeout: in - how long to wait (NULL forever); out - how much time remaining + * + * Returns 0 if the seqno was found within the alloted time. Else returns the + * errno with remaining time filled in timeout argument. + */ +static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, + bool interruptible, struct timespec *timeout) +{ + drm_i915_private_t *dev_priv = ring->dev->dev_private; + struct timespec before, now, wait_time={1,0}; + unsigned long timeout_jiffies; + long end; + bool wait_forever = true; + int ret; + + if (i915_seqno_passed(ring->get_seqno(ring, true), seqno)) + return 0; + + trace_i915_gem_request_wait_begin(ring, seqno); + + if (timeout != NULL) { + wait_time = *timeout; + wait_forever = false; + } + + timeout_jiffies = timespec_to_jiffies(&wait_time); + + if (WARN_ON(!ring->irq_get(ring))) + return -ENODEV; + + /* Record current time in case interrupted by signal, or wedged * */ + getrawmonotonic(&before); + +#define EXIT_COND \ + (i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \ + atomic_read(&dev_priv->mm.wedged)) + do { + if (interruptible) + end = wait_event_interruptible_timeout(ring->irq_queue, + EXIT_COND, + timeout_jiffies); + else + end = wait_event_timeout(ring->irq_queue, EXIT_COND, + timeout_jiffies); + + ret = i915_gem_check_wedge(dev_priv, interruptible); + if (ret) + end = ret; + } while (end == 0 && wait_forever); + + getrawmonotonic(&now); + + ring->irq_put(ring); + trace_i915_gem_request_wait_end(ring, seqno); +#undef EXIT_COND + + if (timeout) { + struct timespec sleep_time = timespec_sub(now, before); + *timeout = timespec_sub(*timeout, sleep_time); + } + + switch (end) { + case -EIO: + case -EAGAIN: /* Wedged */ + case -ERESTARTSYS: /* Signal */ + return (int)end; + case 0: /* Timeout */ + if (timeout) + set_normalized_timespec(timeout, 0, 0); + return -ETIME; + default: /* Completed */ + WARN_ON(end < 0); /* We're not aware of other errors */ + return 0; + } +} + +/** + * Waits for a sequence number to be signaled, and cleans up the + * request and object lists appropriately for that event. + */ +int +i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno) +{ + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + bool interruptible = dev_priv->mm.interruptible; + int ret; + + BUG_ON(!mutex_is_locked(&dev->struct_mutex)); + BUG_ON(seqno == 0); + + ret = i915_gem_check_wedge(dev_priv, interruptible); + if (ret) + return ret; + + ret = i915_gem_check_olr(ring, seqno); + if (ret) + return ret; + + return __wait_seqno(ring, seqno, interruptible, NULL); +} + +/** + * Ensures that all rendering to the object has completed and the object is + * safe to unbind from the GTT or access from the CPU. + */ +static __must_check int +i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, + bool readonly) +{ + struct intel_ring_buffer *ring = obj->ring; + u32 seqno; + int ret; + + seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno; + if (seqno == 0) + return 0; + + ret = i915_wait_seqno(ring, seqno); + if (ret) + return ret; + + i915_gem_retire_requests_ring(ring); + + /* Manually manage the write flush as we may have not yet + * retired the buffer. + */ + if (obj->last_write_seqno && + i915_seqno_passed(seqno, obj->last_write_seqno)) { + obj->last_write_seqno = 0; + obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; + } + + return 0; +} + +/* A nonblocking variant of the above wait. This is a highly dangerous routine + * as the object state may change during this call. + */ +static __must_check int +i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, + bool readonly) +{ + struct drm_device *dev = obj->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_ring_buffer *ring = obj->ring; + u32 seqno; + int ret; + + BUG_ON(!mutex_is_locked(&dev->struct_mutex)); + BUG_ON(!dev_priv->mm.interruptible); + + seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno; + if (seqno == 0) + return 0; + + ret = i915_gem_check_wedge(dev_priv, true); + if (ret) + return ret; + + ret = i915_gem_check_olr(ring, seqno); + if (ret) + return ret; + + mutex_unlock(&dev->struct_mutex); + ret = __wait_seqno(ring, seqno, true, NULL); + mutex_lock(&dev->struct_mutex); + + i915_gem_retire_requests_ring(ring); + + /* Manually manage the write flush as we may have not yet + * retired the buffer. + */ + if (obj->last_write_seqno && + i915_seqno_passed(seqno, obj->last_write_seqno)) { + obj->last_write_seqno = 0; + obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; + } + + return ret; +} + /** * Called when user space prepares to use an object with the CPU, either * through the mmap ioctl's mapping or a GTT mapping. @@ -978,6 +1210,14 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, goto unlock; } + /* Try to flush the object off the GPU without holding the lock. + * We will repeat the flush holding the lock in the normal manner + * to catch cases where we are gazumped. + */ + ret = i915_gem_object_wait_rendering__nonblocking(obj, !write_domain); + if (ret) + goto unref; + if (read_domains & I915_GEM_DOMAIN_GTT) { ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); @@ -991,6 +1231,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); } +unref: drm_gem_object_unreference(&obj->base); unlock: mutex_unlock(&dev->struct_mutex); @@ -1110,7 +1351,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) goto unlock; } if (!obj->gtt_space) { - ret = i915_gem_object_bind_to_gtt(obj, 0, true); + ret = i915_gem_object_bind_to_gtt(obj, 0, true, false); if (ret) goto unlock; @@ -1158,10 +1399,16 @@ out: case 0: case -ERESTARTSYS: case -EINTR: + case -EBUSY: + /* + * EBUSY is ok: this just means that another thread + * already did the job. + */ return VM_FAULT_NOPAGE; case -ENOMEM: return VM_FAULT_OOM; default: + WARN_ON_ONCE(ret); return VM_FAULT_SIGBUS; } } @@ -1271,6 +1518,42 @@ i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev, return i915_gem_get_gtt_size(dev, size, tiling_mode); } +static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *dev_priv = obj->base.dev->dev_private; + int ret; + + if (obj->base.map_list.map) + return 0; + + ret = drm_gem_create_mmap_offset(&obj->base); + if (ret != -ENOSPC) + return ret; + + /* Badly fragmented mmap space? The only way we can recover + * space is by destroying unwanted objects. We can't randomly release + * mmap_offsets as userspace expects them to be persistent for the + * lifetime of the objects. The closest we can is to release the + * offsets on purgeable objects by truncating it and marking it purged, + * which prevents userspace from ever using that object again. + */ + i915_gem_purge(dev_priv, obj->base.size >> PAGE_SHIFT); + ret = drm_gem_create_mmap_offset(&obj->base); + if (ret != -ENOSPC) + return ret; + + i915_gem_shrink_all(dev_priv); + return drm_gem_create_mmap_offset(&obj->base); +} + +static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) +{ + if (!obj->base.map_list.map) + return; + + drm_gem_free_mmap_offset(&obj->base); +} + int i915_gem_mmap_gtt(struct drm_file *file, struct drm_device *dev, @@ -1302,11 +1585,9 @@ i915_gem_mmap_gtt(struct drm_file *file, goto out; } - if (!obj->base.map_list.map) { - ret = drm_gem_create_mmap_offset(&obj->base); - if (ret) - goto out; - } + ret = i915_gem_object_create_mmap_offset(obj); + if (ret) + goto out; *offset = (u64)obj->base.map_list.hash.key << PAGE_SHIFT; @@ -1341,83 +1622,245 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); } -int -i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj, - gfp_t gfpmask) +/* Immediately discard the backing storage */ +static void +i915_gem_object_truncate(struct drm_i915_gem_object *obj) { - int page_count, i; - struct address_space *mapping; struct inode *inode; - struct page *page; - if (obj->pages || obj->sg_table) - return 0; + i915_gem_object_free_mmap_offset(obj); - /* Get the list of pages out of our struct file. They'll be pinned - * at this point until we release them. - */ - page_count = obj->base.size / PAGE_SIZE; - BUG_ON(obj->pages != NULL); - obj->pages = drm_malloc_ab(page_count, sizeof(struct page *)); - if (obj->pages == NULL) - return -ENOMEM; + if (obj->base.filp == NULL) + return; + /* Our goal here is to return as much of the memory as + * is possible back to the system as we are called from OOM. + * To do this we must instruct the shmfs to drop all of its + * backing pages, *now*. + */ inode = obj->base.filp->f_path.dentry->d_inode; - mapping = inode->i_mapping; - gfpmask |= mapping_gfp_mask(mapping); - - for (i = 0; i < page_count; i++) { - page = shmem_read_mapping_page_gfp(mapping, i, gfpmask); - if (IS_ERR(page)) - goto err_pages; - - obj->pages[i] = page; - } - - if (i915_gem_object_needs_bit17_swizzle(obj)) - i915_gem_object_do_bit_17_swizzle(obj); - - return 0; + shmem_truncate_range(inode, 0, (loff_t)-1); -err_pages: - while (i--) - page_cache_release(obj->pages[i]); + obj->madv = __I915_MADV_PURGED; +} - drm_free_large(obj->pages); - obj->pages = NULL; - return PTR_ERR(page); +static inline int +i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj) +{ + return obj->madv == I915_MADV_DONTNEED; } static void i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) { int page_count = obj->base.size / PAGE_SIZE; - int i; - - if (!obj->pages) - return; + struct scatterlist *sg; + int ret, i; BUG_ON(obj->madv == __I915_MADV_PURGED); + ret = i915_gem_object_set_to_cpu_domain(obj, true); + if (ret) { + /* In the event of a disaster, abandon all caches and + * hope for the best. + */ + WARN_ON(ret != -EIO); + i915_gem_clflush_object(obj); + obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; + } + if (i915_gem_object_needs_bit17_swizzle(obj)) i915_gem_object_save_bit_17_swizzle(obj); if (obj->madv == I915_MADV_DONTNEED) obj->dirty = 0; - for (i = 0; i < page_count; i++) { + for_each_sg(obj->pages->sgl, sg, page_count, i) { + struct page *page = sg_page(sg); + if (obj->dirty) - set_page_dirty(obj->pages[i]); + set_page_dirty(page); if (obj->madv == I915_MADV_WILLNEED) - mark_page_accessed(obj->pages[i]); + mark_page_accessed(page); - page_cache_release(obj->pages[i]); + page_cache_release(page); } obj->dirty = 0; - drm_free_large(obj->pages); + sg_free_table(obj->pages); + kfree(obj->pages); +} + +static int +i915_gem_object_put_pages(struct drm_i915_gem_object *obj) +{ + const struct drm_i915_gem_object_ops *ops = obj->ops; + + if (obj->pages == NULL) + return 0; + + BUG_ON(obj->gtt_space); + + if (obj->pages_pin_count) + return -EBUSY; + + ops->put_pages(obj); obj->pages = NULL; + + list_del(&obj->gtt_list); + if (i915_gem_object_is_purgeable(obj)) + i915_gem_object_truncate(obj); + + return 0; +} + +static long +i915_gem_purge(struct drm_i915_private *dev_priv, long target) +{ + struct drm_i915_gem_object *obj, *next; + long count = 0; + + list_for_each_entry_safe(obj, next, + &dev_priv->mm.unbound_list, + gtt_list) { + if (i915_gem_object_is_purgeable(obj) && + i915_gem_object_put_pages(obj) == 0) { + count += obj->base.size >> PAGE_SHIFT; + if (count >= target) + return count; + } + } + + list_for_each_entry_safe(obj, next, + &dev_priv->mm.inactive_list, + mm_list) { + if (i915_gem_object_is_purgeable(obj) && + i915_gem_object_unbind(obj) == 0 && + i915_gem_object_put_pages(obj) == 0) { + count += obj->base.size >> PAGE_SHIFT; + if (count >= target) + return count; + } + } + + return count; +} + +static void +i915_gem_shrink_all(struct drm_i915_private *dev_priv) +{ + struct drm_i915_gem_object *obj, *next; + + i915_gem_evict_everything(dev_priv->dev); + + list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list, gtt_list) + i915_gem_object_put_pages(obj); +} + +static int +i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *dev_priv = obj->base.dev->dev_private; + int page_count, i; + struct address_space *mapping; + struct sg_table *st; + struct scatterlist *sg; + struct page *page; + gfp_t gfp; + + /* Assert that the object is not currently in any GPU domain. As it + * wasn't in the GTT, there shouldn't be any way it could have been in + * a GPU cache + */ + BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); + BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); + + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (st == NULL) + return -ENOMEM; + + page_count = obj->base.size / PAGE_SIZE; + if (sg_alloc_table(st, page_count, GFP_KERNEL)) { + sg_free_table(st); + kfree(st); + return -ENOMEM; + } + + /* Get the list of pages out of our struct file. They'll be pinned + * at this point until we release them. + * + * Fail silently without starting the shrinker + */ + mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; + gfp = mapping_gfp_mask(mapping); + gfp |= __GFP_NORETRY | __GFP_NOWARN; + gfp &= ~(__GFP_IO | __GFP_WAIT); + for_each_sg(st->sgl, sg, page_count, i) { + page = shmem_read_mapping_page_gfp(mapping, i, gfp); + if (IS_ERR(page)) { + i915_gem_purge(dev_priv, page_count); + page = shmem_read_mapping_page_gfp(mapping, i, gfp); + } + if (IS_ERR(page)) { + /* We've tried hard to allocate the memory by reaping + * our own buffer, now let the real VM do its job and + * go down in flames if truly OOM. + */ + gfp &= ~(__GFP_NORETRY | __GFP_NOWARN); + gfp |= __GFP_IO | __GFP_WAIT; + + i915_gem_shrink_all(dev_priv); + page = shmem_read_mapping_page_gfp(mapping, i, gfp); + if (IS_ERR(page)) + goto err_pages; + + gfp |= __GFP_NORETRY | __GFP_NOWARN; + gfp &= ~(__GFP_IO | __GFP_WAIT); + } + + sg_set_page(sg, page, PAGE_SIZE, 0); + } + + if (i915_gem_object_needs_bit17_swizzle(obj)) + i915_gem_object_do_bit_17_swizzle(obj); + + obj->pages = st; + return 0; + +err_pages: + for_each_sg(st->sgl, sg, i, page_count) + page_cache_release(sg_page(sg)); + sg_free_table(st); + kfree(st); + return PTR_ERR(page); +} + +/* Ensure that the associated pages are gathered from the backing storage + * and pinned into our object. i915_gem_object_get_pages() may be called + * multiple times before they are released by a single call to + * i915_gem_object_put_pages() - once the pages are no longer referenced + * either as a result of memory pressure (reaping pages under the shrinker) + * or as the object is itself released. + */ +int +i915_gem_object_get_pages(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *dev_priv = obj->base.dev->dev_private; + const struct drm_i915_gem_object_ops *ops = obj->ops; + int ret; + + if (obj->pages) + return 0; + + BUG_ON(obj->pages_pin_count); + + ret = ops->get_pages(obj); + if (ret) + return ret; + + list_add_tail(&obj->gtt_list, &dev_priv->mm.unbound_list); + return 0; } void @@ -1441,7 +1884,7 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, list_move_tail(&obj->mm_list, &dev_priv->mm.active_list); list_move_tail(&obj->ring_list, &ring->active_list); - obj->last_rendering_seqno = seqno; + obj->last_read_seqno = seqno; if (obj->fenced_gpu_access) { obj->last_fenced_seqno = seqno; @@ -1458,97 +1901,35 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, } static void -i915_gem_object_move_off_active(struct drm_i915_gem_object *obj) -{ - list_del_init(&obj->ring_list); - obj->last_rendering_seqno = 0; - obj->last_fenced_seqno = 0; -} - -static void -i915_gem_object_move_to_flushing(struct drm_i915_gem_object *obj) +i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) { struct drm_device *dev = obj->base.dev; - drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = dev->dev_private; + BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS); BUG_ON(!obj->active); - list_move_tail(&obj->mm_list, &dev_priv->mm.flushing_list); - - i915_gem_object_move_off_active(obj); -} -static void -i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) -{ - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = dev->dev_private; + if (obj->pin_count) /* are we a framebuffer? */ + intel_mark_fb_idle(obj); list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); - BUG_ON(!list_empty(&obj->gpu_write_list)); - BUG_ON(!obj->active); + list_del_init(&obj->ring_list); obj->ring = NULL; - i915_gem_object_move_off_active(obj); + obj->last_read_seqno = 0; + obj->last_write_seqno = 0; + obj->base.write_domain = 0; + + obj->last_fenced_seqno = 0; obj->fenced_gpu_access = false; obj->active = 0; - obj->pending_gpu_write = false; drm_gem_object_unreference(&obj->base); WARN_ON(i915_verify_lists(dev)); } -/* Immediately discard the backing storage */ -static void -i915_gem_object_truncate(struct drm_i915_gem_object *obj) -{ - struct inode *inode; - - /* Our goal here is to return as much of the memory as - * is possible back to the system as we are called from OOM. - * To do this we must instruct the shmfs to drop all of its - * backing pages, *now*. - */ - inode = obj->base.filp->f_path.dentry->d_inode; - shmem_truncate_range(inode, 0, (loff_t)-1); - - if (obj->base.map_list.map) - drm_gem_free_mmap_offset(&obj->base); - - obj->madv = __I915_MADV_PURGED; -} - -static inline int -i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj) -{ - return obj->madv == I915_MADV_DONTNEED; -} - -static void -i915_gem_process_flushing_list(struct intel_ring_buffer *ring, - uint32_t flush_domains) -{ - struct drm_i915_gem_object *obj, *next; - - list_for_each_entry_safe(obj, next, - &ring->gpu_write_list, - gpu_write_list) { - if (obj->base.write_domain & flush_domains) { - uint32_t old_write_domain = obj->base.write_domain; - - obj->base.write_domain = 0; - list_del_init(&obj->gpu_write_list); - i915_gem_object_move_to_active(obj, ring, - i915_gem_next_request_seqno(ring)); - - trace_i915_gem_object_change_domain(obj, - obj->base.read_domains, - old_write_domain); - } - } -} - static u32 i915_gem_get_seqno(struct drm_device *dev) { @@ -1589,15 +1970,16 @@ i915_add_request(struct intel_ring_buffer *ring, * is that the flush _must_ happen before the next request, no matter * what. */ - if (ring->gpu_caches_dirty) { - ret = i915_gem_flush_ring(ring, 0, I915_GEM_GPU_DOMAINS); - if (ret) - return ret; + ret = intel_ring_flush_all_caches(ring); + if (ret) + return ret; - ring->gpu_caches_dirty = false; + if (request == NULL) { + request = kmalloc(sizeof(*request), GFP_KERNEL); + if (request == NULL) + return -ENOMEM; } - BUG_ON(request == NULL); seqno = i915_gem_next_request_seqno(ring); /* Record the position of the start of the request so that @@ -1608,8 +1990,10 @@ i915_add_request(struct intel_ring_buffer *ring, request_ring_position = intel_ring_get_tail(ring); ret = ring->add_request(ring, &seqno); - if (ret) - return ret; + if (ret) { + kfree(request); + return ret; + } trace_i915_gem_request_add(ring, seqno); @@ -1619,6 +2003,7 @@ i915_add_request(struct intel_ring_buffer *ring, request->emitted_jiffies = jiffies; was_empty = list_empty(&ring->request_list); list_add_tail(&request->list, &ring->request_list); + request->file_priv = NULL; if (file) { struct drm_i915_file_private *file_priv = file->driver_priv; @@ -1638,13 +2023,13 @@ i915_add_request(struct intel_ring_buffer *ring, jiffies + msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)); } - if (was_empty) + if (was_empty) { queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); + intel_mark_busy(dev_priv->dev); + } } - WARN_ON(!list_empty(&ring->gpu_write_list)); - return 0; } @@ -1686,8 +2071,6 @@ static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv, struct drm_i915_gem_object, ring_list); - obj->base.write_domain = 0; - list_del_init(&obj->gpu_write_list); i915_gem_object_move_to_inactive(obj); } } @@ -1723,20 +2106,6 @@ void i915_gem_reset(struct drm_device *dev) for_each_ring(ring, dev_priv, i) i915_gem_reset_ring_lists(dev_priv, ring); - /* Remove anything from the flushing lists. The GPU cache is likely - * to be lost on reset along with the data, so simply move the - * lost bo to the inactive list. - */ - while (!list_empty(&dev_priv->mm.flushing_list)) { - obj = list_first_entry(&dev_priv->mm.flushing_list, - struct drm_i915_gem_object, - mm_list); - - obj->base.write_domain = 0; - list_del_init(&obj->gpu_write_list); - i915_gem_object_move_to_inactive(obj); - } - /* Move everything out of the GPU domains to ensure we do any * necessary invalidation upon reuse. */ @@ -1765,7 +2134,7 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) WARN_ON(i915_verify_lists(ring->dev)); - seqno = ring->get_seqno(ring); + seqno = ring->get_seqno(ring, true); for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++) if (seqno >= ring->sync_seqno[i]) @@ -1804,13 +2173,10 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) struct drm_i915_gem_object, ring_list); - if (!i915_seqno_passed(seqno, obj->last_rendering_seqno)) + if (!i915_seqno_passed(seqno, obj->last_read_seqno)) break; - if (obj->base.write_domain != 0) - i915_gem_object_move_to_flushing(obj); - else - i915_gem_object_move_to_inactive(obj); + i915_gem_object_move_to_inactive(obj); } if (unlikely(ring->trace_irq_seqno && @@ -1859,216 +2225,20 @@ i915_gem_retire_work_handler(struct work_struct *work) */ idle = true; for_each_ring(ring, dev_priv, i) { - if (ring->gpu_caches_dirty) { - struct drm_i915_gem_request *request; - - request = kzalloc(sizeof(*request), GFP_KERNEL); - if (request == NULL || - i915_add_request(ring, NULL, request)) - kfree(request); - } + if (ring->gpu_caches_dirty) + i915_add_request(ring, NULL, NULL); idle &= list_empty(&ring->request_list); } if (!dev_priv->mm.suspended && !idle) queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); + if (idle) + intel_mark_idle(dev); mutex_unlock(&dev->struct_mutex); } -int -i915_gem_check_wedge(struct drm_i915_private *dev_priv, - bool interruptible) -{ - if (atomic_read(&dev_priv->mm.wedged)) { - struct completion *x = &dev_priv->error_completion; - bool recovery_complete; - unsigned long flags; - - /* Give the error handler a chance to run. */ - spin_lock_irqsave(&x->wait.lock, flags); - recovery_complete = x->done > 0; - spin_unlock_irqrestore(&x->wait.lock, flags); - - /* Non-interruptible callers can't handle -EAGAIN, hence return - * -EIO unconditionally for these. */ - if (!interruptible) - return -EIO; - - /* Recovery complete, but still wedged means reset failure. */ - if (recovery_complete) - return -EIO; - - return -EAGAIN; - } - - return 0; -} - -/* - * Compare seqno against outstanding lazy request. Emit a request if they are - * equal. - */ -static int -i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno) -{ - int ret = 0; - - BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex)); - - if (seqno == ring->outstanding_lazy_request) { - struct drm_i915_gem_request *request; - - request = kzalloc(sizeof(*request), GFP_KERNEL); - if (request == NULL) - return -ENOMEM; - - ret = i915_add_request(ring, NULL, request); - if (ret) { - kfree(request); - return ret; - } - - BUG_ON(seqno != request->seqno); - } - - return ret; -} - -/** - * __wait_seqno - wait until execution of seqno has finished - * @ring: the ring expected to report seqno - * @seqno: duh! - * @interruptible: do an interruptible wait (normally yes) - * @timeout: in - how long to wait (NULL forever); out - how much time remaining - * - * Returns 0 if the seqno was found within the alloted time. Else returns the - * errno with remaining time filled in timeout argument. - */ -static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, - bool interruptible, struct timespec *timeout) -{ - drm_i915_private_t *dev_priv = ring->dev->dev_private; - struct timespec before, now, wait_time={1,0}; - unsigned long timeout_jiffies; - long end; - bool wait_forever = true; - int ret; - - if (i915_seqno_passed(ring->get_seqno(ring), seqno)) - return 0; - - trace_i915_gem_request_wait_begin(ring, seqno); - - if (timeout != NULL) { - wait_time = *timeout; - wait_forever = false; - } - - timeout_jiffies = timespec_to_jiffies(&wait_time); - - if (WARN_ON(!ring->irq_get(ring))) - return -ENODEV; - - /* Record current time in case interrupted by signal, or wedged * */ - getrawmonotonic(&before); - -#define EXIT_COND \ - (i915_seqno_passed(ring->get_seqno(ring), seqno) || \ - atomic_read(&dev_priv->mm.wedged)) - do { - if (interruptible) - end = wait_event_interruptible_timeout(ring->irq_queue, - EXIT_COND, - timeout_jiffies); - else - end = wait_event_timeout(ring->irq_queue, EXIT_COND, - timeout_jiffies); - - ret = i915_gem_check_wedge(dev_priv, interruptible); - if (ret) - end = ret; - } while (end == 0 && wait_forever); - - getrawmonotonic(&now); - - ring->irq_put(ring); - trace_i915_gem_request_wait_end(ring, seqno); -#undef EXIT_COND - - if (timeout) { - struct timespec sleep_time = timespec_sub(now, before); - *timeout = timespec_sub(*timeout, sleep_time); - } - - switch (end) { - case -EIO: - case -EAGAIN: /* Wedged */ - case -ERESTARTSYS: /* Signal */ - return (int)end; - case 0: /* Timeout */ - if (timeout) - set_normalized_timespec(timeout, 0, 0); - return -ETIME; - default: /* Completed */ - WARN_ON(end < 0); /* We're not aware of other errors */ - return 0; - } -} - -/** - * Waits for a sequence number to be signaled, and cleans up the - * request and object lists appropriately for that event. - */ -int -i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno) -{ - drm_i915_private_t *dev_priv = ring->dev->dev_private; - int ret = 0; - - BUG_ON(seqno == 0); - - ret = i915_gem_check_wedge(dev_priv, dev_priv->mm.interruptible); - if (ret) - return ret; - - ret = i915_gem_check_olr(ring, seqno); - if (ret) - return ret; - - ret = __wait_seqno(ring, seqno, dev_priv->mm.interruptible, NULL); - - return ret; -} - -/** - * Ensures that all rendering to the object has completed and the object is - * safe to unbind from the GTT or access from the CPU. - */ -int -i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj) -{ - int ret; - - /* This function only exists to support waiting for existing rendering, - * not for emitting required flushes. - */ - BUG_ON((obj->base.write_domain & I915_GEM_GPU_DOMAINS) != 0); - - /* If there is rendering queued on the buffer being evicted, wait for - * it. - */ - if (obj->active) { - ret = i915_wait_seqno(obj->ring, obj->last_rendering_seqno); - if (ret) - return ret; - i915_gem_retire_requests_ring(obj->ring); - } - - return 0; -} - /** * Ensures that an object will eventually get non-busy by flushing any required * write domains, emitting any outstanding lazy request and retiring and @@ -2080,14 +2250,10 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj) int ret; if (obj->active) { - ret = i915_gem_object_flush_gpu_write_domain(obj); + ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno); if (ret) return ret; - ret = i915_gem_check_olr(obj->ring, - obj->last_rendering_seqno); - if (ret) - return ret; i915_gem_retire_requests_ring(obj->ring); } @@ -2147,7 +2313,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto out; if (obj->active) { - seqno = obj->last_rendering_seqno; + seqno = obj->last_read_seqno; ring = obj->ring; } @@ -2202,11 +2368,11 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, return 0; if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev)) - return i915_gem_object_wait_rendering(obj); + return i915_gem_object_wait_rendering(obj, false); idx = intel_ring_sync_index(from, to); - seqno = obj->last_rendering_seqno; + seqno = obj->last_read_seqno; if (seqno <= from->sync_seqno[idx]) return 0; @@ -2260,6 +2426,8 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj) if (obj->pin_count) return -EBUSY; + BUG_ON(obj->pages == NULL); + ret = i915_gem_object_finish_gpu(obj); if (ret) return ret; @@ -2270,22 +2438,6 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj) i915_gem_object_finish_gtt(obj); - /* Move the object to the CPU domain to ensure that - * any possible CPU writes while it's not in the GTT - * are flushed when we go to remap it. - */ - if (ret == 0) - ret = i915_gem_object_set_to_cpu_domain(obj, 1); - if (ret == -ERESTARTSYS) - return ret; - if (ret) { - /* In the event of a disaster, abandon all caches and - * hope for the best. - */ - i915_gem_clflush_object(obj); - obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; - } - /* release the fence reg _after_ flushing */ ret = i915_gem_object_put_fence(obj); if (ret) @@ -2301,10 +2453,8 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj) } i915_gem_gtt_finish_object(obj); - i915_gem_object_put_pages_gtt(obj); - - list_del_init(&obj->gtt_list); - list_del_init(&obj->mm_list); + list_del(&obj->mm_list); + list_move_tail(&obj->gtt_list, &dev_priv->mm.unbound_list); /* Avoid an unnecessary call to unbind on rebind. */ obj->map_and_fenceable = true; @@ -2312,48 +2462,14 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj) obj->gtt_space = NULL; obj->gtt_offset = 0; - if (i915_gem_object_is_purgeable(obj)) - i915_gem_object_truncate(obj); - - return ret; -} - -int -i915_gem_flush_ring(struct intel_ring_buffer *ring, - uint32_t invalidate_domains, - uint32_t flush_domains) -{ - int ret; - - if (((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) == 0) - return 0; - - trace_i915_gem_ring_flush(ring, invalidate_domains, flush_domains); - - ret = ring->flush(ring, invalidate_domains, flush_domains); - if (ret) - return ret; - - if (flush_domains & I915_GEM_GPU_DOMAINS) - i915_gem_process_flushing_list(ring, flush_domains); - return 0; } static int i915_ring_idle(struct intel_ring_buffer *ring) { - int ret; - - if (list_empty(&ring->gpu_write_list) && list_empty(&ring->active_list)) + if (list_empty(&ring->active_list)) return 0; - if (!list_empty(&ring->gpu_write_list)) { - ret = i915_gem_flush_ring(ring, - I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); - if (ret) - return ret; - } - return i915_wait_seqno(ring, i915_gem_next_request_seqno(ring)); } @@ -2372,10 +2488,6 @@ int i915_gpu_idle(struct drm_device *dev) ret = i915_ring_idle(ring); if (ret) return ret; - - /* Is the device fubar? */ - if (WARN_ON(!list_empty(&ring->gpu_write_list))) - return -EBUSY; } return 0; @@ -2548,21 +2660,8 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, static int i915_gem_object_flush_fence(struct drm_i915_gem_object *obj) { - int ret; - - if (obj->fenced_gpu_access) { - if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { - ret = i915_gem_flush_ring(obj->ring, - 0, obj->base.write_domain); - if (ret) - return ret; - } - - obj->fenced_gpu_access = false; - } - if (obj->last_fenced_seqno) { - ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno); + int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno); if (ret) return ret; @@ -2575,6 +2674,7 @@ i915_gem_object_flush_fence(struct drm_i915_gem_object *obj) if (obj->base.read_domains & I915_GEM_DOMAIN_GTT) mb(); + obj->fenced_gpu_access = false; return 0; } @@ -2694,18 +2794,88 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj) return 0; } +static bool i915_gem_valid_gtt_space(struct drm_device *dev, + struct drm_mm_node *gtt_space, + unsigned long cache_level) +{ + struct drm_mm_node *other; + + /* On non-LLC machines we have to be careful when putting differing + * types of snoopable memory together to avoid the prefetcher + * crossing memory domains and dieing. + */ + if (HAS_LLC(dev)) + return true; + + if (gtt_space == NULL) + return true; + + if (list_empty(>t_space->node_list)) + return true; + + other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); + if (other->allocated && !other->hole_follows && other->color != cache_level) + return false; + + other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); + if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) + return false; + + return true; +} + +static void i915_gem_verify_gtt(struct drm_device *dev) +{ +#if WATCH_GTT + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_gem_object *obj; + int err = 0; + + list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) { + if (obj->gtt_space == NULL) { + printk(KERN_ERR "object found on GTT list with no space reserved\n"); + err++; + continue; + } + + if (obj->cache_level != obj->gtt_space->color) { + printk(KERN_ERR "object reserved space [%08lx, %08lx] with wrong color, cache_level=%x, color=%lx\n", + obj->gtt_space->start, + obj->gtt_space->start + obj->gtt_space->size, + obj->cache_level, + obj->gtt_space->color); + err++; + continue; + } + + if (!i915_gem_valid_gtt_space(dev, + obj->gtt_space, + obj->cache_level)) { + printk(KERN_ERR "invalid GTT space found at [%08lx, %08lx] - color=%x\n", + obj->gtt_space->start, + obj->gtt_space->start + obj->gtt_space->size, + obj->cache_level); + err++; + continue; + } + } + + WARN_ON(err); +#endif +} + /** * Finds free space in the GTT aperture and binds the object there. */ static int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, unsigned alignment, - bool map_and_fenceable) + bool map_and_fenceable, + bool nonblocking) { struct drm_device *dev = obj->base.dev; drm_i915_private_t *dev_priv = dev->dev_private; struct drm_mm_node *free_space; - gfp_t gfpmask = __GFP_NORETRY | __GFP_NOWARN; u32 size, fence_size, fence_alignment, unfenced_alignment; bool mappable, fenceable; int ret; @@ -2745,89 +2915,67 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, return -E2BIG; } + ret = i915_gem_object_get_pages(obj); + if (ret) + return ret; + search_free: if (map_and_fenceable) free_space = - drm_mm_search_free_in_range(&dev_priv->mm.gtt_space, - size, alignment, - 0, dev_priv->mm.gtt_mappable_end, - 0); + drm_mm_search_free_in_range_color(&dev_priv->mm.gtt_space, + size, alignment, obj->cache_level, + 0, dev_priv->mm.gtt_mappable_end, + false); else - free_space = drm_mm_search_free(&dev_priv->mm.gtt_space, - size, alignment, 0); + free_space = drm_mm_search_free_color(&dev_priv->mm.gtt_space, + size, alignment, obj->cache_level, + false); if (free_space != NULL) { if (map_and_fenceable) obj->gtt_space = drm_mm_get_block_range_generic(free_space, - size, alignment, 0, + size, alignment, obj->cache_level, 0, dev_priv->mm.gtt_mappable_end, - 0); + false); else obj->gtt_space = - drm_mm_get_block(free_space, size, alignment); + drm_mm_get_block_generic(free_space, + size, alignment, obj->cache_level, + false); } if (obj->gtt_space == NULL) { - /* If the gtt is empty and we're still having trouble - * fitting our object in, we're out of memory. - */ ret = i915_gem_evict_something(dev, size, alignment, - map_and_fenceable); + obj->cache_level, + map_and_fenceable, + nonblocking); if (ret) return ret; goto search_free; } - - ret = i915_gem_object_get_pages_gtt(obj, gfpmask); - if (ret) { + if (WARN_ON(!i915_gem_valid_gtt_space(dev, + obj->gtt_space, + obj->cache_level))) { drm_mm_put_block(obj->gtt_space); obj->gtt_space = NULL; - - if (ret == -ENOMEM) { - /* first try to reclaim some memory by clearing the GTT */ - ret = i915_gem_evict_everything(dev, false); - if (ret) { - /* now try to shrink everyone else */ - if (gfpmask) { - gfpmask = 0; - goto search_free; - } - - return -ENOMEM; - } - - goto search_free; - } - - return ret; + return -EINVAL; } + ret = i915_gem_gtt_prepare_object(obj); if (ret) { - i915_gem_object_put_pages_gtt(obj); drm_mm_put_block(obj->gtt_space); obj->gtt_space = NULL; - - if (i915_gem_evict_everything(dev, false)) - return ret; - - goto search_free; + return ret; } if (!dev_priv->mm.aliasing_ppgtt) i915_gem_gtt_bind_object(obj, obj->cache_level); - list_add_tail(&obj->gtt_list, &dev_priv->mm.gtt_list); + list_move_tail(&obj->gtt_list, &dev_priv->mm.bound_list); list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list); - /* Assert that the object is not currently in any GPU domain. As it - * wasn't in the GTT, there shouldn't be any way it could have been in - * a GPU cache - */ - BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); - BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); - obj->gtt_offset = obj->gtt_space->start; fenceable = @@ -2840,6 +2988,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, obj->map_and_fenceable = mappable && fenceable; trace_i915_gem_object_bind(obj, map_and_fenceable); + i915_gem_verify_gtt(dev); return 0; } @@ -2866,18 +3015,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj) trace_i915_gem_object_clflush(obj); - drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE); -} - -/** Flushes any GPU write domain for the object if it's dirty. */ -static int -i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj) -{ - if ((obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0) - return 0; - - /* Queue the GPU write cache flushing we need. */ - return i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain); + drm_clflush_sg(obj->pages); } /** Flushes the GTT write domain for the object if it's dirty. */ @@ -2946,16 +3084,10 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) return 0; - ret = i915_gem_object_flush_gpu_write_domain(obj); + ret = i915_gem_object_wait_rendering(obj, !write); if (ret) return ret; - if (obj->pending_gpu_write || write) { - ret = i915_gem_object_wait_rendering(obj); - if (ret) - return ret; - } - i915_gem_object_flush_cpu_write_domain(obj); old_write_domain = obj->base.write_domain; @@ -2998,6 +3130,12 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, return -EBUSY; } + if (!i915_gem_valid_gtt_space(dev, obj->gtt_space, cache_level)) { + ret = i915_gem_object_unbind(obj); + if (ret) + return ret; + } + if (obj->gtt_space) { ret = i915_gem_object_finish_gpu(obj); if (ret) @@ -3009,7 +3147,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, * registers with snooped memory, so relinquish any fences * currently pointing to our region in the aperture. */ - if (INTEL_INFO(obj->base.dev)->gen < 6) { + if (INTEL_INFO(dev)->gen < 6) { ret = i915_gem_object_put_fence(obj); if (ret) return ret; @@ -3020,6 +3158,8 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, if (obj->has_aliasing_ppgtt_mapping) i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, obj, cache_level); + + obj->gtt_space->color = cache_level; } if (cache_level == I915_CACHE_NONE) { @@ -3046,9 +3186,72 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, } obj->cache_level = cache_level; + i915_gem_verify_gtt(dev); return 0; } +int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_gem_caching *args = data; + struct drm_i915_gem_object *obj; + int ret; + + ret = i915_mutex_lock_interruptible(dev); + if (ret) + return ret; + + obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); + if (&obj->base == NULL) { + ret = -ENOENT; + goto unlock; + } + + args->caching = obj->cache_level != I915_CACHE_NONE; + + drm_gem_object_unreference(&obj->base); +unlock: + mutex_unlock(&dev->struct_mutex); + return ret; +} + +int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_gem_caching *args = data; + struct drm_i915_gem_object *obj; + enum i915_cache_level level; + int ret; + + switch (args->caching) { + case I915_CACHING_NONE: + level = I915_CACHE_NONE; + break; + case I915_CACHING_CACHED: + level = I915_CACHE_LLC; + break; + default: + return -EINVAL; + } + + ret = i915_mutex_lock_interruptible(dev); + if (ret) + return ret; + + obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); + if (&obj->base == NULL) { + ret = -ENOENT; + goto unlock; + } + + ret = i915_gem_object_set_cache_level(obj, level); + + drm_gem_object_unreference(&obj->base); +unlock: + mutex_unlock(&dev->struct_mutex); + return ret; +} + /* * Prepare buffer for display plane (scanout, cursors, etc). * Can be called from an uninterruptible phase (modesetting) and allows @@ -3062,10 +3265,6 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, u32 old_read_domains, old_write_domain; int ret; - ret = i915_gem_object_flush_gpu_write_domain(obj); - if (ret) - return ret; - if (pipelined != obj->ring) { ret = i915_gem_object_sync(obj, pipelined); if (ret) @@ -3089,7 +3288,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, * (e.g. libkms for the bootup splash), we have to ensure that we * always use map_and_fenceable for all scanout buffers. */ - ret = i915_gem_object_pin(obj, alignment, true); + ret = i915_gem_object_pin(obj, alignment, true, false); if (ret) return ret; @@ -3101,7 +3300,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, /* It should now be out of any other write domains, and we can update * the domain values for our changes. */ - BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); + obj->base.write_domain = 0; obj->base.read_domains |= I915_GEM_DOMAIN_GTT; trace_i915_gem_object_change_domain(obj, @@ -3119,13 +3318,7 @@ i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj) if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0) return 0; - if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { - ret = i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain); - if (ret) - return ret; - } - - ret = i915_gem_object_wait_rendering(obj); + ret = i915_gem_object_wait_rendering(obj, false); if (ret) return ret; @@ -3149,16 +3342,10 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) return 0; - ret = i915_gem_object_flush_gpu_write_domain(obj); + ret = i915_gem_object_wait_rendering(obj, !write); if (ret) return ret; - if (write || obj->pending_gpu_write) { - ret = i915_gem_object_wait_rendering(obj); - if (ret) - return ret; - } - i915_gem_object_flush_gtt_write_domain(obj); old_write_domain = obj->base.write_domain; @@ -3238,7 +3425,8 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) int i915_gem_object_pin(struct drm_i915_gem_object *obj, uint32_t alignment, - bool map_and_fenceable) + bool map_and_fenceable, + bool nonblocking) { int ret; @@ -3263,7 +3451,8 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj, if (obj->gtt_space == NULL) { ret = i915_gem_object_bind_to_gtt(obj, alignment, - map_and_fenceable); + map_and_fenceable, + nonblocking); if (ret) return ret; } @@ -3321,7 +3510,7 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data, obj->user_pin_count++; obj->pin_filp = file; if (obj->user_pin_count == 1) { - ret = i915_gem_object_pin(obj, args->alignment, true); + ret = i915_gem_object_pin(obj, args->alignment, true, false); if (ret) goto out; } @@ -3401,6 +3590,10 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, ret = i915_gem_object_flush_active(obj); args->busy = obj->active; + if (obj->ring) { + BUILD_BUG_ON(I915_NUM_RINGS > 16); + args->busy |= intel_ring_flag(obj->ring) << 16; + } drm_gem_object_unreference(&obj->base); unlock: @@ -3449,9 +3642,8 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, if (obj->madv != __I915_MADV_PURGED) obj->madv = args->madv; - /* if the object is no longer bound, discard its backing storage */ - if (i915_gem_object_is_purgeable(obj) && - obj->gtt_space == NULL) + /* if the object is no longer attached, discard its backing storage */ + if (i915_gem_object_is_purgeable(obj) && obj->pages == NULL) i915_gem_object_truncate(obj); args->retained = obj->madv != __I915_MADV_PURGED; @@ -3463,10 +3655,32 @@ unlock: return ret; } +void i915_gem_object_init(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_object_ops *ops) +{ + INIT_LIST_HEAD(&obj->mm_list); + INIT_LIST_HEAD(&obj->gtt_list); + INIT_LIST_HEAD(&obj->ring_list); + INIT_LIST_HEAD(&obj->exec_list); + + obj->ops = ops; + + obj->fence_reg = I915_FENCE_REG_NONE; + obj->madv = I915_MADV_WILLNEED; + /* Avoid an unnecessary call to unbind on the first bind. */ + obj->map_and_fenceable = true; + + i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); +} + +static const struct drm_i915_gem_object_ops i915_gem_object_ops = { + .get_pages = i915_gem_object_get_pages_gtt, + .put_pages = i915_gem_object_put_pages_gtt, +}; + struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, size_t size) { - struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj; struct address_space *mapping; u32 mask; @@ -3490,7 +3704,7 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; mapping_set_gfp_mask(mapping, mask); - i915_gem_info_add_obj(dev_priv, size); + i915_gem_object_init(obj, &i915_gem_object_ops); obj->base.write_domain = I915_GEM_DOMAIN_CPU; obj->base.read_domains = I915_GEM_DOMAIN_CPU; @@ -3512,17 +3726,6 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, } else obj->cache_level = I915_CACHE_NONE; - obj->base.driver_private = NULL; - obj->fence_reg = I915_FENCE_REG_NONE; - INIT_LIST_HEAD(&obj->mm_list); - INIT_LIST_HEAD(&obj->gtt_list); - INIT_LIST_HEAD(&obj->ring_list); - INIT_LIST_HEAD(&obj->exec_list); - INIT_LIST_HEAD(&obj->gpu_write_list); - obj->madv = I915_MADV_WILLNEED; - /* Avoid an unnecessary call to unbind on the first bind. */ - obj->map_and_fenceable = true; - return obj; } @@ -3541,9 +3744,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) trace_i915_gem_object_destroy(obj); - if (gem_obj->import_attach) - drm_prime_gem_destroy(gem_obj, obj->sg_table); - if (obj->phys_obj) i915_gem_detach_phys_object(dev, obj); @@ -3559,8 +3759,14 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) dev_priv->mm.interruptible = was_interruptible; } - if (obj->base.map_list.map) - drm_gem_free_mmap_offset(&obj->base); + obj->pages_pin_count = 0; + i915_gem_object_put_pages(obj); + i915_gem_object_free_mmap_offset(obj); + + BUG_ON(obj->pages); + + if (obj->base.import_attach) + drm_prime_gem_destroy(&obj->base, NULL); drm_gem_object_release(&obj->base); i915_gem_info_remove_obj(dev_priv, obj->base.size); @@ -3591,7 +3797,7 @@ i915_gem_idle(struct drm_device *dev) /* Under UMS, be paranoid and evict. */ if (!drm_core_check_feature(dev, DRIVER_MODESET)) - i915_gem_evict_everything(dev, false); + i915_gem_evict_everything(dev); i915_gem_reset_fences(dev); @@ -3892,7 +4098,6 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data, } BUG_ON(!list_empty(&dev_priv->mm.active_list)); - BUG_ON(!list_empty(&dev_priv->mm.flushing_list)); BUG_ON(!list_empty(&dev_priv->mm.inactive_list)); mutex_unlock(&dev->struct_mutex); @@ -3940,7 +4145,6 @@ init_ring_lists(struct intel_ring_buffer *ring) { INIT_LIST_HEAD(&ring->active_list); INIT_LIST_HEAD(&ring->request_list); - INIT_LIST_HEAD(&ring->gpu_write_list); } void @@ -3950,10 +4154,10 @@ i915_gem_load(struct drm_device *dev) drm_i915_private_t *dev_priv = dev->dev_private; INIT_LIST_HEAD(&dev_priv->mm.active_list); - INIT_LIST_HEAD(&dev_priv->mm.flushing_list); INIT_LIST_HEAD(&dev_priv->mm.inactive_list); + INIT_LIST_HEAD(&dev_priv->mm.unbound_list); + INIT_LIST_HEAD(&dev_priv->mm.bound_list); INIT_LIST_HEAD(&dev_priv->mm.fence_list); - INIT_LIST_HEAD(&dev_priv->mm.gtt_list); for (i = 0; i < I915_NUM_RINGS; i++) init_ring_lists(&dev_priv->ring[i]); for (i = 0; i < I915_MAX_NUM_FENCES; i++) @@ -4198,18 +4402,6 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file) } static int -i915_gpu_is_active(struct drm_device *dev) -{ - drm_i915_private_t *dev_priv = dev->dev_private; - int lists_empty; - - lists_empty = list_empty(&dev_priv->mm.flushing_list) && - list_empty(&dev_priv->mm.active_list); - - return !lists_empty; -} - -static int i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc) { struct drm_i915_private *dev_priv = @@ -4217,60 +4409,27 @@ i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc) struct drm_i915_private, mm.inactive_shrinker); struct drm_device *dev = dev_priv->dev; - struct drm_i915_gem_object *obj, *next; + struct drm_i915_gem_object *obj; int nr_to_scan = sc->nr_to_scan; int cnt; if (!mutex_trylock(&dev->struct_mutex)) return 0; - /* "fast-path" to count number of available objects */ - if (nr_to_scan == 0) { - cnt = 0; - list_for_each_entry(obj, - &dev_priv->mm.inactive_list, - mm_list) - cnt++; - mutex_unlock(&dev->struct_mutex); - return cnt / 100 * sysctl_vfs_cache_pressure; - } - -rescan: - /* first scan for clean buffers */ - i915_gem_retire_requests(dev); - - list_for_each_entry_safe(obj, next, - &dev_priv->mm.inactive_list, - mm_list) { - if (i915_gem_object_is_purgeable(obj)) { - if (i915_gem_object_unbind(obj) == 0 && - --nr_to_scan == 0) - break; - } + if (nr_to_scan) { + nr_to_scan -= i915_gem_purge(dev_priv, nr_to_scan); + if (nr_to_scan > 0) + i915_gem_shrink_all(dev_priv); } - /* second pass, evict/count anything still on the inactive list */ cnt = 0; - list_for_each_entry_safe(obj, next, - &dev_priv->mm.inactive_list, - mm_list) { - if (nr_to_scan && - i915_gem_object_unbind(obj) == 0) - nr_to_scan--; - else - cnt++; - } + list_for_each_entry(obj, &dev_priv->mm.unbound_list, gtt_list) + if (obj->pages_pin_count == 0) + cnt += obj->base.size >> PAGE_SHIFT; + list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) + if (obj->pin_count == 0 && obj->pages_pin_count == 0) + cnt += obj->base.size >> PAGE_SHIFT; - if (nr_to_scan && i915_gpu_is_active(dev)) { - /* - * We are desperate for pages, so as a last resort, wait - * for the GPU to finish and discard whatever we can. - * This has a dramatic impact to reduce the number of - * OOM-killer events whilst running the GPU aggressively. - */ - if (i915_gpu_idle(dev) == 0) - goto rescan; - } mutex_unlock(&dev->struct_mutex); - return cnt / 100 * sysctl_vfs_cache_pressure; + return cnt; } |