diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 375 |
1 files changed, 137 insertions, 238 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 83599f2a0387..4303b447efe8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -47,9 +47,30 @@ * that the the relevant GPU caches have been flushed. */ +struct amdgpu_fence { + struct fence base; + + /* RB, DMA, etc. */ + struct amdgpu_ring *ring; +}; + static struct kmem_cache *amdgpu_fence_slab; static atomic_t amdgpu_fence_slab_ref = ATOMIC_INIT(0); +/* + * Cast helper + */ +static const struct fence_ops amdgpu_fence_ops; +static inline struct amdgpu_fence *to_amdgpu_fence(struct fence *f) +{ + struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base); + + if (__f->base.ops == &amdgpu_fence_ops) + return __f; + + return NULL; +} + /** * amdgpu_fence_write - write a fence value * @@ -82,7 +103,7 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring) if (drv->cpu_addr) seq = le32_to_cpu(*drv->cpu_addr); else - seq = lower_32_bits(atomic64_read(&drv->last_seq)); + seq = atomic_read(&drv->last_seq); return seq; } @@ -100,20 +121,32 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f) { struct amdgpu_device *adev = ring->adev; struct amdgpu_fence *fence; + struct fence **ptr; + uint32_t seq; fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL); if (fence == NULL) return -ENOMEM; - fence->seq = ++ring->fence_drv.sync_seq; + seq = ++ring->fence_drv.sync_seq; fence->ring = ring; fence_init(&fence->base, &amdgpu_fence_ops, - &ring->fence_drv.fence_queue.lock, + &ring->fence_drv.lock, adev->fence_context + ring->idx, - fence->seq); + seq); amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, - fence->seq, AMDGPU_FENCE_FLAG_INT); + seq, AMDGPU_FENCE_FLAG_INT); + + ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; + /* This function can't be called concurrently anyway, otherwise + * emitting the fence would mess up the hardware ring buffer. + */ + BUG_ON(rcu_dereference_protected(*ptr, 1)); + + rcu_assign_pointer(*ptr, fence_get(&fence->base)); + *f = &fence->base; + return 0; } @@ -131,89 +164,48 @@ static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring) } /** - * amdgpu_fence_activity - check for fence activity + * amdgpu_fence_process - check for fence activity * * @ring: pointer to struct amdgpu_ring * * Checks the current fence value and calculates the last - * signalled fence value. Returns true if activity occured - * on the ring, and the fence_queue should be waken up. + * signalled fence value. Wakes the fence queue if the + * sequence number has increased. */ -static bool amdgpu_fence_activity(struct amdgpu_ring *ring) +void amdgpu_fence_process(struct amdgpu_ring *ring) { - uint64_t seq, last_seq, last_emitted; - unsigned count_loop = 0; - bool wake = false; - - /* Note there is a scenario here for an infinite loop but it's - * very unlikely to happen. For it to happen, the current polling - * process need to be interrupted by another process and another - * process needs to update the last_seq btw the atomic read and - * xchg of the current process. - * - * More over for this to go in infinite loop there need to be - * continuously new fence signaled ie amdgpu_fence_read needs - * to return a different value each time for both the currently - * polling process and the other process that xchg the last_seq - * btw atomic read and xchg of the current process. And the - * value the other process set as last seq must be higher than - * the seq value we just read. Which means that current process - * need to be interrupted after amdgpu_fence_read and before - * atomic xchg. - * - * To be even more safe we count the number of time we loop and - * we bail after 10 loop just accepting the fact that we might - * have temporarly set the last_seq not to the true real last - * seq but to an older one. - */ - last_seq = atomic64_read(&ring->fence_drv.last_seq); + struct amdgpu_fence_driver *drv = &ring->fence_drv; + uint32_t seq, last_seq; + int r; + do { - last_emitted = ring->fence_drv.sync_seq; + last_seq = atomic_read(&ring->fence_drv.last_seq); seq = amdgpu_fence_read(ring); - seq |= last_seq & 0xffffffff00000000LL; - if (seq < last_seq) { - seq &= 0xffffffff; - seq |= last_emitted & 0xffffffff00000000LL; - } - if (seq <= last_seq || seq > last_emitted) { - break; - } - /* If we loop over we don't want to return without - * checking if a fence is signaled as it means that the - * seq we just read is different from the previous on. - */ - wake = true; - last_seq = seq; - if ((count_loop++) > 10) { - /* We looped over too many time leave with the - * fact that we might have set an older fence - * seq then the current real last seq as signaled - * by the hw. - */ - break; - } - } while (atomic64_xchg(&ring->fence_drv.last_seq, seq) > seq); + } while (atomic_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq); - if (seq < last_emitted) + if (seq != ring->fence_drv.sync_seq) amdgpu_fence_schedule_fallback(ring); - return wake; -} + while (last_seq != seq) { + struct fence *fence, **ptr; -/** - * amdgpu_fence_process - process a fence - * - * @adev: amdgpu_device pointer - * @ring: ring index the fence is associated with - * - * Checks the current fence value and wakes the fence queue - * if the sequence number has increased (all asics). - */ -void amdgpu_fence_process(struct amdgpu_ring *ring) -{ - if (amdgpu_fence_activity(ring)) - wake_up_all(&ring->fence_drv.fence_queue); + ptr = &drv->fences[++last_seq & drv->num_fences_mask]; + + /* There is always exactly one thread signaling this fence slot */ + fence = rcu_dereference_protected(*ptr, 1); + rcu_assign_pointer(*ptr, NULL); + + BUG_ON(!fence); + + r = fence_signal(fence); + if (!r) + FENCE_TRACE(fence, "signaled from irq context\n"); + else + BUG(); + + fence_put(fence); + } } /** @@ -231,77 +223,6 @@ static void amdgpu_fence_fallback(unsigned long arg) } /** - * amdgpu_fence_seq_signaled - check if a fence sequence number has signaled - * - * @ring: ring the fence is associated with - * @seq: sequence number - * - * Check if the last signaled fence sequnce number is >= the requested - * sequence number (all asics). - * Returns true if the fence has signaled (current fence value - * is >= requested value) or false if it has not (current fence - * value is < the requested value. Helper function for - * amdgpu_fence_signaled(). - */ -static bool amdgpu_fence_seq_signaled(struct amdgpu_ring *ring, u64 seq) -{ - if (atomic64_read(&ring->fence_drv.last_seq) >= seq) - return true; - - /* poll new last sequence at least once */ - amdgpu_fence_process(ring); - if (atomic64_read(&ring->fence_drv.last_seq) >= seq) - return true; - - return false; -} - -/* - * amdgpu_ring_wait_seq - wait for seq of the specific ring to signal - * @ring: ring to wait on for the seq number - * @seq: seq number wait for - * - * return value: - * 0: seq signaled, and gpu not hang - * -EINVAL: some paramter is not valid - */ -static int amdgpu_fence_ring_wait_seq(struct amdgpu_ring *ring, uint64_t seq) -{ - BUG_ON(!ring); - if (seq > ring->fence_drv.sync_seq) - return -EINVAL; - - if (atomic64_read(&ring->fence_drv.last_seq) >= seq) - return 0; - - amdgpu_fence_schedule_fallback(ring); - wait_event(ring->fence_drv.fence_queue, - amdgpu_fence_seq_signaled(ring, seq)); - - return 0; -} - -/** - * amdgpu_fence_wait_next - wait for the next fence to signal - * - * @adev: amdgpu device pointer - * @ring: ring index the fence is associated with - * - * Wait for the next fence on the requested ring to signal (all asics). - * Returns 0 if the next fence has passed, error for all other cases. - * Caller must hold ring lock. - */ -int amdgpu_fence_wait_next(struct amdgpu_ring *ring) -{ - uint64_t seq = atomic64_read(&ring->fence_drv.last_seq) + 1ULL; - - if (seq >= ring->fence_drv.sync_seq) - return -ENOENT; - - return amdgpu_fence_ring_wait_seq(ring, seq); -} - -/** * amdgpu_fence_wait_empty - wait for all fences to signal * * @adev: amdgpu device pointer @@ -309,16 +230,28 @@ int amdgpu_fence_wait_next(struct amdgpu_ring *ring) * * Wait for all fences on the requested ring to signal (all asics). * Returns 0 if the fences have passed, error for all other cases. - * Caller must hold ring lock. */ int amdgpu_fence_wait_empty(struct amdgpu_ring *ring) { - uint64_t seq = ring->fence_drv.sync_seq; + uint64_t seq = ACCESS_ONCE(ring->fence_drv.sync_seq); + struct fence *fence, **ptr; + int r; if (!seq) return 0; - return amdgpu_fence_ring_wait_seq(ring, seq); + ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; + rcu_read_lock(); + fence = rcu_dereference(*ptr); + if (!fence || !fence_get_rcu(fence)) { + rcu_read_unlock(); + return 0; + } + rcu_read_unlock(); + + r = fence_wait(fence, false); + fence_put(fence); + return r; } /** @@ -338,13 +271,10 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring) * but it's ok to report slightly wrong fence count here. */ amdgpu_fence_process(ring); - emitted = ring->fence_drv.sync_seq - - atomic64_read(&ring->fence_drv.last_seq); - /* to avoid 32bits warp around */ - if (emitted > 0x10000000) - emitted = 0x10000000; - - return (unsigned)emitted; + emitted = 0x100000000ull; + emitted -= atomic_read(&ring->fence_drv.last_seq); + emitted += ACCESS_ONCE(ring->fence_drv.sync_seq); + return lower_32_bits(emitted); } /** @@ -376,7 +306,7 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, ring->fence_drv.cpu_addr = adev->uvd.cpu_addr + index; ring->fence_drv.gpu_addr = adev->uvd.gpu_addr + index; } - amdgpu_fence_write(ring, atomic64_read(&ring->fence_drv.last_seq)); + amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq)); amdgpu_irq_get(adev, irq_src, irq_type); ring->fence_drv.irq_src = irq_src; @@ -394,25 +324,36 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, * for the requested ring. * * @ring: ring to init the fence driver on + * @num_hw_submission: number of entries on the hardware queue * * Init the fence driver for the requested ring (all asics). * Helper function for amdgpu_fence_driver_init(). */ -int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring) +int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, + unsigned num_hw_submission) { long timeout; int r; + /* Check that num_hw_submission is a power of two */ + if ((num_hw_submission & (num_hw_submission - 1)) != 0) + return -EINVAL; + ring->fence_drv.cpu_addr = NULL; ring->fence_drv.gpu_addr = 0; ring->fence_drv.sync_seq = 0; - atomic64_set(&ring->fence_drv.last_seq, 0); + atomic_set(&ring->fence_drv.last_seq, 0); ring->fence_drv.initialized = false; setup_timer(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback, (unsigned long)ring); - init_waitqueue_head(&ring->fence_drv.fence_queue); + ring->fence_drv.num_fences_mask = num_hw_submission - 1; + spin_lock_init(&ring->fence_drv.lock); + ring->fence_drv.fences = kcalloc(num_hw_submission, sizeof(void *), + GFP_KERNEL); + if (!ring->fence_drv.fences) + return -ENOMEM; timeout = msecs_to_jiffies(amdgpu_lockup_timeout); if (timeout == 0) { @@ -426,7 +367,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring) timeout = MAX_SCHEDULE_TIMEOUT; } r = amd_sched_init(&ring->sched, &amdgpu_sched_ops, - amdgpu_sched_hw_submission, + num_hw_submission, timeout, ring->name); if (r) { DRM_ERROR("Failed to create scheduler on ring %s.\n", @@ -474,10 +415,9 @@ int amdgpu_fence_driver_init(struct amdgpu_device *adev) */ void amdgpu_fence_driver_fini(struct amdgpu_device *adev) { - int i, r; + unsigned i, j; + int r; - if (atomic_dec_and_test(&amdgpu_fence_slab_ref)) - kmem_cache_destroy(amdgpu_fence_slab); for (i = 0; i < AMDGPU_MAX_RINGS; i++) { struct amdgpu_ring *ring = adev->rings[i]; @@ -488,13 +428,18 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev) /* no need to trigger GPU reset as we are unloading */ amdgpu_fence_driver_force_completion(adev); } - wake_up_all(&ring->fence_drv.fence_queue); amdgpu_irq_put(adev, ring->fence_drv.irq_src, ring->fence_drv.irq_type); amd_sched_fini(&ring->sched); del_timer_sync(&ring->fence_drv.fallback_timer); + for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j) + fence_put(ring->fence_drv.fences[i]); + kfree(ring->fence_drv.fences); ring->fence_drv.initialized = false; } + + if (atomic_dec_and_test(&amdgpu_fence_slab_ref)) + kmem_cache_destroy(amdgpu_fence_slab); } /** @@ -591,103 +536,57 @@ static const char *amdgpu_fence_get_timeline_name(struct fence *f) } /** - * amdgpu_fence_is_signaled - test if fence is signaled - * - * @f: fence to test + * amdgpu_fence_enable_signaling - enable signalling on fence + * @fence: fence * - * Test the fence sequence number if it is already signaled. If it isn't - * signaled start fence processing. Returns True if the fence is signaled. + * This function is called with fence_queue lock held, and adds a callback + * to fence_queue that checks if this fence is signaled, and if so it + * signals the fence and removes itself. */ -static bool amdgpu_fence_is_signaled(struct fence *f) +static bool amdgpu_fence_enable_signaling(struct fence *f) { struct amdgpu_fence *fence = to_amdgpu_fence(f); struct amdgpu_ring *ring = fence->ring; - if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq) - return true; - - amdgpu_fence_process(ring); + if (!timer_pending(&ring->fence_drv.fallback_timer)) + amdgpu_fence_schedule_fallback(ring); - if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq) - return true; + FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx); - return false; + return true; } /** - * amdgpu_fence_check_signaled - callback from fence_queue + * amdgpu_fence_free - free up the fence memory + * + * @rcu: RCU callback head * - * this function is called with fence_queue lock held, which is also used - * for the fence locking itself, so unlocked variants are used for - * fence_signal, and remove_wait_queue. + * Free up the fence memory after the RCU grace period. */ -static int amdgpu_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key) +static void amdgpu_fence_free(struct rcu_head *rcu) { - struct amdgpu_fence *fence; - struct amdgpu_device *adev; - u64 seq; - int ret; - - fence = container_of(wait, struct amdgpu_fence, fence_wake); - adev = fence->ring->adev; - - /* - * We cannot use amdgpu_fence_process here because we're already - * in the waitqueue, in a call from wake_up_all. - */ - seq = atomic64_read(&fence->ring->fence_drv.last_seq); - if (seq >= fence->seq) { - ret = fence_signal_locked(&fence->base); - if (!ret) - FENCE_TRACE(&fence->base, "signaled from irq context\n"); - else - FENCE_TRACE(&fence->base, "was already signaled\n"); - - __remove_wait_queue(&fence->ring->fence_drv.fence_queue, &fence->fence_wake); - fence_put(&fence->base); - } else - FENCE_TRACE(&fence->base, "pending\n"); - return 0; + struct fence *f = container_of(rcu, struct fence, rcu); + struct amdgpu_fence *fence = to_amdgpu_fence(f); + kmem_cache_free(amdgpu_fence_slab, fence); } /** - * amdgpu_fence_enable_signaling - enable signalling on fence + * amdgpu_fence_release - callback that fence can be freed + * * @fence: fence * - * This function is called with fence_queue lock held, and adds a callback - * to fence_queue that checks if this fence is signaled, and if so it - * signals the fence and removes itself. + * This function is called when the reference count becomes zero. + * It just RCU schedules freeing up the fence. */ -static bool amdgpu_fence_enable_signaling(struct fence *f) -{ - struct amdgpu_fence *fence = to_amdgpu_fence(f); - struct amdgpu_ring *ring = fence->ring; - - if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq) - return false; - - fence->fence_wake.flags = 0; - fence->fence_wake.private = NULL; - fence->fence_wake.func = amdgpu_fence_check_signaled; - __add_wait_queue(&ring->fence_drv.fence_queue, &fence->fence_wake); - fence_get(f); - if (!timer_pending(&ring->fence_drv.fallback_timer)) - amdgpu_fence_schedule_fallback(ring); - FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx); - return true; -} - static void amdgpu_fence_release(struct fence *f) { - struct amdgpu_fence *fence = to_amdgpu_fence(f); - kmem_cache_free(amdgpu_fence_slab, fence); + call_rcu(&f->rcu, amdgpu_fence_free); } -const struct fence_ops amdgpu_fence_ops = { +static const struct fence_ops amdgpu_fence_ops = { .get_driver_name = amdgpu_fence_get_driver_name, .get_timeline_name = amdgpu_fence_get_timeline_name, .enable_signaling = amdgpu_fence_enable_signaling, - .signaled = amdgpu_fence_is_signaled, .wait = fence_default_wait, .release = amdgpu_fence_release, }; @@ -711,9 +610,9 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data) amdgpu_fence_process(ring); seq_printf(m, "--- ring %d (%s) ---\n", i, ring->name); - seq_printf(m, "Last signaled fence 0x%016llx\n", - (unsigned long long)atomic64_read(&ring->fence_drv.last_seq)); - seq_printf(m, "Last emitted 0x%016llx\n", + seq_printf(m, "Last signaled fence 0x%08x\n", + atomic_read(&ring->fence_drv.last_seq)); + seq_printf(m, "Last emitted 0x%08x\n", ring->fence_drv.sync_seq); } return 0; |