summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c375
1 files changed, 137 insertions, 238 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 83599f2a0387..4303b447efe8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -47,9 +47,30 @@
* that the the relevant GPU caches have been flushed.
*/
+struct amdgpu_fence {
+ struct fence base;
+
+ /* RB, DMA, etc. */
+ struct amdgpu_ring *ring;
+};
+
static struct kmem_cache *amdgpu_fence_slab;
static atomic_t amdgpu_fence_slab_ref = ATOMIC_INIT(0);
+/*
+ * Cast helper
+ */
+static const struct fence_ops amdgpu_fence_ops;
+static inline struct amdgpu_fence *to_amdgpu_fence(struct fence *f)
+{
+ struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);
+
+ if (__f->base.ops == &amdgpu_fence_ops)
+ return __f;
+
+ return NULL;
+}
+
/**
* amdgpu_fence_write - write a fence value
*
@@ -82,7 +103,7 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
if (drv->cpu_addr)
seq = le32_to_cpu(*drv->cpu_addr);
else
- seq = lower_32_bits(atomic64_read(&drv->last_seq));
+ seq = atomic_read(&drv->last_seq);
return seq;
}
@@ -100,20 +121,32 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_fence *fence;
+ struct fence **ptr;
+ uint32_t seq;
fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
if (fence == NULL)
return -ENOMEM;
- fence->seq = ++ring->fence_drv.sync_seq;
+ seq = ++ring->fence_drv.sync_seq;
fence->ring = ring;
fence_init(&fence->base, &amdgpu_fence_ops,
- &ring->fence_drv.fence_queue.lock,
+ &ring->fence_drv.lock,
adev->fence_context + ring->idx,
- fence->seq);
+ seq);
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
- fence->seq, AMDGPU_FENCE_FLAG_INT);
+ seq, AMDGPU_FENCE_FLAG_INT);
+
+ ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
+ /* This function can't be called concurrently anyway, otherwise
+ * emitting the fence would mess up the hardware ring buffer.
+ */
+ BUG_ON(rcu_dereference_protected(*ptr, 1));
+
+ rcu_assign_pointer(*ptr, fence_get(&fence->base));
+
*f = &fence->base;
+
return 0;
}
@@ -131,89 +164,48 @@ static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring)
}
/**
- * amdgpu_fence_activity - check for fence activity
+ * amdgpu_fence_process - check for fence activity
*
* @ring: pointer to struct amdgpu_ring
*
* Checks the current fence value and calculates the last
- * signalled fence value. Returns true if activity occured
- * on the ring, and the fence_queue should be waken up.
+ * signalled fence value. Wakes the fence queue if the
+ * sequence number has increased.
*/
-static bool amdgpu_fence_activity(struct amdgpu_ring *ring)
+void amdgpu_fence_process(struct amdgpu_ring *ring)
{
- uint64_t seq, last_seq, last_emitted;
- unsigned count_loop = 0;
- bool wake = false;
-
- /* Note there is a scenario here for an infinite loop but it's
- * very unlikely to happen. For it to happen, the current polling
- * process need to be interrupted by another process and another
- * process needs to update the last_seq btw the atomic read and
- * xchg of the current process.
- *
- * More over for this to go in infinite loop there need to be
- * continuously new fence signaled ie amdgpu_fence_read needs
- * to return a different value each time for both the currently
- * polling process and the other process that xchg the last_seq
- * btw atomic read and xchg of the current process. And the
- * value the other process set as last seq must be higher than
- * the seq value we just read. Which means that current process
- * need to be interrupted after amdgpu_fence_read and before
- * atomic xchg.
- *
- * To be even more safe we count the number of time we loop and
- * we bail after 10 loop just accepting the fact that we might
- * have temporarly set the last_seq not to the true real last
- * seq but to an older one.
- */
- last_seq = atomic64_read(&ring->fence_drv.last_seq);
+ struct amdgpu_fence_driver *drv = &ring->fence_drv;
+ uint32_t seq, last_seq;
+ int r;
+
do {
- last_emitted = ring->fence_drv.sync_seq;
+ last_seq = atomic_read(&ring->fence_drv.last_seq);
seq = amdgpu_fence_read(ring);
- seq |= last_seq & 0xffffffff00000000LL;
- if (seq < last_seq) {
- seq &= 0xffffffff;
- seq |= last_emitted & 0xffffffff00000000LL;
- }
- if (seq <= last_seq || seq > last_emitted) {
- break;
- }
- /* If we loop over we don't want to return without
- * checking if a fence is signaled as it means that the
- * seq we just read is different from the previous on.
- */
- wake = true;
- last_seq = seq;
- if ((count_loop++) > 10) {
- /* We looped over too many time leave with the
- * fact that we might have set an older fence
- * seq then the current real last seq as signaled
- * by the hw.
- */
- break;
- }
- } while (atomic64_xchg(&ring->fence_drv.last_seq, seq) > seq);
+ } while (atomic_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq);
- if (seq < last_emitted)
+ if (seq != ring->fence_drv.sync_seq)
amdgpu_fence_schedule_fallback(ring);
- return wake;
-}
+ while (last_seq != seq) {
+ struct fence *fence, **ptr;
-/**
- * amdgpu_fence_process - process a fence
- *
- * @adev: amdgpu_device pointer
- * @ring: ring index the fence is associated with
- *
- * Checks the current fence value and wakes the fence queue
- * if the sequence number has increased (all asics).
- */
-void amdgpu_fence_process(struct amdgpu_ring *ring)
-{
- if (amdgpu_fence_activity(ring))
- wake_up_all(&ring->fence_drv.fence_queue);
+ ptr = &drv->fences[++last_seq & drv->num_fences_mask];
+
+ /* There is always exactly one thread signaling this fence slot */
+ fence = rcu_dereference_protected(*ptr, 1);
+ rcu_assign_pointer(*ptr, NULL);
+
+ BUG_ON(!fence);
+
+ r = fence_signal(fence);
+ if (!r)
+ FENCE_TRACE(fence, "signaled from irq context\n");
+ else
+ BUG();
+
+ fence_put(fence);
+ }
}
/**
@@ -231,77 +223,6 @@ static void amdgpu_fence_fallback(unsigned long arg)
}
/**
- * amdgpu_fence_seq_signaled - check if a fence sequence number has signaled
- *
- * @ring: ring the fence is associated with
- * @seq: sequence number
- *
- * Check if the last signaled fence sequnce number is >= the requested
- * sequence number (all asics).
- * Returns true if the fence has signaled (current fence value
- * is >= requested value) or false if it has not (current fence
- * value is < the requested value. Helper function for
- * amdgpu_fence_signaled().
- */
-static bool amdgpu_fence_seq_signaled(struct amdgpu_ring *ring, u64 seq)
-{
- if (atomic64_read(&ring->fence_drv.last_seq) >= seq)
- return true;
-
- /* poll new last sequence at least once */
- amdgpu_fence_process(ring);
- if (atomic64_read(&ring->fence_drv.last_seq) >= seq)
- return true;
-
- return false;
-}
-
-/*
- * amdgpu_ring_wait_seq - wait for seq of the specific ring to signal
- * @ring: ring to wait on for the seq number
- * @seq: seq number wait for
- *
- * return value:
- * 0: seq signaled, and gpu not hang
- * -EINVAL: some paramter is not valid
- */
-static int amdgpu_fence_ring_wait_seq(struct amdgpu_ring *ring, uint64_t seq)
-{
- BUG_ON(!ring);
- if (seq > ring->fence_drv.sync_seq)
- return -EINVAL;
-
- if (atomic64_read(&ring->fence_drv.last_seq) >= seq)
- return 0;
-
- amdgpu_fence_schedule_fallback(ring);
- wait_event(ring->fence_drv.fence_queue,
- amdgpu_fence_seq_signaled(ring, seq));
-
- return 0;
-}
-
-/**
- * amdgpu_fence_wait_next - wait for the next fence to signal
- *
- * @adev: amdgpu device pointer
- * @ring: ring index the fence is associated with
- *
- * Wait for the next fence on the requested ring to signal (all asics).
- * Returns 0 if the next fence has passed, error for all other cases.
- * Caller must hold ring lock.
- */
-int amdgpu_fence_wait_next(struct amdgpu_ring *ring)
-{
- uint64_t seq = atomic64_read(&ring->fence_drv.last_seq) + 1ULL;
-
- if (seq >= ring->fence_drv.sync_seq)
- return -ENOENT;
-
- return amdgpu_fence_ring_wait_seq(ring, seq);
-}
-
-/**
* amdgpu_fence_wait_empty - wait for all fences to signal
*
* @adev: amdgpu device pointer
@@ -309,16 +230,28 @@ int amdgpu_fence_wait_next(struct amdgpu_ring *ring)
*
* Wait for all fences on the requested ring to signal (all asics).
* Returns 0 if the fences have passed, error for all other cases.
- * Caller must hold ring lock.
*/
int amdgpu_fence_wait_empty(struct amdgpu_ring *ring)
{
- uint64_t seq = ring->fence_drv.sync_seq;
+ uint64_t seq = ACCESS_ONCE(ring->fence_drv.sync_seq);
+ struct fence *fence, **ptr;
+ int r;
if (!seq)
return 0;
- return amdgpu_fence_ring_wait_seq(ring, seq);
+ ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
+ rcu_read_lock();
+ fence = rcu_dereference(*ptr);
+ if (!fence || !fence_get_rcu(fence)) {
+ rcu_read_unlock();
+ return 0;
+ }
+ rcu_read_unlock();
+
+ r = fence_wait(fence, false);
+ fence_put(fence);
+ return r;
}
/**
@@ -338,13 +271,10 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
* but it's ok to report slightly wrong fence count here.
*/
amdgpu_fence_process(ring);
- emitted = ring->fence_drv.sync_seq
- - atomic64_read(&ring->fence_drv.last_seq);
- /* to avoid 32bits warp around */
- if (emitted > 0x10000000)
- emitted = 0x10000000;
-
- return (unsigned)emitted;
+ emitted = 0x100000000ull;
+ emitted -= atomic_read(&ring->fence_drv.last_seq);
+ emitted += ACCESS_ONCE(ring->fence_drv.sync_seq);
+ return lower_32_bits(emitted);
}
/**
@@ -376,7 +306,7 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
ring->fence_drv.cpu_addr = adev->uvd.cpu_addr + index;
ring->fence_drv.gpu_addr = adev->uvd.gpu_addr + index;
}
- amdgpu_fence_write(ring, atomic64_read(&ring->fence_drv.last_seq));
+ amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq));
amdgpu_irq_get(adev, irq_src, irq_type);
ring->fence_drv.irq_src = irq_src;
@@ -394,25 +324,36 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
* for the requested ring.
*
* @ring: ring to init the fence driver on
+ * @num_hw_submission: number of entries on the hardware queue
*
* Init the fence driver for the requested ring (all asics).
* Helper function for amdgpu_fence_driver_init().
*/
-int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
+int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
+ unsigned num_hw_submission)
{
long timeout;
int r;
+ /* Check that num_hw_submission is a power of two */
+ if ((num_hw_submission & (num_hw_submission - 1)) != 0)
+ return -EINVAL;
+
ring->fence_drv.cpu_addr = NULL;
ring->fence_drv.gpu_addr = 0;
ring->fence_drv.sync_seq = 0;
- atomic64_set(&ring->fence_drv.last_seq, 0);
+ atomic_set(&ring->fence_drv.last_seq, 0);
ring->fence_drv.initialized = false;
setup_timer(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback,
(unsigned long)ring);
- init_waitqueue_head(&ring->fence_drv.fence_queue);
+ ring->fence_drv.num_fences_mask = num_hw_submission - 1;
+ spin_lock_init(&ring->fence_drv.lock);
+ ring->fence_drv.fences = kcalloc(num_hw_submission, sizeof(void *),
+ GFP_KERNEL);
+ if (!ring->fence_drv.fences)
+ return -ENOMEM;
timeout = msecs_to_jiffies(amdgpu_lockup_timeout);
if (timeout == 0) {
@@ -426,7 +367,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
timeout = MAX_SCHEDULE_TIMEOUT;
}
r = amd_sched_init(&ring->sched, &amdgpu_sched_ops,
- amdgpu_sched_hw_submission,
+ num_hw_submission,
timeout, ring->name);
if (r) {
DRM_ERROR("Failed to create scheduler on ring %s.\n",
@@ -474,10 +415,9 @@ int amdgpu_fence_driver_init(struct amdgpu_device *adev)
*/
void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
{
- int i, r;
+ unsigned i, j;
+ int r;
- if (atomic_dec_and_test(&amdgpu_fence_slab_ref))
- kmem_cache_destroy(amdgpu_fence_slab);
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
@@ -488,13 +428,18 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
/* no need to trigger GPU reset as we are unloading */
amdgpu_fence_driver_force_completion(adev);
}
- wake_up_all(&ring->fence_drv.fence_queue);
amdgpu_irq_put(adev, ring->fence_drv.irq_src,
ring->fence_drv.irq_type);
amd_sched_fini(&ring->sched);
del_timer_sync(&ring->fence_drv.fallback_timer);
+ for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j)
+ fence_put(ring->fence_drv.fences[i]);
+ kfree(ring->fence_drv.fences);
ring->fence_drv.initialized = false;
}
+
+ if (atomic_dec_and_test(&amdgpu_fence_slab_ref))
+ kmem_cache_destroy(amdgpu_fence_slab);
}
/**
@@ -591,103 +536,57 @@ static const char *amdgpu_fence_get_timeline_name(struct fence *f)
}
/**
- * amdgpu_fence_is_signaled - test if fence is signaled
- *
- * @f: fence to test
+ * amdgpu_fence_enable_signaling - enable signalling on fence
+ * @fence: fence
*
- * Test the fence sequence number if it is already signaled. If it isn't
- * signaled start fence processing. Returns True if the fence is signaled.
+ * This function is called with fence_queue lock held, and adds a callback
+ * to fence_queue that checks if this fence is signaled, and if so it
+ * signals the fence and removes itself.
*/
-static bool amdgpu_fence_is_signaled(struct fence *f)
+static bool amdgpu_fence_enable_signaling(struct fence *f)
{
struct amdgpu_fence *fence = to_amdgpu_fence(f);
struct amdgpu_ring *ring = fence->ring;
- if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
- return true;
-
- amdgpu_fence_process(ring);
+ if (!timer_pending(&ring->fence_drv.fallback_timer))
+ amdgpu_fence_schedule_fallback(ring);
- if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
- return true;
+ FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx);
- return false;
+ return true;
}
/**
- * amdgpu_fence_check_signaled - callback from fence_queue
+ * amdgpu_fence_free - free up the fence memory
+ *
+ * @rcu: RCU callback head
*
- * this function is called with fence_queue lock held, which is also used
- * for the fence locking itself, so unlocked variants are used for
- * fence_signal, and remove_wait_queue.
+ * Free up the fence memory after the RCU grace period.
*/
-static int amdgpu_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key)
+static void amdgpu_fence_free(struct rcu_head *rcu)
{
- struct amdgpu_fence *fence;
- struct amdgpu_device *adev;
- u64 seq;
- int ret;
-
- fence = container_of(wait, struct amdgpu_fence, fence_wake);
- adev = fence->ring->adev;
-
- /*
- * We cannot use amdgpu_fence_process here because we're already
- * in the waitqueue, in a call from wake_up_all.
- */
- seq = atomic64_read(&fence->ring->fence_drv.last_seq);
- if (seq >= fence->seq) {
- ret = fence_signal_locked(&fence->base);
- if (!ret)
- FENCE_TRACE(&fence->base, "signaled from irq context\n");
- else
- FENCE_TRACE(&fence->base, "was already signaled\n");
-
- __remove_wait_queue(&fence->ring->fence_drv.fence_queue, &fence->fence_wake);
- fence_put(&fence->base);
- } else
- FENCE_TRACE(&fence->base, "pending\n");
- return 0;
+ struct fence *f = container_of(rcu, struct fence, rcu);
+ struct amdgpu_fence *fence = to_amdgpu_fence(f);
+ kmem_cache_free(amdgpu_fence_slab, fence);
}
/**
- * amdgpu_fence_enable_signaling - enable signalling on fence
+ * amdgpu_fence_release - callback that fence can be freed
+ *
* @fence: fence
*
- * This function is called with fence_queue lock held, and adds a callback
- * to fence_queue that checks if this fence is signaled, and if so it
- * signals the fence and removes itself.
+ * This function is called when the reference count becomes zero.
+ * It just RCU schedules freeing up the fence.
*/
-static bool amdgpu_fence_enable_signaling(struct fence *f)
-{
- struct amdgpu_fence *fence = to_amdgpu_fence(f);
- struct amdgpu_ring *ring = fence->ring;
-
- if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
- return false;
-
- fence->fence_wake.flags = 0;
- fence->fence_wake.private = NULL;
- fence->fence_wake.func = amdgpu_fence_check_signaled;
- __add_wait_queue(&ring->fence_drv.fence_queue, &fence->fence_wake);
- fence_get(f);
- if (!timer_pending(&ring->fence_drv.fallback_timer))
- amdgpu_fence_schedule_fallback(ring);
- FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx);
- return true;
-}
-
static void amdgpu_fence_release(struct fence *f)
{
- struct amdgpu_fence *fence = to_amdgpu_fence(f);
- kmem_cache_free(amdgpu_fence_slab, fence);
+ call_rcu(&f->rcu, amdgpu_fence_free);
}
-const struct fence_ops amdgpu_fence_ops = {
+static const struct fence_ops amdgpu_fence_ops = {
.get_driver_name = amdgpu_fence_get_driver_name,
.get_timeline_name = amdgpu_fence_get_timeline_name,
.enable_signaling = amdgpu_fence_enable_signaling,
- .signaled = amdgpu_fence_is_signaled,
.wait = fence_default_wait,
.release = amdgpu_fence_release,
};
@@ -711,9 +610,9 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data)
amdgpu_fence_process(ring);
seq_printf(m, "--- ring %d (%s) ---\n", i, ring->name);
- seq_printf(m, "Last signaled fence 0x%016llx\n",
- (unsigned long long)atomic64_read(&ring->fence_drv.last_seq));
- seq_printf(m, "Last emitted 0x%016llx\n",
+ seq_printf(m, "Last signaled fence 0x%08x\n",
+ atomic_read(&ring->fence_drv.last_seq));
+ seq_printf(m, "Last emitted 0x%08x\n",
ring->fence_drv.sync_seq);
}
return 0;