diff options
-rw-r--r-- | drivers/misc/habanalabs/goya/goya.c | 7 | ||||
-rw-r--r-- | drivers/misc/habanalabs/goya/goyaP.h | 2 | ||||
-rw-r--r-- | drivers/misc/habanalabs/habanalabs.h | 9 | ||||
-rw-r--r-- | drivers/misc/habanalabs/hw_queue.c | 14 |
4 files changed, 17 insertions, 15 deletions
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index e8b1142910e0..b39b9c98fe1d 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -2729,9 +2729,10 @@ void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi) GOYA_ASYNC_EVENT_ID_PI_UPDATE); } -void goya_flush_pq_write(struct hl_device *hdev, u64 *pq, u64 exp_val) +void goya_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd) { - /* Not needed in Goya */ + /* The QMANs are on the SRAM so need to copy to IO space */ + memcpy_toio((void __iomem *) pqe, bd, sizeof(struct hl_bd)); } static void *goya_dma_alloc_coherent(struct hl_device *hdev, size_t size, @@ -5048,7 +5049,7 @@ static const struct hl_asic_funcs goya_funcs = { .resume = goya_resume, .cb_mmap = goya_cb_mmap, .ring_doorbell = goya_ring_doorbell, - .flush_pq_write = goya_flush_pq_write, + .pqe_write = goya_pqe_write, .asic_dma_alloc_coherent = goya_dma_alloc_coherent, .asic_dma_free_coherent = goya_dma_free_coherent, .get_int_queue_base = goya_get_int_queue_base, diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h index f8c611883dc1..d7f48c9c41cd 100644 --- a/drivers/misc/habanalabs/goya/goyaP.h +++ b/drivers/misc/habanalabs/goya/goyaP.h @@ -177,7 +177,7 @@ int goya_late_init(struct hl_device *hdev); void goya_late_fini(struct hl_device *hdev); void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi); -void goya_flush_pq_write(struct hl_device *hdev, u64 *pq, u64 exp_val); +void goya_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd); void goya_update_eq_ci(struct hl_device *hdev, u32 val); void goya_restore_phase_topology(struct hl_device *hdev); int goya_context_switch(struct hl_device *hdev, u32 asid); diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index 6a4c64b97f38..ce83adafcf2d 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -441,7 +441,11 @@ enum hl_pll_frequency { * @resume: handles IP specific H/W or SW changes for resume. * @cb_mmap: maps a CB. * @ring_doorbell: increment PI on a given QMAN. - * @flush_pq_write: flush PQ entry write if necessary, WARN if flushing failed. + * @pqe_write: Write the PQ entry to the PQ. This is ASIC-specific + * function because the PQs are located in different memory areas + * per ASIC (SRAM, DRAM, Host memory) and therefore, the method of + * writing the PQE must match the destination memory area + * properties. * @asic_dma_alloc_coherent: Allocate coherent DMA memory by calling * dma_alloc_coherent(). This is ASIC function because * its implementation is not trivial when the driver @@ -510,7 +514,8 @@ struct hl_asic_funcs { int (*cb_mmap)(struct hl_device *hdev, struct vm_area_struct *vma, u64 kaddress, phys_addr_t paddress, u32 size); void (*ring_doorbell)(struct hl_device *hdev, u32 hw_queue_id, u32 pi); - void (*flush_pq_write)(struct hl_device *hdev, u64 *pq, u64 exp_val); + void (*pqe_write)(struct hl_device *hdev, __le64 *pqe, + struct hl_bd *bd); void* (*asic_dma_alloc_coherent)(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle, gfp_t flag); void (*asic_dma_free_coherent)(struct hl_device *hdev, size_t size, diff --git a/drivers/misc/habanalabs/hw_queue.c b/drivers/misc/habanalabs/hw_queue.c index e3b5517897ea..5f5673b74985 100644 --- a/drivers/misc/habanalabs/hw_queue.c +++ b/drivers/misc/habanalabs/hw_queue.c @@ -290,23 +290,19 @@ static void int_hw_queue_schedule_job(struct hl_cs_job *job) struct hl_device *hdev = job->cs->ctx->hdev; struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id]; struct hl_bd bd; - u64 *pi, *pbd = (u64 *) &bd; + __le64 *pi; bd.ctl = 0; - bd.len = __cpu_to_le32(job->job_cb_size); - bd.ptr = __cpu_to_le64((u64) (uintptr_t) job->user_cb); + bd.len = cpu_to_le32(job->job_cb_size); + bd.ptr = cpu_to_le64((u64) (uintptr_t) job->user_cb); - pi = (u64 *) (uintptr_t) (q->kernel_address + + pi = (__le64 *) (uintptr_t) (q->kernel_address + ((q->pi & (q->int_queue_len - 1)) * sizeof(bd))); - pi[0] = pbd[0]; - pi[1] = pbd[1]; - q->pi++; q->pi &= ((q->int_queue_len << 1) - 1); - /* Flush PQ entry write. Relevant only for specific ASICs */ - hdev->asic_funcs->flush_pq_write(hdev, pi, pbd[0]); + hdev->asic_funcs->pqe_write(hdev, pi, &bd); hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi); } |