summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMukul Joshi <mukul.joshi@amd.com>2025-03-01 05:48:19 +0300
committerAlex Deucher <alexander.deucher@amd.com>2025-12-11 01:38:56 +0300
commit4c5f7d73883355990a4eb1964a02fc5664cfcf22 (patch)
treef86c825e906c02193c873edc2ebc816d0603266c
parent00e08fb2e7ce88e2ae366cbc79997d71d014b0ac (diff)
downloadlinux-4c5f7d73883355990a4eb1964a02fc5664cfcf22.tar.xz
drm/amdgpu: Add interrupt handler for GFX 12.1.0
Add a separate interrupt handler for handling interrupts, both retry and no-retry, for GFX 12.1.0. Signed-off-by: Mukul Joshi <mukul.joshi@amd.com> Reviewed-by: Philip Yang <Philip.Yang@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c213
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v12_1.h1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c7
4 files changed, 224 insertions, 3 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
index 1b1e2ddbc600..dfa518185116 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
@@ -639,15 +639,16 @@ static int gmc_v12_0_early_init(struct amdgpu_ip_block *ip_block)
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(12, 1, 0):
gmc_v12_1_set_gmc_funcs(adev);
+ gmc_v12_1_set_irq_funcs(adev);
adev->gmc.init_pte_flags = AMDGPU_PTE_IS_PTE;
break;
default:
gmc_v12_0_set_gmc_funcs(adev);
+ gmc_v12_0_set_irq_funcs(adev);
break;
}
gmc_v12_0_set_gfxhub_funcs(adev);
gmc_v12_0_set_mmhub_funcs(adev);
- gmc_v12_0_set_irq_funcs(adev);
gmc_v12_0_set_umc_funcs(adev);
adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
@@ -848,7 +849,8 @@ static int gmc_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
if (r)
return r;
- if (!amdgpu_sriov_vf(adev)) {
+ if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(12, 1, 0)) &&
+ !amdgpu_sriov_vf(adev)) {
/* interrupt sent to DF. */
r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_DF, 0,
&adev->gmc.ecc_irq);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c
index b9ae8469d553..55f77520df2f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c
@@ -26,6 +26,208 @@
#include "soc_v1_0_enum.h"
#include "oss/osssys_7_1_0_offset.h"
#include "oss/osssys_7_1_0_sh_mask.h"
+#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
+
+static int gmc_v12_1_vm_fault_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ struct amdgpu_vmhub *hub;
+ u32 tmp, reg, i, j;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ for_each_set_bit(j, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) {
+ hub = &adev->vmhub[j];
+ for (i = 0; i < 16; i++) {
+ reg = hub->vm_context0_cntl + i;
+
+ /* This works because this interrupt is only
+ * enabled at init/resume and disabled in
+ * fini/suspend, so the overall state doesn't
+ * change over the course of suspend/resume.
+ */
+ if (adev->in_s0ix && (j == AMDGPU_GFXHUB(0)))
+ continue;
+
+ if (j >= AMDGPU_MMHUB0(0))
+ tmp = RREG32_SOC15_IP(MMHUB, reg);
+ else
+ tmp = RREG32_XCC(reg, j);
+
+ tmp &= ~hub->vm_cntx_cntl_vm_fault;
+
+ if (j >= AMDGPU_MMHUB0(0))
+ WREG32_SOC15_IP(MMHUB, reg, tmp);
+ else
+ WREG32_XCC(reg, tmp, j);
+ }
+ }
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ for_each_set_bit(j, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) {
+ hub = &adev->vmhub[j];
+ for (i = 0; i < 16; i++) {
+ reg = hub->vm_context0_cntl + i;
+
+ /* This works because this interrupt is only
+ * enabled at init/resume and disabled in
+ * fini/suspend, so the overall state doesn't
+ * change over the course of suspend/resume.
+ */
+ if (adev->in_s0ix && (j == AMDGPU_GFXHUB(0)))
+ continue;
+
+ if (j >= AMDGPU_MMHUB0(0))
+ tmp = RREG32_SOC15_IP(MMHUB, reg);
+ else
+ tmp = RREG32_XCC(reg, j);
+
+ tmp |= hub->vm_cntx_cntl_vm_fault;
+
+ if (j >= AMDGPU_MMHUB0(0))
+ WREG32_SOC15_IP(MMHUB, reg, tmp);
+ else
+ WREG32_XCC(reg, tmp, j);
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gmc_v12_1_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ struct amdgpu_task_info *task_info;
+ bool retry_fault = false, write_fault = false;
+ unsigned int vmhub, node_id;
+ struct amdgpu_vmhub *hub;
+ uint32_t cam_index = 0;
+ const char *hub_name;
+ int ret, xcc_id = 0;
+ uint32_t status = 0;
+ u64 addr;
+
+ node_id = entry->node_id;
+
+ addr = (u64)entry->src_data[0] << 12;
+ addr |= ((u64)entry->src_data[1] & 0xf) << 44;
+
+ if (entry->src_id == UTCL2_1_0__SRCID__RETRY) {
+ retry_fault = true;
+ write_fault = !!(entry->src_data[1] & 0x200000);
+ }
+
+ if (entry->client_id == SOC21_IH_CLIENTID_VMC) {
+ hub_name = "mmhub0";
+ vmhub = AMDGPU_MMHUB0(node_id / 4);
+ } else {
+ hub_name = "gfxhub0";
+ if (adev->gfx.funcs->ih_node_to_logical_xcc) {
+ xcc_id = adev->gfx.funcs->ih_node_to_logical_xcc(adev,
+ node_id);
+ if (xcc_id < 0)
+ xcc_id = 0;
+ }
+ vmhub = xcc_id;
+ }
+
+ hub = &adev->vmhub[vmhub];
+
+ if (retry_fault) {
+ if (adev->irq.retry_cam_enabled) {
+ /* Delegate it to a different ring if the hardware hasn't
+ * already done it.
+ */
+ if (entry->ih == &adev->irq.ih) {
+ amdgpu_irq_delegate(adev, entry, 8);
+ return 1;
+ }
+
+ cam_index = entry->src_data[3] & 0x3ff;
+
+ ret = amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id,
+ addr, entry->timestamp, write_fault);
+ WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index);
+ if (ret)
+ return 1;
+ } else {
+ /* Process it onyl if it's the first fault for this address */
+ if (entry->ih != &adev->irq.ih_soft &&
+ amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
+ entry->timestamp))
+ return 1;
+
+ /* Delegate it to a different ring if the hardware hasn't
+ * already done it.
+ */
+ if (entry->ih == &adev->irq.ih) {
+ amdgpu_irq_delegate(adev, entry, 8);
+ return 1;
+ }
+
+ /* Try to handle the recoverable page faults by filling page
+ * tables
+ */
+ if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id,
+ addr, entry->timestamp, write_fault))
+ return 1;
+ }
+ }
+
+ if (kgd2kfd_vmfault_fast_path(adev, entry, retry_fault))
+ return 1;
+
+ if (!printk_ratelimit())
+ return 0;
+
+ dev_err(adev->dev,
+ "[%s] %s page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n", hub_name,
+ retry_fault ? "retry" : "no-retry",
+ entry->src_id, entry->ring_id, entry->vmid, entry->pasid);
+
+ task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+ if (task_info) {
+ amdgpu_vm_print_task_info(adev, task_info);
+ amdgpu_vm_put_task_info(task_info);
+ }
+
+ dev_err(adev->dev, " in page starting at address 0x%016llx from IH client %d\n",
+ addr, entry->client_id);
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ /*
+ * Issue a dummy read to wait for the status register to
+ * be updated to avoid reading an incorrect value due to
+ * the new fast GRBM interface.
+ */
+ if (entry->vmid_src == AMDGPU_GFXHUB(0))
+ RREG32(hub->vm_l2_pro_fault_status);
+
+ status = RREG32(hub->vm_l2_pro_fault_status);
+
+ /* Only print L2 fault status if the status register could be read and
+ * contains useful information
+ */
+ if (!status)
+ return 0;
+
+ WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
+
+ amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status, vmhub);
+
+ hub->vmhub_funcs->print_l2_protection_fault_status(adev, status);
+
+ return 0;
+}
static bool gmc_v12_1_get_vmid_pasid_mapping_info(struct amdgpu_device *adev,
uint8_t vmid, uint16_t *p_pasid)
@@ -382,3 +584,14 @@ void gmc_v12_1_set_gmc_funcs(struct amdgpu_device *adev)
{
adev->gmc.gmc_funcs = &gmc_v12_1_gmc_funcs;
}
+
+static const struct amdgpu_irq_src_funcs gmc_v12_1_irq_funcs = {
+ .set = gmc_v12_1_vm_fault_interrupt_state,
+ .process = gmc_v12_1_process_interrupt,
+};
+
+void gmc_v12_1_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->gmc.vm_fault.num_types = 1;
+ adev->gmc.vm_fault.funcs = &gmc_v12_1_irq_funcs;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.h b/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.h
index cc52db3a9908..67ee5f7fd9b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.h
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.h
@@ -25,5 +25,6 @@
#define __GMC_V12_1_H__
void gmc_v12_1_set_gmc_funcs(struct amdgpu_device *adev);
+void gmc_v12_1_set_irq_funcs(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index cf640c9d19bf..b91843b2af6a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -1685,6 +1685,10 @@ bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entr
{
struct kfd_process *p;
u32 cam_index;
+ u32 src_data_idx;
+
+ src_data_idx = (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 1, 0)) ?
+ 3 : 2;
if (entry->ih == &adev->irq.ih_soft || entry->ih == &adev->irq.ih1) {
p = kfd_lookup_process_by_pasid(entry->pasid, NULL);
@@ -1693,7 +1697,8 @@ bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entr
if (p->gpu_page_fault && !p->debug_trap_enabled) {
if (retry_fault && adev->irq.retry_cam_enabled) {
- cam_index = entry->src_data[2] & 0x3ff;
+ cam_index = entry->src_data[src_data_idx] & 0x3ff;
+
WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index);
}