diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
58 files changed, 2074 insertions, 737 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 74a8105fd2c0..7777d55275de 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -4,7 +4,7 @@ config DRM_AMDGPU_SI depends on DRM_AMDGPU help Choose this option if you want to enable experimental support - for SI asics. + for SI (Southern Islands) asics. SI is already supported in radeon. Experimental support for SI in amdgpu will be disabled by default and is still provided by @@ -16,7 +16,8 @@ config DRM_AMDGPU_CIK bool "Enable amdgpu support for CIK parts" depends on DRM_AMDGPU help - Choose this option if you want to enable support for CIK asics. + Choose this option if you want to enable support for CIK (Sea + Islands) asics. CIK is already supported in radeon. Support for CIK in amdgpu will be disabled by default and is still provided by radeon. diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index a87e42c2c8dc..c7d0cd15b5ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -93,7 +93,7 @@ amdgpu-y += \ # add UMC block amdgpu-y += \ - umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o + umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o umc_v8_10.o # add IH block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index fb9399a999ae..b075845a5328 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -197,6 +197,7 @@ extern uint amdgpu_smu_memory_pool_size; extern int amdgpu_smu_pptable_id; extern uint amdgpu_dc_feature_mask; extern uint amdgpu_dc_debug_mask; +extern uint amdgpu_dc_visual_confirm; extern uint amdgpu_dm_abm_level; extern int amdgpu_backlight; extern struct amdgpu_mgpu_info mgpu_info; @@ -1011,7 +1012,6 @@ struct amdgpu_device { uint64_t df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS]; /* enable runtime pm on the device */ - bool runpm; bool in_runpm; bool has_pr3; @@ -1253,9 +1253,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev); bool amdgpu_device_has_job_running(struct amdgpu_device *adev); bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev); int amdgpu_device_gpu_recover(struct amdgpu_device *adev, - struct amdgpu_job* job); -int amdgpu_device_gpu_recover(struct amdgpu_device *adev, - struct amdgpu_job *job); + struct amdgpu_job *job, + struct amdgpu_reset_context *reset_context); void amdgpu_device_pci_config_reset(struct amdgpu_device *adev); int amdgpu_device_pci_reset(struct amdgpu_device *adev); bool amdgpu_device_need_post(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index cc9c9f8b23b2..bcc7ee02e0fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -29,6 +29,8 @@ #include <linux/platform_device.h> #include <sound/designware_i2s.h> #include <sound/pcm.h> +#include <linux/acpi.h> +#include <linux/dmi.h> #include "amdgpu.h" #include "atom.h" @@ -36,6 +38,7 @@ #include "acp_gfx_if.h" +#define ST_JADEITE 1 #define ACP_TILE_ON_MASK 0x03 #define ACP_TILE_OFF_MASK 0x02 #define ACP_TILE_ON_RETAIN_REG_MASK 0x1f @@ -85,6 +88,8 @@ #define ACP_DEVS 4 #define ACP_SRC_ID 162 +static unsigned long acp_machine_id; + enum { ACP_TILE_P1 = 0, ACP_TILE_P2, @@ -128,16 +133,14 @@ static int acp_poweroff(struct generic_pm_domain *genpd) struct amdgpu_device *adev; apd = container_of(genpd, struct acp_pm_domain, gpd); - if (apd != NULL) { - adev = apd->adev; + adev = apd->adev; /* call smu to POWER GATE ACP block * smu will * 1. turn off the acp clock * 2. power off the acp tiles * 3. check and enter ulv state */ - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true); - } + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true); return 0; } @@ -147,16 +150,14 @@ static int acp_poweron(struct generic_pm_domain *genpd) struct amdgpu_device *adev; apd = container_of(genpd, struct acp_pm_domain, gpd); - if (apd != NULL) { - adev = apd->adev; + adev = apd->adev; /* call smu to UNGATE ACP block * smu will * 1. exit ulv * 2. turn on acp clock * 3. power on acp tiles */ - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false); - } + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false); return 0; } @@ -184,6 +185,37 @@ static int acp_genpd_remove_device(struct device *dev, void *data) return 0; } +static int acp_quirk_cb(const struct dmi_system_id *id) +{ + acp_machine_id = ST_JADEITE; + return 1; +} + +static const struct dmi_system_id acp_quirk_table[] = { + { + .callback = acp_quirk_cb, + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "AMD"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Jadeite"), + } + }, + { + .callback = acp_quirk_cb, + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "IP3 Technology CO.,Ltd."), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ASN1D"), + }, + }, + { + .callback = acp_quirk_cb, + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Standard"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ASN10"), + }, + }, + {} +}; + /** * acp_hw_init - start and test ACP block * @@ -193,7 +225,7 @@ static int acp_genpd_remove_device(struct device *dev, void *data) static int acp_hw_init(void *handle) { int r; - uint64_t acp_base; + u64 acp_base; u32 val = 0; u32 count = 0; struct i2s_platform_data *i2s_pdata = NULL; @@ -220,141 +252,210 @@ static int acp_hw_init(void *handle) return -EINVAL; acp_base = adev->rmmio_base; - - adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL); - if (adev->acp.acp_genpd == NULL) + if (!adev->acp.acp_genpd) return -ENOMEM; adev->acp.acp_genpd->gpd.name = "ACP_AUDIO"; adev->acp.acp_genpd->gpd.power_off = acp_poweroff; adev->acp.acp_genpd->gpd.power_on = acp_poweron; - - adev->acp.acp_genpd->adev = adev; pm_genpd_init(&adev->acp.acp_genpd->gpd, NULL, false); + dmi_check_system(acp_quirk_table); + switch (acp_machine_id) { + case ST_JADEITE: + { + adev->acp.acp_cell = kcalloc(2, sizeof(struct mfd_cell), + GFP_KERNEL); + if (!adev->acp.acp_cell) { + r = -ENOMEM; + goto failure; + } - adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell), - GFP_KERNEL); - - if (adev->acp.acp_cell == NULL) { - r = -ENOMEM; - goto failure; - } - - adev->acp.acp_res = kcalloc(5, sizeof(struct resource), GFP_KERNEL); - if (adev->acp.acp_res == NULL) { - r = -ENOMEM; - goto failure; - } + adev->acp.acp_res = kcalloc(3, sizeof(struct resource), GFP_KERNEL); + if (!adev->acp.acp_res) { + r = -ENOMEM; + goto failure; + } - i2s_pdata = kcalloc(3, sizeof(struct i2s_platform_data), GFP_KERNEL); - if (i2s_pdata == NULL) { - r = -ENOMEM; - goto failure; - } + i2s_pdata = kcalloc(1, sizeof(struct i2s_platform_data), GFP_KERNEL); + if (!i2s_pdata) { + r = -ENOMEM; + goto failure; + } - switch (adev->asic_type) { - case CHIP_STONEY: i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | - DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; + DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; + i2s_pdata[0].cap = DWC_I2S_PLAY | DWC_I2S_RECORD; + i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000; + i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET; + i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET; + + adev->acp.acp_res[0].name = "acp2x_dma"; + adev->acp.acp_res[0].flags = IORESOURCE_MEM; + adev->acp.acp_res[0].start = acp_base; + adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END; + + adev->acp.acp_res[1].name = "acp2x_dw_i2s_play_cap"; + adev->acp.acp_res[1].flags = IORESOURCE_MEM; + adev->acp.acp_res[1].start = acp_base + ACP_I2S_CAP_REGS_START; + adev->acp.acp_res[1].end = acp_base + ACP_I2S_CAP_REGS_END; + + adev->acp.acp_res[2].name = "acp2x_dma_irq"; + adev->acp.acp_res[2].flags = IORESOURCE_IRQ; + adev->acp.acp_res[2].start = amdgpu_irq_create_mapping(adev, 162); + adev->acp.acp_res[2].end = adev->acp.acp_res[2].start; + + adev->acp.acp_cell[0].name = "acp_audio_dma"; + adev->acp.acp_cell[0].num_resources = 3; + adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0]; + adev->acp.acp_cell[0].platform_data = &adev->asic_type; + adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type); + + adev->acp.acp_cell[1].name = "designware-i2s"; + adev->acp.acp_cell[1].num_resources = 1; + adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1]; + adev->acp.acp_cell[1].platform_data = &i2s_pdata[0]; + adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data); + r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, 2); + if (r) + goto failure; + r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd, + acp_genpd_add_device); + if (r) + goto failure; break; - default: - i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET; } - i2s_pdata[0].cap = DWC_I2S_PLAY; - i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000; - i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_PLAY_REG_OFFSET; - i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_PLAY_REG_OFFSET; - switch (adev->asic_type) { - case CHIP_STONEY: - i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | - DW_I2S_QUIRK_COMP_PARAM1 | - DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; - break; default: - i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | - DW_I2S_QUIRK_COMP_PARAM1; - } + adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell), + GFP_KERNEL); - i2s_pdata[1].cap = DWC_I2S_RECORD; - i2s_pdata[1].snd_rates = SNDRV_PCM_RATE_8000_96000; - i2s_pdata[1].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET; - i2s_pdata[1].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET; + if (!adev->acp.acp_cell) { + r = -ENOMEM; + goto failure; + } - i2s_pdata[2].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET; - switch (adev->asic_type) { - case CHIP_STONEY: - i2s_pdata[2].quirks |= DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; - break; - default: - break; - } + adev->acp.acp_res = kcalloc(5, sizeof(struct resource), GFP_KERNEL); + if (!adev->acp.acp_res) { + r = -ENOMEM; + goto failure; + } + + i2s_pdata = kcalloc(3, sizeof(struct i2s_platform_data), GFP_KERNEL); + if (!i2s_pdata) { + r = -ENOMEM; + goto failure; + } + + switch (adev->asic_type) { + case CHIP_STONEY: + i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | + DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; + break; + default: + i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET; + } + i2s_pdata[0].cap = DWC_I2S_PLAY; + i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000; + i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_PLAY_REG_OFFSET; + i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_PLAY_REG_OFFSET; + switch (adev->asic_type) { + case CHIP_STONEY: + i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | + DW_I2S_QUIRK_COMP_PARAM1 | + DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; + break; + default: + i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | + DW_I2S_QUIRK_COMP_PARAM1; + } + + i2s_pdata[1].cap = DWC_I2S_RECORD; + i2s_pdata[1].snd_rates = SNDRV_PCM_RATE_8000_96000; + i2s_pdata[1].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET; + i2s_pdata[1].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET; + + i2s_pdata[2].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET; + switch (adev->asic_type) { + case CHIP_STONEY: + i2s_pdata[2].quirks |= DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; + break; + default: + break; + } - i2s_pdata[2].cap = DWC_I2S_PLAY | DWC_I2S_RECORD; - i2s_pdata[2].snd_rates = SNDRV_PCM_RATE_8000_96000; - i2s_pdata[2].i2s_reg_comp1 = ACP_BT_COMP1_REG_OFFSET; - i2s_pdata[2].i2s_reg_comp2 = ACP_BT_COMP2_REG_OFFSET; - - adev->acp.acp_res[0].name = "acp2x_dma"; - adev->acp.acp_res[0].flags = IORESOURCE_MEM; - adev->acp.acp_res[0].start = acp_base; - adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END; - - adev->acp.acp_res[1].name = "acp2x_dw_i2s_play"; - adev->acp.acp_res[1].flags = IORESOURCE_MEM; - adev->acp.acp_res[1].start = acp_base + ACP_I2S_PLAY_REGS_START; - adev->acp.acp_res[1].end = acp_base + ACP_I2S_PLAY_REGS_END; - - adev->acp.acp_res[2].name = "acp2x_dw_i2s_cap"; - adev->acp.acp_res[2].flags = IORESOURCE_MEM; - adev->acp.acp_res[2].start = acp_base + ACP_I2S_CAP_REGS_START; - adev->acp.acp_res[2].end = acp_base + ACP_I2S_CAP_REGS_END; - - adev->acp.acp_res[3].name = "acp2x_dw_bt_i2s_play_cap"; - adev->acp.acp_res[3].flags = IORESOURCE_MEM; - adev->acp.acp_res[3].start = acp_base + ACP_BT_PLAY_REGS_START; - adev->acp.acp_res[3].end = acp_base + ACP_BT_PLAY_REGS_END; - - adev->acp.acp_res[4].name = "acp2x_dma_irq"; - adev->acp.acp_res[4].flags = IORESOURCE_IRQ; - adev->acp.acp_res[4].start = amdgpu_irq_create_mapping(adev, 162); - adev->acp.acp_res[4].end = adev->acp.acp_res[4].start; - - adev->acp.acp_cell[0].name = "acp_audio_dma"; - adev->acp.acp_cell[0].num_resources = 5; - adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0]; - adev->acp.acp_cell[0].platform_data = &adev->asic_type; - adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type); - - adev->acp.acp_cell[1].name = "designware-i2s"; - adev->acp.acp_cell[1].num_resources = 1; - adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1]; - adev->acp.acp_cell[1].platform_data = &i2s_pdata[0]; - adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data); - - adev->acp.acp_cell[2].name = "designware-i2s"; - adev->acp.acp_cell[2].num_resources = 1; - adev->acp.acp_cell[2].resources = &adev->acp.acp_res[2]; - adev->acp.acp_cell[2].platform_data = &i2s_pdata[1]; - adev->acp.acp_cell[2].pdata_size = sizeof(struct i2s_platform_data); - - adev->acp.acp_cell[3].name = "designware-i2s"; - adev->acp.acp_cell[3].num_resources = 1; - adev->acp.acp_cell[3].resources = &adev->acp.acp_res[3]; - adev->acp.acp_cell[3].platform_data = &i2s_pdata[2]; - adev->acp.acp_cell[3].pdata_size = sizeof(struct i2s_platform_data); - - r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, - ACP_DEVS); - if (r) - goto failure; - - r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd, - acp_genpd_add_device); - if (r) - goto failure; + i2s_pdata[2].cap = DWC_I2S_PLAY | DWC_I2S_RECORD; + i2s_pdata[2].snd_rates = SNDRV_PCM_RATE_8000_96000; + i2s_pdata[2].i2s_reg_comp1 = ACP_BT_COMP1_REG_OFFSET; + i2s_pdata[2].i2s_reg_comp2 = ACP_BT_COMP2_REG_OFFSET; + + i2s_pdata[3].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET; + switch (adev->asic_type) { + case CHIP_STONEY: + i2s_pdata[3].quirks |= DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; + break; + default: + break; + } + adev->acp.acp_res[0].name = "acp2x_dma"; + adev->acp.acp_res[0].flags = IORESOURCE_MEM; + adev->acp.acp_res[0].start = acp_base; + adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END; + + adev->acp.acp_res[1].name = "acp2x_dw_i2s_play"; + adev->acp.acp_res[1].flags = IORESOURCE_MEM; + adev->acp.acp_res[1].start = acp_base + ACP_I2S_PLAY_REGS_START; + adev->acp.acp_res[1].end = acp_base + ACP_I2S_PLAY_REGS_END; + + adev->acp.acp_res[2].name = "acp2x_dw_i2s_cap"; + adev->acp.acp_res[2].flags = IORESOURCE_MEM; + adev->acp.acp_res[2].start = acp_base + ACP_I2S_CAP_REGS_START; + adev->acp.acp_res[2].end = acp_base + ACP_I2S_CAP_REGS_END; + + adev->acp.acp_res[3].name = "acp2x_dw_bt_i2s_play_cap"; + adev->acp.acp_res[3].flags = IORESOURCE_MEM; + adev->acp.acp_res[3].start = acp_base + ACP_BT_PLAY_REGS_START; + adev->acp.acp_res[3].end = acp_base + ACP_BT_PLAY_REGS_END; + + adev->acp.acp_res[4].name = "acp2x_dma_irq"; + adev->acp.acp_res[4].flags = IORESOURCE_IRQ; + adev->acp.acp_res[4].start = amdgpu_irq_create_mapping(adev, 162); + adev->acp.acp_res[4].end = adev->acp.acp_res[4].start; + + adev->acp.acp_cell[0].name = "acp_audio_dma"; + adev->acp.acp_cell[0].num_resources = 5; + adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0]; + adev->acp.acp_cell[0].platform_data = &adev->asic_type; + adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type); + + adev->acp.acp_cell[1].name = "designware-i2s"; + adev->acp.acp_cell[1].num_resources = 1; + adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1]; + adev->acp.acp_cell[1].platform_data = &i2s_pdata[0]; + adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data); + + adev->acp.acp_cell[2].name = "designware-i2s"; + adev->acp.acp_cell[2].num_resources = 1; + adev->acp.acp_cell[2].resources = &adev->acp.acp_res[2]; + adev->acp.acp_cell[2].platform_data = &i2s_pdata[1]; + adev->acp.acp_cell[2].pdata_size = sizeof(struct i2s_platform_data); + + adev->acp.acp_cell[3].name = "designware-i2s"; + adev->acp.acp_cell[3].num_resources = 1; + adev->acp.acp_cell[3].resources = &adev->acp.acp_res[3]; + adev->acp.acp_cell[3].platform_data = &i2s_pdata[2]; + adev->acp.acp_cell[3].pdata_size = sizeof(struct i2s_platform_data); + + r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, ACP_DEVS); + if (r) + goto failure; + + r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd, + acp_genpd_add_device); + if (r) + goto failure; + } /* Assert Soft reset of ACP */ val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET); @@ -546,8 +647,7 @@ static const struct amd_ip_funcs acp_ip_funcs = { .set_powergating_state = acp_set_powergating_state, }; -const struct amdgpu_ip_block_version acp_ip_block = -{ +const struct amdgpu_ip_block_version acp_ip_block = { .type = AMD_IP_BLOCK_TYPE_ACP, .major = 2, .minor = 2, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 567597469a8a..5e53a5293935 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -129,7 +129,14 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work) struct amdgpu_device *adev = container_of(work, struct amdgpu_device, kfd.reset_work); - amdgpu_device_gpu_recover(adev, NULL); + struct amdgpu_reset_context reset_context; + memset(&reset_context, 0, sizeof(reset_context)); + + reset_context.method = AMD_RESET_METHOD_NONE; + reset_context.reset_req_dev = adev; + clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + + amdgpu_device_gpu_recover(adev, NULL, &reset_context); } void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 2fcc6e079769..08997092e7f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -115,21 +115,12 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size) * compromise that should work in most cases without reserving too * much memory for page tables unnecessarily (factor 16K, >> 14). */ -#define ESTIMATE_PT_SIZE(mem_size) max(((mem_size) >> 14), AMDGPU_VM_RESERVED_VRAM) - -static size_t amdgpu_amdkfd_acc_size(uint64_t size) -{ - size >>= PAGE_SHIFT; - size *= sizeof(dma_addr_t) + sizeof(void *); - return __roundup_pow_of_two(sizeof(struct amdgpu_bo)) + - __roundup_pow_of_two(sizeof(struct ttm_tt)) + - PAGE_ALIGN(size); -} +#define ESTIMATE_PT_SIZE(mem_size) max(((mem_size) >> 14), AMDGPU_VM_RESERVED_VRAM) /** * amdgpu_amdkfd_reserve_mem_limit() - Decrease available memory by size - * of buffer including any reserved for control structures + * of buffer. * * @adev: Device to which allocated BO belongs to * @size: Size of buffer, in bytes, encapsulated by B0. This should be @@ -143,19 +134,16 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, { uint64_t reserved_for_pt = ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); - size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed; + size_t system_mem_needed, ttm_mem_needed, vram_needed; int ret = 0; - acc_size = amdgpu_amdkfd_acc_size(size); - + system_mem_needed = 0; + ttm_mem_needed = 0; vram_needed = 0; if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) { - system_mem_needed = acc_size + size; - ttm_mem_needed = acc_size + size; + system_mem_needed = size; + ttm_mem_needed = size; } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { - system_mem_needed = acc_size; - ttm_mem_needed = acc_size; - /* * Conservatively round up the allocation requirement to 2 MB * to avoid fragmentation caused by 4K allocations in the tail @@ -163,14 +151,10 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, */ vram_needed = ALIGN(size, VRAM_ALLOCATION_ALIGN); } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { - system_mem_needed = acc_size + size; - ttm_mem_needed = acc_size; - } else if (alloc_flag & - (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | - KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { - system_mem_needed = acc_size; - ttm_mem_needed = acc_size; - } else { + system_mem_needed = size; + } else if (!(alloc_flag & + (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | + KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) { pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag); return -ENOMEM; } @@ -208,28 +192,18 @@ release: static void unreserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 alloc_flag) { - size_t acc_size; - - acc_size = amdgpu_amdkfd_acc_size(size); - spin_lock(&kfd_mem_limit.mem_limit_lock); if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) { - kfd_mem_limit.system_mem_used -= (acc_size + size); - kfd_mem_limit.ttm_mem_used -= (acc_size + size); + kfd_mem_limit.system_mem_used -= size; + kfd_mem_limit.ttm_mem_used -= size; } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { - kfd_mem_limit.system_mem_used -= acc_size; - kfd_mem_limit.ttm_mem_used -= acc_size; adev->kfd.vram_used -= ALIGN(size, VRAM_ALLOCATION_ALIGN); } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { - kfd_mem_limit.system_mem_used -= (acc_size + size); - kfd_mem_limit.ttm_mem_used -= acc_size; - } else if (alloc_flag & - (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | - KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { - kfd_mem_limit.system_mem_used -= acc_size; - kfd_mem_limit.ttm_mem_used -= acc_size; - } else { + kfd_mem_limit.system_mem_used -= size; + } else if (!(alloc_flag & + (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | + KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) { pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag); goto release; } @@ -401,22 +375,8 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm) return ret; } - ret = amdgpu_amdkfd_validate_vm_bo(NULL, pd); - if (ret) { - pr_err("failed to validate PD\n"); - return ret; - } - vm->pd_phys_addr = amdgpu_gmc_pd_addr(vm->root.bo); - if (vm->use_cpu_for_update) { - ret = amdgpu_bo_kmap(pd, NULL); - if (ret) { - pr_err("failed to kmap PD, ret=%d\n", ret); - return ret; - } - } - return 0; } @@ -450,45 +410,42 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) switch (adev->asic_type) { case CHIP_ARCTURUS: - if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { - if (bo_adev == adev) - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; - else - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; - } else { - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; - } - break; case CHIP_ALDEBARAN: - if (coherent && uncached) { - if (adev->gmc.xgmi.connected_to_cpu || - !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)) - snoop = true; - mapping_flags |= AMDGPU_VM_MTYPE_UC; - } else if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { + if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { if (bo_adev == adev) { - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; - if (adev->gmc.xgmi.connected_to_cpu) + if (uncached) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else if (coherent) + mapping_flags |= AMDGPU_VM_MTYPE_CC; + else + mapping_flags |= AMDGPU_VM_MTYPE_RW; + if (adev->asic_type == CHIP_ALDEBARAN && + adev->gmc.xgmi.connected_to_cpu) snoop = true; } else { - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; + if (uncached || coherent) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else + mapping_flags |= AMDGPU_VM_MTYPE_NC; if (amdgpu_xgmi_same_hive(adev, bo_adev)) snoop = true; } } else { + if (uncached || coherent) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else + mapping_flags |= AMDGPU_VM_MTYPE_NC; snoop = true; - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; } break; default: - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; + if (uncached || coherent) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else + mapping_flags |= AMDGPU_VM_MTYPE_NC; + + if (!(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)) + snoop = true; } pte_flags = amdgpu_gem_va_map_flags(adev, mapping_flags); @@ -1555,16 +1512,10 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, struct amdgpu_vm *vm) { struct amdkfd_process_info *process_info = vm->process_info; - struct amdgpu_bo *pd = vm->root.bo; if (!process_info) return; - /* Release eviction fence from PD */ - amdgpu_bo_reserve(pd, false); - amdgpu_bo_fence(pd, NULL, false); - amdgpu_bo_unreserve(pd); - /* Update process info */ mutex_lock(&process_info->lock); process_info->n_vms--; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 714178f1b6c6..2168163aad2d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -40,7 +40,7 @@ static void amdgpu_bo_list_free_rcu(struct rcu_head *rcu) { struct amdgpu_bo_list *list = container_of(rcu, struct amdgpu_bo_list, rhead); - + mutex_destroy(&list->bo_list_mutex); kvfree(list); } @@ -136,6 +136,7 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, trace_amdgpu_cs_bo_status(list->num_entries, total_size); + mutex_init(&list->bo_list_mutex); *result = list; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index 529d52a204cf..9caea1688fc3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -47,6 +47,10 @@ struct amdgpu_bo_list { struct amdgpu_bo *oa_obj; unsigned first_userptr; unsigned num_entries; + + /* Protect access during command submission. + */ + struct mutex bo_list_mutex; }; int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index b28af04b0c3e..d8f1335bc68f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -519,6 +519,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, return r; } + mutex_lock(&p->bo_list->bo_list_mutex); + /* One for TTM and one for the CS job */ amdgpu_bo_list_for_each_entry(e, p->bo_list) e->tv.num_shared = 2; @@ -651,6 +653,7 @@ out_free_user_pages: kvfree(e->user_pages); e->user_pages = NULL; } + mutex_unlock(&p->bo_list->bo_list_mutex); } return r; } @@ -690,9 +693,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, { unsigned i; - if (error && backoff) + if (error && backoff) { ttm_eu_backoff_reservation(&parser->ticket, &parser->validated); + mutex_unlock(&parser->bo_list->bo_list_mutex); + } for (i = 0; i < parser->num_post_deps; i++) { drm_syncobj_put(parser->post_deps[i].syncobj); @@ -832,12 +837,16 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) continue; r = amdgpu_vm_bo_update(adev, bo_va, false); - if (r) + if (r) { + mutex_unlock(&p->bo_list->bo_list_mutex); return r; + } r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update); - if (r) + if (r) { + mutex_unlock(&p->bo_list->bo_list_mutex); return r; + } } r = amdgpu_vm_handle_moved(adev, vm); @@ -1278,6 +1287,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); mutex_unlock(&p->adev->notifier_lock); + mutex_unlock(&p->bo_list->bo_list_mutex); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index b212eaa280a7..afe22f83d4a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -272,32 +272,6 @@ static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity) return res; } -static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority, - struct drm_file *filp, struct amdgpu_ctx *ctx) -{ - int r; - - r = amdgpu_ctx_priority_permit(filp, priority); - if (r) - return r; - - memset(ctx, 0, sizeof(*ctx)); - - kref_init(&ctx->refcount); - ctx->mgr = mgr; - spin_lock_init(&ctx->ring_lock); - mutex_init(&ctx->lock); - - ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter); - ctx->reset_counter_query = ctx->reset_counter; - ctx->vram_lost_counter = atomic_read(&mgr->adev->vram_lost_counter); - ctx->init_priority = priority; - ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET; - ctx->stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE; - - return 0; -} - static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx, u32 *stable_pstate) { @@ -326,6 +300,38 @@ static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx, return 0; } +static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority, + struct drm_file *filp, struct amdgpu_ctx *ctx) +{ + u32 current_stable_pstate; + int r; + + r = amdgpu_ctx_priority_permit(filp, priority); + if (r) + return r; + + memset(ctx, 0, sizeof(*ctx)); + + kref_init(&ctx->refcount); + ctx->mgr = mgr; + spin_lock_init(&ctx->ring_lock); + mutex_init(&ctx->lock); + + ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter); + ctx->reset_counter_query = ctx->reset_counter; + ctx->vram_lost_counter = atomic_read(&mgr->adev->vram_lost_counter); + ctx->init_priority = priority; + ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET; + + r = amdgpu_ctx_get_stable_pstate(ctx, ¤t_stable_pstate); + if (r) + return r; + + ctx->stable_pstate = current_stable_pstate; + + return 0; +} + static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx, u32 stable_pstate) { @@ -397,7 +403,7 @@ static void amdgpu_ctx_fini(struct kref *ref) } if (drm_dev_enter(&adev->ddev, &idx)) { - amdgpu_ctx_set_stable_pstate(ctx, AMDGPU_CTX_STABLE_PSTATE_NONE); + amdgpu_ctx_set_stable_pstate(ctx, ctx->stable_pstate); drm_dev_exit(idx); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index f3ac7912c29c..e2eec985adb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -383,12 +383,8 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, value = RREG32_PCIE(*pos); r = put_user(value, (uint32_t *)buf); - if (r) { - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); - pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - amdgpu_virt_disable_access_debugfs(adev); - return r; - } + if (r) + goto out; result += 4; buf += 4; @@ -396,11 +392,12 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, size -= 4; } + r = result; +out: pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - amdgpu_virt_disable_access_debugfs(adev); - return result; + return r; } /** @@ -441,12 +438,8 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user uint32_t value; r = get_user(value, (uint32_t *)buf); - if (r) { - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); - pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - amdgpu_virt_disable_access_debugfs(adev); - return r; - } + if (r) + goto out; WREG32_PCIE(*pos, value); @@ -456,11 +449,12 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user size -= 4; } + r = result; +out: pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - amdgpu_virt_disable_access_debugfs(adev); - return result; + return r; } /** @@ -502,12 +496,8 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, value = RREG32_DIDT(*pos >> 2); r = put_user(value, (uint32_t *)buf); - if (r) { - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); - pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - amdgpu_virt_disable_access_debugfs(adev); - return r; - } + if (r) + goto out; result += 4; buf += 4; @@ -515,11 +505,12 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, size -= 4; } + r = result; +out: pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - amdgpu_virt_disable_access_debugfs(adev); - return result; + return r; } /** @@ -560,12 +551,8 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user uint32_t value; r = get_user(value, (uint32_t *)buf); - if (r) { - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); - pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - amdgpu_virt_disable_access_debugfs(adev); - return r; - } + if (r) + goto out; WREG32_DIDT(*pos >> 2, value); @@ -575,11 +562,12 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user size -= 4; } + r = result; +out: pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - amdgpu_virt_disable_access_debugfs(adev); - return result; + return r; } /** @@ -621,12 +609,8 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, value = RREG32_SMC(*pos); r = put_user(value, (uint32_t *)buf); - if (r) { - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); - pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - amdgpu_virt_disable_access_debugfs(adev); - return r; - } + if (r) + goto out; result += 4; buf += 4; @@ -634,11 +618,12 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, size -= 4; } + r = result; +out: pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - amdgpu_virt_disable_access_debugfs(adev); - return result; + return r; } /** @@ -679,12 +664,8 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user * uint32_t value; r = get_user(value, (uint32_t *)buf); - if (r) { - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); - pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - amdgpu_virt_disable_access_debugfs(adev); - return r; - } + if (r) + goto out; WREG32_SMC(*pos, value); @@ -694,11 +675,12 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user * size -= 4; } + r = result; +out: pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - amdgpu_virt_disable_access_debugfs(adev); - return result; + return r; } /** @@ -1090,11 +1072,8 @@ static ssize_t amdgpu_debugfs_gfxoff_write(struct file *f, const char __user *bu uint32_t value; r = get_user(value, (uint32_t *)buf); - if (r) { - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); - pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - return r; - } + if (r) + goto out; amdgpu_gfx_off_ctrl(adev, value ? true : false); @@ -1104,10 +1083,12 @@ static ssize_t amdgpu_debugfs_gfxoff_write(struct file *f, const char __user *bu size -= 4; } + r = result; +out: pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - return result; + return r; } @@ -1136,21 +1117,52 @@ static ssize_t amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf, } while (size) { - uint32_t value; + u32 value = adev->gfx.gfx_off_state; + + r = put_user(value, (u32 *)buf); + if (r) + goto out; + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + + r = result; +out: + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + + return r; +} + +static ssize_t amdgpu_debugfs_gfxoff_status_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + ssize_t result = 0; + int r; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); + if (r < 0) { + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + return r; + } + + while (size) { + u32 value; r = amdgpu_get_gfx_off_status(adev, &value); - if (r) { - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); - pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - return r; - } + if (r) + goto out; - r = put_user(value, (uint32_t *)buf); - if (r) { - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); - pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - return r; - } + r = put_user(value, (u32 *)buf); + if (r) + goto out; result += 4; buf += 4; @@ -1158,10 +1170,12 @@ static ssize_t amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf, size -= 4; } + r = result; +out: pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - return result; + return r; } static const struct file_operations amdgpu_debugfs_regs2_fops = { @@ -1229,6 +1243,12 @@ static const struct file_operations amdgpu_debugfs_gfxoff_fops = { .llseek = default_llseek }; +static const struct file_operations amdgpu_debugfs_gfxoff_status_fops = { + .owner = THIS_MODULE, + .read = amdgpu_debugfs_gfxoff_status_read, + .llseek = default_llseek +}; + static const struct file_operations *debugfs_regs[] = { &amdgpu_debugfs_regs_fops, &amdgpu_debugfs_regs2_fops, @@ -1240,6 +1260,7 @@ static const struct file_operations *debugfs_regs[] = { &amdgpu_debugfs_wave_fops, &amdgpu_debugfs_gpr_fops, &amdgpu_debugfs_gfxoff_fops, + &amdgpu_debugfs_gfxoff_status_fops, }; static const char *debugfs_regs_names[] = { @@ -1253,6 +1274,7 @@ static const char *debugfs_regs_names[] = { "amdgpu_wave", "amdgpu_gpr", "amdgpu_gfxoff", + "amdgpu_gfxoff_status", }; /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 64f37713b270..041bd906449d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5109,7 +5109,8 @@ static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev) */ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, - struct amdgpu_job *job) + struct amdgpu_job *job, + struct amdgpu_reset_context *reset_context) { struct list_head device_list, *device_list_handle = NULL; bool job_signaled = false; @@ -5119,9 +5120,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, bool need_emergency_restart = false; bool audio_suspended = false; int tmp_vram_lost_counter; - struct amdgpu_reset_context reset_context; - - memset(&reset_context, 0, sizeof(reset_context)); /* * Special case: RAS triggered and full reset isn't supported @@ -5147,12 +5145,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, if (hive) mutex_lock(&hive->hive_lock); - reset_context.method = AMD_RESET_METHOD_NONE; - reset_context.reset_req_dev = adev; - reset_context.job = job; - reset_context.hive = hive; - clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); - + reset_context->job = job; + reset_context->hive = hive; /* * Build list of devices to reset. * In case we are in XGMI hive mode, resort the device list @@ -5236,8 +5230,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, * * job->base holds a reference to parent fence */ - if (job && (job->hw_fence.ops != NULL) && - dma_fence_is_signaled(&job->hw_fence)) { + if (job && dma_fence_is_signaled(&job->hw_fence)) { job_signaled = true; dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); goto skip_hw_reset; @@ -5245,7 +5238,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, retry: /* Rest of adevs pre asic reset from XGMI hive. */ list_for_each_entry(tmp_adev, device_list_handle, reset_list) { - r = amdgpu_device_pre_asic_reset(tmp_adev, &reset_context); + r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context); /*TODO Should we stop ?*/ if (r) { dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ", @@ -5272,7 +5265,7 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) amdgpu_ras_resume(adev); } else { - r = amdgpu_do_asic_reset(device_list_handle, &reset_context); + r = amdgpu_do_asic_reset(device_list_handle, reset_context); if (r && r == -EAGAIN) goto retry; } @@ -5292,7 +5285,7 @@ skip_hw_reset: if (amdgpu_gpu_recovery == 2 && !(tmp_vram_lost_counter < atomic_read(&adev->vram_lost_counter))) amdgpu_device_recheck_guilty_jobs( - tmp_adev, device_list_handle, &reset_context); + tmp_adev, device_list_handle, reset_context); for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = tmp_adev->rings[i]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 37234c2998d7..242d1847c4aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1716,6 +1716,7 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(3, 0, 1): case IP_VERSION(3, 1, 2): case IP_VERSION(3, 1, 3): + case IP_VERSION(3, 1, 4): case IP_VERSION(3, 1, 5): case IP_VERSION(3, 1, 6): case IP_VERSION(3, 2, 0): @@ -2206,12 +2207,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) break; case IP_VERSION(7, 4, 0): case IP_VERSION(7, 4, 1): - adev->nbio.funcs = &nbio_v7_4_funcs; - adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg; - break; case IP_VERSION(7, 4, 4): adev->nbio.funcs = &nbio_v7_4_funcs; - adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg_ald; + adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg; break; case IP_VERSION(7, 2, 0): case IP_VERSION(7, 2, 1): @@ -2225,15 +2223,12 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(2, 3, 0): case IP_VERSION(2, 3, 1): case IP_VERSION(2, 3, 2): - adev->nbio.funcs = &nbio_v2_3_funcs; - adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg; - break; case IP_VERSION(3, 3, 0): case IP_VERSION(3, 3, 1): case IP_VERSION(3, 3, 2): case IP_VERSION(3, 3, 3): adev->nbio.funcs = &nbio_v2_3_funcs; - adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg_sc; + adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg; break; case IP_VERSION(4, 3, 0): case IP_VERSION(4, 3, 1): diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 97fff4727724..c20922a5af9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -1559,6 +1559,21 @@ bool amdgpu_crtc_get_scanout_position(struct drm_crtc *crtc, stime, etime, mode); } +static bool +amdgpu_display_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj) +{ + struct drm_device *dev = adev_to_drm(adev); + struct drm_fb_helper *fb_helper = dev->fb_helper; + + if (!fb_helper || !fb_helper->buffer) + return false; + + if (gem_to_amdgpu_bo(fb_helper->buffer->gem) != robj) + return false; + + return true; +} + int amdgpu_display_suspend_helper(struct amdgpu_device *adev) { struct drm_device *dev = adev_to_drm(adev); @@ -1594,10 +1609,12 @@ int amdgpu_display_suspend_helper(struct amdgpu_device *adev) continue; } robj = gem_to_amdgpu_bo(fb->obj[0]); - r = amdgpu_bo_reserve(robj, true); - if (r == 0) { - amdgpu_bo_unpin(robj); - amdgpu_bo_unreserve(robj); + if (!amdgpu_display_robj_is_fb(adev, robj)) { + r = amdgpu_bo_reserve(robj, true); + if (r == 0) { + amdgpu_bo_unpin(robj); + amdgpu_bo_unreserve(robj); + } } } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index e3d139708160..429fcdf28836 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -80,7 +80,7 @@ * - 3.24.0 - Add high priority compute support for gfx9 * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk). * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE. - * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation. + * - 3.27.0 - Add new chunk to AMDGPU_CS to enable BO_LIST creation. * - 3.28.0 - Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES * - 3.29.0 - Add AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID * - 3.30.0 - Add AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE. @@ -100,10 +100,11 @@ * - 3.44.0 - DCN3 supports DCC independent block settings: !64B && 128B, 64B && 128B * - 3.45.0 - Add context ioctl stable pstate interface * - 3.46.0 - To enable hot plug amdgpu tests in libdrm - * * 3.47.0 - Add AMDGPU_GEM_CREATE_DISCARDABLE and AMDGPU_VM_NOALLOC flags + * - 3.47.0 - Add AMDGPU_GEM_CREATE_DISCARDABLE and AMDGPU_VM_NOALLOC flags + * - 3.48.0 - Add IP discovery version info to HW INFO */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 47 +#define KMS_DRIVER_MINOR 48 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit; @@ -167,6 +168,7 @@ int amdgpu_smu_pptable_id = -1; */ uint amdgpu_dc_feature_mask = 2; uint amdgpu_dc_debug_mask; +uint amdgpu_dc_visual_confirm; int amdgpu_async_gfx_ring = 1; int amdgpu_mcbp; int amdgpu_discovery = -1; @@ -827,6 +829,9 @@ module_param_named(dcfeaturemask, amdgpu_dc_feature_mask, uint, 0444); MODULE_PARM_DESC(dcdebugmask, "all debug options disabled (default))"); module_param_named(dcdebugmask, amdgpu_dc_debug_mask, uint, 0444); +MODULE_PARM_DESC(visualconfirm, "Visual confirm (0 = off (default), 1 = MPO, 5 = PSR)"); +module_param_named(visualconfirm, amdgpu_dc_visual_confirm, uint, 0444); + /** * DOC: abmlevel (uint) * Override the default ABM (Adaptive Backlight Management) level used for DC @@ -2121,7 +2126,7 @@ retry_init: if (ret) DRM_ERROR("Creating debugfs files failed (%d).\n", ret); - if (adev->runpm) { + if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) { /* only need to skip on ATPX */ if (amdgpu_device_supports_px(ddev)) dev_pm_set_driver_flags(ddev->dev, DPM_FLAG_NO_DIRECT_COMPLETE); @@ -2178,7 +2183,7 @@ amdgpu_pci_remove(struct pci_dev *pdev) drm_dev_unplug(dev); - if (adev->runpm) { + if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) { pm_runtime_get_sync(dev->dev); pm_runtime_forbid(dev->dev); } @@ -2461,7 +2466,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) struct amdgpu_device *adev = drm_to_adev(drm_dev); int ret, i; - if (!adev->runpm) { + if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) { pm_runtime_forbid(dev); return -EBUSY; } @@ -2530,7 +2535,7 @@ static int amdgpu_pmops_runtime_resume(struct device *dev) struct amdgpu_device *adev = drm_to_adev(drm_dev); int ret; - if (!adev->runpm) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) return -EINVAL; /* Avoids registers access if device is physically gone */ @@ -2574,7 +2579,7 @@ static int amdgpu_pmops_runtime_idle(struct device *dev) /* we don't want the main rpm_idle to call suspend - we want to autosuspend */ int ret = 1; - if (!adev->runpm) { + if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) { pm_runtime_forbid(dev); return -EBUSY; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 39597ab807d1..8adeb7469f1e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -47,7 +47,7 @@ * for GPU/CPU synchronization. When the fence is written, * it is expected that all buffers associated with that fence * are no longer in use by the associated ring on the GPU and - * that the the relevant GPU caches have been flushed. + * that the relevant GPU caches have been flushed. */ struct amdgpu_fence { @@ -844,7 +844,14 @@ static void amdgpu_debugfs_reset_work(struct work_struct *work) struct amdgpu_device *adev = container_of(work, struct amdgpu_device, reset_work); - amdgpu_device_gpu_recover(adev, NULL); + struct amdgpu_reset_context reset_context; + memset(&reset_context, 0, sizeof(reset_context)); + + reset_context.method = AMD_RESET_METHOD_NONE; + reset_context.reset_req_dev = adev; + set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + + amdgpu_device_gpu_recover(adev, NULL, &reset_context); } #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 22735790fe50..5071b96be982 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -29,6 +29,7 @@ #include "amdgpu.h" #include "amdgpu_trace.h" +#include "amdgpu_reset.h" static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) { @@ -64,7 +65,14 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) ti.process_name, ti.tgid, ti.task_name, ti.pid); if (amdgpu_device_should_recover_gpu(ring->adev)) { - r = amdgpu_device_gpu_recover(ring->adev, job); + struct amdgpu_reset_context reset_context; + memset(&reset_context, 0, sizeof(reset_context)); + + reset_context.method = AMD_RESET_METHOD_NONE; + reset_context.reset_req_dev = adev; + clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + + r = amdgpu_device_gpu_recover(ring->adev, job, &reset_context); if (r) DRM_ERROR("GPU Recovery Failed: %d\n", r); } else { @@ -125,16 +133,10 @@ void amdgpu_job_free_resources(struct amdgpu_job *job) { struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched); struct dma_fence *f; - struct dma_fence *hw_fence; unsigned i; - if (job->hw_fence.ops == NULL) - hw_fence = job->external_hw_fence; - else - hw_fence = &job->hw_fence; - /* use sched fence if available */ - f = job->base.s_fence ? &job->base.s_fence->finished : hw_fence; + f = job->base.s_fence ? &job->base.s_fence->finished : &job->hw_fence; for (i = 0; i < job->num_ibs; ++i) amdgpu_ib_free(ring->adev, &job->ibs[i], f); } @@ -148,11 +150,7 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job) amdgpu_sync_free(&job->sync); amdgpu_sync_free(&job->sched_sync); - /* only put the hw fence if has embedded fence */ - if (job->hw_fence.ops != NULL) - dma_fence_put(&job->hw_fence); - else - kfree(job); + dma_fence_put(&job->hw_fence); } void amdgpu_job_free(struct amdgpu_job *job) @@ -161,11 +159,7 @@ void amdgpu_job_free(struct amdgpu_job *job) amdgpu_sync_free(&job->sync); amdgpu_sync_free(&job->sched_sync); - /* only put the hw fence if has embedded fence */ - if (job->hw_fence.ops != NULL) - dma_fence_put(&job->hw_fence); - else - kfree(job); + dma_fence_put(&job->hw_fence); } int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, @@ -195,15 +189,12 @@ int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring, int r; job->base.sched = &ring->sched; - r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, NULL, fence); - /* record external_hw_fence for direct submit */ - job->external_hw_fence = dma_fence_get(*fence); + r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job, fence); + if (r) return r; amdgpu_job_free(job); - dma_fence_put(*fence); - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index d599c0540b46..babc0af751c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -50,7 +50,6 @@ struct amdgpu_job { struct amdgpu_sync sync; struct amdgpu_sync sched_sync; struct dma_fence hw_fence; - struct dma_fence *external_hw_fence; uint32_t preamble_status; uint32_t preemption_status; bool vm_needs_flush; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 103927c48d05..77668c3dae5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -43,17 +43,6 @@ #include "amdgpu_display.h" #include "amdgpu_ras.h" -static void amdgpu_runtime_pm_quirk(struct amdgpu_device *adev) -{ - /* - * Add below quirk on several sienna_cichlid cards to disable - * runtime pm to fix EMI failures. - */ - if (((adev->pdev->device == 0x73A1) && (adev->pdev->revision == 0x00)) || - ((adev->pdev->device == 0x73BF) && (adev->pdev->revision == 0xCF))) - adev->runpm = false; -} - void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev) { struct amdgpu_gpu_instance *gpu_instance; @@ -158,37 +147,36 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) goto out; } + adev->pm.rpm_mode = AMDGPU_RUNPM_NONE; if (amdgpu_device_supports_px(dev) && - (amdgpu_runtime_pm != 0)) { /* enable runpm by default for atpx */ - adev->runpm = true; + (amdgpu_runtime_pm != 0)) { /* enable PX as runtime mode */ + adev->pm.rpm_mode = AMDGPU_RUNPM_PX; dev_info(adev->dev, "Using ATPX for runtime pm\n"); } else if (amdgpu_device_supports_boco(dev) && - (amdgpu_runtime_pm != 0)) { /* enable runpm by default for boco */ - adev->runpm = true; + (amdgpu_runtime_pm != 0)) { /* enable boco as runtime mode */ + adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO; dev_info(adev->dev, "Using BOCO for runtime pm\n"); } else if (amdgpu_device_supports_baco(dev) && (amdgpu_runtime_pm != 0)) { switch (adev->asic_type) { case CHIP_VEGA20: case CHIP_ARCTURUS: - /* enable runpm if runpm=1 */ + /* enable BACO as runpm mode if runpm=1 */ if (amdgpu_runtime_pm > 0) - adev->runpm = true; + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; break; case CHIP_VEGA10: - /* turn runpm on if noretry=0 */ + /* enable BACO as runpm mode if noretry=0 */ if (!adev->gmc.noretry) - adev->runpm = true; + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; break; default: - /* enable runpm on CI+ */ - adev->runpm = true; + /* enable BACO as runpm mode on CI+ */ + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; break; } - amdgpu_runtime_pm_quirk(adev); - - if (adev->runpm) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) dev_info(adev->dev, "Using BACO for runtime pm\n"); } @@ -473,6 +461,30 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, result->hw_ip_version_major = adev->ip_blocks[i].version->major; result->hw_ip_version_minor = adev->ip_blocks[i].version->minor; + + if (adev->asic_type >= CHIP_VEGA10) { + switch (type) { + case AMD_IP_BLOCK_TYPE_GFX: + result->ip_discovery_version = adev->ip_versions[GC_HWIP][0]; + break; + case AMD_IP_BLOCK_TYPE_SDMA: + result->ip_discovery_version = adev->ip_versions[SDMA0_HWIP][0]; + break; + case AMD_IP_BLOCK_TYPE_UVD: + case AMD_IP_BLOCK_TYPE_VCN: + case AMD_IP_BLOCK_TYPE_JPEG: + result->ip_discovery_version = adev->ip_versions[UVD_HWIP][0]; + break; + case AMD_IP_BLOCK_TYPE_VCE: + result->ip_discovery_version = adev->ip_versions[VCE_HWIP][0]; + break; + default: + result->ip_discovery_version = 0; + break; + } + } else { + result->ip_discovery_version = 0; + } result->capabilities_flags = 0; result->available_rings = (1 << num_rings) - 1; result->ib_start_alignment = ib_start_alignment; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index bffde4aa6fe7..fe82b8b19a4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -114,8 +114,14 @@ static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev) size_t doorbell_start_offset; size_t doorbell_aperture_size; size_t doorbell_process_limit; + size_t aggregated_doorbell_start; + int i; - doorbell_start_offset = (adev->doorbell_index.max_assignment+1) * sizeof(u32); + aggregated_doorbell_start = (adev->doorbell_index.max_assignment + 1) * sizeof(u32); + aggregated_doorbell_start = + roundup(aggregated_doorbell_start, PAGE_SIZE); + + doorbell_start_offset = aggregated_doorbell_start + PAGE_SIZE; doorbell_start_offset = roundup(doorbell_start_offset, amdgpu_mes_doorbell_process_slice(adev)); @@ -135,6 +141,11 @@ static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev) adev->mes.doorbell_id_offset = doorbell_start_offset / sizeof(u32); adev->mes.max_doorbell_slices = doorbell_process_limit; + /* allocate Qword range for aggregated doorbell */ + for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++) + adev->mes.aggregated_doorbells[i] = + aggregated_doorbell_start / sizeof(u32) + i * 2; + DRM_INFO("max_doorbell_slices=%zu\n", doorbell_process_limit); return 0; } @@ -150,6 +161,7 @@ int amdgpu_mes_init(struct amdgpu_device *adev) idr_init(&adev->mes.queue_id_idr); ida_init(&adev->mes.doorbell_ida); spin_lock_init(&adev->mes.queue_id_lock); + spin_lock_init(&adev->mes.ring_lock); mutex_init(&adev->mes.mutex_hidden); adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK; @@ -173,9 +185,6 @@ int amdgpu_mes_init(struct amdgpu_device *adev) adev->mes.sdma_hqd_mask[i] = 0xfc; } - for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++) - adev->mes.agreegated_doorbells[i] = 0xffffffff; - r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs); if (r) { dev_err(adev->dev, @@ -716,6 +725,7 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, queue->queue_type = qprops->queue_type; queue->paging = qprops->paging; queue->gang = gang; + queue->ring->mqd_ptr = queue->mqd_cpu_ptr; list_add_tail(&queue->list, &gang->queue_list); amdgpu_mes_unlock(&adev->mes); @@ -794,8 +804,6 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, struct mes_unmap_legacy_queue_input queue_input; int r; - amdgpu_mes_lock(&adev->mes); - queue_input.action = action; queue_input.queue_type = ring->funcs->type; queue_input.doorbell_offset = ring->doorbell_index; @@ -808,7 +816,6 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, if (r) DRM_ERROR("failed to unmap legacy queue\n"); - amdgpu_mes_unlock(&adev->mes); return r; } @@ -817,8 +824,6 @@ uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg) struct mes_misc_op_input op_input; int r, val = 0; - amdgpu_mes_lock(&adev->mes); - op_input.op = MES_MISC_OP_READ_REG; op_input.read_reg.reg_offset = reg; op_input.read_reg.buffer_addr = adev->mes.read_val_gpu_addr; @@ -835,7 +840,6 @@ uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg) val = *(adev->mes.read_val_ptr); error: - amdgpu_mes_unlock(&adev->mes); return val; } @@ -845,8 +849,6 @@ int amdgpu_mes_wreg(struct amdgpu_device *adev, struct mes_misc_op_input op_input; int r; - amdgpu_mes_lock(&adev->mes); - op_input.op = MES_MISC_OP_WRITE_REG; op_input.write_reg.reg_offset = reg; op_input.write_reg.reg_value = val; @@ -862,7 +864,6 @@ int amdgpu_mes_wreg(struct amdgpu_device *adev, DRM_ERROR("failed to write reg (0x%x)\n", reg); error: - amdgpu_mes_unlock(&adev->mes); return r; } @@ -873,8 +874,6 @@ int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev, struct mes_misc_op_input op_input; int r; - amdgpu_mes_lock(&adev->mes); - op_input.op = MES_MISC_OP_WRM_REG_WR_WAIT; op_input.wrm_reg.reg0 = reg0; op_input.wrm_reg.reg1 = reg1; @@ -892,7 +891,6 @@ int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev, DRM_ERROR("failed to reg_write_reg_wait\n"); error: - amdgpu_mes_unlock(&adev->mes); return r; } @@ -902,8 +900,6 @@ int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg, struct mes_misc_op_input op_input; int r; - amdgpu_mes_lock(&adev->mes); - op_input.op = MES_MISC_OP_WRM_REG_WAIT; op_input.wrm_reg.reg0 = reg; op_input.wrm_reg.ref = val; @@ -920,7 +916,6 @@ int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg, DRM_ERROR("failed to reg_write_reg_wait\n"); error: - amdgpu_mes_unlock(&adev->mes); return r; } @@ -1087,6 +1082,12 @@ void amdgpu_mes_remove_ring(struct amdgpu_device *adev, kfree(ring); } +uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev, + enum amdgpu_mes_priority_level prio) +{ + return adev->mes.aggregated_doorbells[prio]; +} + int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev, struct amdgpu_mes_ctx_data *ctx_data) { @@ -1188,6 +1189,63 @@ error: return r; } +int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev, + struct amdgpu_mes_ctx_data *ctx_data) +{ + struct amdgpu_bo_va *bo_va = ctx_data->meta_data_va; + struct amdgpu_bo *bo = ctx_data->meta_data_obj; + struct amdgpu_vm *vm = bo_va->base.vm; + struct amdgpu_bo_list_entry vm_pd; + struct list_head list, duplicates; + struct dma_fence *fence = NULL; + struct ttm_validate_buffer tv; + struct ww_acquire_ctx ticket; + long r = 0; + + INIT_LIST_HEAD(&list); + INIT_LIST_HEAD(&duplicates); + + tv.bo = &bo->tbo; + tv.num_shared = 2; + list_add(&tv.head, &list); + + amdgpu_vm_get_pd_bo(vm, &list, &vm_pd); + + r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates); + if (r) { + dev_err(adev->dev, "leaking bo va because " + "we fail to reserve bo (%ld)\n", r); + return r; + } + + amdgpu_vm_bo_del(adev, bo_va); + if (!amdgpu_vm_ready(vm)) + goto out_unlock; + + r = dma_resv_get_singleton(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP, &fence); + if (r) + goto out_unlock; + if (fence) { + amdgpu_bo_fence(bo, fence, true); + fence = NULL; + } + + r = amdgpu_vm_clear_freed(adev, vm, &fence); + if (r || !fence) + goto out_unlock; + + dma_fence_wait(fence, false); + amdgpu_bo_fence(bo, fence, true); + dma_fence_put(fence); + +out_unlock: + if (unlikely(r < 0)) + dev_err(adev->dev, "failed to clear page tables (%ld)\n", r); + ttm_eu_backoff_reservation(&ticket, &list); + + return r; +} + static int amdgpu_mes_test_create_gang_and_queues(struct amdgpu_device *adev, int pasid, int *gang_id, int queue_type, int num_queue, @@ -1294,7 +1352,7 @@ int amdgpu_mes_self_test(struct amdgpu_device *adev) r = amdgpu_mes_ctx_alloc_meta_data(adev, &ctx_data); if (r) { DRM_ERROR("failed to alloc ctx meta data\n"); - goto error_pasid; + goto error_fini; } ctx_data.meta_data_gpu_addr = AMDGPU_VA_RESERVED_SIZE; @@ -1349,9 +1407,9 @@ error_queues: amdgpu_mes_destroy_process(adev, pasid); error_vm: - BUG_ON(amdgpu_bo_reserve(ctx_data.meta_data_obj, true)); - amdgpu_vm_bo_del(adev, ctx_data.meta_data_va); - amdgpu_bo_unreserve(ctx_data.meta_data_obj); + amdgpu_mes_ctx_unmap_meta_data(adev, &ctx_data); + +error_fini: amdgpu_vm_fini(adev, vm); error_pasid: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 3cec87e023b3..7b46f6bf4187 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -83,6 +83,7 @@ struct amdgpu_mes { uint64_t default_gang_quantum; struct amdgpu_ring ring; + spinlock_t ring_lock; const struct firmware *fw[AMDGPU_MAX_MES_PIPES]; @@ -112,7 +113,7 @@ struct amdgpu_mes { uint32_t compute_hqd_mask[AMDGPU_MES_MAX_COMPUTE_PIPES]; uint32_t gfx_hqd_mask[AMDGPU_MES_MAX_GFX_PIPES]; uint32_t sdma_hqd_mask[AMDGPU_MES_MAX_SDMA_PIPES]; - uint32_t agreegated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS]; + uint32_t aggregated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS]; uint32_t sch_ctx_offs; uint64_t sch_ctx_gpu_addr; uint64_t *sch_ctx_ptr; @@ -346,12 +347,17 @@ int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id, void amdgpu_mes_remove_ring(struct amdgpu_device *adev, struct amdgpu_ring *ring); +uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev, + enum amdgpu_mes_priority_level prio); + int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev, struct amdgpu_mes_ctx_data *ctx_data); void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data); int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_mes_ctx_data *ctx_data); +int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev, + struct amdgpu_mes_ctx_data *ctx_data); int amdgpu_mes_self_test(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 677d1dfab37f..e6a9b9fc9e0b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -882,6 +882,10 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, if (WARN_ON_ONCE(min_offset > max_offset)) return -EINVAL; + /* Check domain to be pinned to against preferred domains */ + if (bo->preferred_domains & domain) + domain = bo->preferred_domains & domain; + /* A shared bo cannot be migrated to VRAM */ if (bo->tbo.base.import_attach) { if (domain & AMDGPU_GEM_DOMAIN_GTT) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index e9411c28d88b..3ee363bfbac2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -2168,6 +2168,21 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode, case AMDGPU_UCODE_ID_RLC_DRAM: *type = GFX_FW_TYPE_RLC_DRAM_BOOT; break; + case AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS: + *type = GFX_FW_TYPE_GLOBAL_TAP_DELAYS; + break; + case AMDGPU_UCODE_ID_SE0_TAP_DELAYS: + *type = GFX_FW_TYPE_SE0_TAP_DELAYS; + break; + case AMDGPU_UCODE_ID_SE1_TAP_DELAYS: + *type = GFX_FW_TYPE_SE1_TAP_DELAYS; + break; + case AMDGPU_UCODE_ID_SE2_TAP_DELAYS: + *type = GFX_FW_TYPE_SE2_TAP_DELAYS; + break; + case AMDGPU_UCODE_ID_SE3_TAP_DELAYS: + *type = GFX_FW_TYPE_SE3_TAP_DELAYS; + break; case AMDGPU_UCODE_ID_SMC: *type = GFX_FW_TYPE_SMU; break; @@ -2348,6 +2363,13 @@ static int psp_load_smu_fw(struct psp_context *psp) &adev->firmware.ucode[AMDGPU_UCODE_ID_SMC]; struct amdgpu_ras *ras = psp->ras_context.ras; + /* + * Skip SMU FW reloading in case of using BACO for runpm only, + * as SMU is always alive. + */ + if (adev->in_runpm && (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO)) + return 0; + if (!ucode->fw || amdgpu_sriov_vf(psp->adev)) return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index e431f4994931..180634616b0f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -69,8 +69,8 @@ enum psp_bootloader_cmd { PSP_BL__LOAD_SOSDRV = 0x20000, PSP_BL__LOAD_KEY_DATABASE = 0x80000, PSP_BL__LOAD_SOCDRV = 0xB0000, - PSP_BL__LOAD_INTFDRV = 0xC0000, - PSP_BL__LOAD_DBGDRV = 0xD0000, + PSP_BL__LOAD_DBGDRV = 0xC0000, + PSP_BL__LOAD_INTFDRV = 0xD0000, PSP_BL__DRAM_LONG_TRAIN = 0x100000, PSP_BL__DRAM_SHORT_TRAIN = 0x200000, PSP_BL__LOAD_TOS_SPL_TABLE = 0x10000000, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 285534bfc084..ff5361f5c2d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -717,27 +717,30 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev, if (!con) return -EINVAL; - info = kzalloc(sizeof(union ta_ras_cmd_input), GFP_KERNEL); - if (!info) - return -ENOMEM; - - if (!enable) { - info->disable_features = (struct ta_ras_disable_features_input) { - .block_id = amdgpu_ras_block_to_ta(head->block), - .error_type = amdgpu_ras_error_to_ta(head->type), - }; - } else { - info->enable_features = (struct ta_ras_enable_features_input) { - .block_id = amdgpu_ras_block_to_ta(head->block), - .error_type = amdgpu_ras_error_to_ta(head->type), - }; + if (head->block == AMDGPU_RAS_BLOCK__GFX) { + info = kzalloc(sizeof(union ta_ras_cmd_input), GFP_KERNEL); + if (!info) + return -ENOMEM; + + if (!enable) { + info->disable_features = (struct ta_ras_disable_features_input) { + .block_id = amdgpu_ras_block_to_ta(head->block), + .error_type = amdgpu_ras_error_to_ta(head->type), + }; + } else { + info->enable_features = (struct ta_ras_enable_features_input) { + .block_id = amdgpu_ras_block_to_ta(head->block), + .error_type = amdgpu_ras_error_to_ta(head->type), + }; + } } /* Do not enable if it is not allowed. */ WARN_ON(enable && !amdgpu_ras_is_feature_allowed(adev, head)); /* Only enable ras feature operation handle on host side */ - if (!amdgpu_sriov_vf(adev) && + if (head->block == AMDGPU_RAS_BLOCK__GFX && + !amdgpu_sriov_vf(adev) && !amdgpu_ras_intr_triggered()) { ret = psp_ras_enable_features(&adev->psp, info, enable); if (ret) { @@ -753,7 +756,8 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev, __amdgpu_ras_feature_enable(adev, head, enable); ret = 0; out: - kfree(info); + if (head->block == AMDGPU_RAS_BLOCK__GFX) + kfree(info); return ret; } @@ -1938,8 +1942,16 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) amdgpu_put_xgmi_hive(hive); } - if (amdgpu_device_should_recover_gpu(ras->adev)) - amdgpu_device_gpu_recover(ras->adev, NULL); + if (amdgpu_device_should_recover_gpu(ras->adev)) { + struct amdgpu_reset_context reset_context; + memset(&reset_context, 0, sizeof(reset_context)); + + reset_context.method = AMD_RESET_METHOD_NONE; + reset_context.reset_req_dev = adev; + clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + + amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context); + } atomic_set(&ras->in_recovery, 0); } @@ -2150,7 +2162,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) bool exc_err_limit = false; int ret; - if (!con) + if (!con || amdgpu_sriov_vf(adev)) return 0; /* Allow access to RAS EEPROM via debugfs, when the ASIC diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h index f6fd9e1a7dac..03ac36b2c2cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h @@ -222,6 +222,11 @@ struct amdgpu_rlc { u32 rlc_dram_ucode_size_bytes; u32 rlcp_ucode_size_bytes; u32 rlcv_ucode_size_bytes; + u32 global_tap_delays_ucode_size_bytes; + u32 se0_tap_delays_ucode_size_bytes; + u32 se1_tap_delays_ucode_size_bytes; + u32 se2_tap_delays_ucode_size_bytes; + u32 se3_tap_delays_ucode_size_bytes; u32 *register_list_format; u32 *register_restore; @@ -232,6 +237,11 @@ struct amdgpu_rlc { u8 *rlc_dram_ucode; u8 *rlcp_ucode; u8 *rlcv_ucode; + u8 *global_tap_delays_ucode; + u8 *se0_tap_delays_ucode; + u8 *se1_tap_delays_ucode; + u8 *se2_tap_delays_ucode; + u8 *se3_tap_delays_ucode; bool is_rlc_v2_1; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index c312577df596..939c8614f0e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -561,6 +561,16 @@ const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id) return "RLC_P"; case AMDGPU_UCODE_ID_RLC_V: return "RLC_V"; + case AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS: + return "GLOBAL_TAP_DELAYS"; + case AMDGPU_UCODE_ID_SE0_TAP_DELAYS: + return "SE0_TAP_DELAYS"; + case AMDGPU_UCODE_ID_SE1_TAP_DELAYS: + return "SE1_TAP_DELAYS"; + case AMDGPU_UCODE_ID_SE2_TAP_DELAYS: + return "SE2_TAP_DELAYS"; + case AMDGPU_UCODE_ID_SE3_TAP_DELAYS: + return "SE3_TAP_DELAYS"; case AMDGPU_UCODE_ID_IMU_I: return "IMU_I"; case AMDGPU_UCODE_ID_IMU_D: @@ -745,6 +755,26 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, ucode->ucode_size = adev->gfx.rlc.rlcv_ucode_size_bytes; ucode_addr = adev->gfx.rlc.rlcv_ucode; break; + case AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS: + ucode->ucode_size = adev->gfx.rlc.global_tap_delays_ucode_size_bytes; + ucode_addr = adev->gfx.rlc.global_tap_delays_ucode; + break; + case AMDGPU_UCODE_ID_SE0_TAP_DELAYS: + ucode->ucode_size = adev->gfx.rlc.se0_tap_delays_ucode_size_bytes; + ucode_addr = adev->gfx.rlc.se0_tap_delays_ucode; + break; + case AMDGPU_UCODE_ID_SE1_TAP_DELAYS: + ucode->ucode_size = adev->gfx.rlc.se1_tap_delays_ucode_size_bytes; + ucode_addr = adev->gfx.rlc.se1_tap_delays_ucode; + break; + case AMDGPU_UCODE_ID_SE2_TAP_DELAYS: + ucode->ucode_size = adev->gfx.rlc.se2_tap_delays_ucode_size_bytes; + ucode_addr = adev->gfx.rlc.se2_tap_delays_ucode; + break; + case AMDGPU_UCODE_ID_SE3_TAP_DELAYS: + ucode->ucode_size = adev->gfx.rlc.se3_tap_delays_ucode_size_bytes; + ucode_addr = adev->gfx.rlc.se3_tap_delays_ucode; + break; case AMDGPU_UCODE_ID_CP_MES: ucode->ucode_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes); ucode_addr = (u8 *)ucode->fw->data + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index f510b6aa82ab..ebed3f5226db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -266,6 +266,21 @@ struct rlc_firmware_header_v2_3 { uint32_t rlcv_ucode_offset_bytes; }; +/* version_major=2, version_minor=4 */ +struct rlc_firmware_header_v2_4 { + struct rlc_firmware_header_v2_3 v2_3; + uint32_t global_tap_delays_ucode_size_bytes; + uint32_t global_tap_delays_ucode_offset_bytes; + uint32_t se0_tap_delays_ucode_size_bytes; + uint32_t se0_tap_delays_ucode_offset_bytes; + uint32_t se1_tap_delays_ucode_size_bytes; + uint32_t se1_tap_delays_ucode_offset_bytes; + uint32_t se2_tap_delays_ucode_size_bytes; + uint32_t se2_tap_delays_ucode_offset_bytes; + uint32_t se3_tap_delays_ucode_size_bytes; + uint32_t se3_tap_delays_ucode_offset_bytes; +}; + /* version_major=1, version_minor=0 */ struct sdma_firmware_header_v1_0 { struct common_firmware_header header; @@ -426,6 +441,11 @@ enum AMDGPU_UCODE_ID { AMDGPU_UCODE_ID_CP_MES1_DATA, AMDGPU_UCODE_ID_IMU_I, AMDGPU_UCODE_ID_IMU_D, + AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS, + AMDGPU_UCODE_ID_SE0_TAP_DELAYS, + AMDGPU_UCODE_ID_SE1_TAP_DELAYS, + AMDGPU_UCODE_ID_SE2_TAP_DELAYS, + AMDGPU_UCODE_ID_SE3_TAP_DELAYS, AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL, AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM, AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 2ec6698aa1fe..3629d8f292ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -41,6 +41,12 @@ #define LOOP_UMC_CH_INST(ch_inst) for ((ch_inst) = 0; (ch_inst) < adev->umc.channel_inst_num; (ch_inst)++) #define LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) LOOP_UMC_INST((umc_inst)) LOOP_UMC_CH_INST((ch_inst)) +#define LOOP_UMC_NODE_INST(node_inst) \ + for ((node_inst) = 0; (node_inst) < adev->umc.node_inst_num; (node_inst)++) + +#define LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) \ + LOOP_UMC_NODE_INST((node_inst)) LOOP_UMC_INST_AND_CH((umc_inst), (ch_inst)) + struct amdgpu_umc_ras { struct amdgpu_ras_block_object ras_block; void (*err_cnt_init)(struct amdgpu_device *adev); @@ -62,6 +68,10 @@ struct amdgpu_umc { uint32_t channel_inst_num; /* number of umc instance with memory map register access */ uint32_t umc_inst_num; + + /*number of umc node instance with memory map register access*/ + uint32_t node_inst_num; + /* UMC regiser per channel offset */ uint32_t channel_offs; /* channel index table of interleaved memory */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index f4b5301ea2a0..a60cb6d84b79 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -496,7 +496,8 @@ static int amdgpu_vkms_sw_init(void *handle) adev_to_drm(adev)->mode_config.max_height = YRES_MAX; adev_to_drm(adev)->mode_config.preferred_depth = 24; - adev_to_drm(adev)->mode_config.prefer_shadow = 1; + /* disable prefer shadow for now due to hibernation issues */ + adev_to_drm(adev)->mode_config.prefer_shadow = 0; adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 8530befb2051..59cac347baa3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2168,6 +2168,14 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) } else { vm->update_funcs = &amdgpu_vm_sdma_funcs; } + /* + * Make sure root PD gets mapped. As vm_update_mode could be changed + * when turning a GFX VM into a compute VM. + */ + r = vm->update_funcs->map_table(to_amdgpu_bo_vm(vm->root.bo)); + if (r) + goto unreserve_bo; + dma_fence_put(vm->last_update); vm->last_update = NULL; vm->is_compute_context = true; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 7a5e8a7b4a1b..28ec5f8ac1c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -395,11 +395,11 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, unsigned long pages_per_block; int r; - lpfn = place->lpfn << PAGE_SHIFT; + lpfn = (u64)place->lpfn << PAGE_SHIFT; if (!lpfn) lpfn = man->size; - fpfn = place->fpfn << PAGE_SHIFT; + fpfn = (u64)place->fpfn << PAGE_SHIFT; max_bytes = adev->gmc.mc_vram_size; if (tbo->type != ttm_bo_type_kernel) @@ -439,12 +439,12 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, /* Allocate blocks in desired range */ vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; - remaining_size = vres->base.num_pages << PAGE_SHIFT; + remaining_size = (u64)vres->base.num_pages << PAGE_SHIFT; mutex_lock(&mgr->lock); while (remaining_size) { if (tbo->page_alignment) - min_block_size = tbo->page_alignment << PAGE_SHIFT; + min_block_size = (u64)tbo->page_alignment << PAGE_SHIFT; else min_block_size = mgr->default_page_size; @@ -453,12 +453,12 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, /* Limit maximum size to 2GiB due to SG table limitations */ size = min(remaining_size, 2ULL << 30); - if (size >= pages_per_block << PAGE_SHIFT) - min_block_size = pages_per_block << PAGE_SHIFT; + if (size >= (u64)pages_per_block << PAGE_SHIFT) + min_block_size = (u64)pages_per_block << PAGE_SHIFT; cur_size = size; - if (fpfn + size != place->lpfn << PAGE_SHIFT) { + if (fpfn + size != (u64)place->lpfn << PAGE_SHIFT) { /* * Except for actual range allocation, modify the size and * min_block_size conforming to continuous flag enablement @@ -498,7 +498,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, LIST_HEAD(temp); trim_list = &vres->blocks; - original_size = vres->base.num_pages << PAGE_SHIFT; + original_size = (u64)vres->base.num_pages << PAGE_SHIFT; /* * If size value is rounded up to min_block_size, trim the last diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h index 4b267bf1c5db..0e04e42cf809 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h @@ -50,7 +50,7 @@ static inline u64 amdgpu_vram_mgr_block_start(struct drm_buddy_block *block) static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block) { - return PAGE_SIZE << drm_buddy_block_order(block); + return (u64)PAGE_SIZE << drm_buddy_block_order(block); } static inline struct amdgpu_vram_mgr_resource * diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 288fce7dc0ed..9c964cd3b5d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -2796,7 +2796,8 @@ static int dce_v10_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.max_height = 16384; adev_to_drm(adev)->mode_config.preferred_depth = 24; - adev_to_drm(adev)->mode_config.prefer_shadow = 1; + /* disable prefer shadow for now due to hibernation issues */ + adev_to_drm(adev)->mode_config.prefer_shadow = 0; adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index cbe5250b31cb..e0ad9f27dc3f 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -2914,7 +2914,8 @@ static int dce_v11_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.max_height = 16384; adev_to_drm(adev)->mode_config.preferred_depth = 24; - adev_to_drm(adev)->mode_config.prefer_shadow = 1; + /* disable prefer shadow for now due to hibernation issues */ + adev_to_drm(adev)->mode_config.prefer_shadow = 0; adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 982855e6cf52..77f5e998a120 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -339,7 +339,7 @@ static void dce_v6_0_hpd_fini(struct amdgpu_device *adev) tmp = RREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]); tmp &= ~DC_HPD1_CONTROL__DC_HPD1_EN_MASK; - WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], 0); + WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp); amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } @@ -2673,7 +2673,8 @@ static int dce_v6_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.max_width = 16384; adev_to_drm(adev)->mode_config.max_height = 16384; adev_to_drm(adev)->mode_config.preferred_depth = 24; - adev_to_drm(adev)->mode_config.prefer_shadow = 1; + /* disable prefer shadow for now due to hibernation issues */ + adev_to_drm(adev)->mode_config.prefer_shadow = 0; adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 84440741c60b..802e5c753271 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -333,7 +333,7 @@ static void dce_v8_0_hpd_fini(struct amdgpu_device *adev) tmp = RREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]); tmp &= ~DC_HPD1_CONTROL__DC_HPD1_EN_MASK; - WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], 0); + WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp); amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } @@ -2693,7 +2693,8 @@ static int dce_v8_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.max_height = 16384; adev_to_drm(adev)->mode_config.preferred_depth = 24; - adev_to_drm(adev)->mode_config.prefer_shadow = 1; + /* disable prefer shadow for now due to hibernation issues */ + adev_to_drm(adev)->mode_config.prefer_shadow = 0; adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index abf2bf7f1a79..fafbad3cf08d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3976,6 +3976,23 @@ static void gfx_v10_0_init_rlc_iram_dram_microcode(struct amdgpu_device *adev) adev->gfx.rlc.rlc_dram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_dram_ucode_offset_bytes); } +static void gfx_v10_0_init_tap_delays_microcode(struct amdgpu_device *adev) +{ + const struct rlc_firmware_header_v2_4 *rlc_hdr; + + rlc_hdr = (const struct rlc_firmware_header_v2_4 *)adev->gfx.rlc_fw->data; + adev->gfx.rlc.global_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->global_tap_delays_ucode_size_bytes); + adev->gfx.rlc.global_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->global_tap_delays_ucode_offset_bytes); + adev->gfx.rlc.se0_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_size_bytes); + adev->gfx.rlc.se0_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_offset_bytes); + adev->gfx.rlc.se1_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_size_bytes); + adev->gfx.rlc.se1_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_offset_bytes); + adev->gfx.rlc.se2_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_size_bytes); + adev->gfx.rlc.se2_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_offset_bytes); + adev->gfx.rlc.se3_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_size_bytes); + adev->gfx.rlc.se3_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_offset_bytes); +} + static bool gfx_v10_0_navi10_gfxoff_should_enable(struct amdgpu_device *adev) { bool ret = false; @@ -4153,8 +4170,11 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) if (version_major == 2) { if (version_minor >= 1) gfx_v10_0_init_rlc_ext_microcode(adev); - if (version_minor == 2) + if (version_minor >= 2) gfx_v10_0_init_rlc_iram_dram_microcode(adev); + if (version_minor == 4) { + gfx_v10_0_init_tap_delays_microcode(adev); + } } } @@ -4251,8 +4271,39 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) adev->firmware.fw_size += ALIGN(adev->gfx.rlc.rlc_dram_ucode_size_bytes, PAGE_SIZE); } + } + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.global_tap_delays_ucode_size_bytes, PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE0_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE0_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se0_tap_delays_ucode_size_bytes, PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE1_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE1_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se1_tap_delays_ucode_size_bytes, PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE2_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE2_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se2_tap_delays_ucode_size_bytes, PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE3_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE3_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se3_tap_delays_ucode_size_bytes, PAGE_SIZE); + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; info->fw = adev->gfx.mec_fw; @@ -8525,14 +8576,45 @@ static u64 gfx_v10_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) static void gfx_v10_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + uint32_t *wptr_saved; + uint32_t *is_queue_unmap; + uint64_t aggregated_db_index; + uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size; + uint64_t wptr_tmp; - if (ring->use_doorbell) { - /* XXX check if swapping is necessary on BE */ - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr); - WDOORBELL64(ring->doorbell_index, ring->wptr); + if (ring->is_mes_queue) { + wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); + is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + + sizeof(uint32_t)); + aggregated_db_index = + amdgpu_mes_get_aggregated_doorbell_index(adev, + AMDGPU_MES_PRIORITY_LEVEL_NORMAL); + + wptr_tmp = ring->wptr & ring->buf_mask; + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); + *wptr_saved = wptr_tmp; + /* assume doorbell always being used by mes mapped queue */ + if (*is_queue_unmap) { + WDOORBELL64(aggregated_db_index, wptr_tmp); + WDOORBELL64(ring->doorbell_index, wptr_tmp); + } else { + WDOORBELL64(ring->doorbell_index, wptr_tmp); + + if (*is_queue_unmap) + WDOORBELL64(aggregated_db_index, wptr_tmp); + } } else { - WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); - WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); + if (ring->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); + } else { + WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, + lower_32_bits(ring->wptr)); + WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, + upper_32_bits(ring->wptr)); + } } } @@ -8557,13 +8639,42 @@ static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring *ring) static void gfx_v10_0_ring_set_wptr_compute(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + uint32_t *wptr_saved; + uint32_t *is_queue_unmap; + uint64_t aggregated_db_index; + uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size; + uint64_t wptr_tmp; - /* XXX check if swapping is necessary on BE */ - if (ring->use_doorbell) { - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr); - WDOORBELL64(ring->doorbell_index, ring->wptr); + if (ring->is_mes_queue) { + wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); + is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + + sizeof(uint32_t)); + aggregated_db_index = + amdgpu_mes_get_aggregated_doorbell_index(adev, + AMDGPU_MES_PRIORITY_LEVEL_NORMAL); + + wptr_tmp = ring->wptr & ring->buf_mask; + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); + *wptr_saved = wptr_tmp; + /* assume doorbell always used by mes mapped queue */ + if (*is_queue_unmap) { + WDOORBELL64(aggregated_db_index, wptr_tmp); + WDOORBELL64(ring->doorbell_index, wptr_tmp); + } else { + WDOORBELL64(ring->doorbell_index, wptr_tmp); + + if (*is_queue_unmap) + WDOORBELL64(aggregated_db_index, wptr_tmp); + } } else { - BUG(); /* only DOORBELL method supported on gfx10 now */ + /* XXX check if swapping is necessary on BE */ + if (ring->use_doorbell) { + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); + } else { + BUG(); /* only DOORBELL method supported on gfx10 now */ + } } } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 942d41a65f2f..0d8193b30fc5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -126,6 +126,8 @@ static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev); static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, uint16_t pasid, uint32_t flush_type, bool all_hub, uint8_t dst_sel); +static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev); +static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev); static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) { @@ -4743,63 +4745,143 @@ static int gfx_v11_0_soft_reset(void *handle) { u32 grbm_soft_reset = 0; u32 tmp; + int i, j, k; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - /* GRBM_STATUS */ - tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS); - if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | - GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | - GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__DB_BUSY_MASK | - GRBM_STATUS__CB_BUSY_MASK | GRBM_STATUS__GDS_BUSY_MASK | - GRBM_STATUS__SPI_BUSY_MASK | GRBM_STATUS__GE_BUSY_NO_DMA_MASK)) { - grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, - GRBM_SOFT_RESET, SOFT_RESET_CP, - 1); - grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, - GRBM_SOFT_RESET, SOFT_RESET_GFX, - 1); - } - - if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { - grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, - GRBM_SOFT_RESET, SOFT_RESET_CP, - 1); - } - - /* GRBM_STATUS2 */ - tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS2); - if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) - grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, - GRBM_SOFT_RESET, - SOFT_RESET_RLC, - 1); - - if (grbm_soft_reset) { - /* stop the rlc */ - gfx_v11_0_rlc_stop(adev); - - /* Disable GFX parsing/prefetching */ - gfx_v11_0_cp_gfx_enable(adev, false); + tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0); + WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); - /* Disable MEC parsing/prefetching */ - gfx_v11_0_cp_compute_enable(adev, false); + gfx_v11_0_set_safe_mode(adev); - tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); - tmp |= grbm_soft_reset; - dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); - WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp); - tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); + for (i = 0; i < adev->gfx.mec.num_mec; ++i) { + for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { + tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL); + tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i); + tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j); + tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k); + WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp); + + WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2); + WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1); + } + } + } + for (i = 0; i < adev->gfx.me.num_me; ++i) { + for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { + tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL); + tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i); + tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j); + tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k); + WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp); - udelay(50); + WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1); + } + } + } + + WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe); - tmp &= ~grbm_soft_reset; - WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp); - tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); + // Read CP_VMID_RESET register three times. + // to get sufficient time for GFX_HQD_ACTIVE reach 0 + RREG32_SOC15(GC, 0, regCP_VMID_RESET); + RREG32_SOC15(GC, 0, regCP_VMID_RESET); + RREG32_SOC15(GC, 0, regCP_VMID_RESET); - /* Wait a little for things to settle down */ - udelay(50); + for (i = 0; i < adev->usec_timeout; i++) { + if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) && + !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE)) + break; + udelay(1); } - return 0; + if (i >= adev->usec_timeout) { + printk("Failed to wait all pipes clean\n"); + return -EINVAL; + } + + /********** trigger soft reset ***********/ + grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, + SOFT_RESET_CP, 1); + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, + SOFT_RESET_GFX, 1); + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, + SOFT_RESET_CPF, 1); + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, + SOFT_RESET_CPC, 1); + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, + SOFT_RESET_CPG, 1); + WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset); + /********** exit soft reset ***********/ + grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, + SOFT_RESET_CP, 0); + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, + SOFT_RESET_GFX, 0); + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, + SOFT_RESET_CPF, 0); + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, + SOFT_RESET_CPC, 0); + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, + SOFT_RESET_CPG, 0); + WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset); + + tmp = RREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL); + tmp = REG_SET_FIELD(tmp, CP_SOFT_RESET_CNTL, CMP_HQD_REG_RESET, 0x1); + WREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL, tmp); + + WREG32_SOC15(GC, 0, regCP_ME_CNTL, 0x0); + WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, 0x0); + + for (i = 0; i < adev->usec_timeout; i++) { + if (!RREG32_SOC15(GC, 0, regCP_VMID_RESET)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) { + printk("Failed to wait CP_VMID_RESET to 0\n"); + return -EINVAL; + } + + tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1); + WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); + + gfx_v11_0_unset_safe_mode(adev); + + return gfx_v11_0_cp_resume(adev); +} + +static bool gfx_v11_0_check_soft_reset(void *handle) +{ + int i, r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring; + long tmo = msecs_to_jiffies(1000); + + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + r = amdgpu_ring_test_ib(ring, tmo); + if (r) + return true; + } + + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + r = amdgpu_ring_test_ib(ring, tmo); + if (r) + return true; + } + + return false; } static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev) @@ -5297,14 +5379,45 @@ static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + uint32_t *wptr_saved; + uint32_t *is_queue_unmap; + uint64_t aggregated_db_index; + uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size; + uint64_t wptr_tmp; - if (ring->use_doorbell) { - /* XXX check if swapping is necessary on BE */ - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr); - WDOORBELL64(ring->doorbell_index, ring->wptr); + if (ring->is_mes_queue) { + wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); + is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + + sizeof(uint32_t)); + aggregated_db_index = + amdgpu_mes_get_aggregated_doorbell_index(adev, + ring->hw_prio); + + wptr_tmp = ring->wptr & ring->buf_mask; + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); + *wptr_saved = wptr_tmp; + /* assume doorbell always being used by mes mapped queue */ + if (*is_queue_unmap) { + WDOORBELL64(aggregated_db_index, wptr_tmp); + WDOORBELL64(ring->doorbell_index, wptr_tmp); + } else { + WDOORBELL64(ring->doorbell_index, wptr_tmp); + + if (*is_queue_unmap) + WDOORBELL64(aggregated_db_index, wptr_tmp); + } } else { - WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr)); - WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); + if (ring->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); + } else { + WREG32_SOC15(GC, 0, regCP_RB0_WPTR, + lower_32_bits(ring->wptr)); + WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, + upper_32_bits(ring->wptr)); + } } } @@ -5329,13 +5442,42 @@ static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring) static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + uint32_t *wptr_saved; + uint32_t *is_queue_unmap; + uint64_t aggregated_db_index; + uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size; + uint64_t wptr_tmp; - /* XXX check if swapping is necessary on BE */ - if (ring->use_doorbell) { - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr); - WDOORBELL64(ring->doorbell_index, ring->wptr); + if (ring->is_mes_queue) { + wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); + is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + + sizeof(uint32_t)); + aggregated_db_index = + amdgpu_mes_get_aggregated_doorbell_index(adev, + ring->hw_prio); + + wptr_tmp = ring->wptr & ring->buf_mask; + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); + *wptr_saved = wptr_tmp; + /* assume doorbell always used by mes mapped queue */ + if (*is_queue_unmap) { + WDOORBELL64(aggregated_db_index, wptr_tmp); + WDOORBELL64(ring->doorbell_index, wptr_tmp); + } else { + WDOORBELL64(ring->doorbell_index, wptr_tmp); + + if (*is_queue_unmap) + WDOORBELL64(aggregated_db_index, wptr_tmp); + } } else { - BUG(); /* only DOORBELL method supported on gfx11 now */ + /* XXX check if swapping is necessary on BE */ + if (ring->use_doorbell) { + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); + } else { + BUG(); /* only DOORBELL method supported on gfx11 now */ + } } } @@ -6132,6 +6274,7 @@ static const struct amd_ip_funcs gfx_v11_0_ip_funcs = { .is_idle = gfx_v11_0_is_idle, .wait_for_idle = gfx_v11_0_wait_for_idle, .soft_reset = gfx_v11_0_soft_reset, + .check_soft_reset = gfx_v11_0_check_soft_reset, .set_clockgating_state = gfx_v11_0_set_clockgating_state, .set_powergating_state = gfx_v11_0_set_powergating_state, .get_clockgating_state = gfx_v11_0_get_clockgating_state, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 5349ca4d19e3..c6e0f9313a7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -987,23 +987,23 @@ static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); uint32_t tmp = 0; unsigned i; int r; - WREG32_SOC15(GC, 0, mmSCRATCH_REG0, 0xCAFEDEAD); + WREG32(scratch, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); if (r) return r; amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); - amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0) - - PACKET3_SET_UCONFIG_REG_START); + amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START); amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32_SOC15(GC, 0, mmSCRATCH_REG0); + tmp = RREG32(scratch); if (tmp == 0xDEADBEEF) break; udelay(1); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 25d5743ae91b..9ae8cdaa033e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -22,6 +22,9 @@ */ #include <linux/firmware.h> #include <linux/pci.h> + +#include <drm/drm_cache.h> + #include "amdgpu.h" #include "amdgpu_atomfirmware.h" #include "gmc_v10_0.h" @@ -834,10 +837,21 @@ static int gmc_v10_0_mc_init(struct amdgpu_device *adev) adev->gmc.visible_vram_size = adev->gmc.real_vram_size; /* set the gart size */ - if (amdgpu_gart_size == -1) - adev->gmc.gart_size = 512ULL << 20; - else + if (amdgpu_gart_size == -1) { + switch (adev->ip_versions[GC_HWIP][0]) { + default: + adev->gmc.gart_size = 512ULL << 20; + break; + case IP_VERSION(10, 3, 1): /* DCE SG support */ + case IP_VERSION(10, 3, 3): /* DCE SG support */ + case IP_VERSION(10, 3, 6): /* DCE SG support */ + case IP_VERSION(10, 3, 7): /* DCE SG support */ + adev->gmc.gart_size = 1024ULL << 20; + break; + } + } else { adev->gmc.gart_size = (u64)amdgpu_gart_size << 20; + } gmc_v10_0_vram_gtt_location(adev, &adev->gmc); @@ -969,6 +983,8 @@ static int gmc_v10_0_sw_init(void *handle) return r; } + adev->need_swiotlb = drm_need_swiotlb(44); + r = gmc_v10_0_mc_init(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index edbdc0b934ea..1471bfb9ae38 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -22,10 +22,13 @@ */ #include <linux/firmware.h> #include <linux/pci.h> + +#include <drm/drm_cache.h> + #include "amdgpu.h" #include "amdgpu_atomfirmware.h" #include "gmc_v11_0.h" -#include "umc_v8_7.h" +#include "umc_v8_10.h" #include "athub/athub_3_0_0_sh_mask.h" #include "athub/athub_3_0_0_offset.h" #include "oss/osssys_6_0_0_offset.h" @@ -537,11 +540,36 @@ static void gmc_v11_0_set_umc_funcs(struct amdgpu_device *adev) { switch (adev->ip_versions[UMC_HWIP][0]) { case IP_VERSION(8, 10, 0): + adev->umc.channel_inst_num = UMC_V8_10_CHANNEL_INSTANCE_NUM; + adev->umc.umc_inst_num = UMC_V8_10_UMC_INSTANCE_NUM; + adev->umc.node_inst_num = adev->gmc.num_umc; + adev->umc.max_ras_err_cnt_per_query = UMC_V8_10_TOTAL_CHANNEL_NUM(adev); + adev->umc.channel_offs = UMC_V8_10_PER_CHANNEL_OFFSET; + adev->umc.channel_idx_tbl = &umc_v8_10_channel_idx_tbl[0][0][0]; + adev->umc.ras = &umc_v8_10_ras; + break; case IP_VERSION(8, 11, 0): break; default: break; } + + if (adev->umc.ras) { + amdgpu_ras_register_ras_block(adev, &adev->umc.ras->ras_block); + + strcpy(adev->umc.ras->ras_block.ras_comm.name, "umc"); + adev->umc.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__UMC; + adev->umc.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->umc.ras_if = &adev->umc.ras->ras_block.ras_comm; + + /* If don't define special ras_late_init function, use default ras_late_init */ + if (!adev->umc.ras->ras_block.ras_late_init) + adev->umc.ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init; + + /* If not define special ras_cb function, use default ras_cb */ + if (!adev->umc.ras->ras_block.ras_cb) + adev->umc.ras->ras_block.ras_cb = amdgpu_umc_process_ras_data_cb; + } } @@ -750,6 +778,8 @@ static int gmc_v11_0_sw_init(void *handle) return r; } + adev->need_swiotlb = drm_need_swiotlb(44); + r = gmc_v11_0_mc_init(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c index 18a129f36215..0082e2e1e0b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c @@ -87,21 +87,32 @@ static const struct amdgpu_ring_funcs mes_v10_1_ring_funcs = { }; static int mes_v10_1_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, - void *pkt, int size) + void *pkt, int size, + int api_status_off) { int ndw = size / 4; signed long r; union MESAPI__ADD_QUEUE *x_pkt = pkt; + struct MES_API_STATUS *api_status; struct amdgpu_device *adev = mes->adev; struct amdgpu_ring *ring = &mes->ring; + unsigned long flags; BUG_ON(size % 4 != 0); - if (amdgpu_ring_alloc(ring, ndw)) + spin_lock_irqsave(&mes->ring_lock, flags); + if (amdgpu_ring_alloc(ring, ndw)) { + spin_unlock_irqrestore(&mes->ring_lock, flags); return -ENOMEM; + } + + api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); + api_status->api_completion_fence_addr = mes->ring.fence_drv.gpu_addr; + api_status->api_completion_fence_value = ++mes->ring.fence_drv.sync_seq; amdgpu_ring_write_multiple(ring, pkt, ndw); amdgpu_ring_commit(ring); + spin_unlock_irqrestore(&mes->ring_lock, flags); DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode); @@ -166,13 +177,9 @@ static int mes_v10_1_add_hw_queue(struct amdgpu_mes *mes, mes_add_queue_pkt.gws_size = input->gws_size; mes_add_queue_pkt.trap_handler_addr = input->tba_addr; - mes_add_queue_pkt.api_status.api_completion_fence_addr = - mes->ring.fence_drv.gpu_addr; - mes_add_queue_pkt.api_status.api_completion_fence_value = - ++mes->ring.fence_drv.sync_seq; - return mes_v10_1_submit_pkt_and_poll_completion(mes, - &mes_add_queue_pkt, sizeof(mes_add_queue_pkt)); + &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), + offsetof(union MESAPI__ADD_QUEUE, api_status)); } static int mes_v10_1_remove_hw_queue(struct amdgpu_mes *mes, @@ -189,13 +196,9 @@ static int mes_v10_1_remove_hw_queue(struct amdgpu_mes *mes, mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset; mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr; - mes_remove_queue_pkt.api_status.api_completion_fence_addr = - mes->ring.fence_drv.gpu_addr; - mes_remove_queue_pkt.api_status.api_completion_fence_value = - ++mes->ring.fence_drv.sync_seq; - return mes_v10_1_submit_pkt_and_poll_completion(mes, - &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt)); + &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), + offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } static int mes_v10_1_unmap_legacy_queue(struct amdgpu_mes *mes, @@ -227,13 +230,9 @@ static int mes_v10_1_unmap_legacy_queue(struct amdgpu_mes *mes, mes_remove_queue_pkt.unmap_kiq_utility_queue = 1; } - mes_remove_queue_pkt.api_status.api_completion_fence_addr = - mes->ring.fence_drv.gpu_addr; - mes_remove_queue_pkt.api_status.api_completion_fence_value = - ++mes->ring.fence_drv.sync_seq; - return mes_v10_1_submit_pkt_and_poll_completion(mes, - &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt)); + &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), + offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } static int mes_v10_1_suspend_gang(struct amdgpu_mes *mes, @@ -258,13 +257,9 @@ static int mes_v10_1_query_sched_status(struct amdgpu_mes *mes) mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; - mes_status_pkt.api_status.api_completion_fence_addr = - mes->ring.fence_drv.gpu_addr; - mes_status_pkt.api_status.api_completion_fence_value = - ++mes->ring.fence_drv.sync_seq; - return mes_v10_1_submit_pkt_and_poll_completion(mes, - &mes_status_pkt, sizeof(mes_status_pkt)); + &mes_status_pkt, sizeof(mes_status_pkt), + offsetof(union MESAPI__QUERY_MES_STATUS, api_status)); } static int mes_v10_1_set_hw_resources(struct amdgpu_mes *mes) @@ -299,7 +294,7 @@ static int mes_v10_1_set_hw_resources(struct amdgpu_mes *mes) for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++) mes_set_hw_res_pkt.aggregated_doorbells[i] = - mes->agreegated_doorbells[i]; + mes->aggregated_doorbells[i]; for (i = 0; i < 5; i++) { mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i]; @@ -313,13 +308,63 @@ static int mes_v10_1_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.disable_mes_log = 1; mes_set_hw_res_pkt.use_different_vmid_compute = 1; - mes_set_hw_res_pkt.api_status.api_completion_fence_addr = - mes->ring.fence_drv.gpu_addr; - mes_set_hw_res_pkt.api_status.api_completion_fence_value = - ++mes->ring.fence_drv.sync_seq; - return mes_v10_1_submit_pkt_and_poll_completion(mes, - &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt)); + &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), + offsetof(union MESAPI_SET_HW_RESOURCES, api_status)); +} + +static void mes_v10_1_init_aggregated_doorbell(struct amdgpu_mes *mes) +{ + struct amdgpu_device *adev = mes->adev; + uint32_t data; + + data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL1); + data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK | + CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK | + CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK); + data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] << + CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT; + data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT; + WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL1, data); + + data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL2); + data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK | + CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK | + CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK); + data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] << + CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT; + data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT; + WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL2, data); + + data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL3); + data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK | + CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK | + CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK); + data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] << + CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT; + data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT; + WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL3, data); + + data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL4); + data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK | + CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK | + CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK); + data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] << + CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT; + data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT; + WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL4, data); + + data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL5); + data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK | + CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK | + CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK); + data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] << + CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT; + data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT; + WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL5, data); + + data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT; + WREG32_SOC15(GC, 0, mmCP_HQD_GFX_CONTROL, data); } static const struct amdgpu_mes_funcs mes_v10_1_funcs = { @@ -1121,6 +1166,8 @@ static int mes_v10_1_hw_init(void *handle) if (r) goto failure; + mes_v10_1_init_aggregated_doorbell(&adev->mes); + r = mes_v10_1_query_sched_status(&adev->mes); if (r) { DRM_ERROR("MES is busy\n"); @@ -1133,6 +1180,7 @@ static int mes_v10_1_hw_init(void *handle) * with MES enabled. */ adev->gfx.kiq.ring.sched.ready = false; + adev->mes.ring.sched.ready = true; return 0; @@ -1145,6 +1193,8 @@ static int mes_v10_1_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->mes.ring.sched.ready = false; + mes_v10_1_enable(adev, false); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 5bdc2babb070..777f9268d92d 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -86,21 +86,32 @@ static const struct amdgpu_ring_funcs mes_v11_0_ring_funcs = { }; static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, - void *pkt, int size) + void *pkt, int size, + int api_status_off) { int ndw = size / 4; signed long r; union MESAPI__ADD_QUEUE *x_pkt = pkt; + struct MES_API_STATUS *api_status; struct amdgpu_device *adev = mes->adev; struct amdgpu_ring *ring = &mes->ring; + unsigned long flags; BUG_ON(size % 4 != 0); - if (amdgpu_ring_alloc(ring, ndw)) + spin_lock_irqsave(&mes->ring_lock, flags); + if (amdgpu_ring_alloc(ring, ndw)) { + spin_unlock_irqrestore(&mes->ring_lock, flags); return -ENOMEM; + } + + api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); + api_status->api_completion_fence_addr = mes->ring.fence_drv.gpu_addr; + api_status->api_completion_fence_value = ++mes->ring.fence_drv.sync_seq; amdgpu_ring_write_multiple(ring, pkt, ndw); amdgpu_ring_commit(ring); + spin_unlock_irqrestore(&mes->ring_lock, flags); DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode); @@ -173,13 +184,9 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes, mes_add_queue_pkt.tma_addr = input->tma_addr; mes_add_queue_pkt.is_kfd_process = input->is_kfd_process; - mes_add_queue_pkt.api_status.api_completion_fence_addr = - mes->ring.fence_drv.gpu_addr; - mes_add_queue_pkt.api_status.api_completion_fence_value = - ++mes->ring.fence_drv.sync_seq; - return mes_v11_0_submit_pkt_and_poll_completion(mes, - &mes_add_queue_pkt, sizeof(mes_add_queue_pkt)); + &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), + offsetof(union MESAPI__ADD_QUEUE, api_status)); } static int mes_v11_0_remove_hw_queue(struct amdgpu_mes *mes, @@ -196,13 +203,9 @@ static int mes_v11_0_remove_hw_queue(struct amdgpu_mes *mes, mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset; mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr; - mes_remove_queue_pkt.api_status.api_completion_fence_addr = - mes->ring.fence_drv.gpu_addr; - mes_remove_queue_pkt.api_status.api_completion_fence_value = - ++mes->ring.fence_drv.sync_seq; - return mes_v11_0_submit_pkt_and_poll_completion(mes, - &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt)); + &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), + offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } static int mes_v11_0_unmap_legacy_queue(struct amdgpu_mes *mes, @@ -216,7 +219,7 @@ static int mes_v11_0_unmap_legacy_queue(struct amdgpu_mes *mes, mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE; mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; - mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset << 2; + mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset; mes_remove_queue_pkt.gang_context_addr = 0; mes_remove_queue_pkt.pipe_id = input->pipe_id; @@ -228,19 +231,14 @@ static int mes_v11_0_unmap_legacy_queue(struct amdgpu_mes *mes, mes_remove_queue_pkt.tf_data = lower_32_bits(input->trail_fence_data); } else { - if (input->queue_type == AMDGPU_RING_TYPE_GFX) - mes_remove_queue_pkt.unmap_legacy_gfx_queue = 1; - else - mes_remove_queue_pkt.unmap_kiq_utility_queue = 1; + mes_remove_queue_pkt.unmap_legacy_queue = 1; + mes_remove_queue_pkt.queue_type = + convert_to_mes_queue_type(input->queue_type); } - mes_remove_queue_pkt.api_status.api_completion_fence_addr = - mes->ring.fence_drv.gpu_addr; - mes_remove_queue_pkt.api_status.api_completion_fence_value = - ++mes->ring.fence_drv.sync_seq; - return mes_v11_0_submit_pkt_and_poll_completion(mes, - &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt)); + &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), + offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } static int mes_v11_0_suspend_gang(struct amdgpu_mes *mes, @@ -265,13 +263,9 @@ static int mes_v11_0_query_sched_status(struct amdgpu_mes *mes) mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; - mes_status_pkt.api_status.api_completion_fence_addr = - mes->ring.fence_drv.gpu_addr; - mes_status_pkt.api_status.api_completion_fence_value = - ++mes->ring.fence_drv.sync_seq; - return mes_v11_0_submit_pkt_and_poll_completion(mes, - &mes_status_pkt, sizeof(mes_status_pkt)); + &mes_status_pkt, sizeof(mes_status_pkt), + offsetof(union MESAPI__QUERY_MES_STATUS, api_status)); } static int mes_v11_0_misc_op(struct amdgpu_mes *mes, @@ -317,13 +311,9 @@ static int mes_v11_0_misc_op(struct amdgpu_mes *mes, return -EINVAL; } - misc_pkt.api_status.api_completion_fence_addr = - mes->ring.fence_drv.gpu_addr; - misc_pkt.api_status.api_completion_fence_value = - ++mes->ring.fence_drv.sync_seq; - return mes_v11_0_submit_pkt_and_poll_completion(mes, - &misc_pkt, sizeof(misc_pkt)); + &misc_pkt, sizeof(misc_pkt), + offsetof(union MESAPI__MISC, api_status)); } static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) @@ -358,7 +348,7 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++) mes_set_hw_res_pkt.aggregated_doorbells[i] = - mes->agreegated_doorbells[i]; + mes->aggregated_doorbells[i]; for (i = 0; i < 5; i++) { mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i]; @@ -373,13 +363,63 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.use_different_vmid_compute = 1; mes_set_hw_res_pkt.oversubscription_timer = 50; - mes_set_hw_res_pkt.api_status.api_completion_fence_addr = - mes->ring.fence_drv.gpu_addr; - mes_set_hw_res_pkt.api_status.api_completion_fence_value = - ++mes->ring.fence_drv.sync_seq; - return mes_v11_0_submit_pkt_and_poll_completion(mes, - &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt)); + &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), + offsetof(union MESAPI_SET_HW_RESOURCES, api_status)); +} + +static void mes_v11_0_init_aggregated_doorbell(struct amdgpu_mes *mes) +{ + struct amdgpu_device *adev = mes->adev; + uint32_t data; + + data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL1); + data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK | + CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK | + CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK); + data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] << + CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT; + data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT; + WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL1, data); + + data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL2); + data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK | + CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK | + CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK); + data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] << + CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT; + data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT; + WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL2, data); + + data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL3); + data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK | + CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK | + CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK); + data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] << + CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT; + data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT; + WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL3, data); + + data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL4); + data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK | + CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK | + CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK); + data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] << + CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT; + data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT; + WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL4, data); + + data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL5); + data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK | + CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK | + CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK); + data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] << + CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT; + data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT; + WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL5, data); + + data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT; + WREG32_SOC15(GC, 0, regCP_HQD_GFX_CONTROL, data); } static const struct amdgpu_mes_funcs mes_v11_0_funcs = { @@ -1181,6 +1221,8 @@ static int mes_v11_0_hw_init(void *handle) if (r) goto failure; + mes_v11_0_init_aggregated_doorbell(&adev->mes); + r = mes_v11_0_query_sched_status(&adev->mes); if (r) { DRM_ERROR("MES is busy\n"); @@ -1204,6 +1246,9 @@ failure: static int mes_v11_0_hw_fini(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->mes.ring.sched.ready = false; return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 7ec5b5cf4bb9..12906ba74462 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -283,8 +283,16 @@ flr_done: /* Trigger recovery for world switch failure if no TDR */ if (amdgpu_device_should_recover_gpu(adev) && (!amdgpu_device_has_job_running(adev) || - adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT)) - amdgpu_device_gpu_recover(adev, NULL); + adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT)) { + struct amdgpu_reset_context reset_context; + memset(&reset_context, 0, sizeof(reset_context)); + + reset_context.method = AMD_RESET_METHOD_NONE; + reset_context.reset_req_dev = adev; + clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + + amdgpu_device_gpu_recover(adev, NULL, &reset_context); + } } static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index e18b75c8fde6..e07757eea7ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -310,8 +310,16 @@ flr_done: adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT || adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT || adev->compute_timeout == MAX_SCHEDULE_TIMEOUT || - adev->video_timeout == MAX_SCHEDULE_TIMEOUT)) - amdgpu_device_gpu_recover(adev, NULL); + adev->video_timeout == MAX_SCHEDULE_TIMEOUT)) { + struct amdgpu_reset_context reset_context; + memset(&reset_context, 0, sizeof(reset_context)); + + reset_context.method = AMD_RESET_METHOD_NONE; + reset_context.reset_req_dev = adev; + clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + + amdgpu_device_gpu_recover(adev, NULL, &reset_context); + } } static int xgpu_nv_set_mailbox_rcv_irq(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c index c5016a926331..288c414babdf 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c @@ -522,8 +522,16 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work) } /* Trigger recovery due to world switch failure */ - if (amdgpu_device_should_recover_gpu(adev)) - amdgpu_device_gpu_recover(adev, NULL); + if (amdgpu_device_should_recover_gpu(adev)) { + struct amdgpu_reset_context reset_context; + memset(&reset_context, 0, sizeof(reset_context)); + + reset_context.method = AMD_RESET_METHOD_NONE; + reset_context.reset_req_dev = adev; + clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + + amdgpu_device_gpu_recover(adev, NULL, &reset_context); + } } static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index 34c610b9157d..b465baa26762 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -328,27 +328,6 @@ const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg = { .ref_and_mask_sdma1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__SDMA1_MASK, }; -const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg_sc = { - .ref_and_mask_cp0 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP0_MASK, - .ref_and_mask_cp1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP1_MASK, - .ref_and_mask_cp2 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP2_MASK, - .ref_and_mask_cp3 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP3_MASK, - .ref_and_mask_cp4 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP4_MASK, - .ref_and_mask_cp5 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP5_MASK, - .ref_and_mask_cp6 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP6_MASK, - .ref_and_mask_cp7 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP7_MASK, - .ref_and_mask_cp8 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP8_MASK, - .ref_and_mask_cp9 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP9_MASK, - .ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK, - .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK, - .ref_and_mask_sdma2 = GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK, - .ref_and_mask_sdma3 = GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK, - .ref_and_mask_sdma4 = GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK, - .ref_and_mask_sdma5 = GPU_HDP_FLUSH_DONE__RSVD_ENG6_MASK, - .ref_and_mask_sdma6 = GPU_HDP_FLUSH_DONE__RSVD_ENG7_MASK, - .ref_and_mask_sdma7 = GPU_HDP_FLUSH_DONE__RSVD_ENG8_MASK, -}; - static void nbio_v2_3_init_registers(struct amdgpu_device *adev) { uint32_t def, data; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h index 6074dd3a1ed8..a43b60acf7f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h @@ -27,7 +27,6 @@ #include "soc15_common.h" extern const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg; -extern const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg_sc; extern const struct amdgpu_nbio_funcs nbio_v2_3_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index 4531761dcf77..11848d1e238b 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -339,27 +339,6 @@ const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg = { .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK, }; -const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg_ald = { - .ref_and_mask_cp0 = GPU_HDP_FLUSH_DONE__CP0_MASK, - .ref_and_mask_cp1 = GPU_HDP_FLUSH_DONE__CP1_MASK, - .ref_and_mask_cp2 = GPU_HDP_FLUSH_DONE__CP2_MASK, - .ref_and_mask_cp3 = GPU_HDP_FLUSH_DONE__CP3_MASK, - .ref_and_mask_cp4 = GPU_HDP_FLUSH_DONE__CP4_MASK, - .ref_and_mask_cp5 = GPU_HDP_FLUSH_DONE__CP5_MASK, - .ref_and_mask_cp6 = GPU_HDP_FLUSH_DONE__CP6_MASK, - .ref_and_mask_cp7 = GPU_HDP_FLUSH_DONE__CP7_MASK, - .ref_and_mask_cp8 = GPU_HDP_FLUSH_DONE__CP8_MASK, - .ref_and_mask_cp9 = GPU_HDP_FLUSH_DONE__CP9_MASK, - .ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK, - .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK, - .ref_and_mask_sdma2 = GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK, - .ref_and_mask_sdma3 = GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK, - .ref_and_mask_sdma4 = GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK, - .ref_and_mask_sdma5 = GPU_HDP_FLUSH_DONE__RSVD_ENG6_MASK, - .ref_and_mask_sdma6 = GPU_HDP_FLUSH_DONE__RSVD_ENG7_MASK, - .ref_and_mask_sdma7 = GPU_HDP_FLUSH_DONE__RSVD_ENG8_MASK, -}; - static void nbio_v7_4_init_registers(struct amdgpu_device *adev) { uint32_t baco_cntl; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h index 7490022d79d4..f27c41728822 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h @@ -27,7 +27,6 @@ #include "soc15_common.h" extern const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg; -extern const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg_ald; extern const struct amdgpu_nbio_funcs nbio_v7_4_funcs; extern struct amdgpu_nbio_ras nbio_v7_4_ras; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 236b7a61443a..22c775f39119 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -259,6 +259,8 @@ enum psp_gfx_fw_type { GFX_FW_TYPE_SDMA7 = 57, /* SDMA7 MI */ GFX_FW_TYPE_VCN1 = 58, /* VCN1 MI */ GFX_FW_TYPE_CAP = 62, /* CAP_FW */ + GFX_FW_TYPE_SE2_TAP_DELAYS = 65, /* SE2 TAP DELAYS NV */ + GFX_FW_TYPE_SE3_TAP_DELAYS = 66, /* SE3 TAP DELAYS NV */ GFX_FW_TYPE_REG_LIST = 67, /* REG_LIST MI */ GFX_FW_TYPE_IMU_I = 68, /* IMU Instruction FW SOC21 */ GFX_FW_TYPE_IMU_D = 69, /* IMU Data FW SOC21 */ diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 1f9021f896a1..a019ac92edb7 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -389,34 +389,67 @@ static uint64_t sdma_v5_0_ring_get_wptr(struct amdgpu_ring *ring) static void sdma_v5_0_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + uint32_t *wptr_saved; + uint32_t *is_queue_unmap; + uint64_t aggregated_db_index; + uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_DMA].mqd_size; DRM_DEBUG("Setting write pointer\n"); - if (ring->use_doorbell) { - DRM_DEBUG("Using doorbell -- " - "wptr_offs == 0x%08x " - "lower_32_bits(ring->wptr << 2) == 0x%08x " - "upper_32_bits(ring->wptr << 2) == 0x%08x\n", - ring->wptr_offs, - lower_32_bits(ring->wptr << 2), - upper_32_bits(ring->wptr << 2)); - /* XXX check if swapping is necessary on BE */ + if (ring->is_mes_queue) { + wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); + is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + + sizeof(uint32_t)); + aggregated_db_index = + amdgpu_mes_get_aggregated_doorbell_index(adev, + AMDGPU_MES_PRIORITY_LEVEL_NORMAL); + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr << 2); - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + *wptr_saved = ring->wptr << 2; + if (*is_queue_unmap) { + WDOORBELL64(aggregated_db_index, ring->wptr << 2); + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", + ring->doorbell_index, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + } else { + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", + ring->doorbell_index, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + + if (*is_queue_unmap) + WDOORBELL64(aggregated_db_index, + ring->wptr << 2); + } } else { - DRM_DEBUG("Not using doorbell -- " - "mmSDMA%i_GFX_RB_WPTR == 0x%08x " - "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", - ring->me, - lower_32_bits(ring->wptr << 2), - ring->me, - upper_32_bits(ring->wptr << 2)); - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), - lower_32_bits(ring->wptr << 2)); - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), - upper_32_bits(ring->wptr << 2)); + if (ring->use_doorbell) { + DRM_DEBUG("Using doorbell -- " + "wptr_offs == 0x%08x " + "lower_32_bits(ring->wptr) << 2 == 0x%08x " + "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", + ring->wptr_offs, + lower_32_bits(ring->wptr << 2), + upper_32_bits(ring->wptr << 2)); + /* XXX check if swapping is necessary on BE */ + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr << 2); + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", + ring->doorbell_index, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + } else { + DRM_DEBUG("Not using doorbell -- " + "mmSDMA%i_GFX_RB_WPTR == 0x%08x " + "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", + ring->me, + lower_32_bits(ring->wptr << 2), + ring->me, + upper_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, + ring->me, mmSDMA0_GFX_RB_WPTR), + lower_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, + ring->me, mmSDMA0_GFX_RB_WPTR_HI), + upper_32_bits(ring->wptr << 2)); + } } } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 8cfaed55b192..0200cb3a31a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -57,6 +57,7 @@ static void sdma_v6_0_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v6_0_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v6_0_set_vm_pte_funcs(struct amdgpu_device *adev); static void sdma_v6_0_set_irq_funcs(struct amdgpu_device *adev); +static int sdma_v6_0_start(struct amdgpu_device *adev); static u32 sdma_v6_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset) { @@ -245,34 +246,68 @@ static uint64_t sdma_v6_0_ring_get_wptr(struct amdgpu_ring *ring) static void sdma_v6_0_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + uint32_t *wptr_saved; + uint32_t *is_queue_unmap; + uint64_t aggregated_db_index; + uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_DMA].mqd_size; DRM_DEBUG("Setting write pointer\n"); - if (ring->use_doorbell) { - DRM_DEBUG("Using doorbell -- " - "wptr_offs == 0x%08x " - "lower_32_bits(ring->wptr) << 2 == 0x%08x " - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", - ring->wptr_offs, - lower_32_bits(ring->wptr << 2), - upper_32_bits(ring->wptr << 2)); - /* XXX check if swapping is necessary on BE */ + + if (ring->is_mes_queue) { + wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); + is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + + sizeof(uint32_t)); + aggregated_db_index = + amdgpu_mes_get_aggregated_doorbell_index(adev, + ring->hw_prio); + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr << 2); - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + *wptr_saved = ring->wptr << 2; + if (*is_queue_unmap) { + WDOORBELL64(aggregated_db_index, ring->wptr << 2); + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", + ring->doorbell_index, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + } else { + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", + ring->doorbell_index, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + + if (*is_queue_unmap) + WDOORBELL64(aggregated_db_index, + ring->wptr << 2); + } } else { - DRM_DEBUG("Not using doorbell -- " - "regSDMA%i_GFX_RB_WPTR == 0x%08x " - "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", - ring->me, - lower_32_bits(ring->wptr << 2), - ring->me, - upper_32_bits(ring->wptr << 2)); - WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, ring->me, regSDMA0_QUEUE0_RB_WPTR), - lower_32_bits(ring->wptr << 2)); - WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, ring->me, regSDMA0_QUEUE0_RB_WPTR_HI), - upper_32_bits(ring->wptr << 2)); + if (ring->use_doorbell) { + DRM_DEBUG("Using doorbell -- " + "wptr_offs == 0x%08x " + "lower_32_bits(ring->wptr) << 2 == 0x%08x " + "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", + ring->wptr_offs, + lower_32_bits(ring->wptr << 2), + upper_32_bits(ring->wptr << 2)); + /* XXX check if swapping is necessary on BE */ + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr << 2); + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", + ring->doorbell_index, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + } else { + DRM_DEBUG("Not using doorbell -- " + "regSDMA%i_GFX_RB_WPTR == 0x%08x " + "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", + ring->me, + lower_32_bits(ring->wptr << 2), + ring->me, + upper_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, + ring->me, regSDMA0_QUEUE0_RB_WPTR), + lower_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, + ring->me, regSDMA0_QUEUE0_RB_WPTR_HI), + upper_32_bits(ring->wptr << 2)); + } } } @@ -771,32 +806,54 @@ static int sdma_v6_0_load_microcode(struct amdgpu_device *adev) static int sdma_v6_0_soft_reset(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - u32 grbm_soft_reset; u32 tmp; int i; + sdma_v6_0_gfx_stop(adev); + for (i = 0; i < adev->sdma.num_instances; i++) { - grbm_soft_reset = REG_SET_FIELD(0, - GRBM_SOFT_RESET, SOFT_RESET_SDMA0, - 1); - grbm_soft_reset <<= i; + tmp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_FREEZE)); + tmp |= SDMA0_FREEZE__FREEZE_MASK; + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_FREEZE), tmp); + tmp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL)); + tmp |= SDMA0_F32_CNTL__HALT_MASK; + tmp |= SDMA0_F32_CNTL__TH1_RESET_MASK; + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL), tmp); - tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); - tmp |= grbm_soft_reset; - DRM_DEBUG("GRBM_SOFT_RESET=0x%08X\n", tmp); + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_PREEMPT), 0); + + udelay(100); + + tmp = GRBM_SOFT_RESET__SOFT_RESET_SDMA0_MASK << i; WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp); tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); - udelay(50); + udelay(100); - tmp &= ~grbm_soft_reset; - WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp); + WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, 0); tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); - udelay(50); + udelay(100); } - return 0; + return sdma_v6_0_start(adev); +} + +static bool sdma_v6_0_check_soft_reset(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring; + int i, r; + long tmo = msecs_to_jiffies(1000); + + for (i = 0; i < adev->sdma.num_instances; i++) { + ring = &adev->sdma.instance[i].ring; + r = amdgpu_ring_test_ib(ring, tmo); + if (r) + return true; + } + + return false; } /** @@ -830,7 +887,6 @@ static int sdma_v6_0_start(struct amdgpu_device *adev) msleep(1000); } - sdma_v6_0_soft_reset(adev); /* unhalt the MEs */ sdma_v6_0_enable(adev, true); /* enable sdma ring preemption */ @@ -1526,6 +1582,7 @@ const struct amd_ip_funcs sdma_v6_0_ip_funcs = { .is_idle = sdma_v6_0_is_idle, .wait_for_idle = sdma_v6_0_wait_for_idle, .soft_reset = sdma_v6_0_soft_reset, + .check_soft_reset = sdma_v6_0_check_soft_reset, .set_clockgating_state = sdma_v6_0_set_clockgating_state, .set_powergating_state = sdma_v6_0_set_powergating_state, .get_clockgating_state = sdma_v6_0_get_clockgating_state, diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 495848515edf..00e9b7089feb 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -320,6 +320,7 @@ soc21_asic_reset_method(struct amdgpu_device *adev) switch (adev->ip_versions[MP1_HWIP][0]) { case IP_VERSION(13, 0, 0): + case IP_VERSION(13, 0, 7): return AMD_RESET_METHOD_MODE1; case IP_VERSION(13, 0, 4): return AMD_RESET_METHOD_MODE2; @@ -417,7 +418,13 @@ static uint32_t soc21_get_rev_id(struct amdgpu_device *adev) static bool soc21_need_full_reset(struct amdgpu_device *adev) { - return true; + switch (adev->ip_versions[GC_HWIP][0]) { + case IP_VERSION(11, 0, 0): + case IP_VERSION(11, 0, 2): + return false; + default: + return true; + } } static bool soc21_need_reset_on_init(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c new file mode 100644 index 000000000000..36a2053f2e8b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c @@ -0,0 +1,357 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "umc_v8_10.h" +#include "amdgpu_ras.h" +#include "amdgpu_umc.h" +#include "amdgpu.h" +#include "umc/umc_8_10_0_offset.h" +#include "umc/umc_8_10_0_sh_mask.h" + +#define UMC_8_NODE_DIST 0x800000 +#define UMC_8_INST_DIST 0x4000 + +struct channelnum_map_colbit { + uint32_t channel_num; + uint32_t col_bit; +}; + +const struct channelnum_map_colbit umc_v8_10_channelnum_map_colbit_table[] = { + {24, 13}, + {20, 13}, + {16, 12}, + {14, 12}, + {12, 12}, + {10, 12}, + {6, 11}, +}; + +const uint32_t + umc_v8_10_channel_idx_tbl[] + [UMC_V8_10_UMC_INSTANCE_NUM] + [UMC_V8_10_CHANNEL_INSTANCE_NUM] = { + {{16, 18}, {17, 19}}, + {{15, 11}, {3, 7}}, + {{1, 5}, {13, 9}}, + {{23, 21}, {22, 20}}, + {{0, 4}, {12, 8}}, + {{14, 10}, {2, 6}} + }; + +static inline uint32_t get_umc_v8_10_reg_offset(struct amdgpu_device *adev, + uint32_t node_inst, + uint32_t umc_inst, + uint32_t ch_inst) +{ + return adev->umc.channel_offs * ch_inst + UMC_8_INST_DIST * umc_inst + + UMC_8_NODE_DIST * node_inst; +} + +static void umc_v8_10_clear_error_count_per_channel(struct amdgpu_device *adev, + uint32_t umc_reg_offset) +{ + uint32_t ecc_err_cnt_addr; + + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt); + + /* clear error count */ + WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, + UMC_V8_10_CE_CNT_INIT); +} + +static void umc_v8_10_clear_error_count(struct amdgpu_device *adev) +{ + uint32_t node_inst = 0; + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { + umc_reg_offset = get_umc_v8_10_reg_offset(adev, + node_inst, + umc_inst, + ch_inst); + + umc_v8_10_clear_error_count_per_channel(adev, + umc_reg_offset); + } +} + +static void umc_v8_10_query_correctable_error_count(struct amdgpu_device *adev, + uint32_t umc_reg_offset, + unsigned long *error_count) +{ + uint32_t ecc_err_cnt, ecc_err_cnt_addr; + uint64_t mc_umc_status; + uint32_t mc_umc_status_addr; + + /* UMC 8_10 registers */ + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt); + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); + + ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4); + *error_count += + (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_GeccErrCnt, GeccErrCnt) - + UMC_V8_10_CE_CNT_INIT); + + /* Check for SRAM correctable error, MCUMC_STATUS is a 64 bit register */ + mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) + *error_count += 1; +} + +static void umc_v8_10_query_uncorrectable_error_count(struct amdgpu_device *adev, + uint32_t umc_reg_offset, + unsigned long *error_count) +{ + uint64_t mc_umc_status; + uint32_t mc_umc_status_addr; + + mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); + + /* Check the MCUMC_STATUS. */ + mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); + if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) + *error_count += 1; +} + +static void umc_v8_10_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + + uint32_t node_inst = 0; + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { + umc_reg_offset = get_umc_v8_10_reg_offset(adev, + node_inst, + umc_inst, + ch_inst); + + umc_v8_10_query_correctable_error_count(adev, + umc_reg_offset, + &(err_data->ce_count)); + umc_v8_10_query_uncorrectable_error_count(adev, + umc_reg_offset, + &(err_data->ue_count)); + } + + umc_v8_10_clear_error_count(adev); +} + +static uint32_t umc_v8_10_get_col_bit(uint32_t channel_num) +{ + uint32_t t = 0; + + for (t = 0; t < ARRAY_SIZE(umc_v8_10_channelnum_map_colbit_table); t++) + if (channel_num == umc_v8_10_channelnum_map_colbit_table[t].channel_num) + return umc_v8_10_channelnum_map_colbit_table[t].col_bit; + + /* Failed to get col_bit. */ + return U32_MAX; +} + +/* + * Mapping normal address to soc physical address in swizzle mode. + */ +static int umc_v8_10_swizzle_mode_na_to_pa(struct amdgpu_device *adev, + uint32_t channel_idx, + uint64_t na, uint64_t *soc_pa) +{ + uint32_t channel_num = UMC_V8_10_TOTAL_CHANNEL_NUM(adev); + uint32_t col_bit = umc_v8_10_get_col_bit(channel_num); + uint64_t tmp_addr; + + if (col_bit == U32_MAX) + return -1; + + tmp_addr = SWIZZLE_MODE_TMP_ADDR(na, channel_num, channel_idx); + *soc_pa = SWIZZLE_MODE_ADDR_HI(tmp_addr, col_bit) | + SWIZZLE_MODE_ADDR_MID(na, col_bit) | + SWIZZLE_MODE_ADDR_LOW(tmp_addr, col_bit) | + SWIZZLE_MODE_ADDR_LSB(na); + + return 0; +} + +static void umc_v8_10_query_error_address(struct amdgpu_device *adev, + struct ras_err_data *err_data, + uint32_t umc_reg_offset, + uint32_t node_inst, + uint32_t ch_inst, + uint32_t umc_inst) +{ + uint64_t mc_umc_status_addr; + uint64_t mc_umc_status, err_addr; + uint32_t channel_index; + + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); + mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); + + if (mc_umc_status == 0) + return; + + if (!err_data->err_addr) { + /* clear umc status */ + WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL); + return; + } + + channel_index = + adev->umc.channel_idx_tbl[node_inst * adev->umc.umc_inst_num * + adev->umc.channel_inst_num + + umc_inst * adev->umc.channel_inst_num + + ch_inst]; + + /* calculate error address if ue/ce error is detected */ + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrV) == 1 && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { + uint32_t addr_lsb; + uint64_t mc_umc_addrt0; + + mc_umc_addrt0 = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0); + err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4); + err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); + + /* the lowest lsb bits should be ignored */ + addr_lsb = REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrLsb); + + err_addr &= ~((0x1ULL << addr_lsb) - 1); + + /* we only save ue error information currently, ce is skipped */ + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) { + uint64_t na_err_addr_base = err_addr & ~(0x3ULL << UMC_V8_10_NA_C5_BIT); + uint64_t na_err_addr, retired_page_addr; + uint32_t col = 0; + int ret = 0; + + /* loop for all possibilities of [C6 C5] in normal address. */ + for (col = 0; col < UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM; col++) { + na_err_addr = na_err_addr_base | (col << UMC_V8_10_NA_C5_BIT); + + /* Mapping normal error address to retired soc physical address. */ + ret = umc_v8_10_swizzle_mode_na_to_pa(adev, channel_index, + na_err_addr, &retired_page_addr); + if (ret) { + dev_err(adev->dev, "Failed to map pa from umc na.\n"); + break; + } + dev_info(adev->dev, "Error Address(PA): 0x%llx\n", + retired_page_addr); + amdgpu_umc_fill_error_record(err_data, na_err_addr, + retired_page_addr, channel_index, umc_inst); + } + } + } + + /* clear umc status */ + WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL); +} + +static void umc_v8_10_query_ras_error_address(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + uint32_t node_inst = 0; + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { + umc_reg_offset = get_umc_v8_10_reg_offset(adev, + node_inst, + umc_inst, + ch_inst); + + umc_v8_10_query_error_address(adev, + err_data, + umc_reg_offset, + node_inst, + ch_inst, + umc_inst); + } +} + +static void umc_v8_10_err_cnt_init_per_channel(struct amdgpu_device *adev, + uint32_t umc_reg_offset) +{ + uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; + uint32_t ecc_err_cnt_addr; + + ecc_err_cnt_sel_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCntSel); + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt); + + ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4); + + /* set ce error interrupt type to APIC based interrupt */ + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_GeccErrCntSel, + GeccErrInt, 0x1); + WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel); + /* set error count to initial value */ + WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V8_10_CE_CNT_INIT); +} + +static void umc_v8_10_err_cnt_init(struct amdgpu_device *adev) +{ + uint32_t node_inst = 0; + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { + umc_reg_offset = get_umc_v8_10_reg_offset(adev, + node_inst, + umc_inst, + ch_inst); + + umc_v8_10_err_cnt_init_per_channel(adev, umc_reg_offset); + } +} + +const struct amdgpu_ras_block_hw_ops umc_v8_10_ras_hw_ops = { + .query_ras_error_count = umc_v8_10_query_ras_error_count, + .query_ras_error_address = umc_v8_10_query_ras_error_address, +}; + +struct amdgpu_umc_ras umc_v8_10_ras = { + .ras_block = { + .hw_ops = &umc_v8_10_ras_hw_ops, + }, + .err_cnt_init = umc_v8_10_err_cnt_init, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h new file mode 100644 index 000000000000..849ede88e111 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h @@ -0,0 +1,70 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __UMC_V8_10_H__ +#define __UMC_V8_10_H__ + +#include "soc15_common.h" +#include "amdgpu.h" + +/* number of umc channel instance with memory map register access */ +#define UMC_V8_10_CHANNEL_INSTANCE_NUM 2 +/* number of umc instance with memory map register access */ +#define UMC_V8_10_UMC_INSTANCE_NUM 2 + +/* Total channel instances for all umc nodes */ +#define UMC_V8_10_TOTAL_CHANNEL_NUM(adev) \ + (UMC_V8_10_CHANNEL_INSTANCE_NUM * UMC_V8_10_UMC_INSTANCE_NUM * (adev)->umc.node_inst_num) + +/* UMC regiser per channel offset */ +#define UMC_V8_10_PER_CHANNEL_OFFSET 0x400 + +/* EccErrCnt max value */ +#define UMC_V8_10_CE_CNT_MAX 0xffff +/* umc ce interrupt threshold */ +#define UUMC_V8_10_CE_INT_THRESHOLD 0xffff +/* umc ce count initial value */ +#define UMC_V8_10_CE_CNT_INIT (UMC_V8_10_CE_CNT_MAX - UUMC_V8_10_CE_INT_THRESHOLD) + +#define UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM 4 + +/* The C5 bit in NA address */ +#define UMC_V8_10_NA_C5_BIT 14 + +/* Map to swizzle mode address */ +#define SWIZZLE_MODE_TMP_ADDR(na, ch_num, ch_idx) \ + ((((na) >> 10) * (ch_num) + (ch_idx)) << 10) +#define SWIZZLE_MODE_ADDR_HI(addr, col_bit) \ + (((addr) >> ((col_bit) + 2)) << ((col_bit) + 2)) +#define SWIZZLE_MODE_ADDR_MID(na, col_bit) ((((na) >> 8) & 0x3) << (col_bit)) +#define SWIZZLE_MODE_ADDR_LOW(addr, col_bit) \ + ((((addr) >> 10) & ((0x1ULL << (col_bit - 8)) - 1)) << 8) +#define SWIZZLE_MODE_ADDR_LSB(na) ((na) & 0xFF) + +extern struct amdgpu_umc_ras umc_v8_10_ras; +extern const uint32_t + umc_v8_10_channel_idx_tbl[] + [UMC_V8_10_UMC_INSTANCE_NUM] + [UMC_V8_10_CHANNEL_INSTANCE_NUM]; + +#endif + diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 84ac2401895a..a91ffbf902d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -25,6 +25,7 @@ #include "amdgpu.h" #include "amdgpu_vcn.h" #include "amdgpu_pm.h" +#include "amdgpu_cs.h" #include "soc15.h" #include "soc15d.h" #include "soc15_hw_ip.h" @@ -44,6 +45,9 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +#define RDECODE_MSG_CREATE 0x00000000 +#define RDECODE_MESSAGE_CREATE 0x00000001 + static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN1 @@ -1323,6 +1327,132 @@ static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring) } } +static int vcn_v4_0_limit_sched(struct amdgpu_cs_parser *p) +{ + struct drm_gpu_scheduler **scheds; + + /* The create msg must be in the first IB submitted */ + if (atomic_read(&p->entity->fence_seq)) + return -EINVAL; + + scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_ENC] + [AMDGPU_RING_PRIO_0].sched; + drm_sched_entity_modify_sched(p->entity, scheds, 1); + return 0; +} + +static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr) +{ + struct ttm_operation_ctx ctx = { false, false }; + struct amdgpu_bo_va_mapping *map; + uint32_t *msg, num_buffers; + struct amdgpu_bo *bo; + uint64_t start, end; + unsigned int i; + void *ptr; + int r; + + addr &= AMDGPU_GMC_HOLE_MASK; + r = amdgpu_cs_find_mapping(p, addr, &bo, &map); + if (r) { + DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr); + return r; + } + + start = map->start * AMDGPU_GPU_PAGE_SIZE; + end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE; + if (addr & 0x7) { + DRM_ERROR("VCN messages must be 8 byte aligned!\n"); + return -EINVAL; + } + + bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + amdgpu_bo_placement_from_domain(bo, bo->allowed_domains); + r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (r) { + DRM_ERROR("Failed validating the VCN message BO (%d)!\n", r); + return r; + } + + r = amdgpu_bo_kmap(bo, &ptr); + if (r) { + DRM_ERROR("Failed mapping the VCN message (%d)!\n", r); + return r; + } + + msg = ptr + addr - start; + + /* Check length */ + if (msg[1] > end - addr) { + r = -EINVAL; + goto out; + } + + if (msg[3] != RDECODE_MSG_CREATE) + goto out; + + num_buffers = msg[2]; + for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) { + uint32_t offset, size, *create; + + if (msg[0] != RDECODE_MESSAGE_CREATE) + continue; + + offset = msg[1]; + size = msg[2]; + + if (offset + size > end) { + r = -EINVAL; + goto out; + } + + create = ptr + addr + offset - start; + + /* H246, HEVC and VP9 can run on any instance */ + if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11) + continue; + + r = vcn_v4_0_limit_sched(p); + if (r) + goto out; + } + +out: + amdgpu_bo_kunmap(bo); + return r; +} + +#define RADEON_VCN_ENGINE_TYPE_DECODE (0x00000003) + +static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, + struct amdgpu_job *job, + struct amdgpu_ib *ib) +{ + struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); + struct amdgpu_vcn_decode_buffer *decode_buffer = NULL; + uint32_t val; + int r = 0; + + /* The first instance can decode anything */ + if (!ring->me) + return r; + + /* unified queue ib header has 8 double words. */ + if (ib->length_dw < 8) + return r; + + val = amdgpu_ib_get_value(ib, 6); //RADEON_VCN_ENGINE_TYPE + + if (val == RADEON_VCN_ENGINE_TYPE_DECODE) { + decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[10]; + + if (decode_buffer->valid_buf_flag & 0x1) + r = vcn_v4_0_dec_msg(p, ((u64)decode_buffer->msg_buffer_address_hi) << 32 | + decode_buffer->msg_buffer_address_lo); + } + return r; +} + static const struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, @@ -1331,6 +1461,7 @@ static const struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = { .get_rptr = vcn_v4_0_unified_ring_get_rptr, .get_wptr = vcn_v4_0_unified_ring_get_wptr, .set_wptr = vcn_v4_0_unified_ring_set_wptr, + .patch_cs_in_place = vcn_v4_0_ring_patch_cs_in_place, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + |