diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 43 | 
1 files changed, 29 insertions, 14 deletions
| diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 659b385b27b5..8567d5d77346 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -24,7 +24,6 @@  #include <linux/list.h>  #include "amdgpu.h"  #include "amdgpu_xgmi.h" -#include "amdgpu_smu.h"  #include "amdgpu_ras.h"  #include "soc15.h"  #include "df/df_3_6_offset.h" @@ -217,7 +216,7 @@ static ssize_t amdgpu_xgmi_show_device_id(struct device *dev,  	struct drm_device *ddev = dev_get_drvdata(dev);  	struct amdgpu_device *adev = drm_to_adev(ddev); -	return snprintf(buf, PAGE_SIZE, "%llu\n", adev->gmc.xgmi.node_id); +	return sysfs_emit(buf, "%llu\n", adev->gmc.xgmi.node_id);  } @@ -246,7 +245,7 @@ static ssize_t amdgpu_xgmi_show_error(struct device *dev,  	adev->df.funcs->set_fica(adev, ficaa_pie_status_in, 0, 0); -	return snprintf(buf, PAGE_SIZE, "%u\n", error_count); +	return sysfs_emit(buf, "%u\n", error_count);  } @@ -468,15 +467,22 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev  } +/* + * NOTE psp_xgmi_node_info.num_hops layout is as follows: + * num_hops[7:6] = link type (0 = xGMI2, 1 = xGMI3, 2/3 = reserved) + * num_hops[5:3] = reserved + * num_hops[2:0] = number of hops + */  int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,  		struct amdgpu_device *peer_adev)  {  	struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info; +	uint8_t num_hops_mask = 0x7;  	int i;  	for (i = 0 ; i < top->num_nodes; ++i)  		if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id) -			return top->nodes[i].num_hops; +			return top->nodes[i].num_hops & num_hops_mask;  	return	-EINVAL;  } @@ -492,7 +498,8 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)  	if (!adev->gmc.xgmi.supported)  		return 0; -	if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) { +	if (!adev->gmc.xgmi.pending_reset && +	    amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {  		ret = psp_xgmi_initialize(&adev->psp);  		if (ret) {  			dev_err(adev->dev, @@ -538,7 +545,8 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)  	task_barrier_add_task(&hive->tb); -	if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) { +	if (!adev->gmc.xgmi.pending_reset && +	    amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {  		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {  			/* update node list for other device in the hive */  			if (tmp_adev != adev) { @@ -567,7 +575,7 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)  		}  	} -	if (!ret) +	if (!ret && !adev->gmc.xgmi.pending_reset)  		ret = amdgpu_xgmi_sysfs_add_dev_info(adev, hive);  exit_unlock: @@ -620,7 +628,7 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev)  	return psp_xgmi_terminate(&adev->psp);  } -int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev) +static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev)  {  	int r;  	struct ras_ih_if ih_info = { @@ -634,7 +642,7 @@ int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev)  	    adev->gmc.xgmi.num_physical_nodes == 0)  		return 0; -	amdgpu_xgmi_reset_ras_error_count(adev); +	adev->gmc.xgmi.ras_funcs->reset_ras_error_count(adev);  	if (!adev->gmc.xgmi.ras_if) {  		adev->gmc.xgmi.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); @@ -656,7 +664,7 @@ int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev)  	return r;  } -void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev) +static void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev)  {  	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL) &&  			adev->gmc.xgmi.ras_if) { @@ -683,7 +691,7 @@ static void pcs_clear_status(struct amdgpu_device *adev, uint32_t pcs_status_reg  	WREG32_PCIE(pcs_status_reg, 0);  } -void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev) +static void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev)  {  	uint32_t i; @@ -743,8 +751,8 @@ static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev,  	return 0;  } -int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, -				      void *ras_error_status) +static int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, +					     void *ras_error_status)  {  	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;  	int i; @@ -793,10 +801,17 @@ int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,  		break;  	} -	amdgpu_xgmi_reset_ras_error_count(adev); +	adev->gmc.xgmi.ras_funcs->reset_ras_error_count(adev);  	err_data->ue_count += ue_cnt;  	err_data->ce_count += ce_cnt;  	return 0;  } + +const struct amdgpu_xgmi_ras_funcs xgmi_ras_funcs = { +	.ras_late_init = amdgpu_xgmi_ras_late_init, +	.ras_fini = amdgpu_xgmi_ras_fini, +	.query_ras_error_count = amdgpu_xgmi_query_ras_error_count, +	.reset_ras_error_count = amdgpu_xgmi_reset_ras_error_count, +}; | 
