diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 224 | 
1 files changed, 213 insertions, 11 deletions
| diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index fe1a39ffda72..c39ed9eb0987 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -32,6 +32,59 @@  #include "amdgpu_xgmi.h"  /** + * amdgpu_gmc_pdb0_alloc - allocate vram for pdb0 + * + * @adev: amdgpu_device pointer + * + * Allocate video memory for pdb0 and map it for CPU access + * Returns 0 for success, error for failure. + */ +int amdgpu_gmc_pdb0_alloc(struct amdgpu_device *adev) +{ +	int r; +	struct amdgpu_bo_param bp; +	u64 vram_size = adev->gmc.xgmi.node_segment_size * adev->gmc.xgmi.num_physical_nodes; +	uint32_t pde0_page_shift = adev->gmc.vmid0_page_table_block_size + 21; +	uint32_t npdes = (vram_size + (1ULL << pde0_page_shift) -1) >> pde0_page_shift; + +	memset(&bp, 0, sizeof(bp)); +	bp.size = PAGE_ALIGN((npdes + 1) * 8); +	bp.byte_align = PAGE_SIZE; +	bp.domain = AMDGPU_GEM_DOMAIN_VRAM; +	bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | +		AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; +	bp.type = ttm_bo_type_kernel; +	bp.resv = NULL; +	bp.bo_ptr_size = sizeof(struct amdgpu_bo); + +	r = amdgpu_bo_create(adev, &bp, &adev->gmc.pdb0_bo); +	if (r) +		return r; + +	r = amdgpu_bo_reserve(adev->gmc.pdb0_bo, false); +	if (unlikely(r != 0)) +		goto bo_reserve_failure; + +	r = amdgpu_bo_pin(adev->gmc.pdb0_bo, AMDGPU_GEM_DOMAIN_VRAM); +	if (r) +		goto bo_pin_failure; +	r = amdgpu_bo_kmap(adev->gmc.pdb0_bo, &adev->gmc.ptr_pdb0); +	if (r) +		goto bo_kmap_failure; + +	amdgpu_bo_unreserve(adev->gmc.pdb0_bo); +	return 0; + +bo_kmap_failure: +	amdgpu_bo_unpin(adev->gmc.pdb0_bo); +bo_pin_failure: +	amdgpu_bo_unreserve(adev->gmc.pdb0_bo); +bo_reserve_failure: +	amdgpu_bo_unref(&adev->gmc.pdb0_bo); +	return r; +} + +/**   * amdgpu_gmc_get_pde_for_bo - get the PDE for a BO   *   * @bo: the BO to get the PDE for @@ -158,6 +211,39 @@ void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,  			mc->vram_end, mc->real_vram_size >> 20);  } +/** amdgpu_gmc_sysvm_location - place vram and gart in sysvm aperture + * + * @adev: amdgpu device structure holding all necessary information + * @mc: memory controller structure holding memory information + * + * This function is only used if use GART for FB translation. In such + * case, we use sysvm aperture (vmid0 page tables) for both vram + * and gart (aka system memory) access. + * + * GPUVM (and our organization of vmid0 page tables) require sysvm + * aperture to be placed at a location aligned with 8 times of native + * page size. For example, if vm_context0_cntl.page_table_block_size + * is 12, then native page size is 8G (2M*2^12), sysvm should start + * with a 64G aligned address. For simplicity, we just put sysvm at + * address 0. So vram start at address 0 and gart is right after vram. + */ +void amdgpu_gmc_sysvm_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc) +{ +	u64 hive_vram_start = 0; +	u64 hive_vram_end = mc->xgmi.node_segment_size * mc->xgmi.num_physical_nodes - 1; +	mc->vram_start = mc->xgmi.node_segment_size * mc->xgmi.physical_node_id; +	mc->vram_end = mc->vram_start + mc->xgmi.node_segment_size - 1; +	mc->gart_start = hive_vram_end + 1; +	mc->gart_end = mc->gart_start + mc->gart_size - 1; +	mc->fb_start = hive_vram_start; +	mc->fb_end = hive_vram_end; +	dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n", +			mc->mc_vram_size >> 20, mc->vram_start, +			mc->vram_end, mc->real_vram_size >> 20); +	dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n", +			mc->gart_size >> 20, mc->gart_start, mc->gart_end); +} +  /**   * amdgpu_gmc_gart_location - try to find GART location   * @@ -165,7 +251,6 @@ void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,   * @mc: memory controller structure holding memory information   *   * Function will place try to place GART before or after VRAM. - *   * If GART size is bigger than space left then we ajust GART size.   * Thus function will never fails.   */ @@ -176,8 +261,6 @@ void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)  	/*To avoid the hole, limit the max mc address to AMDGPU_GMC_HOLE_START*/  	u64 max_mc_address = min(adev->gmc.mc_mask, AMDGPU_GMC_HOLE_START - 1); -	mc->gart_size += adev->pm.smu_prv_buffer_size; -  	/* VCE doesn't like it when BOs cross a 4GB segment, so align  	 * the GART base on a 4GB boundary as well.  	 */ @@ -308,26 +391,46 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)  {  	int r; -	if (adev->umc.funcs && adev->umc.funcs->ras_late_init) { -		r = adev->umc.funcs->ras_late_init(adev); +	if (adev->umc.ras_funcs && +	    adev->umc.ras_funcs->ras_late_init) { +		r = adev->umc.ras_funcs->ras_late_init(adev);  		if (r)  			return r;  	} -	if (adev->mmhub.funcs && adev->mmhub.funcs->ras_late_init) { -		r = adev->mmhub.funcs->ras_late_init(adev); +	if (adev->mmhub.ras_funcs && +	    adev->mmhub.ras_funcs->ras_late_init) { +		r = adev->mmhub.ras_funcs->ras_late_init(adev);  		if (r)  			return r;  	} -	return amdgpu_xgmi_ras_late_init(adev); +	if (!adev->gmc.xgmi.connected_to_cpu) +		adev->gmc.xgmi.ras_funcs = &xgmi_ras_funcs; + +	if (adev->gmc.xgmi.ras_funcs && +	    adev->gmc.xgmi.ras_funcs->ras_late_init) { +		r = adev->gmc.xgmi.ras_funcs->ras_late_init(adev); +		if (r) +			return r; +	} + +	return 0;  }  void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)  { -	amdgpu_umc_ras_fini(adev); -	amdgpu_mmhub_ras_fini(adev); -	amdgpu_xgmi_ras_fini(adev); +	if (adev->umc.ras_funcs && +	    adev->umc.ras_funcs->ras_fini) +		adev->umc.ras_funcs->ras_fini(adev); + +	if (adev->mmhub.ras_funcs && +	    adev->mmhub.ras_funcs->ras_fini) +		amdgpu_mmhub_ras_fini(adev); + +	if (adev->gmc.xgmi.ras_funcs && +	    adev->gmc.xgmi.ras_funcs->ras_fini) +		adev->gmc.xgmi.ras_funcs->ras_fini(adev);  }  	/* @@ -385,6 +488,16 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)  	switch (adev->asic_type) {  	case CHIP_RAVEN:  	case CHIP_RENOIR: +		if (amdgpu_tmz == 0) { +			adev->gmc.tmz_enabled = false; +			dev_info(adev->dev, +				 "Trusted Memory Zone (TMZ) feature disabled (cmd line)\n"); +		} else { +			adev->gmc.tmz_enabled = true; +			dev_info(adev->dev, +				 "Trusted Memory Zone (TMZ) feature enabled\n"); +		} +		break;  	case CHIP_NAVI10:  	case CHIP_NAVI14:  	case CHIP_NAVI12: @@ -423,6 +536,8 @@ void amdgpu_gmc_noretry_set(struct amdgpu_device *adev)  	switch (adev->asic_type) {  	case CHIP_VEGA10:  	case CHIP_VEGA20: +	case CHIP_ARCTURUS: +	case CHIP_ALDEBARAN:  		/*  		 * noretry = 0 will cause kfd page fault tests fail  		 * for some ASICs, so set default to 1 for these ASICs. @@ -518,3 +633,90 @@ void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev)  		adev->mman.stolen_extended_size = 0;  	}  } + +/** + * amdgpu_gmc_init_pdb0 - initialize PDB0 + * + * @adev: amdgpu_device pointer + * + * This function is only used when GART page table is used + * for FB address translatioin. In such a case, we construct + * a 2-level system VM page table: PDB0->PTB, to cover both + * VRAM of the hive and system memory. + * + * PDB0 is static, initialized once on driver initialization. + * The first n entries of PDB0 are used as PTE by setting + * P bit to 1, pointing to VRAM. The n+1'th entry points + * to a big PTB covering system memory. + * + */ +void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev) +{ +	int i; +	uint64_t flags = adev->gart.gart_pte_flags; //TODO it is UC. explore NC/RW? +	/* Each PDE0 (used as PTE) covers (2^vmid0_page_table_block_size)*2M +	 */ +	u64 vram_size = adev->gmc.xgmi.node_segment_size * adev->gmc.xgmi.num_physical_nodes; +	u64 pde0_page_size = (1ULL<<adev->gmc.vmid0_page_table_block_size)<<21; +	u64 vram_addr = adev->vm_manager.vram_base_offset - +		adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; +	u64 vram_end = vram_addr + vram_size; +	u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo); + +	flags |= AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE; +	flags |= AMDGPU_PTE_WRITEABLE; +	flags |= AMDGPU_PTE_SNOOPED; +	flags |= AMDGPU_PTE_FRAG((adev->gmc.vmid0_page_table_block_size + 9*1)); +	flags |= AMDGPU_PDE_PTE; + +	/* The first n PDE0 entries are used as PTE, +	 * pointing to vram +	 */ +	for (i = 0; vram_addr < vram_end; i++, vram_addr += pde0_page_size) +		amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, vram_addr, flags); + +	/* The n+1'th PDE0 entry points to a huge +	 * PTB who has more than 512 entries each +	 * pointing to a 4K system page +	 */ +	flags = AMDGPU_PTE_VALID; +	flags |= AMDGPU_PDE_BFS(0) | AMDGPU_PTE_SNOOPED; +	/* Requires gart_ptb_gpu_pa to be 4K aligned */ +	amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, gart_ptb_gpu_pa, flags); +} + +/** + * amdgpu_gmc_vram_mc2pa - calculate vram buffer's physical address from MC + * address + * + * @adev: amdgpu_device pointer + * @mc_addr: MC address of buffer + */ +uint64_t amdgpu_gmc_vram_mc2pa(struct amdgpu_device *adev, uint64_t mc_addr) +{ +	return mc_addr - adev->gmc.vram_start + adev->vm_manager.vram_base_offset; +} + +/** + * amdgpu_gmc_vram_pa - calculate vram buffer object's physical address from + * GPU's view + * + * @adev: amdgpu_device pointer + * @bo: amdgpu buffer object + */ +uint64_t amdgpu_gmc_vram_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo) +{ +	return amdgpu_gmc_vram_mc2pa(adev, amdgpu_bo_gpu_offset(bo)); +} + +/** + * amdgpu_gmc_vram_cpu_pa - calculate vram buffer object's physical address + * from CPU's view + * + * @adev: amdgpu_device pointer + * @bo: amdgpu buffer object + */ +uint64_t amdgpu_gmc_vram_cpu_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo) +{ +	return amdgpu_bo_gpu_offset(bo) - adev->gmc.vram_start + adev->gmc.aper_base; +} | 
