summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd/kfd_device.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_device.c')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c105
1 files changed, 87 insertions, 18 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 765b58a17dc7..3322a443dfb2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -54,6 +54,7 @@ static const struct kfd_device_info kaveri_device_info = {
.needs_iommu_device = true,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
+ .num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@@ -71,6 +72,7 @@ static const struct kfd_device_info carrizo_device_info = {
.needs_iommu_device = true,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
+ .num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@@ -87,6 +89,7 @@ static const struct kfd_device_info raven_device_info = {
.needs_iommu_device = true,
.needs_pci_atomics = true,
.num_sdma_engines = 1,
+ .num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
#endif
@@ -105,6 +108,7 @@ static const struct kfd_device_info hawaii_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
+ .num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@@ -121,6 +125,7 @@ static const struct kfd_device_info tonga_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = true,
.num_sdma_engines = 2,
+ .num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@@ -137,6 +142,7 @@ static const struct kfd_device_info fiji_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = true,
.num_sdma_engines = 2,
+ .num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@@ -153,6 +159,7 @@ static const struct kfd_device_info fiji_vf_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
+ .num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@@ -170,6 +177,7 @@ static const struct kfd_device_info polaris10_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = true,
.num_sdma_engines = 2,
+ .num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@@ -186,6 +194,7 @@ static const struct kfd_device_info polaris10_vf_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
+ .num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@@ -202,6 +211,7 @@ static const struct kfd_device_info polaris11_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = true,
.num_sdma_engines = 2,
+ .num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@@ -218,6 +228,24 @@ static const struct kfd_device_info polaris12_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = true,
.num_sdma_engines = 2,
+ .num_xgmi_sdma_engines = 0,
+ .num_sdma_queues_per_engine = 2,
+};
+
+static const struct kfd_device_info vegam_device_info = {
+ .asic_family = CHIP_VEGAM,
+ .max_pasid_bits = 16,
+ .max_no_of_hqd = 24,
+ .doorbell_size = 4,
+ .ih_ring_entry_size = 4 * sizeof(uint32_t),
+ .event_interrupt_class = &event_interrupt_class_cik,
+ .num_of_watch_points = 4,
+ .mqd_size_aligned = MQD_SIZE_ALIGNED,
+ .supports_cwsr = true,
+ .needs_iommu_device = false,
+ .needs_pci_atomics = true,
+ .num_sdma_engines = 2,
+ .num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@@ -234,6 +262,7 @@ static const struct kfd_device_info vega10_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
+ .num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@@ -250,6 +279,7 @@ static const struct kfd_device_info vega10_vf_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
+ .num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@@ -266,6 +296,7 @@ static const struct kfd_device_info vega12_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
+ .num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@@ -282,6 +313,24 @@ static const struct kfd_device_info vega20_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
+ .num_xgmi_sdma_engines = 0,
+ .num_sdma_queues_per_engine = 8,
+};
+
+static const struct kfd_device_info navi10_device_info = {
+ .asic_family = CHIP_NAVI10,
+ .max_pasid_bits = 16,
+ .max_no_of_hqd = 24,
+ .doorbell_size = 8,
+ .ih_ring_entry_size = 8 * sizeof(uint32_t),
+ .event_interrupt_class = &event_interrupt_class_v9,
+ .num_of_watch_points = 4,
+ .mqd_size_aligned = MQD_SIZE_ALIGNED,
+ .needs_iommu_device = false,
+ .supports_cwsr = true,
+ .needs_pci_atomics = false,
+ .num_sdma_engines = 2,
+ .num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 8,
};
@@ -373,6 +422,9 @@ static const struct kfd_deviceid supported_devices[] = {
{ 0x6995, &polaris12_device_info }, /* Polaris12 */
{ 0x6997, &polaris12_device_info }, /* Polaris12 */
{ 0x699F, &polaris12_device_info }, /* Polaris12 */
+ { 0x694C, &vegam_device_info }, /* VegaM */
+ { 0x694E, &vegam_device_info }, /* VegaM */
+ { 0x694F, &vegam_device_info }, /* VegaM */
{ 0x6860, &vega10_device_info }, /* Vega10 */
{ 0x6861, &vega10_device_info }, /* Vega10 */
{ 0x6862, &vega10_device_info }, /* Vega10 */
@@ -399,7 +451,13 @@ static const struct kfd_deviceid supported_devices[] = {
{ 0x66a3, &vega20_device_info }, /* Vega20 */
{ 0x66a4, &vega20_device_info }, /* Vega20 */
{ 0x66a7, &vega20_device_info }, /* Vega20 */
- { 0x66af, &vega20_device_info } /* Vega20 */
+ { 0x66af, &vega20_device_info }, /* Vega20 */
+ /* Navi10 */
+ { 0x7310, &navi10_device_info }, /* Navi10 */
+ { 0x7312, &navi10_device_info }, /* Navi10 */
+ { 0x7318, &navi10_device_info }, /* Navi10 */
+ { 0x731a, &navi10_device_info }, /* Navi10 */
+ { 0x731f, &navi10_device_info }, /* Navi10 */
};
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
@@ -429,7 +487,6 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
struct pci_dev *pdev, const struct kfd2kgd_calls *f2g)
{
struct kfd_dev *kfd;
- int ret;
const struct kfd_device_info *device_info =
lookup_device_info(pdev->device);
@@ -446,17 +503,15 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
* 32 and 64-bit requests are possible and must be
* supported.
*/
- ret = pci_enable_atomic_ops_to_root(pdev,
- PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
- PCI_EXP_DEVCAP2_ATOMIC_COMP64);
- if (device_info->needs_pci_atomics && ret < 0) {
+ kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kgd);
+ if (device_info->needs_pci_atomics &&
+ !kfd->pci_atomic_requested) {
dev_info(kfd_device,
"skipped device %x:%x, PCI rejects atomics\n",
pdev->vendor, pdev->device);
kfree(kfd);
return NULL;
- } else if (!ret)
- kfd->pci_atomic_requested = true;
+ }
kfd->kgd = kgd;
kfd->device_info = device_info;
@@ -481,10 +536,14 @@ static void kfd_cwsr_init(struct kfd_dev *kfd)
BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
kfd->cwsr_isa = cwsr_trap_gfx8_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
- } else {
+ } else if (kfd->device_info->asic_family < CHIP_NAVI10) {
BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE);
kfd->cwsr_isa = cwsr_trap_gfx9_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex);
+ } else {
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex) > PAGE_SIZE);
+ kfd->cwsr_isa = cwsr_trap_gfx10_hex;
+ kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx10_hex);
}
kfd->cwsr_enabled = true;
@@ -518,6 +577,13 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
} else
kfd->max_proc_per_quantum = hws_max_conc_proc;
+ /* Allocate global GWS that is shared by all KFD processes */
+ if (hws_gws_support && amdgpu_amdkfd_alloc_gws(kfd->kgd,
+ amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws)) {
+ dev_err(kfd_device, "Could not allocate %d gws\n",
+ amdgpu_amdkfd_get_num_gws(kfd->kgd));
+ goto out;
+ }
/* calculate max size of mqds needed for queues */
size = max_num_of_queues_per_device *
kfd->device_info->mqd_size_aligned;
@@ -541,7 +607,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
&kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr,
false)) {
dev_err(kfd_device, "Could not allocate %d bytes\n", size);
- goto out;
+ goto alloc_gtt_mem_failure;
}
dev_info(kfd_device, "Allocated %d bytes on gart\n", size);
@@ -561,11 +627,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
if (kfd->kfd2kgd->get_hive_id)
kfd->hive_id = kfd->kfd2kgd->get_hive_id(kfd->kgd);
- if (kfd_topology_add_device(kfd)) {
- dev_err(kfd_device, "Error adding device to topology\n");
- goto kfd_topology_add_device_error;
- }
-
if (kfd_interrupt_init(kfd)) {
dev_err(kfd_device, "Error initializing interrupts\n");
goto kfd_interrupt_error;
@@ -589,6 +650,11 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
kfd->dbgmgr = NULL;
+ if (kfd_topology_add_device(kfd)) {
+ dev_err(kfd_device, "Error adding device to topology\n");
+ goto kfd_topology_add_device_error;
+ }
+
kfd->init_complete = true;
dev_info(kfd_device, "added device %x:%x\n", kfd->pdev->vendor,
kfd->pdev->device);
@@ -598,19 +664,21 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
goto out;
+kfd_topology_add_device_error:
kfd_resume_error:
device_iommu_error:
device_queue_manager_uninit(kfd->dqm);
device_queue_manager_error:
kfd_interrupt_exit(kfd);
kfd_interrupt_error:
- kfd_topology_remove_device(kfd);
-kfd_topology_add_device_error:
kfd_doorbell_fini(kfd);
kfd_doorbell_error:
kfd_gtt_sa_fini(kfd);
kfd_gtt_sa_init_error:
amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
+alloc_gtt_mem_failure:
+ if (hws_gws_support)
+ amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
dev_err(kfd_device,
"device %x:%x NOT added due to errors\n",
kfd->pdev->vendor, kfd->pdev->device);
@@ -628,6 +696,8 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
kfd_doorbell_fini(kfd);
kfd_gtt_sa_fini(kfd);
amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
+ if (hws_gws_support)
+ amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
}
kfree(kfd);
@@ -665,7 +735,6 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd)
if (ret)
return ret;
count = atomic_dec_return(&kfd_locked);
- WARN_ONCE(count != 0, "KFD reset ref. error");
atomic_set(&kfd->sram_ecc_flag, 0);