diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_device.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device.c | 356 |
1 files changed, 255 insertions, 101 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index a8fa33a08de3..3346699960dd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -20,7 +20,9 @@ * OTHER DEALINGS IN THE SOFTWARE. */ +#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2) #include <linux/amd-iommu.h> +#endif #include <linux/bsearch.h> #include <linux/pci.h> #include <linux/slab.h> @@ -28,9 +30,12 @@ #include "kfd_device_queue_manager.h" #include "kfd_pm4_headers_vi.h" #include "cwsr_trap_handler_gfx8.asm" +#include "kfd_iommu.h" #define MQD_SIZE_ALIGNED 768 +static atomic_t kfd_device_suspended = ATOMIC_INIT(0); +#ifdef KFD_SUPPORT_IOMMU_V2 static const struct kfd_device_info kaveri_device_info = { .asic_family = CHIP_KAVERI, .max_pasid_bits = 16, @@ -41,6 +46,8 @@ static const struct kfd_device_info kaveri_device_info = { .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED, .supports_cwsr = false, + .needs_iommu_device = true, + .needs_pci_atomics = false, }; static const struct kfd_device_info carrizo_device_info = { @@ -53,15 +60,125 @@ static const struct kfd_device_info carrizo_device_info = { .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED, .supports_cwsr = true, + .needs_iommu_device = true, + .needs_pci_atomics = false, }; +#endif + +static const struct kfd_device_info hawaii_device_info = { + .asic_family = CHIP_HAWAII, + .max_pasid_bits = 16, + /* max num of queues for KV.TODO should be a dynamic value */ + .max_no_of_hqd = 24, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = false, + .needs_iommu_device = false, + .needs_pci_atomics = false, +}; + +static const struct kfd_device_info tonga_device_info = { + .asic_family = CHIP_TONGA, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = false, + .needs_iommu_device = false, + .needs_pci_atomics = true, +}; + +static const struct kfd_device_info tonga_vf_device_info = { + .asic_family = CHIP_TONGA, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = false, + .needs_iommu_device = false, + .needs_pci_atomics = false, +}; + +static const struct kfd_device_info fiji_device_info = { + .asic_family = CHIP_FIJI, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = false, + .needs_pci_atomics = true, +}; + +static const struct kfd_device_info fiji_vf_device_info = { + .asic_family = CHIP_FIJI, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = false, + .needs_pci_atomics = false, +}; + + +static const struct kfd_device_info polaris10_device_info = { + .asic_family = CHIP_POLARIS10, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = false, + .needs_pci_atomics = true, +}; + +static const struct kfd_device_info polaris10_vf_device_info = { + .asic_family = CHIP_POLARIS10, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = false, + .needs_pci_atomics = false, +}; + +static const struct kfd_device_info polaris11_device_info = { + .asic_family = CHIP_POLARIS11, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = false, + .needs_pci_atomics = true, +}; + struct kfd_deviceid { unsigned short did; const struct kfd_device_info *device_info; }; -/* Please keep this sorted by increasing device id. */ static const struct kfd_deviceid supported_devices[] = { +#ifdef KFD_SUPPORT_IOMMU_V2 { 0x1304, &kaveri_device_info }, /* Kaveri */ { 0x1305, &kaveri_device_info }, /* Kaveri */ { 0x1306, &kaveri_device_info }, /* Kaveri */ @@ -88,7 +205,51 @@ static const struct kfd_deviceid supported_devices[] = { { 0x9874, &carrizo_device_info }, /* Carrizo */ { 0x9875, &carrizo_device_info }, /* Carrizo */ { 0x9876, &carrizo_device_info }, /* Carrizo */ - { 0x9877, &carrizo_device_info } /* Carrizo */ + { 0x9877, &carrizo_device_info }, /* Carrizo */ +#endif + { 0x67A0, &hawaii_device_info }, /* Hawaii */ + { 0x67A1, &hawaii_device_info }, /* Hawaii */ + { 0x67A2, &hawaii_device_info }, /* Hawaii */ + { 0x67A8, &hawaii_device_info }, /* Hawaii */ + { 0x67A9, &hawaii_device_info }, /* Hawaii */ + { 0x67AA, &hawaii_device_info }, /* Hawaii */ + { 0x67B0, &hawaii_device_info }, /* Hawaii */ + { 0x67B1, &hawaii_device_info }, /* Hawaii */ + { 0x67B8, &hawaii_device_info }, /* Hawaii */ + { 0x67B9, &hawaii_device_info }, /* Hawaii */ + { 0x67BA, &hawaii_device_info }, /* Hawaii */ + { 0x67BE, &hawaii_device_info }, /* Hawaii */ + { 0x6920, &tonga_device_info }, /* Tonga */ + { 0x6921, &tonga_device_info }, /* Tonga */ + { 0x6928, &tonga_device_info }, /* Tonga */ + { 0x6929, &tonga_device_info }, /* Tonga */ + { 0x692B, &tonga_device_info }, /* Tonga */ + { 0x692F, &tonga_vf_device_info }, /* Tonga vf */ + { 0x6938, &tonga_device_info }, /* Tonga */ + { 0x6939, &tonga_device_info }, /* Tonga */ + { 0x7300, &fiji_device_info }, /* Fiji */ + { 0x730F, &fiji_vf_device_info }, /* Fiji vf*/ + { 0x67C0, &polaris10_device_info }, /* Polaris10 */ + { 0x67C1, &polaris10_device_info }, /* Polaris10 */ + { 0x67C2, &polaris10_device_info }, /* Polaris10 */ + { 0x67C4, &polaris10_device_info }, /* Polaris10 */ + { 0x67C7, &polaris10_device_info }, /* Polaris10 */ + { 0x67C8, &polaris10_device_info }, /* Polaris10 */ + { 0x67C9, &polaris10_device_info }, /* Polaris10 */ + { 0x67CA, &polaris10_device_info }, /* Polaris10 */ + { 0x67CC, &polaris10_device_info }, /* Polaris10 */ + { 0x67CF, &polaris10_device_info }, /* Polaris10 */ + { 0x67D0, &polaris10_vf_device_info }, /* Polaris10 vf*/ + { 0x67DF, &polaris10_device_info }, /* Polaris10 */ + { 0x67E0, &polaris11_device_info }, /* Polaris11 */ + { 0x67E1, &polaris11_device_info }, /* Polaris11 */ + { 0x67E3, &polaris11_device_info }, /* Polaris11 */ + { 0x67E7, &polaris11_device_info }, /* Polaris11 */ + { 0x67E8, &polaris11_device_info }, /* Polaris11 */ + { 0x67E9, &polaris11_device_info }, /* Polaris11 */ + { 0x67EB, &polaris11_device_info }, /* Polaris11 */ + { 0x67EF, &polaris11_device_info }, /* Polaris11 */ + { 0x67FF, &polaris11_device_info }, /* Polaris11 */ }; static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, @@ -127,6 +288,21 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, return NULL; } + if (device_info->needs_pci_atomics) { + /* Allow BIF to recode atomics to PCIe 3.0 + * AtomicOps. 32 and 64-bit requests are possible and + * must be supported. + */ + if (pci_enable_atomic_ops_to_root(pdev, + PCI_EXP_DEVCAP2_ATOMIC_COMP32 | + PCI_EXP_DEVCAP2_ATOMIC_COMP64) < 0) { + dev_info(kfd_device, + "skipped device %x:%x, PCI rejects atomics", + pdev->vendor, pdev->device); + return NULL; + } + } + kfd = kzalloc(sizeof(*kfd), GFP_KERNEL); if (!kfd) return NULL; @@ -144,77 +320,6 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, return kfd; } -static bool device_iommu_pasid_init(struct kfd_dev *kfd) -{ - const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP | - AMD_IOMMU_DEVICE_FLAG_PRI_SUP | - AMD_IOMMU_DEVICE_FLAG_PASID_SUP; - - struct amd_iommu_device_info iommu_info; - unsigned int pasid_limit; - int err; - - err = amd_iommu_device_info(kfd->pdev, &iommu_info); - if (err < 0) { - dev_err(kfd_device, - "error getting iommu info. is the iommu enabled?\n"); - return false; - } - - if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) { - dev_err(kfd_device, "error required iommu flags ats %i, pri %i, pasid %i\n", - (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0, - (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0, - (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) - != 0); - return false; - } - - pasid_limit = min_t(unsigned int, - (unsigned int)(1 << kfd->device_info->max_pasid_bits), - iommu_info.max_pasids); - - if (!kfd_set_pasid_limit(pasid_limit)) { - dev_err(kfd_device, "error setting pasid limit\n"); - return false; - } - - return true; -} - -static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid) -{ - struct kfd_dev *dev = kfd_device_by_pci_dev(pdev); - - if (dev) - kfd_process_iommu_unbind_callback(dev, pasid); -} - -/* - * This function called by IOMMU driver on PPR failure - */ -static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid, - unsigned long address, u16 flags) -{ - struct kfd_dev *dev; - - dev_warn(kfd_device, - "Invalid PPR device %x:%x.%x pasid %d address 0x%lX flags 0x%X", - PCI_BUS_NUM(pdev->devfn), - PCI_SLOT(pdev->devfn), - PCI_FUNC(pdev->devfn), - pasid, - address, - flags); - - dev = kfd_device_by_pci_dev(pdev); - if (!WARN_ON(!dev)) - kfd_signal_iommu_event(dev, pasid, address, - flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC); - - return AMD_IOMMU_INV_PRI_RSP_INVALID; -} - static void kfd_cwsr_init(struct kfd_dev *kfd) { if (cwsr_enable && kfd->device_info->supports_cwsr) { @@ -304,11 +409,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, goto device_queue_manager_error; } - if (!device_iommu_pasid_init(kfd)) { - dev_err(kfd_device, - "Error initializing iommuv2 for device %x:%x\n", - kfd->pdev->vendor, kfd->pdev->device); - goto device_iommu_pasid_error; + if (kfd_iommu_device_init(kfd)) { + dev_err(kfd_device, "Error initializing iommuv2\n"); + goto device_iommu_error; } kfd_cwsr_init(kfd); @@ -323,12 +426,12 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd->pdev->device); pr_debug("Starting kfd with the following scheduling policy %d\n", - sched_policy); + kfd->dqm->sched_policy); goto out; kfd_resume_error: -device_iommu_pasid_error: +device_iommu_error: device_queue_manager_uninit(kfd->dqm); device_queue_manager_error: kfd_interrupt_exit(kfd); @@ -367,40 +470,45 @@ void kgd2kfd_suspend(struct kfd_dev *kfd) if (!kfd->init_complete) return; - kfd->dqm->ops.stop(kfd->dqm); + /* For first KFD device suspend all the KFD processes */ + if (atomic_inc_return(&kfd_device_suspended) == 1) + kfd_suspend_all_processes(); - kfd_unbind_processes_from_device(kfd); + kfd->dqm->ops.stop(kfd->dqm); - amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL); - amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL); - amd_iommu_free_device(kfd->pdev); + kfd_iommu_suspend(kfd); } int kgd2kfd_resume(struct kfd_dev *kfd) { + int ret, count; + if (!kfd->init_complete) return 0; - return kfd_resume(kfd); + ret = kfd_resume(kfd); + if (ret) + return ret; + + count = atomic_dec_return(&kfd_device_suspended); + WARN_ONCE(count < 0, "KFD suspend / resume ref. error"); + if (count == 0) + ret = kfd_resume_all_processes(); + return ret; } static int kfd_resume(struct kfd_dev *kfd) { int err = 0; - unsigned int pasid_limit = kfd_get_pasid_limit(); - err = amd_iommu_init_device(kfd->pdev, pasid_limit); - if (err) - return -ENXIO; - amd_iommu_set_invalidate_ctx_cb(kfd->pdev, - iommu_pasid_shutdown_callback); - amd_iommu_set_invalid_ppr_cb(kfd->pdev, - iommu_invalid_ppr_cb); - - err = kfd_bind_processes_to_device(kfd); - if (err) - goto processes_bind_error; + err = kfd_iommu_resume(kfd); + if (err) { + dev_err(kfd_device, + "Failed to resume IOMMU for device %x:%x\n", + kfd->pdev->vendor, kfd->pdev->device); + return err; + } err = kfd->dqm->ops.start(kfd->dqm); if (err) { @@ -413,9 +521,7 @@ static int kfd_resume(struct kfd_dev *kfd) return err; dqm_start_error: -processes_bind_error: - amd_iommu_free_device(kfd->pdev); - + kfd_iommu_suspend(kfd); return err; } @@ -435,6 +541,54 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) spin_unlock(&kfd->interrupt_lock); } +/** kgd2kfd_schedule_evict_and_restore_process - Schedules work queue that will + * prepare for safe eviction of KFD BOs that belong to the specified + * process. + * + * @mm: mm_struct that identifies the specified KFD process + * @fence: eviction fence attached to KFD process BOs + * + */ +int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, + struct dma_fence *fence) +{ + struct kfd_process *p; + unsigned long active_time; + unsigned long delay_jiffies = msecs_to_jiffies(PROCESS_ACTIVE_TIME_MS); + + if (!fence) + return -EINVAL; + + if (dma_fence_is_signaled(fence)) + return 0; + + p = kfd_lookup_process_by_mm(mm); + if (!p) + return -ENODEV; + + if (fence->seqno == p->last_eviction_seqno) + goto out; + + p->last_eviction_seqno = fence->seqno; + + /* Avoid KFD process starvation. Wait for at least + * PROCESS_ACTIVE_TIME_MS before evicting the process again + */ + active_time = get_jiffies_64() - p->last_restore_timestamp; + if (delay_jiffies > active_time) + delay_jiffies -= active_time; + else + delay_jiffies = 0; + + /* During process initialization eviction_work.dwork is initialized + * to kfd_evict_bo_worker + */ + schedule_delayed_work(&p->eviction_work, delay_jiffies); +out: + kfd_unref_process(p); + return 0; +} + static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, unsigned int chunk_size) { |