diff options
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/Kconfig | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/Makefile | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 14 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device.c | 127 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_events.c | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_iommu.c | 357 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_iommu.h | 78 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 14 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_process.c | 138 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 16 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 6 |
11 files changed, 495 insertions, 265 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig index bc5a2945bd2b..ed2f06c9f346 100644 --- a/drivers/gpu/drm/amd/amdkfd/Kconfig +++ b/drivers/gpu/drm/amd/amdkfd/Kconfig @@ -4,6 +4,7 @@ config HSA_AMD tristate "HSA kernel driver for AMD GPU devices" - depends on DRM_AMDGPU && AMD_IOMMU_V2 && X86_64 + depends on DRM_AMDGPU && X86_64 + imply AMD_IOMMU_V2 help Enable this if you want to use HSA features on AMD GPU devices. diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index a317e76ffb5e..0d0242240c47 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -37,6 +37,10 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \ kfd_interrupt.o kfd_events.o cik_event_interrupt.o \ kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o +ifneq ($(CONFIG_AMD_IOMMU_V2),) +amdkfd-y += kfd_iommu.o +endif + amdkfd-$(CONFIG_DEBUG_FS) += kfd_debugfs.o obj-$(CONFIG_HSA_AMD) += amdkfd.o diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 2bc2816767a7..7493f47e7fe1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -22,10 +22,10 @@ #include <linux/pci.h> #include <linux/acpi.h> -#include <linux/amd-iommu.h> #include "kfd_crat.h" #include "kfd_priv.h" #include "kfd_topology.h" +#include "kfd_iommu.h" /* GPU Processor ID base for dGPUs for which VCRAT needs to be created. * GPU processor ID are expressed with Bit[31]=1. @@ -1037,15 +1037,11 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, struct crat_subtype_generic *sub_type_hdr; struct crat_subtype_computeunit *cu; struct kfd_cu_info cu_info; - struct amd_iommu_device_info iommu_info; int avail_size = *size; uint32_t total_num_of_cu; int num_of_cache_entries = 0; int cache_mem_filled = 0; int ret = 0; - const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP | - AMD_IOMMU_DEVICE_FLAG_PRI_SUP | - AMD_IOMMU_DEVICE_FLAG_PASID_SUP; struct kfd_local_mem_info local_mem_info; if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU) @@ -1106,12 +1102,8 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, /* Check if this node supports IOMMU. During parsing this flag will * translate to HSA_CAP_ATS_PRESENT */ - iommu_info.flags = 0; - if (amd_iommu_device_info(kdev->pdev, &iommu_info) == 0) { - if ((iommu_info.flags & required_iommu_flags) == - required_iommu_flags) - cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT; - } + if (!kfd_iommu_check_device(kdev)) + cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT; crat_table->length += sub_type_hdr->length; crat_table->total_entries++; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 83d6f410890e..4ac2d61b65d5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -20,7 +20,9 @@ * OTHER DEALINGS IN THE SOFTWARE. */ +#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2) #include <linux/amd-iommu.h> +#endif #include <linux/bsearch.h> #include <linux/pci.h> #include <linux/slab.h> @@ -28,9 +30,11 @@ #include "kfd_device_queue_manager.h" #include "kfd_pm4_headers_vi.h" #include "cwsr_trap_handler_gfx8.asm" +#include "kfd_iommu.h" #define MQD_SIZE_ALIGNED 768 +#ifdef KFD_SUPPORT_IOMMU_V2 static const struct kfd_device_info kaveri_device_info = { .asic_family = CHIP_KAVERI, .max_pasid_bits = 16, @@ -41,6 +45,7 @@ static const struct kfd_device_info kaveri_device_info = { .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED, .supports_cwsr = false, + .needs_iommu_device = true, .needs_pci_atomics = false, }; @@ -54,8 +59,10 @@ static const struct kfd_device_info carrizo_device_info = { .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED, .supports_cwsr = true, + .needs_iommu_device = true, .needs_pci_atomics = false, }; +#endif static const struct kfd_device_info hawaii_device_info = { .asic_family = CHIP_HAWAII, @@ -67,6 +74,7 @@ static const struct kfd_device_info hawaii_device_info = { .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED, .supports_cwsr = false, + .needs_iommu_device = false, .needs_pci_atomics = false, }; @@ -79,6 +87,7 @@ static const struct kfd_device_info tonga_device_info = { .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED, .supports_cwsr = false, + .needs_iommu_device = false, .needs_pci_atomics = true, }; @@ -91,6 +100,7 @@ static const struct kfd_device_info tonga_vf_device_info = { .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED, .supports_cwsr = false, + .needs_iommu_device = false, .needs_pci_atomics = false, }; @@ -103,6 +113,7 @@ static const struct kfd_device_info fiji_device_info = { .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED, .supports_cwsr = true, + .needs_iommu_device = false, .needs_pci_atomics = true, }; @@ -115,6 +126,7 @@ static const struct kfd_device_info fiji_vf_device_info = { .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED, .supports_cwsr = true, + .needs_iommu_device = false, .needs_pci_atomics = false, }; @@ -128,6 +140,7 @@ static const struct kfd_device_info polaris10_device_info = { .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED, .supports_cwsr = true, + .needs_iommu_device = false, .needs_pci_atomics = true, }; @@ -140,6 +153,7 @@ static const struct kfd_device_info polaris10_vf_device_info = { .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED, .supports_cwsr = true, + .needs_iommu_device = false, .needs_pci_atomics = false, }; @@ -152,6 +166,7 @@ static const struct kfd_device_info polaris11_device_info = { .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED, .supports_cwsr = true, + .needs_iommu_device = false, .needs_pci_atomics = true, }; @@ -162,6 +177,7 @@ struct kfd_deviceid { }; static const struct kfd_deviceid supported_devices[] = { +#ifdef KFD_SUPPORT_IOMMU_V2 { 0x1304, &kaveri_device_info }, /* Kaveri */ { 0x1305, &kaveri_device_info }, /* Kaveri */ { 0x1306, &kaveri_device_info }, /* Kaveri */ @@ -189,6 +205,7 @@ static const struct kfd_deviceid supported_devices[] = { { 0x9875, &carrizo_device_info }, /* Carrizo */ { 0x9876, &carrizo_device_info }, /* Carrizo */ { 0x9877, &carrizo_device_info }, /* Carrizo */ +#endif { 0x67A0, &hawaii_device_info }, /* Hawaii */ { 0x67A1, &hawaii_device_info }, /* Hawaii */ { 0x67A2, &hawaii_device_info }, /* Hawaii */ @@ -302,77 +319,6 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, return kfd; } -static bool device_iommu_pasid_init(struct kfd_dev *kfd) -{ - const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP | - AMD_IOMMU_DEVICE_FLAG_PRI_SUP | - AMD_IOMMU_DEVICE_FLAG_PASID_SUP; - - struct amd_iommu_device_info iommu_info; - unsigned int pasid_limit; - int err; - - err = amd_iommu_device_info(kfd->pdev, &iommu_info); - if (err < 0) { - dev_err(kfd_device, - "error getting iommu info. is the iommu enabled?\n"); - return false; - } - - if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) { - dev_err(kfd_device, "error required iommu flags ats %i, pri %i, pasid %i\n", - (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0, - (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0, - (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) - != 0); - return false; - } - - pasid_limit = min_t(unsigned int, - (unsigned int)(1 << kfd->device_info->max_pasid_bits), - iommu_info.max_pasids); - - if (!kfd_set_pasid_limit(pasid_limit)) { - dev_err(kfd_device, "error setting pasid limit\n"); - return false; - } - - return true; -} - -static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid) -{ - struct kfd_dev *dev = kfd_device_by_pci_dev(pdev); - - if (dev) - kfd_process_iommu_unbind_callback(dev, pasid); -} - -/* - * This function called by IOMMU driver on PPR failure - */ -static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid, - unsigned long address, u16 flags) -{ - struct kfd_dev *dev; - - dev_warn(kfd_device, - "Invalid PPR device %x:%x.%x pasid %d address 0x%lX flags 0x%X", - PCI_BUS_NUM(pdev->devfn), - PCI_SLOT(pdev->devfn), - PCI_FUNC(pdev->devfn), - pasid, - address, - flags); - - dev = kfd_device_by_pci_dev(pdev); - if (!WARN_ON(!dev)) - kfd_signal_iommu_event(dev, pasid, address, - flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC); - - return AMD_IOMMU_INV_PRI_RSP_INVALID; -} - static void kfd_cwsr_init(struct kfd_dev *kfd) { if (cwsr_enable && kfd->device_info->supports_cwsr) { @@ -462,11 +408,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, goto device_queue_manager_error; } - if (!device_iommu_pasid_init(kfd)) { - dev_err(kfd_device, - "Error initializing iommuv2 for device %x:%x\n", - kfd->pdev->vendor, kfd->pdev->device); - goto device_iommu_pasid_error; + if (kfd_iommu_device_init(kfd)) { + dev_err(kfd_device, "Error initializing iommuv2\n"); + goto device_iommu_error; } kfd_cwsr_init(kfd); @@ -486,7 +430,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, goto out; kfd_resume_error: -device_iommu_pasid_error: +device_iommu_error: device_queue_manager_uninit(kfd->dqm); device_queue_manager_error: kfd_interrupt_exit(kfd); @@ -527,11 +471,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd) kfd->dqm->ops.stop(kfd->dqm); - kfd_unbind_processes_from_device(kfd); - - amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL); - amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL); - amd_iommu_free_device(kfd->pdev); + kfd_iommu_suspend(kfd); } int kgd2kfd_resume(struct kfd_dev *kfd) @@ -546,19 +486,14 @@ int kgd2kfd_resume(struct kfd_dev *kfd) static int kfd_resume(struct kfd_dev *kfd) { int err = 0; - unsigned int pasid_limit = kfd_get_pasid_limit(); - - err = amd_iommu_init_device(kfd->pdev, pasid_limit); - if (err) - return -ENXIO; - amd_iommu_set_invalidate_ctx_cb(kfd->pdev, - iommu_pasid_shutdown_callback); - amd_iommu_set_invalid_ppr_cb(kfd->pdev, - iommu_invalid_ppr_cb); - err = kfd_bind_processes_to_device(kfd); - if (err) - goto processes_bind_error; + err = kfd_iommu_resume(kfd); + if (err) { + dev_err(kfd_device, + "Failed to resume IOMMU for device %x:%x\n", + kfd->pdev->vendor, kfd->pdev->device); + return err; + } err = kfd->dqm->ops.start(kfd->dqm); if (err) { @@ -571,9 +506,7 @@ static int kfd_resume(struct kfd_dev *kfd) return err; dqm_start_error: -processes_bind_error: - amd_iommu_free_device(kfd->pdev); - + kfd_iommu_suspend(kfd); return err; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 93aae5c1e78b..6fb9c0d46d63 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -30,6 +30,7 @@ #include <linux/memory.h> #include "kfd_priv.h" #include "kfd_events.h" +#include "kfd_iommu.h" #include <linux/device.h> /* @@ -837,6 +838,7 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p, } } +#ifdef KFD_SUPPORT_IOMMU_V2 void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, unsigned long address, bool is_write_requested, bool is_execute_requested) @@ -905,6 +907,7 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, mutex_unlock(&p->event_mutex); kfd_unref_process(p); } +#endif /* KFD_SUPPORT_IOMMU_V2 */ void kfd_signal_hw_exception_event(unsigned int pasid) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c new file mode 100644 index 000000000000..c71817963eea --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c @@ -0,0 +1,357 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <linux/printk.h> +#include <linux/device.h> +#include <linux/slab.h> +#include <linux/pci.h> +#include <linux/amd-iommu.h> +#include "kfd_priv.h" +#include "kfd_dbgmgr.h" +#include "kfd_topology.h" +#include "kfd_iommu.h" + +static const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP | + AMD_IOMMU_DEVICE_FLAG_PRI_SUP | + AMD_IOMMU_DEVICE_FLAG_PASID_SUP; + +/** kfd_iommu_check_device - Check whether IOMMU is available for device + */ +int kfd_iommu_check_device(struct kfd_dev *kfd) +{ + struct amd_iommu_device_info iommu_info; + int err; + + if (!kfd->device_info->needs_iommu_device) + return -ENODEV; + + iommu_info.flags = 0; + err = amd_iommu_device_info(kfd->pdev, &iommu_info); + if (err) + return err; + + if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) + return -ENODEV; + + return 0; +} + +/** kfd_iommu_device_init - Initialize IOMMU for device + */ +int kfd_iommu_device_init(struct kfd_dev *kfd) +{ + struct amd_iommu_device_info iommu_info; + unsigned int pasid_limit; + int err; + + if (!kfd->device_info->needs_iommu_device) + return 0; + + iommu_info.flags = 0; + err = amd_iommu_device_info(kfd->pdev, &iommu_info); + if (err < 0) { + dev_err(kfd_device, + "error getting iommu info. is the iommu enabled?\n"); + return -ENODEV; + } + + if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) { + dev_err(kfd_device, + "error required iommu flags ats %i, pri %i, pasid %i\n", + (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0, + (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0, + (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) + != 0); + return -ENODEV; + } + + pasid_limit = min_t(unsigned int, + (unsigned int)(1 << kfd->device_info->max_pasid_bits), + iommu_info.max_pasids); + + if (!kfd_set_pasid_limit(pasid_limit)) { + dev_err(kfd_device, "error setting pasid limit\n"); + return -EBUSY; + } + + return 0; +} + +/** kfd_iommu_bind_process_to_device - Have the IOMMU bind a process + * + * Binds the given process to the given device using its PASID. This + * enables IOMMUv2 address translation for the process on the device. + * + * This function assumes that the process mutex is held. + */ +int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd) +{ + struct kfd_dev *dev = pdd->dev; + struct kfd_process *p = pdd->process; + int err; + + if (!dev->device_info->needs_iommu_device || pdd->bound == PDD_BOUND) + return 0; + + if (unlikely(pdd->bound == PDD_BOUND_SUSPENDED)) { + pr_err("Binding PDD_BOUND_SUSPENDED pdd is unexpected!\n"); + return -EINVAL; + } + + err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread); + if (!err) + pdd->bound = PDD_BOUND; + + return err; +} + +/** kfd_iommu_unbind_process - Unbind process from all devices + * + * This removes all IOMMU device bindings of the process. To be used + * before process termination. + */ +void kfd_iommu_unbind_process(struct kfd_process *p) +{ + struct kfd_process_device *pdd; + + list_for_each_entry(pdd, &p->per_device_data, per_device_list) + if (pdd->bound == PDD_BOUND) + amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid); +} + +/* Callback for process shutdown invoked by the IOMMU driver */ +static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid) +{ + struct kfd_dev *dev = kfd_device_by_pci_dev(pdev); + struct kfd_process *p; + struct kfd_process_device *pdd; + + if (!dev) + return; + + /* + * Look for the process that matches the pasid. If there is no such + * process, we either released it in amdkfd's own notifier, or there + * is a bug. Unfortunately, there is no way to tell... + */ + p = kfd_lookup_process_by_pasid(pasid); + if (!p) + return; + + pr_debug("Unbinding process %d from IOMMU\n", pasid); + + mutex_lock(kfd_get_dbgmgr_mutex()); + + if (dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) { + if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) { + kfd_dbgmgr_destroy(dev->dbgmgr); + dev->dbgmgr = NULL; + } + } + + mutex_unlock(kfd_get_dbgmgr_mutex()); + + mutex_lock(&p->mutex); + + pdd = kfd_get_process_device_data(dev, p); + if (pdd) + /* For GPU relying on IOMMU, we need to dequeue here + * when PASID is still bound. + */ + kfd_process_dequeue_from_device(pdd); + + mutex_unlock(&p->mutex); + + kfd_unref_process(p); +} + +/* This function called by IOMMU driver on PPR failure */ +static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid, + unsigned long address, u16 flags) +{ + struct kfd_dev *dev; + + dev_warn(kfd_device, + "Invalid PPR device %x:%x.%x pasid %d address 0x%lX flags 0x%X", + PCI_BUS_NUM(pdev->devfn), + PCI_SLOT(pdev->devfn), + PCI_FUNC(pdev->devfn), + pasid, + address, + flags); + + dev = kfd_device_by_pci_dev(pdev); + if (!WARN_ON(!dev)) + kfd_signal_iommu_event(dev, pasid, address, + flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC); + + return AMD_IOMMU_INV_PRI_RSP_INVALID; +} + +/* + * Bind processes do the device that have been temporarily unbound + * (PDD_BOUND_SUSPENDED) in kfd_unbind_processes_from_device. + */ +static int kfd_bind_processes_to_device(struct kfd_dev *kfd) +{ + struct kfd_process_device *pdd; + struct kfd_process *p; + unsigned int temp; + int err = 0; + + int idx = srcu_read_lock(&kfd_processes_srcu); + + hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { + mutex_lock(&p->mutex); + pdd = kfd_get_process_device_data(kfd, p); + + if (WARN_ON(!pdd) || pdd->bound != PDD_BOUND_SUSPENDED) { + mutex_unlock(&p->mutex); + continue; + } + + err = amd_iommu_bind_pasid(kfd->pdev, p->pasid, + p->lead_thread); + if (err < 0) { + pr_err("Unexpected pasid %d binding failure\n", + p->pasid); + mutex_unlock(&p->mutex); + break; + } + + pdd->bound = PDD_BOUND; + mutex_unlock(&p->mutex); + } + + srcu_read_unlock(&kfd_processes_srcu, idx); + + return err; +} + +/* + * Mark currently bound processes as PDD_BOUND_SUSPENDED. These + * processes will be restored to PDD_BOUND state in + * kfd_bind_processes_to_device. + */ +static void kfd_unbind_processes_from_device(struct kfd_dev *kfd) +{ + struct kfd_process_device *pdd; + struct kfd_process *p; + unsigned int temp; + + int idx = srcu_read_lock(&kfd_processes_srcu); + + hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { + mutex_lock(&p->mutex); + pdd = kfd_get_process_device_data(kfd, p); + + if (WARN_ON(!pdd)) { + mutex_unlock(&p->mutex); + continue; + } + + if (pdd->bound == PDD_BOUND) + pdd->bound = PDD_BOUND_SUSPENDED; + mutex_unlock(&p->mutex); + } + + srcu_read_unlock(&kfd_processes_srcu, idx); +} + +/** kfd_iommu_suspend - Prepare IOMMU for suspend + * + * This unbinds processes from the device and disables the IOMMU for + * the device. + */ +void kfd_iommu_suspend(struct kfd_dev *kfd) +{ + if (!kfd->device_info->needs_iommu_device) + return; + + kfd_unbind_processes_from_device(kfd); + + amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL); + amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL); + amd_iommu_free_device(kfd->pdev); +} + +/** kfd_iommu_resume - Restore IOMMU after resume + * + * This reinitializes the IOMMU for the device and re-binds previously + * suspended processes to the device. + */ +int kfd_iommu_resume(struct kfd_dev *kfd) +{ + unsigned int pasid_limit; + int err; + + if (!kfd->device_info->needs_iommu_device) + return 0; + + pasid_limit = kfd_get_pasid_limit(); + + err = amd_iommu_init_device(kfd->pdev, pasid_limit); + if (err) + return -ENXIO; + + amd_iommu_set_invalidate_ctx_cb(kfd->pdev, + iommu_pasid_shutdown_callback); + amd_iommu_set_invalid_ppr_cb(kfd->pdev, + iommu_invalid_ppr_cb); + + err = kfd_bind_processes_to_device(kfd); + if (err) { + amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL); + amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL); + amd_iommu_free_device(kfd->pdev); + return err; + } + + return 0; +} + +extern bool amd_iommu_pc_supported(void); +extern u8 amd_iommu_pc_get_max_banks(u16 devid); +extern u8 amd_iommu_pc_get_max_counters(u16 devid); + +/** kfd_iommu_add_perf_counters - Add IOMMU performance counters to topology + */ +int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev) +{ + struct kfd_perf_properties *props; + + if (!(kdev->node_props.capability & HSA_CAP_ATS_PRESENT)) + return 0; + + if (!amd_iommu_pc_supported()) + return 0; + + props = kfd_alloc_struct(props); + if (!props) + return -ENOMEM; + strcpy(props->block_name, "iommu"); + props->max_concurrent = amd_iommu_pc_get_max_banks(0) * + amd_iommu_pc_get_max_counters(0); /* assume one iommu */ + list_add_tail(&props->list, &kdev->perf_props); + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.h b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.h new file mode 100644 index 000000000000..dd23d9fdf6a8 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.h @@ -0,0 +1,78 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef __KFD_IOMMU_H__ +#define __KFD_IOMMU_H__ + +#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2) + +#define KFD_SUPPORT_IOMMU_V2 + +int kfd_iommu_check_device(struct kfd_dev *kfd); +int kfd_iommu_device_init(struct kfd_dev *kfd); + +int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd); +void kfd_iommu_unbind_process(struct kfd_process *p); + +void kfd_iommu_suspend(struct kfd_dev *kfd); +int kfd_iommu_resume(struct kfd_dev *kfd); + +int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev); + +#else + +static inline int kfd_iommu_check_device(struct kfd_dev *kfd) +{ + return -ENODEV; +} +static inline int kfd_iommu_device_init(struct kfd_dev *kfd) +{ + return 0; +} + +static inline int kfd_iommu_bind_process_to_device( + struct kfd_process_device *pdd) +{ + return 0; +} +static inline void kfd_iommu_unbind_process(struct kfd_process *p) +{ + /* empty */ +} + +static inline void kfd_iommu_suspend(struct kfd_dev *kfd) +{ + /* empty */ +} +static inline int kfd_iommu_resume(struct kfd_dev *kfd) +{ + return 0; +} + +static inline int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev) +{ + return 0; +} + +#endif /* defined(CONFIG_AMD_IOMMU_V2) */ + +#endif /* __KFD_IOMMU_H__ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 594f85355397..f12eb5d98be8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -158,6 +158,7 @@ struct kfd_device_info { uint8_t num_of_watch_points; uint16_t mqd_size_aligned; bool supports_cwsr; + bool needs_iommu_device; bool needs_pci_atomics; }; @@ -517,15 +518,15 @@ struct kfd_process_device { uint64_t scratch_base; uint64_t scratch_limit; - /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */ - enum kfd_pdd_bound bound; - /* Flag used to tell the pdd has dequeued from the dqm. * This is used to prevent dev->dqm->ops.process_termination() from * being called twice when it is already called in IOMMU callback * function. */ bool already_dequeued; + + /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */ + enum kfd_pdd_bound bound; }; #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) @@ -590,6 +591,10 @@ struct kfd_process { bool signal_event_limit_reached; }; +#define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */ +extern DECLARE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE); +extern struct srcu_struct kfd_processes_srcu; + /** * Ioctl function type. * @@ -617,9 +622,6 @@ void kfd_unref_process(struct kfd_process *p); struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, struct kfd_process *p); -int kfd_bind_processes_to_device(struct kfd_dev *dev); -void kfd_unbind_processes_from_device(struct kfd_dev *dev); -void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid); struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, struct kfd_process *p); struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 4ff5f0fe6db8..e9aee76ceba9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -35,16 +35,16 @@ struct mm_struct; #include "kfd_priv.h" #include "kfd_dbgmgr.h" +#include "kfd_iommu.h" /* * List of struct kfd_process (field kfd_process). * Unique/indexed by mm_struct* */ -#define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */ -static DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE); +DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE); static DEFINE_MUTEX(kfd_processes_mutex); -DEFINE_STATIC_SRCU(kfd_processes_srcu); +DEFINE_SRCU(kfd_processes_srcu); static struct workqueue_struct *kfd_process_wq; @@ -173,14 +173,8 @@ static void kfd_process_wq_release(struct work_struct *work) { struct kfd_process *p = container_of(work, struct kfd_process, release_work); - struct kfd_process_device *pdd; - pr_debug("Releasing process (pasid %d) in workqueue\n", p->pasid); - - list_for_each_entry(pdd, &p->per_device_data, per_device_list) { - if (pdd->bound == PDD_BOUND) - amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid); - } + kfd_iommu_unbind_process(p); kfd_process_destroy_pdds(p); @@ -429,133 +423,13 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, return ERR_PTR(-ENOMEM); } - if (pdd->bound == PDD_BOUND) { - return pdd; - } else if (unlikely(pdd->bound == PDD_BOUND_SUSPENDED)) { - pr_err("Binding PDD_BOUND_SUSPENDED pdd is unexpected!\n"); - return ERR_PTR(-EINVAL); - } - - err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread); - if (err < 0) + err = kfd_iommu_bind_process_to_device(pdd); + if (err) return ERR_PTR(err); - pdd->bound = PDD_BOUND; - return pdd; } -/* - * Bind processes do the device that have been temporarily unbound - * (PDD_BOUND_SUSPENDED) in kfd_unbind_processes_from_device. - */ -int kfd_bind_processes_to_device(struct kfd_dev *dev) -{ - struct kfd_process_device *pdd; - struct kfd_process *p; - unsigned int temp; - int err = 0; - - int idx = srcu_read_lock(&kfd_processes_srcu); - - hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { - mutex_lock(&p->mutex); - pdd = kfd_get_process_device_data(dev, p); - - if (WARN_ON(!pdd) || pdd->bound != PDD_BOUND_SUSPENDED) { - mutex_unlock(&p->mutex); - continue; - } - - err = amd_iommu_bind_pasid(dev->pdev, p->pasid, - p->lead_thread); - if (err < 0) { - pr_err("Unexpected pasid %d binding failure\n", - p->pasid); - mutex_unlock(&p->mutex); - break; - } - - pdd->bound = PDD_BOUND; - mutex_unlock(&p->mutex); - } - - srcu_read_unlock(&kfd_processes_srcu, idx); - - return err; -} - -/* - * Mark currently bound processes as PDD_BOUND_SUSPENDED. These - * processes will be restored to PDD_BOUND state in - * kfd_bind_processes_to_device. - */ -void kfd_unbind_processes_from_device(struct kfd_dev *dev) -{ - struct kfd_process_device *pdd; - struct kfd_process *p; - unsigned int temp; - - int idx = srcu_read_lock(&kfd_processes_srcu); - - hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { - mutex_lock(&p->mutex); - pdd = kfd_get_process_device_data(dev, p); - - if (WARN_ON(!pdd)) { - mutex_unlock(&p->mutex); - continue; - } - - if (pdd->bound == PDD_BOUND) - pdd->bound = PDD_BOUND_SUSPENDED; - mutex_unlock(&p->mutex); - } - - srcu_read_unlock(&kfd_processes_srcu, idx); -} - -void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid) -{ - struct kfd_process *p; - struct kfd_process_device *pdd; - - /* - * Look for the process that matches the pasid. If there is no such - * process, we either released it in amdkfd's own notifier, or there - * is a bug. Unfortunately, there is no way to tell... - */ - p = kfd_lookup_process_by_pasid(pasid); - if (!p) - return; - - pr_debug("Unbinding process %d from IOMMU\n", pasid); - - mutex_lock(kfd_get_dbgmgr_mutex()); - - if (dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) { - if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) { - kfd_dbgmgr_destroy(dev->dbgmgr); - dev->dbgmgr = NULL; - } - } - - mutex_unlock(kfd_get_dbgmgr_mutex()); - - mutex_lock(&p->mutex); - - pdd = kfd_get_process_device_data(dev, p); - if (pdd) - /* For GPU relying on IOMMU, we need to dequeue here - * when PASID is still bound. - */ - kfd_process_dequeue_from_device(pdd); - - mutex_unlock(&p->mutex); - - kfd_unref_process(p); -} - struct kfd_process_device *kfd_get_first_process_device_data( struct kfd_process *p) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 7783250e1c6d..250615535563 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -35,6 +35,7 @@ #include "kfd_crat.h" #include "kfd_topology.h" #include "kfd_device_queue_manager.h" +#include "kfd_iommu.h" /* topology_device_list - Master list of all topology devices */ static struct list_head topology_device_list; @@ -875,19 +876,8 @@ static void find_system_memory(const struct dmi_header *dm, */ static int kfd_add_perf_to_topology(struct kfd_topology_device *kdev) { - struct kfd_perf_properties *props; - - if (amd_iommu_pc_supported()) { - props = kfd_alloc_struct(props); - if (!props) - return -ENOMEM; - strcpy(props->block_name, "iommu"); - props->max_concurrent = amd_iommu_pc_get_max_banks(0) * - amd_iommu_pc_get_max_counters(0); /* assume one iommu */ - list_add_tail(&props->list, &kdev->perf_props); - } - - return 0; + /* These are the only counters supported so far */ + return kfd_iommu_add_perf_counters(kdev); } /* kfd_add_non_crat_information - Add information that is not currently diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index 53fca1f45401..c0be2be6dca5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -25,7 +25,7 @@ #include <linux/types.h> #include <linux/list.h> -#include "kfd_priv.h" +#include "kfd_crat.h" #define KFD_TOPOLOGY_PUBLIC_NAME_SIZE 128 @@ -183,8 +183,4 @@ struct kfd_topology_device *kfd_create_topology_device( struct list_head *device_list); void kfd_release_topology_device_list(struct list_head *device_list); -extern bool amd_iommu_pc_supported(void); -extern u8 amd_iommu_pc_get_max_banks(u16 devid); -extern u8 amd_iommu_pc_get_max_counters(u16 devid); - #endif /* __KFD_TOPOLOGY_H__ */ |