diff options
Diffstat (limited to 'drivers/iommu/amd/iommu.c')
-rw-r--r-- | drivers/iommu/amd/iommu.c | 275 |
1 files changed, 180 insertions, 95 deletions
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index ba9f3dbc5b94..b9cf59443843 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -18,7 +18,7 @@ #include <linux/slab.h> #include <linux/debugfs.h> #include <linux/scatterlist.h> -#include <linux/dma-mapping.h> +#include <linux/dma-map-ops.h> #include <linux/dma-direct.h> #include <linux/dma-iommu.h> #include <linux/iommu-helper.h> @@ -28,7 +28,6 @@ #include <linux/export.h> #include <linux/irq.h> #include <linux/msi.h> -#include <linux/dma-contiguous.h> #include <linux/irqdomain.h> #include <linux/percpu.h> #include <linux/iova.h> @@ -486,6 +485,67 @@ static void dump_command(unsigned long phys_addr) pr_err("CMD[%d]: %08x\n", i, cmd->data[i]); } +static void amd_iommu_report_rmp_hw_error(volatile u32 *event) +{ + struct iommu_dev_data *dev_data = NULL; + int devid, vmg_tag, flags; + struct pci_dev *pdev; + u64 spa; + + devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK; + vmg_tag = (event[1]) & 0xFFFF; + flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; + spa = ((u64)event[3] << 32) | (event[2] & 0xFFFFFFF8); + + pdev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(devid), + devid & 0xff); + if (pdev) + dev_data = dev_iommu_priv_get(&pdev->dev); + + if (dev_data && __ratelimit(&dev_data->rs)) { + pci_err(pdev, "Event logged [RMP_HW_ERROR vmg_tag=0x%04x, spa=0x%llx, flags=0x%04x]\n", + vmg_tag, spa, flags); + } else { + pr_err_ratelimited("Event logged [RMP_HW_ERROR device=%02x:%02x.%x, vmg_tag=0x%04x, spa=0x%llx, flags=0x%04x]\n", + PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + vmg_tag, spa, flags); + } + + if (pdev) + pci_dev_put(pdev); +} + +static void amd_iommu_report_rmp_fault(volatile u32 *event) +{ + struct iommu_dev_data *dev_data = NULL; + int devid, flags_rmp, vmg_tag, flags; + struct pci_dev *pdev; + u64 gpa; + + devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK; + flags_rmp = (event[0] >> EVENT_FLAGS_SHIFT) & 0xFF; + vmg_tag = (event[1]) & 0xFFFF; + flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; + gpa = ((u64)event[3] << 32) | event[2]; + + pdev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(devid), + devid & 0xff); + if (pdev) + dev_data = dev_iommu_priv_get(&pdev->dev); + + if (dev_data && __ratelimit(&dev_data->rs)) { + pci_err(pdev, "Event logged [RMP_PAGE_FAULT vmg_tag=0x%04x, gpa=0x%llx, flags_rmp=0x%04x, flags=0x%04x]\n", + vmg_tag, gpa, flags_rmp, flags); + } else { + pr_err_ratelimited("Event logged [RMP_PAGE_FAULT device=%02x:%02x.%x, vmg_tag=0x%04x, gpa=0x%llx, flags_rmp=0x%04x, flags=0x%04x]\n", + PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + vmg_tag, gpa, flags_rmp, flags); + } + + if (pdev) + pci_dev_put(pdev); +} + static void amd_iommu_report_page_fault(u16 devid, u16 domain_id, u64 address, int flags) { @@ -513,10 +573,11 @@ static void amd_iommu_report_page_fault(u16 devid, u16 domain_id, static void iommu_print_event(struct amd_iommu *iommu, void *__evt) { struct device *dev = iommu->iommu.dev; - int type, devid, pasid, flags, tag; + int type, devid, flags, tag; volatile u32 *event = __evt; int count = 0; u64 address; + u32 pasid; retry: type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK; @@ -577,6 +638,12 @@ retry: PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), pasid, address, flags); break; + case EVENT_TYPE_RMP_FAULT: + amd_iommu_report_rmp_fault(event); + break; + case EVENT_TYPE_RMP_HW_ERR: + amd_iommu_report_rmp_hw_error(event); + break; case EVENT_TYPE_INV_PPR_REQ: pasid = PPR_PASID(*((u64 *)__evt)); tag = event[1] & 0x03FF; @@ -729,7 +796,21 @@ static void iommu_poll_ga_log(struct amd_iommu *iommu) } } } -#endif /* CONFIG_IRQ_REMAP */ + +static void +amd_iommu_set_pci_msi_domain(struct device *dev, struct amd_iommu *iommu) +{ + if (!irq_remapping_enabled || !dev_is_pci(dev) || + pci_dev_has_special_msi_domain(to_pci_dev(dev))) + return; + + dev_set_msi_domain(dev, iommu->msi_domain); +} + +#else /* CONFIG_IRQ_REMAP */ +static inline void +amd_iommu_set_pci_msi_domain(struct device *dev, struct amd_iommu *iommu) { } +#endif /* !CONFIG_IRQ_REMAP */ #define AMD_IOMMU_INT_MASK \ (MMIO_STATUS_EVT_INT_MASK | \ @@ -792,11 +873,11 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data) * ****************************************************************************/ -static int wait_on_sem(volatile u64 *sem) +static int wait_on_sem(struct amd_iommu *iommu, u64 data) { int i = 0; - while (*sem == 0 && i < LOOP_TIMEOUT) { + while (*iommu->cmd_sem != data && i < LOOP_TIMEOUT) { udelay(1); i += 1; } @@ -827,16 +908,16 @@ static void copy_cmd_to_buffer(struct amd_iommu *iommu, writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); } -static void build_completion_wait(struct iommu_cmd *cmd, u64 address) +static void build_completion_wait(struct iommu_cmd *cmd, + struct amd_iommu *iommu, + u64 data) { - u64 paddr = iommu_virt_to_phys((void *)address); - - WARN_ON(address & 0x7ULL); + u64 paddr = iommu_virt_to_phys((void *)iommu->cmd_sem); memset(cmd, 0, sizeof(*cmd)); cmd->data[0] = lower_32_bits(paddr) | CMD_COMPL_WAIT_STORE_MASK; cmd->data[1] = upper_32_bits(paddr); - cmd->data[2] = 1; + cmd->data[2] = data; CMD_SET_TYPE(cmd, CMD_COMPL_WAIT); } @@ -909,7 +990,7 @@ static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep, cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; } -static void build_inv_iommu_pasid(struct iommu_cmd *cmd, u16 domid, int pasid, +static void build_inv_iommu_pasid(struct iommu_cmd *cmd, u16 domid, u32 pasid, u64 address, bool size) { memset(cmd, 0, sizeof(*cmd)); @@ -927,7 +1008,7 @@ static void build_inv_iommu_pasid(struct iommu_cmd *cmd, u16 domid, int pasid, CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES); } -static void build_inv_iotlb_pasid(struct iommu_cmd *cmd, u16 devid, int pasid, +static void build_inv_iotlb_pasid(struct iommu_cmd *cmd, u16 devid, u32 pasid, int qdep, u64 address, bool size) { memset(cmd, 0, sizeof(*cmd)); @@ -947,7 +1028,7 @@ static void build_inv_iotlb_pasid(struct iommu_cmd *cmd, u16 devid, int pasid, CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES); } -static void build_complete_ppr(struct iommu_cmd *cmd, u16 devid, int pasid, +static void build_complete_ppr(struct iommu_cmd *cmd, u16 devid, u32 pasid, int status, int tag, bool gn) { memset(cmd, 0, sizeof(*cmd)); @@ -1045,22 +1126,21 @@ static int iommu_completion_wait(struct amd_iommu *iommu) struct iommu_cmd cmd; unsigned long flags; int ret; + u64 data; if (!iommu->need_sync) return 0; - - build_completion_wait(&cmd, (u64)&iommu->cmd_sem); - raw_spin_lock_irqsave(&iommu->lock, flags); - iommu->cmd_sem = 0; + data = ++iommu->cmd_sem_val; + build_completion_wait(&cmd, iommu, data); ret = __iommu_queue_command_sync(iommu, &cmd, false); if (ret) goto out_unlock; - ret = wait_on_sem(&iommu->cmd_sem); + ret = wait_on_sem(iommu, data); out_unlock: raw_spin_unlock_irqrestore(&iommu->lock, flags); @@ -2157,6 +2237,7 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev) iommu_dev = ERR_PTR(ret); iommu_ignore_device(dev); } else { + amd_iommu_set_pci_msi_domain(dev, iommu); iommu_dev = &iommu->iommu; } @@ -2659,7 +2740,12 @@ static int amd_iommu_def_domain_type(struct device *dev) if (!dev_data) return 0; - if (dev_data->iommu_v2) + /* + * Do not identity map IOMMUv2 capable devices when memory encryption is + * active, because some of those devices (AMD GPUs) don't have the + * encryption bit in their DMA-mask and require remapping. + */ + if (!mem_encrypt_active() && dev_data->iommu_v2) return IOMMU_DOMAIN_IDENTITY; return 0; @@ -2781,7 +2867,7 @@ out: } EXPORT_SYMBOL(amd_iommu_domain_enable_v2); -static int __flush_pasid(struct protection_domain *domain, int pasid, +static int __flush_pasid(struct protection_domain *domain, u32 pasid, u64 address, bool size) { struct iommu_dev_data *dev_data; @@ -2842,13 +2928,13 @@ out: return ret; } -static int __amd_iommu_flush_page(struct protection_domain *domain, int pasid, +static int __amd_iommu_flush_page(struct protection_domain *domain, u32 pasid, u64 address) { return __flush_pasid(domain, pasid, address, false); } -int amd_iommu_flush_page(struct iommu_domain *dom, int pasid, +int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid, u64 address) { struct protection_domain *domain = to_pdomain(dom); @@ -2863,13 +2949,13 @@ int amd_iommu_flush_page(struct iommu_domain *dom, int pasid, } EXPORT_SYMBOL(amd_iommu_flush_page); -static int __amd_iommu_flush_tlb(struct protection_domain *domain, int pasid) +static int __amd_iommu_flush_tlb(struct protection_domain *domain, u32 pasid) { return __flush_pasid(domain, pasid, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, true); } -int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid) +int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid) { struct protection_domain *domain = to_pdomain(dom); unsigned long flags; @@ -2883,7 +2969,7 @@ int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid) } EXPORT_SYMBOL(amd_iommu_flush_tlb); -static u64 *__get_gcr3_pte(u64 *root, int level, int pasid, bool alloc) +static u64 *__get_gcr3_pte(u64 *root, int level, u32 pasid, bool alloc) { int index; u64 *pte; @@ -2915,7 +3001,7 @@ static u64 *__get_gcr3_pte(u64 *root, int level, int pasid, bool alloc) return pte; } -static int __set_gcr3(struct protection_domain *domain, int pasid, +static int __set_gcr3(struct protection_domain *domain, u32 pasid, unsigned long cr3) { struct domain_pgtable pgtable; @@ -2934,7 +3020,7 @@ static int __set_gcr3(struct protection_domain *domain, int pasid, return __amd_iommu_flush_tlb(domain, pasid); } -static int __clear_gcr3(struct protection_domain *domain, int pasid) +static int __clear_gcr3(struct protection_domain *domain, u32 pasid) { struct domain_pgtable pgtable; u64 *pte; @@ -2952,7 +3038,7 @@ static int __clear_gcr3(struct protection_domain *domain, int pasid) return __amd_iommu_flush_tlb(domain, pasid); } -int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid, +int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid, unsigned long cr3) { struct protection_domain *domain = to_pdomain(dom); @@ -2967,7 +3053,7 @@ int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid, } EXPORT_SYMBOL(amd_iommu_domain_set_gcr3); -int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid) +int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid) { struct protection_domain *domain = to_pdomain(dom); unsigned long flags; @@ -2981,7 +3067,7 @@ int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid) } EXPORT_SYMBOL(amd_iommu_domain_clear_gcr3); -int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid, +int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid, int status, int tag) { struct iommu_dev_data *dev_data; @@ -3292,6 +3378,7 @@ out: static int modify_irte_ga(u16 devid, int index, struct irte_ga *irte, struct amd_ir_data *data) { + bool ret; struct irq_remap_table *table; struct amd_iommu *iommu; unsigned long flags; @@ -3309,10 +3396,18 @@ static int modify_irte_ga(u16 devid, int index, struct irte_ga *irte, entry = (struct irte_ga *)table->table; entry = &entry[index]; - entry->lo.fields_remap.valid = 0; - entry->hi.val = irte->hi.val; - entry->lo.val = irte->lo.val; - entry->lo.fields_remap.valid = 1; + + ret = cmpxchg_double(&entry->lo.val, &entry->hi.val, + entry->lo.val, entry->hi.val, + irte->lo.val, irte->hi.val); + /* + * We use cmpxchg16 to atomically update the 128-bit IRTE, + * and it cannot be updated by the hardware or other processors + * behind us, so the return value of cmpxchg16 should be the + * same as the old value. + */ + WARN_ON(!ret); + if (data) data->ref = entry; @@ -3505,69 +3600,51 @@ static void irte_ga_clear_allocated(struct irq_remap_table *table, int index) static int get_devid(struct irq_alloc_info *info) { - int devid = -1; - switch (info->type) { case X86_IRQ_ALLOC_TYPE_IOAPIC: - devid = get_ioapic_devid(info->ioapic_id); - break; + case X86_IRQ_ALLOC_TYPE_IOAPIC_GET_PARENT: + return get_ioapic_devid(info->devid); case X86_IRQ_ALLOC_TYPE_HPET: - devid = get_hpet_devid(info->hpet_id); - break; - case X86_IRQ_ALLOC_TYPE_MSI: - case X86_IRQ_ALLOC_TYPE_MSIX: - devid = get_device_id(&info->msi_dev->dev); - break; + case X86_IRQ_ALLOC_TYPE_HPET_GET_PARENT: + return get_hpet_devid(info->devid); + case X86_IRQ_ALLOC_TYPE_PCI_MSI: + case X86_IRQ_ALLOC_TYPE_PCI_MSIX: + return get_device_id(msi_desc_to_dev(info->desc)); default: - BUG_ON(1); - break; + WARN_ON_ONCE(1); + return -1; } - - return devid; } -static struct irq_domain *get_ir_irq_domain(struct irq_alloc_info *info) +static struct irq_domain *get_irq_domain_for_devid(struct irq_alloc_info *info, + int devid) { - struct amd_iommu *iommu; - int devid; + struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; - if (!info) + if (!iommu) return NULL; - devid = get_devid(info); - if (devid >= 0) { - iommu = amd_iommu_rlookup_table[devid]; - if (iommu) - return iommu->ir_domain; + switch (info->type) { + case X86_IRQ_ALLOC_TYPE_IOAPIC_GET_PARENT: + case X86_IRQ_ALLOC_TYPE_HPET_GET_PARENT: + return iommu->ir_domain; + default: + WARN_ON_ONCE(1); + return NULL; } - - return NULL; } static struct irq_domain *get_irq_domain(struct irq_alloc_info *info) { - struct amd_iommu *iommu; int devid; if (!info) return NULL; - switch (info->type) { - case X86_IRQ_ALLOC_TYPE_MSI: - case X86_IRQ_ALLOC_TYPE_MSIX: - devid = get_device_id(&info->msi_dev->dev); - if (devid < 0) - return NULL; - - iommu = amd_iommu_rlookup_table[devid]; - if (iommu) - return iommu->msi_domain; - break; - default: - break; - } - - return NULL; + devid = get_devid(info); + if (devid < 0) + return NULL; + return get_irq_domain_for_devid(info, devid); } struct irq_remap_ops amd_iommu_irq_ops = { @@ -3576,7 +3653,6 @@ struct irq_remap_ops amd_iommu_irq_ops = { .disable = amd_iommu_disable, .reenable = amd_iommu_reenable, .enable_faulting = amd_iommu_enable_faulting, - .get_ir_irq_domain = get_ir_irq_domain, .get_irq_domain = get_irq_domain, }; @@ -3602,21 +3678,21 @@ static void irq_remapping_prepare_irte(struct amd_ir_data *data, switch (info->type) { case X86_IRQ_ALLOC_TYPE_IOAPIC: /* Setup IOAPIC entry */ - entry = info->ioapic_entry; - info->ioapic_entry = NULL; + entry = info->ioapic.entry; + info->ioapic.entry = NULL; memset(entry, 0, sizeof(*entry)); entry->vector = index; entry->mask = 0; - entry->trigger = info->ioapic_trigger; - entry->polarity = info->ioapic_polarity; + entry->trigger = info->ioapic.trigger; + entry->polarity = info->ioapic.polarity; /* Mask level triggered irqs. */ - if (info->ioapic_trigger) + if (info->ioapic.trigger) entry->mask = 1; break; case X86_IRQ_ALLOC_TYPE_HPET: - case X86_IRQ_ALLOC_TYPE_MSI: - case X86_IRQ_ALLOC_TYPE_MSIX: + case X86_IRQ_ALLOC_TYPE_PCI_MSI: + case X86_IRQ_ALLOC_TYPE_PCI_MSIX: msg->address_hi = MSI_ADDR_BASE_HI; msg->address_lo = MSI_ADDR_BASE_LO; msg->data = irte_info->index; @@ -3660,15 +3736,15 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, if (!info) return -EINVAL; - if (nr_irqs > 1 && info->type != X86_IRQ_ALLOC_TYPE_MSI && - info->type != X86_IRQ_ALLOC_TYPE_MSIX) + if (nr_irqs > 1 && info->type != X86_IRQ_ALLOC_TYPE_PCI_MSI && + info->type != X86_IRQ_ALLOC_TYPE_PCI_MSIX) return -EINVAL; /* * With IRQ remapping enabled, don't need contiguous CPU vectors * to support multiple MSI interrupts. */ - if (info->type == X86_IRQ_ALLOC_TYPE_MSI) + if (info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI) info->flags &= ~X86_IRQ_ALLOC_CONTIGUOUS_VECTORS; devid = get_devid(info); @@ -3696,15 +3772,16 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, iommu->irte_ops->set_allocated(table, i); } WARN_ON(table->min_index != 32); - index = info->ioapic_pin; + index = info->ioapic.pin; } else { index = -ENOMEM; } - } else if (info->type == X86_IRQ_ALLOC_TYPE_MSI || - info->type == X86_IRQ_ALLOC_TYPE_MSIX) { - bool align = (info->type == X86_IRQ_ALLOC_TYPE_MSI); + } else if (info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI || + info->type == X86_IRQ_ALLOC_TYPE_PCI_MSIX) { + bool align = (info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI); - index = alloc_irq_index(devid, nr_irqs, align, info->msi_dev); + index = alloc_irq_index(devid, nr_irqs, align, + msi_desc_to_pci_dev(info->desc)); } else { index = alloc_irq_index(devid, nr_irqs, false, NULL); } @@ -3717,8 +3794,8 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, for (i = 0; i < nr_irqs; i++) { irq_data = irq_domain_get_irq_data(domain, virq + i); - cfg = irqd_cfg(irq_data); - if (!irq_data || !cfg) { + cfg = irq_data ? irqd_cfg(irq_data) : NULL; + if (!cfg) { ret = -EINVAL; goto out_free_data; } @@ -3826,14 +3903,18 @@ int amd_iommu_activate_guest_mode(void *data) { struct amd_ir_data *ir_data = (struct amd_ir_data *)data; struct irte_ga *entry = (struct irte_ga *) ir_data->entry; + u64 valid; if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) || !entry || entry->lo.fields_vapic.guest_mode) return 0; + valid = entry->lo.fields_vapic.valid; + entry->lo.val = 0; entry->hi.val = 0; + entry->lo.fields_vapic.valid = valid; entry->lo.fields_vapic.guest_mode = 1; entry->lo.fields_vapic.ga_log_intr = 1; entry->hi.fields.ga_root_ptr = ir_data->ga_root_ptr; @@ -3850,14 +3931,18 @@ int amd_iommu_deactivate_guest_mode(void *data) struct amd_ir_data *ir_data = (struct amd_ir_data *)data; struct irte_ga *entry = (struct irte_ga *) ir_data->entry; struct irq_cfg *cfg = ir_data->cfg; + u64 valid; if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) || !entry || !entry->lo.fields_vapic.guest_mode) return 0; + valid = entry->lo.fields_remap.valid; + entry->lo.val = 0; entry->hi.val = 0; + entry->lo.fields_remap.valid = valid; entry->lo.fields_remap.dm = apic->irq_dest_mode; entry->lo.fields_remap.int_type = apic->irq_delivery_mode; entry->hi.fields.vector = cfg->vector; |