diff options
Diffstat (limited to 'drivers/iommu/amd/iommu.c')
-rw-r--r-- | drivers/iommu/amd/iommu.c | 207 |
1 files changed, 121 insertions, 86 deletions
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index b3a01b7757ee..aea061f26de3 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -75,8 +75,6 @@ struct iommu_cmd { */ DEFINE_IDA(pdom_ids); -struct kmem_cache *amd_iommu_irq_cache; - static int amd_iommu_attach_device(struct iommu_domain *dom, struct device *dev); @@ -243,7 +241,9 @@ static inline int get_acpihid_device_id(struct device *dev, struct acpihid_map_entry **entry) { struct acpi_device *adev = ACPI_COMPANION(dev); - struct acpihid_map_entry *p; + struct acpihid_map_entry *p, *p1 = NULL; + int hid_count = 0; + bool fw_bug; if (!adev) return -ENODEV; @@ -251,12 +251,33 @@ static inline int get_acpihid_device_id(struct device *dev, list_for_each_entry(p, &acpihid_map, list) { if (acpi_dev_hid_uid_match(adev, p->hid, p->uid[0] ? p->uid : NULL)) { - if (entry) - *entry = p; - return p->devid; + p1 = p; + fw_bug = false; + hid_count = 1; + break; + } + + /* + * Count HID matches w/o UID, raise FW_BUG but allow exactly one match + */ + if (acpi_dev_hid_match(adev, p->hid)) { + p1 = p; + hid_count++; + fw_bug = true; } } - return -EINVAL; + + if (!p1) + return -EINVAL; + if (fw_bug) + dev_err_once(dev, FW_BUG "No ACPI device matched UID, but %d device%s matched HID.\n", + hid_count, hid_count > 1 ? "s" : ""); + if (hid_count > 1) + return -EINVAL; + if (entry) + *entry = p1; + + return p1->devid; } static inline int get_device_sbdf_id(struct device *dev) @@ -613,8 +634,8 @@ static inline void pdev_disable_cap_pasid(struct pci_dev *pdev) static void pdev_enable_caps(struct pci_dev *pdev) { - pdev_enable_cap_ats(pdev); pdev_enable_cap_pasid(pdev); + pdev_enable_cap_ats(pdev); pdev_enable_cap_pri(pdev); } @@ -868,7 +889,7 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt) int type, devid, flags, tag; volatile u32 *event = __evt; int count = 0; - u64 address; + u64 address, ctrl; u32 pasid; retry: @@ -878,6 +899,7 @@ retry: (event[1] & EVENT_DOMID_MASK_LO); flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; address = (u64)(((u64)event[3]) << 32) | event[2]; + ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); if (type == 0) { /* Did we hit the erratum? */ @@ -899,6 +921,7 @@ retry: dev_err(dev, "Event logged [ILLEGAL_DEV_TABLE_ENTRY device=%04x:%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n", iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), pasid, address, flags); + dev_err(dev, "Control Reg : 0x%llx\n", ctrl); dump_dte_entry(iommu, devid); break; case EVENT_TYPE_DEV_TAB_ERR: @@ -982,6 +1005,14 @@ int amd_iommu_register_ga_log_notifier(int (*notifier)(u32)) { iommu_ga_log_notifier = notifier; + /* + * Ensure all in-flight IRQ handlers run to completion before returning + * to the caller, e.g. to ensure module code isn't unloaded while it's + * being executed in the IRQ handler. + */ + if (!notifier) + synchronize_rcu(); + return 0; } EXPORT_SYMBOL(amd_iommu_register_ga_log_notifier); @@ -1812,7 +1843,7 @@ static void free_gcr3_tbl_level1(u64 *tbl) ptr = iommu_phys_to_virt(tbl[i] & PAGE_MASK); - iommu_free_page(ptr); + iommu_free_pages(ptr); } } @@ -1845,7 +1876,7 @@ static void free_gcr3_table(struct gcr3_tbl_info *gcr3_info) /* Free per device domain ID */ pdom_id_free(gcr3_info->domid); - iommu_free_page(gcr3_info->gcr3_tbl); + iommu_free_pages(gcr3_info->gcr3_tbl); gcr3_info->gcr3_tbl = NULL; } @@ -1884,7 +1915,7 @@ static int setup_gcr3_table(struct gcr3_tbl_info *gcr3_info, return -ENOSPC; gcr3_info->domid = domid; - gcr3_info->gcr3_tbl = iommu_alloc_page_node(nid, GFP_ATOMIC); + gcr3_info->gcr3_tbl = iommu_alloc_pages_node_sz(nid, GFP_ATOMIC, SZ_4K); if (gcr3_info->gcr3_tbl == NULL) { pdom_id_free(domid); return -ENOMEM; @@ -2394,8 +2425,14 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev) } out_err: + iommu_completion_wait(iommu); + if (FEATURE_NUM_INT_REMAP_SUP_2K(amd_iommu_efr2)) + dev_data->max_irqs = MAX_IRQS_PER_TABLE_2K; + else + dev_data->max_irqs = MAX_IRQS_PER_TABLE_512; + if (dev_is_pci(dev)) pci_prepare_ats(to_pci_dev(dev), PAGE_SHIFT); @@ -2432,15 +2469,6 @@ static struct iommu_group *amd_iommu_device_group(struct device *dev) * *****************************************************************************/ -void protection_domain_free(struct protection_domain *domain) -{ - WARN_ON(!list_empty(&domain->dev_list)); - if (domain->domain.type & __IOMMU_DOMAIN_PAGING) - free_io_pgtable_ops(&domain->iop.pgtbl.ops); - pdom_id_free(domain->id); - kfree(domain); -} - static void protection_domain_init(struct protection_domain *domain) { spin_lock_init(&domain->lock); @@ -2498,8 +2526,21 @@ static inline u64 dma_max_address(enum protection_domain_mode pgtable) if (pgtable == PD_MODE_V1) return ~0ULL; - /* V2 with 4/5 level page table */ - return ((1ULL << PM_LEVEL_SHIFT(amd_iommu_gpt_level)) - 1); + /* + * V2 with 4/5 level page table. Note that "2.2.6.5 AMD64 4-Kbyte Page + * Translation" shows that the V2 table sign extends the top of the + * address space creating a reserved region in the middle of the + * translation, just like the CPU does. Further Vasant says the docs are + * incomplete and this only applies to non-zero PASIDs. If the AMDv2 + * page table is assigned to the 0 PASID then there is no sign extension + * check. + * + * Since the IOMMU must have a fixed geometry, and the core code does + * not understand sign extended addressing, we have to chop off the high + * bit to get consistent behavior with attachments of the domain to any + * PASID. + */ + return ((1ULL << (PM_LEVEL_SHIFT(amd_iommu_gpt_level) - 1)) - 1); } static bool amd_iommu_hd_support(struct amd_iommu *iommu) @@ -2578,7 +2619,11 @@ void amd_iommu_domain_free(struct iommu_domain *dom) { struct protection_domain *domain = to_pdomain(dom); - protection_domain_free(domain); + WARN_ON(!list_empty(&domain->dev_list)); + if (domain->domain.type & __IOMMU_DOMAIN_PAGING) + free_io_pgtable_ops(&domain->iop.pgtbl.ops); + pdom_id_free(domain->id); + kfree(domain); } static int blocked_domain_attach_device(struct iommu_domain *domain, @@ -2907,6 +2952,9 @@ static void amd_iommu_get_resv_regions(struct device *dev, return; list_add_tail(®ion->list, head); + if (amd_iommu_ht_range_ignore()) + return; + region = iommu_alloc_resv_region(HT_RANGE_START, HT_RANGE_END - HT_RANGE_START + 1, 0, IOMMU_RESV_RESERVED, GFP_KERNEL); @@ -2983,38 +3031,6 @@ static const struct iommu_dirty_ops amd_dirty_ops = { .read_and_clear_dirty = amd_iommu_read_and_clear_dirty, }; -static int amd_iommu_dev_enable_feature(struct device *dev, - enum iommu_dev_features feat) -{ - int ret = 0; - - switch (feat) { - case IOMMU_DEV_FEAT_IOPF: - case IOMMU_DEV_FEAT_SVA: - break; - default: - ret = -EINVAL; - break; - } - return ret; -} - -static int amd_iommu_dev_disable_feature(struct device *dev, - enum iommu_dev_features feat) -{ - int ret = 0; - - switch (feat) { - case IOMMU_DEV_FEAT_IOPF: - case IOMMU_DEV_FEAT_SVA: - break; - default: - ret = -EINVAL; - break; - } - return ret; -} - const struct iommu_ops amd_iommu_ops = { .capable = amd_iommu_capable, .blocked_domain = &blocked_domain, @@ -3028,8 +3044,6 @@ const struct iommu_ops amd_iommu_ops = { .get_resv_regions = amd_iommu_get_resv_regions, .is_attach_deferred = amd_iommu_is_attach_deferred, .def_domain_type = amd_iommu_def_domain_type, - .dev_enable_feat = amd_iommu_dev_enable_feature, - .dev_disable_feat = amd_iommu_dev_disable_feature, .page_response = amd_iommu_page_response, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = amd_iommu_attach_device, @@ -3081,6 +3095,13 @@ out: raw_spin_unlock_irqrestore(&iommu->lock, flags); } +static inline u8 iommu_get_int_tablen(struct iommu_dev_data *dev_data) +{ + if (dev_data && dev_data->max_irqs == MAX_IRQS_PER_TABLE_2K) + return DTE_INTTABLEN_2K; + return DTE_INTTABLEN_512; +} + static void set_dte_irq_entry(struct amd_iommu *iommu, u16 devid, struct irq_remap_table *table) { @@ -3095,7 +3116,7 @@ static void set_dte_irq_entry(struct amd_iommu *iommu, u16 devid, new &= ~DTE_IRQ_PHYS_ADDR_MASK; new |= iommu_virt_to_phys(table->table); new |= DTE_IRQ_REMAP_INTCTL; - new |= DTE_INTTABLEN; + new |= iommu_get_int_tablen(dev_data); new |= DTE_IRQ_REMAP_ENABLE; WRITE_ONCE(dte->data[2], new); @@ -3121,7 +3142,7 @@ static struct irq_remap_table *get_irq_table(struct amd_iommu *iommu, u16 devid) return table; } -static struct irq_remap_table *__alloc_irq_table(void) +static struct irq_remap_table *__alloc_irq_table(int nid, size_t size) { struct irq_remap_table *table; @@ -3129,19 +3150,14 @@ static struct irq_remap_table *__alloc_irq_table(void) if (!table) return NULL; - table->table = kmem_cache_alloc(amd_iommu_irq_cache, GFP_KERNEL); + table->table = iommu_alloc_pages_node_sz( + nid, GFP_KERNEL, max(DTE_INTTAB_ALIGNMENT, size)); if (!table->table) { kfree(table); return NULL; } raw_spin_lock_init(&table->lock); - if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir)) - memset(table->table, 0, - MAX_IRQS_PER_TABLE * sizeof(u32)); - else - memset(table->table, 0, - (MAX_IRQS_PER_TABLE * (sizeof(u64) * 2))); return table; } @@ -3173,13 +3189,23 @@ static int set_remap_table_entry_alias(struct pci_dev *pdev, u16 alias, return 0; } +static inline size_t get_irq_table_size(unsigned int max_irqs) +{ + if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir)) + return max_irqs * sizeof(u32); + + return max_irqs * (sizeof(u64) * 2); +} + static struct irq_remap_table *alloc_irq_table(struct amd_iommu *iommu, - u16 devid, struct pci_dev *pdev) + u16 devid, struct pci_dev *pdev, + unsigned int max_irqs) { struct irq_remap_table *table = NULL; struct irq_remap_table *new_table = NULL; struct amd_iommu_pci_seg *pci_seg; unsigned long flags; + int nid = iommu && iommu->dev ? dev_to_node(&iommu->dev->dev) : NUMA_NO_NODE; u16 alias; spin_lock_irqsave(&iommu_table_lock, flags); @@ -3198,7 +3224,7 @@ static struct irq_remap_table *alloc_irq_table(struct amd_iommu *iommu, spin_unlock_irqrestore(&iommu_table_lock, flags); /* Nothing there yet, allocate new irq remapping table */ - new_table = __alloc_irq_table(); + new_table = __alloc_irq_table(nid, get_irq_table_size(max_irqs)); if (!new_table) return NULL; @@ -3233,20 +3259,21 @@ out_unlock: spin_unlock_irqrestore(&iommu_table_lock, flags); if (new_table) { - kmem_cache_free(amd_iommu_irq_cache, new_table->table); + iommu_free_pages(new_table->table); kfree(new_table); } return table; } static int alloc_irq_index(struct amd_iommu *iommu, u16 devid, int count, - bool align, struct pci_dev *pdev) + bool align, struct pci_dev *pdev, + unsigned long max_irqs) { struct irq_remap_table *table; int index, c, alignment = 1; unsigned long flags; - table = alloc_irq_table(iommu, devid, pdev); + table = alloc_irq_table(iommu, devid, pdev, max_irqs); if (!table) return -ENODEV; @@ -3257,7 +3284,7 @@ static int alloc_irq_index(struct amd_iommu *iommu, u16 devid, int count, /* Scan table for free entries */ for (index = ALIGN(table->min_index, alignment), c = 0; - index < MAX_IRQS_PER_TABLE;) { + index < max_irqs;) { if (!iommu->irte_ops->is_allocated(table, index)) { c += 1; } else { @@ -3527,6 +3554,14 @@ static void fill_msi_msg(struct msi_msg *msg, u32 index) msg->data = index; msg->address_lo = 0; msg->arch_addr_lo.base_address = X86_MSI_BASE_ADDRESS_LOW; + /* + * The struct msi_msg.dest_mode_logical is used to set the DM bit + * in MSI Message Address Register. For device w/ 2K int-remap support, + * this is bit must be set to 1 regardless of the actual destination + * mode, which is signified by the IRTE[DM]. + */ + if (FEATURE_NUM_INT_REMAP_SUP_2K(amd_iommu_efr2)) + msg->arch_addr_lo.dest_mode_logical = true; msg->address_hi = X86_MSI_BASE_ADDRESS_HIGH; } @@ -3589,6 +3624,8 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, struct amd_ir_data *data = NULL; struct amd_iommu *iommu; struct irq_cfg *cfg; + struct iommu_dev_data *dev_data; + unsigned long max_irqs; int i, ret, devid, seg, sbdf; int index; @@ -3607,6 +3644,9 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, if (!iommu) return -EINVAL; + dev_data = search_dev_data(iommu, devid); + max_irqs = dev_data ? dev_data->max_irqs : MAX_IRQS_PER_TABLE_512; + ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); if (ret < 0) return ret; @@ -3614,7 +3654,7 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, if (info->type == X86_IRQ_ALLOC_TYPE_IOAPIC) { struct irq_remap_table *table; - table = alloc_irq_table(iommu, devid, NULL); + table = alloc_irq_table(iommu, devid, NULL, max_irqs); if (table) { if (!table->min_index) { /* @@ -3635,9 +3675,11 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, bool align = (info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI); index = alloc_irq_index(iommu, devid, nr_irqs, align, - msi_desc_to_pci_dev(info->desc)); + msi_desc_to_pci_dev(info->desc), + max_irqs); } else { - index = alloc_irq_index(iommu, devid, nr_irqs, false, NULL); + index = alloc_irq_index(iommu, devid, nr_irqs, false, NULL, + max_irqs); } if (index < 0) { @@ -3840,6 +3882,9 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info) struct irq_2_irte *irte_info = &ir_data->irq_2_irte; struct iommu_dev_data *dev_data; + if (WARN_ON_ONCE(!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))) + return -EINVAL; + if (ir_data->iommu == NULL) return -EINVAL; @@ -3855,16 +3900,6 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info) ir_data->cfg = irqd_cfg(data); pi_data->ir_data = ir_data; - /* Note: - * SVM tries to set up for VAPIC mode, but we are in - * legacy mode. So, we force legacy mode instead. - */ - if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) { - pr_debug("%s: Fall back to using intr legacy remap\n", - __func__); - pi_data->is_guest_mode = false; - } - pi_data->prev_ga_tag = ir_data->cached_ga_tag; if (pi_data->is_guest_mode) { ir_data->ga_root_ptr = (pi_data->base >> 12); |