From a45946abb8991e17c39326854ed1314d20742ca6 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Thu, 11 Mar 2010 14:04:08 -0800 Subject: intel-iommu: use for_each_set_bit() Replace open-coded loop with for_each_set_bit(). Signed-off-by: Akinobu Mita Signed-off-by: Andrew Morton Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 33 +++++++-------------------------- 1 file changed, 7 insertions(+), 26 deletions(-) (limited to 'drivers/pci/intel-iommu.c') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 417312528ddf..a0ac7197ffdd 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -491,13 +491,11 @@ static void domain_update_iommu_coherency(struct dmar_domain *domain) domain->iommu_coherency = 1; - i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus); - for (; i < g_num_of_iommus; ) { + for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) { if (!ecap_coherent(g_iommus[i]->ecap)) { domain->iommu_coherency = 0; break; } - i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1); } } @@ -507,13 +505,11 @@ static void domain_update_iommu_snooping(struct dmar_domain *domain) domain->iommu_snooping = 1; - i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus); - for (; i < g_num_of_iommus; ) { + for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) { if (!ecap_sc_support(g_iommus[i]->ecap)) { domain->iommu_snooping = 0; break; } - i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1); } } @@ -1194,8 +1190,7 @@ void free_dmar_iommu(struct intel_iommu *iommu) unsigned long flags; if ((iommu->domains) && (iommu->domain_ids)) { - i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap)); - for (; i < cap_ndoms(iommu->cap); ) { + for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) { domain = iommu->domains[i]; clear_bit(i, iommu->domain_ids); @@ -1207,9 +1202,6 @@ void free_dmar_iommu(struct intel_iommu *iommu) domain_exit(domain); } spin_unlock_irqrestore(&domain->iommu_lock, flags); - - i = find_next_bit(iommu->domain_ids, - cap_ndoms(iommu->cap), i+1); } } @@ -1292,14 +1284,11 @@ static void iommu_detach_domain(struct dmar_domain *domain, spin_lock_irqsave(&iommu->lock, flags); ndomains = cap_ndoms(iommu->cap); - num = find_first_bit(iommu->domain_ids, ndomains); - for (; num < ndomains; ) { + for_each_set_bit(num, iommu->domain_ids, ndomains) { if (iommu->domains[num] == domain) { found = 1; break; } - num = find_next_bit(iommu->domain_ids, - cap_ndoms(iommu->cap), num+1); } if (found) { @@ -1485,15 +1474,12 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment, /* find an available domain id for this device in iommu */ ndomains = cap_ndoms(iommu->cap); - num = find_first_bit(iommu->domain_ids, ndomains); - for (; num < ndomains; ) { + for_each_set_bit(num, iommu->domain_ids, ndomains) { if (iommu->domains[num] == domain) { id = num; found = 1; break; } - num = find_next_bit(iommu->domain_ids, - cap_ndoms(iommu->cap), num+1); } if (found == 0) { @@ -3441,12 +3427,9 @@ static int vm_domain_min_agaw(struct dmar_domain *domain) int i; int min_agaw = domain->agaw; - i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus); - for (; i < g_num_of_iommus; ) { + for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) { if (min_agaw > g_iommus[i]->agaw) min_agaw = g_iommus[i]->agaw; - - i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1); } return min_agaw; @@ -3512,8 +3495,7 @@ static void iommu_free_vm_domain(struct dmar_domain *domain) iommu = drhd->iommu; ndomains = cap_ndoms(iommu->cap); - i = find_first_bit(iommu->domain_ids, ndomains); - for (; i < ndomains; ) { + for_each_set_bit(i, iommu->domain_ids, ndomains) { if (iommu->domains[i] == domain) { spin_lock_irqsave(&iommu->lock, flags); clear_bit(i, iommu->domain_ids); @@ -3521,7 +3503,6 @@ static void iommu_free_vm_domain(struct dmar_domain *domain) spin_unlock_irqrestore(&iommu->lock, flags); break; } - i = find_next_bit(iommu->domain_ids, ndomains, i+1); } } } -- cgit v1.2.3 From 8bdd77dd4ef99292f3d705c4c389c12f55641133 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Thu, 1 Apr 2010 13:24:35 +0300 Subject: intel-iommu mistakenly uses offset_pfn when caching mode is enabled intel_map_sg used offset_pfn which was set to zero when invalidating the IOTLB. intel_map_sg now uses size variable for this matter. Signed-off-by: Nadav Amit Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/pci/intel-iommu.c') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index a0ac7197ffdd..341da41cde8b 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2860,7 +2860,6 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne struct dmar_domain *domain; size_t size = 0; int prot = 0; - size_t offset_pfn = 0; struct iova *iova = NULL; int ret; struct scatterlist *sg; @@ -2914,7 +2913,7 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne /* it's a non-present to present mapping. Only flush if caching mode */ if (cap_caching_mode(iommu->cap)) - iommu_flush_iotlb_psi(iommu, 0, start_vpfn, offset_pfn); + iommu_flush_iotlb_psi(iommu, 0, start_vpfn, size); else iommu_flush_write_buffer(iommu); -- cgit v1.2.3 From 82653633b6161cdecc011d15bc9df1c7489bd9a2 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Thu, 1 Apr 2010 13:24:40 +0300 Subject: intel-iommu: Use correct domain ID when caching mode is enabled In caching-mode mappings of pages (changes from non-present to present) require invalidation. Currently, this IOTLB flush is performed with domain ID of zero. This is not according to the VT-d spec and causes big problems for emulating software. This patch uses the correct domain ID in IOTLB flushes. Device IOTLB invalidation is performed only on present to non-present changes. This decision is now based on explicit parameter instead of zero domain-ID. Signed-off-by: Nadav Amit Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers/pci/intel-iommu.c') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 341da41cde8b..1880ee06d701 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1064,7 +1064,7 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain, } static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, - unsigned long pfn, unsigned int pages) + unsigned long pfn, unsigned int pages, int map) { unsigned int mask = ilog2(__roundup_pow_of_two(pages)); uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT; @@ -1085,10 +1085,10 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, DMA_TLB_PSI_FLUSH); /* - * In caching mode, domain ID 0 is reserved for non-present to present - * mapping flush. Device IOTLB doesn't need to be flushed in this case. + * In caching mode, changes of pages from non-present to present require + * flush. However, device IOTLB doesn't need to be flushed in this case. */ - if (!cap_caching_mode(iommu->cap) || did) + if (!cap_caching_mode(iommu->cap) || !map) iommu_flush_dev_iotlb(iommu->domains[did], addr, mask); } @@ -1544,7 +1544,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment, (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL); - iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH); + iommu->flush.flush_iotlb(iommu, domain->id, 0, 0, DMA_TLB_DSI_FLUSH); } else { iommu_flush_write_buffer(iommu); } @@ -2607,7 +2607,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, /* it's a non-present to present mapping. Only flush if caching mode */ if (cap_caching_mode(iommu->cap)) - iommu_flush_iotlb_psi(iommu, 0, mm_to_dma_pfn(iova->pfn_lo), size); + iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 1); else iommu_flush_write_buffer(iommu); @@ -2736,7 +2736,7 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, if (intel_iommu_strict) { iommu_flush_iotlb_psi(iommu, domain->id, start_pfn, - last_pfn - start_pfn + 1); + last_pfn - start_pfn + 1, 0); /* free iova */ __free_iova(&domain->iovad, iova); } else { @@ -2826,7 +2826,7 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, if (intel_iommu_strict) { iommu_flush_iotlb_psi(iommu, domain->id, start_pfn, - last_pfn - start_pfn + 1); + last_pfn - start_pfn + 1, 0); /* free iova */ __free_iova(&domain->iovad, iova); } else { @@ -2913,7 +2913,7 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne /* it's a non-present to present mapping. Only flush if caching mode */ if (cap_caching_mode(iommu->cap)) - iommu_flush_iotlb_psi(iommu, 0, start_vpfn, size); + iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 1); else iommu_flush_write_buffer(iommu); -- cgit v1.2.3 From 78d5f0f500e6ba8f6cfd0673475ff4d941d705a2 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Thu, 8 Apr 2010 23:00:41 +0300 Subject: intel-iommu: Avoid global flushes with caching mode. While it may be efficient on real hardware, emulation of global invalidations is very expensive as all shadow entries must be examined. This patch changes the behaviour when caching mode is enabled (which is the case when IOMMU emulation takes place). In this case, page specific invalidation is used instead. Signed-off-by: Nadav Amit Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'drivers/pci/intel-iommu.c') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 1880ee06d701..9ce79b1bae83 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2647,15 +2647,24 @@ static void flush_unmaps(void) if (!deferred_flush[i].next) continue; - iommu->flush.flush_iotlb(iommu, 0, 0, 0, + /* In caching mode, global flushes turn emulation expensive */ + if (!cap_caching_mode(iommu->cap)) + iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); for (j = 0; j < deferred_flush[i].next; j++) { unsigned long mask; struct iova *iova = deferred_flush[i].iova[j]; - - mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1)); - iommu_flush_dev_iotlb(deferred_flush[i].domain[j], - (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask); + struct dmar_domain *domain = deferred_flush[i].domain[j]; + + /* On real hardware multiple invalidations are expensive */ + if (cap_caching_mode(iommu->cap)) + iommu_flush_iotlb_psi(iommu, domain->id, + iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1, 0); + else { + mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1)); + iommu_flush_dev_iotlb(deferred_flush[i].domain[j], + (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask); + } __free_iova(&deferred_flush[i].domain[j]->iovad, iova); } deferred_flush[i].next = 0; -- cgit v1.2.3 From 680a7524622356f5476e8fad2fe32b2b68b432c0 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 8 Apr 2010 19:58:23 +0100 Subject: intel-iommu: Print out iommu seq_id more info on system with more than one IOMMU Signed-off-by: Yinghai Lu Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 3 ++- drivers/pci/intel-iommu.c | 9 ++++++--- drivers/pci/intr_remapping.c | 6 +++--- 3 files changed, 11 insertions(+), 7 deletions(-) (limited to 'drivers/pci/intel-iommu.c') diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index a04bde9bd10f..d439917f37a9 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -806,7 +806,8 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) } ver = readl(iommu->reg + DMAR_VER_REG); - pr_info("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n", + pr_info("IOMMU %d: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n", + iommu->seq_id, (unsigned long long)drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver), (unsigned long long)iommu->cap, diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 9ce79b1bae83..da40f0789739 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1150,7 +1150,8 @@ static int iommu_init_domains(struct intel_iommu *iommu) unsigned long nlongs; ndomains = cap_ndoms(iommu->cap); - pr_debug("Number of Domains supportd <%ld>\n", ndomains); + pr_debug("IOMMU %d: Number of Domains supportd <%ld>\n", iommu->seq_id, + ndomains); nlongs = BITS_TO_LONGS(ndomains); spin_lock_init(&iommu->lock); @@ -2319,14 +2320,16 @@ int __init init_dmars(void) */ iommu->flush.flush_context = __iommu_flush_context; iommu->flush.flush_iotlb = __iommu_flush_iotlb; - printk(KERN_INFO "IOMMU 0x%Lx: using Register based " + printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based " "invalidation\n", + iommu->seq_id, (unsigned long long)drhd->reg_base_addr); } else { iommu->flush.flush_context = qi_flush_context; iommu->flush.flush_iotlb = qi_flush_iotlb; - printk(KERN_INFO "IOMMU 0x%Lx: using Queued " + printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued " "invalidation\n", + iommu->seq_id, (unsigned long long)drhd->reg_base_addr); } } diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index 95b849130ad4..c13802a7e109 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -831,9 +831,9 @@ static int ir_parse_ioapic_hpet_scope(struct acpi_dmar_header *header, return -1; } - printk(KERN_INFO "IOAPIC id %d under DRHD base" - " 0x%Lx\n", scope->enumeration_id, - drhd->address); + printk(KERN_INFO "IOAPIC id %d under DRHD base " + " 0x%Lx IOMMU %d\n", scope->enumeration_id, + drhd->address, iommu->seq_id); ir_parse_one_ioapic_scope(scope, iommu); } else if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_HPET) { -- cgit v1.2.3 From a99c47a228c194aa75bffdcb82806c5f33c7c63b Mon Sep 17 00:00:00 2001 From: Tom Lyon Date: Mon, 17 May 2010 08:20:45 +0100 Subject: intel-iommu: errors with smaller iommu widths When using iommu_domain_alloc with the Intel iommu, the domain address width is always initialized to 48 bits (agaw 2). This domain->agaw value is then used by pfn_to_dma_pte to (always) build a 4 level page table. However, not all systems support iommu width of 48 or 4 level page tables. In particular, the Core i5-660 and i5-670 support an address width of 36 bits (not 39!), an agaw of only 1, and only 3 level page tables. This version of the patch simply lops off extra levels of the page tables if the agaw value of the iommu is less than what is currently allocated for the domain (in intel_iommu_attach_device). If there were already allocated addresses above what the new iommu can handle, EFAULT is returned. Signed-off-by: Tom Lyon Signed-off-by: Andrew Morton Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) (limited to 'drivers/pci/intel-iommu.c') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index da40f0789739..57be89e6f484 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -3433,19 +3433,6 @@ static void vm_domain_remove_all_dev_info(struct dmar_domain *domain) /* domain id for virtual machine, it won't be set in context */ static unsigned long vm_domid; -static int vm_domain_min_agaw(struct dmar_domain *domain) -{ - int i; - int min_agaw = domain->agaw; - - for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) { - if (min_agaw > g_iommus[i]->agaw) - min_agaw = g_iommus[i]->agaw; - } - - return min_agaw; -} - static struct dmar_domain *iommu_alloc_vm_domain(void) { struct dmar_domain *domain; @@ -3574,7 +3561,6 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, struct pci_dev *pdev = to_pci_dev(dev); struct intel_iommu *iommu; int addr_width; - u64 end; /* normally pdev is not mapped */ if (unlikely(domain_context_mapped(pdev))) { @@ -3597,14 +3583,30 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, /* check if this iommu agaw is sufficient for max mapped address */ addr_width = agaw_to_width(iommu->agaw); - end = DOMAIN_MAX_ADDR(addr_width); - end = end & VTD_PAGE_MASK; - if (end < dmar_domain->max_addr) { - printk(KERN_ERR "%s: iommu agaw (%d) is not " + if (addr_width > cap_mgaw(iommu->cap)) + addr_width = cap_mgaw(iommu->cap); + + if (dmar_domain->max_addr > (1LL << addr_width)) { + printk(KERN_ERR "%s: iommu width (%d) is not " "sufficient for the mapped address (%llx)\n", - __func__, iommu->agaw, dmar_domain->max_addr); + __func__, addr_width, dmar_domain->max_addr); return -EFAULT; } + dmar_domain->gaw = addr_width; + + /* + * Knock out extra levels of page tables if necessary + */ + while (iommu->agaw < dmar_domain->agaw) { + struct dma_pte *pte; + + pte = dmar_domain->pgd; + if (dma_pte_present(pte)) { + free_pgtable_page(dmar_domain->pgd); + dmar_domain->pgd = (struct dma_pte *)dma_pte_addr(pte); + } + dmar_domain->agaw--; + } return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL); } -- cgit v1.2.3 From 8954da1f82a468deeeae3683252b5440e7f4ccbe Mon Sep 17 00:00:00 2001 From: Tom Lyon Date: Mon, 17 May 2010 08:19:52 +0100 Subject: intel-iommu: intel_iommu_map_range failed at very end of address space intel_iommu_map_range() doesn't allow allocation at the very end of the address space; that code has been simplified and corrected. Signed-off-by: Tom Lyon Signed-off-by: Andrew Morton Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'drivers/pci/intel-iommu.c') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 57be89e6f484..65741dc491d6 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -3626,7 +3626,6 @@ static int intel_iommu_map_range(struct iommu_domain *domain, { struct dmar_domain *dmar_domain = domain->priv; u64 max_addr; - int addr_width; int prot = 0; int ret; @@ -3639,18 +3638,14 @@ static int intel_iommu_map_range(struct iommu_domain *domain, max_addr = iova + size; if (dmar_domain->max_addr < max_addr) { - int min_agaw; u64 end; /* check if minimum agaw is sufficient for mapped address */ - min_agaw = vm_domain_min_agaw(dmar_domain); - addr_width = agaw_to_width(min_agaw); - end = DOMAIN_MAX_ADDR(addr_width); - end = end & VTD_PAGE_MASK; + end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1; if (end < max_addr) { - printk(KERN_ERR "%s: iommu agaw (%d) is not " + printk(KERN_ERR "%s: iommu width (%d) is not " "sufficient for the mapped address (%llx)\n", - __func__, min_agaw, max_addr); + __func__, dmar_domain->gaw, max_addr); return -EFAULT; } dmar_domain->max_addr = max_addr; -- cgit v1.2.3