From e61d98d8dad0048619bb138b0ff996422ffae53b Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:35 -0700 Subject: x64, x2apic/intr-remap: Intel vt-d, IOMMU code reorganization code reorganization of the generic Intel vt-d parsing related routines and linux iommu routines specific to Intel vt-d. drivers/pci/dmar.c now contains the generic vt-d parsing related routines drivers/pci/intel_iommu.c contains the iommu routines specific to vt-d Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- drivers/pci/dma_remapping.h | 155 +++++++++++++++++++++++++++++++++++++++++ drivers/pci/dmar.c | 90 +++++++++++++++++++++++- drivers/pci/intel-iommu.c | 92 +++---------------------- drivers/pci/intel-iommu.h | 163 ++++---------------------------------------- 4 files changed, 264 insertions(+), 236 deletions(-) create mode 100644 drivers/pci/dma_remapping.h (limited to 'drivers/pci') diff --git a/drivers/pci/dma_remapping.h b/drivers/pci/dma_remapping.h new file mode 100644 index 000000000000..05aac8ef96c7 --- /dev/null +++ b/drivers/pci/dma_remapping.h @@ -0,0 +1,155 @@ +#ifndef _DMA_REMAPPING_H +#define _DMA_REMAPPING_H + +/* + * We need a fixed PAGE_SIZE of 4K irrespective of + * arch PAGE_SIZE for IOMMU page tables. + */ +#define PAGE_SHIFT_4K (12) +#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K) +#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K) +#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K) + +#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K) +#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK) +#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK) + + +/* + * 0: Present + * 1-11: Reserved + * 12-63: Context Ptr (12 - (haw-1)) + * 64-127: Reserved + */ +struct root_entry { + u64 val; + u64 rsvd1; +}; +#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry)) +static inline bool root_present(struct root_entry *root) +{ + return (root->val & 1); +} +static inline void set_root_present(struct root_entry *root) +{ + root->val |= 1; +} +static inline void set_root_value(struct root_entry *root, unsigned long value) +{ + root->val |= value & PAGE_MASK_4K; +} + +struct context_entry; +static inline struct context_entry * +get_context_addr_from_root(struct root_entry *root) +{ + return (struct context_entry *) + (root_present(root)?phys_to_virt( + root->val & PAGE_MASK_4K): + NULL); +} + +/* + * low 64 bits: + * 0: present + * 1: fault processing disable + * 2-3: translation type + * 12-63: address space root + * high 64 bits: + * 0-2: address width + * 3-6: aval + * 8-23: domain id + */ +struct context_entry { + u64 lo; + u64 hi; +}; +#define context_present(c) ((c).lo & 1) +#define context_fault_disable(c) (((c).lo >> 1) & 1) +#define context_translation_type(c) (((c).lo >> 2) & 3) +#define context_address_root(c) ((c).lo & PAGE_MASK_4K) +#define context_address_width(c) ((c).hi & 7) +#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1)) + +#define context_set_present(c) do {(c).lo |= 1;} while (0) +#define context_set_fault_enable(c) \ + do {(c).lo &= (((u64)-1) << 2) | 1;} while (0) +#define context_set_translation_type(c, val) \ + do { \ + (c).lo &= (((u64)-1) << 4) | 3; \ + (c).lo |= ((val) & 3) << 2; \ + } while (0) +#define CONTEXT_TT_MULTI_LEVEL 0 +#define context_set_address_root(c, val) \ + do {(c).lo |= (val) & PAGE_MASK_4K;} while (0) +#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0) +#define context_set_domain_id(c, val) \ + do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0) +#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0) + +/* + * 0: readable + * 1: writable + * 2-6: reserved + * 7: super page + * 8-11: available + * 12-63: Host physcial address + */ +struct dma_pte { + u64 val; +}; +#define dma_clear_pte(p) do {(p).val = 0;} while (0) + +#define DMA_PTE_READ (1) +#define DMA_PTE_WRITE (2) + +#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0) +#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0) +#define dma_set_pte_prot(p, prot) \ + do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0) +#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K) +#define dma_set_pte_addr(p, addr) do {\ + (p).val |= ((addr) & PAGE_MASK_4K); } while (0) +#define dma_pte_present(p) (((p).val & 3) != 0) + +struct intel_iommu; + +struct dmar_domain { + int id; /* domain id */ + struct intel_iommu *iommu; /* back pointer to owning iommu */ + + struct list_head devices; /* all devices' list */ + struct iova_domain iovad; /* iova's that belong to this domain */ + + struct dma_pte *pgd; /* virtual address */ + spinlock_t mapping_lock; /* page table lock */ + int gaw; /* max guest address width */ + + /* adjusted guest address width, 0 is level 2 30-bit */ + int agaw; + +#define DOMAIN_FLAG_MULTIPLE_DEVICES 1 + int flags; +}; + +/* PCI domain-device relationship */ +struct device_domain_info { + struct list_head link; /* link to domain siblings */ + struct list_head global; /* link to global list */ + u8 bus; /* PCI bus numer */ + u8 devfn; /* PCI devfn number */ + struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */ + struct dmar_domain *domain; /* pointer to domain */ +}; + +extern int init_dmars(void); +extern void free_dmar_iommu(struct intel_iommu *iommu); + +#ifndef CONFIG_DMAR_GFX_WA +static inline void iommu_prepare_gfx_mapping(void) +{ + return; +} +#endif /* !CONFIG_DMAR_GFX_WA */ + +#endif diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index f941f609dbf3..c00e387f5b75 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -19,9 +19,11 @@ * Author: Shaohua Li * Author: Anil S Keshavamurthy * - * This file implements early detection/parsing of DMA Remapping Devices + * This file implements early detection/parsing of Remapping Devices * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI * tables. + * + * These routines are used by both DMA-remapping and Interrupt-remapping */ #include @@ -300,6 +302,37 @@ parse_dmar_table(void) return ret; } +int dmar_pci_device_match(struct pci_dev *devices[], int cnt, + struct pci_dev *dev) +{ + int index; + + while (dev) { + for (index = 0; index < cnt; index++) + if (dev == devices[index]) + return 1; + + /* Check our parent */ + dev = dev->bus->self; + } + + return 0; +} + +struct dmar_drhd_unit * +dmar_find_matched_drhd_unit(struct pci_dev *dev) +{ + struct dmar_drhd_unit *drhd = NULL; + + list_for_each_entry(drhd, &dmar_drhd_units, list) { + if (drhd->include_all || dmar_pci_device_match(drhd->devices, + drhd->devices_cnt, dev)) + return drhd; + } + + return NULL; +} + int __init dmar_table_init(void) { @@ -343,3 +376,58 @@ int __init early_dmar_detect(void) return (ACPI_SUCCESS(status) ? 1 : 0); } + +struct intel_iommu *alloc_iommu(struct intel_iommu *iommu, + struct dmar_drhd_unit *drhd) +{ + int map_size; + u32 ver; + + iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K); + if (!iommu->reg) { + printk(KERN_ERR "IOMMU: can't map the region\n"); + goto error; + } + iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG); + iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG); + + /* the registers might be more than one page */ + map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), + cap_max_fault_reg_offset(iommu->cap)); + map_size = PAGE_ALIGN_4K(map_size); + if (map_size > PAGE_SIZE_4K) { + iounmap(iommu->reg); + iommu->reg = ioremap(drhd->reg_base_addr, map_size); + if (!iommu->reg) { + printk(KERN_ERR "IOMMU: can't map the region\n"); + goto error; + } + } + + ver = readl(iommu->reg + DMAR_VER_REG); + pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n", + drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver), + iommu->cap, iommu->ecap); + + spin_lock_init(&iommu->register_lock); + + drhd->iommu = iommu; + return iommu; +error: + kfree(iommu); + return NULL; +} + +void free_iommu(struct intel_iommu *iommu) +{ + if (!iommu) + return; + +#ifdef CONFIG_DMAR + free_dmar_iommu(iommu); +#endif + + if (iommu->reg) + iounmap(iommu->reg); + kfree(iommu); +} diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index bb0642318a95..1c0270d3e2e5 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -990,6 +990,8 @@ static int iommu_init_domains(struct intel_iommu *iommu) return -ENOMEM; } + spin_lock_init(&iommu->lock); + /* * if Caching mode is set, then invalid translations are tagged * with domainid 0. Hence we need to pre-allocate it. @@ -998,62 +1000,15 @@ static int iommu_init_domains(struct intel_iommu *iommu) set_bit(0, iommu->domain_ids); return 0; } -static struct intel_iommu *alloc_iommu(struct intel_iommu *iommu, - struct dmar_drhd_unit *drhd) -{ - int ret; - int map_size; - u32 ver; - - iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K); - if (!iommu->reg) { - printk(KERN_ERR "IOMMU: can't map the region\n"); - goto error; - } - iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG); - iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG); - - /* the registers might be more than one page */ - map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), - cap_max_fault_reg_offset(iommu->cap)); - map_size = PAGE_ALIGN_4K(map_size); - if (map_size > PAGE_SIZE_4K) { - iounmap(iommu->reg); - iommu->reg = ioremap(drhd->reg_base_addr, map_size); - if (!iommu->reg) { - printk(KERN_ERR "IOMMU: can't map the region\n"); - goto error; - } - } - ver = readl(iommu->reg + DMAR_VER_REG); - pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n", - drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver), - iommu->cap, iommu->ecap); - ret = iommu_init_domains(iommu); - if (ret) - goto error_unmap; - spin_lock_init(&iommu->lock); - spin_lock_init(&iommu->register_lock); - - drhd->iommu = iommu; - return iommu; -error_unmap: - iounmap(iommu->reg); -error: - kfree(iommu); - return NULL; -} static void domain_exit(struct dmar_domain *domain); -static void free_iommu(struct intel_iommu *iommu) + +void free_dmar_iommu(struct intel_iommu *iommu) { struct dmar_domain *domain; int i; - if (!iommu) - return; - i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap)); for (; i < cap_ndoms(iommu->cap); ) { domain = iommu->domains[i]; @@ -1078,10 +1033,6 @@ static void free_iommu(struct intel_iommu *iommu) /* free context mapping */ free_context_table(iommu); - - if (iommu->reg) - iounmap(iommu->reg); - kfree(iommu); } static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu) @@ -1426,37 +1377,6 @@ find_domain(struct pci_dev *pdev) return NULL; } -static int dmar_pci_device_match(struct pci_dev *devices[], int cnt, - struct pci_dev *dev) -{ - int index; - - while (dev) { - for (index = 0; index < cnt; index++) - if (dev == devices[index]) - return 1; - - /* Check our parent */ - dev = dev->bus->self; - } - - return 0; -} - -static struct dmar_drhd_unit * -dmar_find_matched_drhd_unit(struct pci_dev *dev) -{ - struct dmar_drhd_unit *drhd = NULL; - - list_for_each_entry(drhd, &dmar_drhd_units, list) { - if (drhd->include_all || dmar_pci_device_match(drhd->devices, - drhd->devices_cnt, dev)) - return drhd; - } - - return NULL; -} - /* domain is initialized */ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) { @@ -1764,6 +1684,10 @@ int __init init_dmars(void) goto error; } + ret = iommu_init_domains(iommu); + if (ret) + goto error; + /* * TBD: * we could share the same root & context tables diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h index afc0ad96122e..9e5e98c76c05 100644 --- a/drivers/pci/intel-iommu.h +++ b/drivers/pci/intel-iommu.h @@ -27,19 +27,7 @@ #include #include "iova.h" #include - -/* - * We need a fixed PAGE_SIZE of 4K irrespective of - * arch PAGE_SIZE for IOMMU page tables. - */ -#define PAGE_SHIFT_4K (12) -#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K) -#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K) -#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K) - -#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K) -#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK) -#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK) +#include "dma_remapping.h" /* * Intel IOMMU register specification per version 1.0 public spec. @@ -187,158 +175,31 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define dma_frcd_source_id(c) (c & 0xffff) #define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */ -/* - * 0: Present - * 1-11: Reserved - * 12-63: Context Ptr (12 - (haw-1)) - * 64-127: Reserved - */ -struct root_entry { - u64 val; - u64 rsvd1; -}; -#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry)) -static inline bool root_present(struct root_entry *root) -{ - return (root->val & 1); -} -static inline void set_root_present(struct root_entry *root) -{ - root->val |= 1; -} -static inline void set_root_value(struct root_entry *root, unsigned long value) -{ - root->val |= value & PAGE_MASK_4K; -} - -struct context_entry; -static inline struct context_entry * -get_context_addr_from_root(struct root_entry *root) -{ - return (struct context_entry *) - (root_present(root)?phys_to_virt( - root->val & PAGE_MASK_4K): - NULL); -} - -/* - * low 64 bits: - * 0: present - * 1: fault processing disable - * 2-3: translation type - * 12-63: address space root - * high 64 bits: - * 0-2: address width - * 3-6: aval - * 8-23: domain id - */ -struct context_entry { - u64 lo; - u64 hi; -}; -#define context_present(c) ((c).lo & 1) -#define context_fault_disable(c) (((c).lo >> 1) & 1) -#define context_translation_type(c) (((c).lo >> 2) & 3) -#define context_address_root(c) ((c).lo & PAGE_MASK_4K) -#define context_address_width(c) ((c).hi & 7) -#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1)) - -#define context_set_present(c) do {(c).lo |= 1;} while (0) -#define context_set_fault_enable(c) \ - do {(c).lo &= (((u64)-1) << 2) | 1;} while (0) -#define context_set_translation_type(c, val) \ - do { \ - (c).lo &= (((u64)-1) << 4) | 3; \ - (c).lo |= ((val) & 3) << 2; \ - } while (0) -#define CONTEXT_TT_MULTI_LEVEL 0 -#define context_set_address_root(c, val) \ - do {(c).lo |= (val) & PAGE_MASK_4K;} while (0) -#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0) -#define context_set_domain_id(c, val) \ - do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0) -#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0) - -/* - * 0: readable - * 1: writable - * 2-6: reserved - * 7: super page - * 8-11: available - * 12-63: Host physcial address - */ -struct dma_pte { - u64 val; -}; -#define dma_clear_pte(p) do {(p).val = 0;} while (0) - -#define DMA_PTE_READ (1) -#define DMA_PTE_WRITE (2) - -#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0) -#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0) -#define dma_set_pte_prot(p, prot) \ - do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0) -#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K) -#define dma_set_pte_addr(p, addr) do {\ - (p).val |= ((addr) & PAGE_MASK_4K); } while (0) -#define dma_pte_present(p) (((p).val & 3) != 0) - -struct intel_iommu; - -struct dmar_domain { - int id; /* domain id */ - struct intel_iommu *iommu; /* back pointer to owning iommu */ - - struct list_head devices; /* all devices' list */ - struct iova_domain iovad; /* iova's that belong to this domain */ - - struct dma_pte *pgd; /* virtual address */ - spinlock_t mapping_lock; /* page table lock */ - int gaw; /* max guest address width */ - - /* adjusted guest address width, 0 is level 2 30-bit */ - int agaw; - -#define DOMAIN_FLAG_MULTIPLE_DEVICES 1 - int flags; -}; - -/* PCI domain-device relationship */ -struct device_domain_info { - struct list_head link; /* link to domain siblings */ - struct list_head global; /* link to global list */ - u8 bus; /* PCI bus numer */ - u8 devfn; /* PCI devfn number */ - struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */ - struct dmar_domain *domain; /* pointer to domain */ -}; - -extern int init_dmars(void); - struct intel_iommu { void __iomem *reg; /* Pointer to hardware regs, virtual addr */ u64 cap; u64 ecap; - unsigned long *domain_ids; /* bitmap of domains */ - struct dmar_domain **domains; /* ptr to domains */ int seg; u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ - spinlock_t lock; /* protect context, domain ids */ spinlock_t register_lock; /* protect register handling */ + +#ifdef CONFIG_DMAR + unsigned long *domain_ids; /* bitmap of domains */ + struct dmar_domain **domains; /* ptr to domains */ + spinlock_t lock; /* protect context, domain ids */ struct root_entry *root_entry; /* virtual address */ unsigned int irq; unsigned char name[7]; /* Device Name */ struct msi_msg saved_msg; struct sys_device sysdev; +#endif }; -#ifndef CONFIG_DMAR_GFX_WA -static inline void iommu_prepare_gfx_mapping(void) -{ - return; -} -#endif /* !CONFIG_DMAR_GFX_WA */ +extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); + +extern struct intel_iommu *alloc_iommu(struct intel_iommu *iommu, + struct dmar_drhd_unit *drhd); +extern void free_iommu(struct intel_iommu *iommu); #endif -- cgit v1.2.3 From c42d9f32443397aed2d37d37df161392e6a5862f Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:36 -0700 Subject: x64, x2apic/intr-remap: fix the need for sequential array allocation of iommus Clean up the intel-iommu code related to deferred iommu flush logic. There is no need to allocate all the iommu's as a sequential array. This will be used later in the interrupt-remapping patch series to allocate iommu much early and individually for each device remapping hardware unit. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- drivers/pci/dmar.c | 11 +++++++++-- drivers/pci/intel-iommu.c | 24 +++++++----------------- drivers/pci/intel-iommu.h | 4 ++-- 3 files changed, 18 insertions(+), 21 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index c00e387f5b75..1a59423a8eda 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -377,11 +377,18 @@ int __init early_dmar_detect(void) return (ACPI_SUCCESS(status) ? 1 : 0); } -struct intel_iommu *alloc_iommu(struct intel_iommu *iommu, - struct dmar_drhd_unit *drhd) +struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd) { + struct intel_iommu *iommu; int map_size; u32 ver; + static int iommu_allocated = 0; + + iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); + if (!iommu) + return NULL; + + iommu->seq_id = iommu_allocated++; iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K); if (!iommu->reg) { diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 1c0270d3e2e5..4d59a6a1f4dd 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -58,8 +58,6 @@ static void flush_unmaps_timeout(unsigned long data); DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0); -static struct intel_iommu *g_iommus; - #define HIGH_WATER_MARK 250 struct deferred_flush_tables { int next; @@ -1649,8 +1647,6 @@ int __init init_dmars(void) * endfor */ for_each_drhd_unit(drhd) { - if (drhd->ignored) - continue; g_num_of_iommus++; /* * lock not needed as this is only incremented in the single @@ -1659,26 +1655,17 @@ int __init init_dmars(void) */ } - g_iommus = kzalloc(g_num_of_iommus * sizeof(*iommu), GFP_KERNEL); - if (!g_iommus) { - ret = -ENOMEM; - goto error; - } - deferred_flush = kzalloc(g_num_of_iommus * sizeof(struct deferred_flush_tables), GFP_KERNEL); if (!deferred_flush) { - kfree(g_iommus); ret = -ENOMEM; goto error; } - i = 0; for_each_drhd_unit(drhd) { if (drhd->ignored) continue; - iommu = alloc_iommu(&g_iommus[i], drhd); - i++; + iommu = alloc_iommu(drhd); if (!iommu) { ret = -ENOMEM; goto error; @@ -1770,7 +1757,6 @@ error: iommu = drhd->iommu; free_iommu(iommu); } - kfree(g_iommus); return ret; } @@ -1927,7 +1913,10 @@ static void flush_unmaps(void) /* just flush them all */ for (i = 0; i < g_num_of_iommus; i++) { if (deferred_flush[i].next) { - iommu_flush_iotlb_global(&g_iommus[i], 0); + struct intel_iommu *iommu = + deferred_flush[i].domain[0]->iommu; + + iommu_flush_iotlb_global(iommu, 0); for (j = 0; j < deferred_flush[i].next; j++) { __free_iova(&deferred_flush[i].domain[j]->iovad, deferred_flush[i].iova[j]); @@ -1957,7 +1946,8 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova) if (list_size == HIGH_WATER_MARK) flush_unmaps(); - iommu_id = dom->iommu - g_iommus; + iommu_id = dom->iommu->seq_id; + next = deferred_flush[iommu_id].next; deferred_flush[iommu_id].domain[next] = dom; deferred_flush[iommu_id].iova[next] = iova; diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h index 9e5e98c76c05..75c63f65b3f5 100644 --- a/drivers/pci/intel-iommu.h +++ b/drivers/pci/intel-iommu.h @@ -182,6 +182,7 @@ struct intel_iommu { int seg; u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ spinlock_t register_lock; /* protect register handling */ + int seq_id; /* sequence id of the iommu */ #ifdef CONFIG_DMAR unsigned long *domain_ids; /* bitmap of domains */ @@ -198,8 +199,7 @@ struct intel_iommu { extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); -extern struct intel_iommu *alloc_iommu(struct intel_iommu *iommu, - struct dmar_drhd_unit *drhd); +extern struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd); extern void free_iommu(struct intel_iommu *iommu); #endif -- cgit v1.2.3 From 1886e8a90a580f3ad343f2065c84c1b9e1dac9ef Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:37 -0700 Subject: x64, x2apic/intr-remap: code re-structuring, to be used by both DMA and Interrupt remapping Allocate the iommu during the parse of DMA remapping hardware definition structures. And also, introduce routines for device scope initialization which will be explicitly called during dma-remapping initialization. These will be used for enabling interrupt remapping separately from the existing DMA-remapping enabling sequence. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- drivers/pci/dmar.c | 89 ++++++++++++++++++++++++++++++++++++++--------- drivers/pci/intel-iommu.c | 10 +++--- drivers/pci/intel-iommu.h | 2 +- include/linux/dmar.h | 10 +++++- 4 files changed, 88 insertions(+), 23 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 1a59423a8eda..158bc5bfcf75 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -174,19 +174,37 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header) struct acpi_dmar_hardware_unit *drhd; struct dmar_drhd_unit *dmaru; int ret = 0; - static int include_all; dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL); if (!dmaru) return -ENOMEM; + dmaru->hdr = header; drhd = (struct acpi_dmar_hardware_unit *)header; dmaru->reg_base_addr = drhd->address; dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */ + ret = alloc_iommu(dmaru); + if (ret) { + kfree(dmaru); + return ret; + } + dmar_register_drhd_unit(dmaru); + return 0; +} + +static int __init +dmar_parse_dev(struct dmar_drhd_unit *dmaru) +{ + struct acpi_dmar_hardware_unit *drhd; + static int include_all; + int ret; + + drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr; + if (!dmaru->include_all) ret = dmar_parse_dev_scope((void *)(drhd + 1), - ((void *)drhd) + header->length, + ((void *)drhd) + drhd->header.length, &dmaru->devices_cnt, &dmaru->devices, drhd->segment); else { @@ -199,10 +217,10 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header) include_all = 1; } - if (ret || (dmaru->devices_cnt == 0 && !dmaru->include_all)) + if (ret || (dmaru->devices_cnt == 0 && !dmaru->include_all)) { + list_del(&dmaru->list); kfree(dmaru); - else - dmar_register_drhd_unit(dmaru); + } return ret; } @@ -211,23 +229,35 @@ dmar_parse_one_rmrr(struct acpi_dmar_header *header) { struct acpi_dmar_reserved_memory *rmrr; struct dmar_rmrr_unit *rmrru; - int ret = 0; rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL); if (!rmrru) return -ENOMEM; + rmrru->hdr = header; rmrr = (struct acpi_dmar_reserved_memory *)header; rmrru->base_address = rmrr->base_address; rmrru->end_address = rmrr->end_address; + + dmar_register_rmrr_unit(rmrru); + return 0; +} + +static int __init +rmrr_parse_dev(struct dmar_rmrr_unit *rmrru) +{ + struct acpi_dmar_reserved_memory *rmrr; + int ret; + + rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr; ret = dmar_parse_dev_scope((void *)(rmrr + 1), - ((void *)rmrr) + header->length, + ((void *)rmrr) + rmrr->header.length, &rmrru->devices_cnt, &rmrru->devices, rmrr->segment); - if (ret || (rmrru->devices_cnt == 0)) + if (ret || (rmrru->devices_cnt == 0)) { + list_del(&rmrru->list); kfree(rmrru); - else - dmar_register_rmrr_unit(rmrru); + } return ret; } @@ -333,15 +363,42 @@ dmar_find_matched_drhd_unit(struct pci_dev *dev) return NULL; } +int __init dmar_dev_scope_init(void) +{ + struct dmar_drhd_unit *drhd; + struct dmar_rmrr_unit *rmrr; + int ret = -ENODEV; + + for_each_drhd_unit(drhd) { + ret = dmar_parse_dev(drhd); + if (ret) + return ret; + } + + for_each_rmrr_units(rmrr) { + ret = rmrr_parse_dev(rmrr); + if (ret) + return ret; + } + + return ret; +} + int __init dmar_table_init(void) { - + static int dmar_table_initialized; int ret; + if (dmar_table_initialized) + return 0; + + dmar_table_initialized = 1; + ret = parse_dmar_table(); if (ret) { - printk(KERN_INFO PREFIX "parse DMAR table failure.\n"); + if (ret != -ENODEV) + printk(KERN_INFO PREFIX "parse DMAR table failure.\n"); return ret; } @@ -377,7 +434,7 @@ int __init early_dmar_detect(void) return (ACPI_SUCCESS(status) ? 1 : 0); } -struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd) +int alloc_iommu(struct dmar_drhd_unit *drhd) { struct intel_iommu *iommu; int map_size; @@ -386,7 +443,7 @@ struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd) iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); if (!iommu) - return NULL; + return -ENOMEM; iommu->seq_id = iommu_allocated++; @@ -419,10 +476,10 @@ struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd) spin_lock_init(&iommu->register_lock); drhd->iommu = iommu; - return iommu; + return 0; error: kfree(iommu); - return NULL; + return -1; } void free_iommu(struct intel_iommu *iommu) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 4d59a6a1f4dd..218a1f357b4d 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1665,11 +1665,8 @@ int __init init_dmars(void) for_each_drhd_unit(drhd) { if (drhd->ignored) continue; - iommu = alloc_iommu(drhd); - if (!iommu) { - ret = -ENOMEM; - goto error; - } + + iommu = drhd->iommu; ret = iommu_init_domains(iommu); if (ret) @@ -2324,6 +2321,9 @@ int __init intel_iommu_init(void) if (dmar_table_init()) return -ENODEV; + if (dmar_dev_scope_init()) + return -ENODEV; + iommu_init_mempool(); dmar_init_reserved_ranges(); diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h index 75c63f65b3f5..371e3b9caf32 100644 --- a/drivers/pci/intel-iommu.h +++ b/drivers/pci/intel-iommu.h @@ -199,7 +199,7 @@ struct intel_iommu { extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); -extern struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd); +extern int alloc_iommu(struct dmar_drhd_unit *drhd); extern void free_iommu(struct intel_iommu *iommu); #endif diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 56c73b847551..3ab07e425583 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -46,12 +46,14 @@ extern int intel_iommu_init(void); extern int dmar_table_init(void); extern int early_dmar_detect(void); +extern int dmar_dev_scope_init(void); extern struct list_head dmar_drhd_units; extern struct list_head dmar_rmrr_units; struct dmar_drhd_unit { struct list_head list; /* list of drhd units */ + struct acpi_dmar_header *hdr; /* ACPI header */ u64 reg_base_addr; /* register base address*/ struct pci_dev **devices; /* target device array */ int devices_cnt; /* target device count */ @@ -62,6 +64,7 @@ struct dmar_drhd_unit { struct dmar_rmrr_unit { struct list_head list; /* list of rmrr units */ + struct acpi_dmar_header *hdr; /* ACPI header */ u64 base_address; /* reserved base address*/ u64 end_address; /* reserved end address */ struct pci_dev **devices; /* target devices */ @@ -72,6 +75,8 @@ struct dmar_rmrr_unit { list_for_each_entry(drhd, &dmar_drhd_units, list) #define for_each_rmrr_units(rmrr) \ list_for_each_entry(rmrr, &dmar_rmrr_units, list) + +extern int alloc_iommu(struct dmar_drhd_unit *); #else static inline void detect_intel_iommu(void) { @@ -81,6 +86,9 @@ static inline int intel_iommu_init(void) { return -ENODEV; } - +static inline int dmar_table_init(void) +{ + return -ENODEV; +} #endif /* !CONFIG_DMAR */ #endif /* __DMAR_H__ */ -- cgit v1.2.3 From aaa9d1dd63bf89b62f4ea9f46de376ab1a3fbc6c Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:38 -0700 Subject: x64, x2apic/intr-remap: use CONFIG_DMAR for DMA-remapping specific code DMA remapping specific code covered with CONFIG_DMAR in the generic code which will also be used later for enabling Interrupt-remapping. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- drivers/pci/dmar.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 158bc5bfcf75..5c99f9973987 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -39,7 +39,6 @@ * these units are not supported by the architecture. */ LIST_HEAD(dmar_drhd_units); -LIST_HEAD(dmar_rmrr_units); static struct acpi_table_header * __initdata dmar_tbl; @@ -55,11 +54,6 @@ static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd) list_add(&drhd->list, &dmar_drhd_units); } -static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr) -{ - list_add(&rmrr->list, &dmar_rmrr_units); -} - static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope, struct pci_dev **dev, u16 segment) { @@ -224,6 +218,15 @@ dmar_parse_dev(struct dmar_drhd_unit *dmaru) return ret; } +#ifdef CONFIG_DMAR +LIST_HEAD(dmar_rmrr_units); + +static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr) +{ + list_add(&rmrr->list, &dmar_rmrr_units); +} + + static int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header) { @@ -260,6 +263,7 @@ rmrr_parse_dev(struct dmar_rmrr_unit *rmrru) } return ret; } +#endif static void __init dmar_table_print_dmar_entry(struct acpi_dmar_header *header) @@ -284,6 +288,7 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header) } } + /** * parse_dmar_table - parses the DMA reporting table */ @@ -316,7 +321,9 @@ parse_dmar_table(void) ret = dmar_parse_one_drhd(entry_header); break; case ACPI_DMAR_TYPE_RESERVED_MEMORY: +#ifdef CONFIG_DMAR ret = dmar_parse_one_rmrr(entry_header); +#endif break; default: printk(KERN_WARNING PREFIX @@ -366,7 +373,6 @@ dmar_find_matched_drhd_unit(struct pci_dev *dev) int __init dmar_dev_scope_init(void) { struct dmar_drhd_unit *drhd; - struct dmar_rmrr_unit *rmrr; int ret = -ENODEV; for_each_drhd_unit(drhd) { @@ -375,11 +381,16 @@ int __init dmar_dev_scope_init(void) return ret; } - for_each_rmrr_units(rmrr) { - ret = rmrr_parse_dev(rmrr); - if (ret) - return ret; +#ifdef CONFIG_DMAR + { + struct dmar_rmrr_unit *rmrr; + for_each_rmrr_units(rmrr) { + ret = rmrr_parse_dev(rmrr); + if (ret) + return ret; + } } +#endif return ret; } @@ -407,10 +418,12 @@ int __init dmar_table_init(void) return -ENODEV; } +#ifdef CONFIG_DMAR if (list_empty(&dmar_rmrr_units)) { printk(KERN_INFO PREFIX "No RMRR found\n"); return -ENODEV; } +#endif return 0; } -- cgit v1.2.3 From 2d6b5f85bb4ca919d8ab0f30311309b53fb93bc3 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:39 -0700 Subject: x64, x2apic/intr-remap: Fix the need for RMRR in the DMA-remapping detection Presence of RMRR structures is not compulsory for enabling DMA-remapping. Signed-off-by: Suresh Siddha Signed-off-by: Yong Y Wang Cc: Yong Y Wang Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- drivers/pci/dmar.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 5c99f9973987..903f1701edff 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -419,10 +419,8 @@ int __init dmar_table_init(void) } #ifdef CONFIG_DMAR - if (list_empty(&dmar_rmrr_units)) { + if (list_empty(&dmar_rmrr_units)) printk(KERN_INFO PREFIX "No RMRR found\n"); - return -ENODEV; - } #endif return 0; -- cgit v1.2.3 From ad3ad3f6a2caebf56869b83b69e23eb9fa5e0ab6 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:40 -0700 Subject: x64, x2apic/intr-remap: parse ioapic scope under vt-d structures Parse the vt-d device scope structures to find the mapping between IO-APICs and the interrupt remapping hardware units. This will be used later for enabling Interrupt-remapping for IOAPIC devices. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- drivers/pci/Makefile | 2 ++ drivers/pci/dmar.c | 3 ++ drivers/pci/intel-iommu.h | 2 ++ drivers/pci/intr_remapping.c | 70 ++++++++++++++++++++++++++++++++++++++++++++ drivers/pci/intr_remapping.h | 6 ++++ include/linux/dmar.h | 1 + 6 files changed, 84 insertions(+) create mode 100644 drivers/pci/intr_remapping.c create mode 100644 drivers/pci/intr_remapping.h (limited to 'drivers/pci') diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index 4d1ce2e7361e..1c409c7b0d56 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -26,6 +26,8 @@ obj-$(CONFIG_HT_IRQ) += htirq.o # Build Intel IOMMU support obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o +obj-$(CONFIG_INTR_REMAP) += dmar.o intr_remapping.o + # # Some architectures use the generic PCI setup functions # diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 903f1701edff..127764cfbe27 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -423,6 +423,9 @@ int __init dmar_table_init(void) printk(KERN_INFO PREFIX "No RMRR found\n"); #endif +#ifdef CONFIG_INTR_REMAP + parse_ioapics_under_ir(); +#endif return 0; } diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h index 371e3b9caf32..eb167e39b464 100644 --- a/drivers/pci/intel-iommu.h +++ b/drivers/pci/intel-iommu.h @@ -114,6 +114,8 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define ecap_max_iotlb_offset(e) \ (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16) #define ecap_coherent(e) ((e) & 0x1) +#define ecap_eim_support(e) ((e >> 4) & 0x1) +#define ecap_ir_support(e) ((e >> 3) & 0x1) /* IOTLB_REG */ diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c new file mode 100644 index 000000000000..a80b87921c68 --- /dev/null +++ b/drivers/pci/intr_remapping.c @@ -0,0 +1,70 @@ +#include +#include +#include "intel-iommu.h" +#include "intr_remapping.h" + +static struct ioapic_scope ir_ioapic[MAX_IO_APICS]; +static int ir_ioapic_num; + +static int ir_parse_ioapic_scope(struct acpi_dmar_header *header, + struct intel_iommu *iommu) +{ + struct acpi_dmar_hardware_unit *drhd; + struct acpi_dmar_device_scope *scope; + void *start, *end; + + drhd = (struct acpi_dmar_hardware_unit *)header; + + start = (void *)(drhd + 1); + end = ((void *)drhd) + header->length; + + while (start < end) { + scope = start; + if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_IOAPIC) { + if (ir_ioapic_num == MAX_IO_APICS) { + printk(KERN_WARNING "Exceeded Max IO APICS\n"); + return -1; + } + + printk(KERN_INFO "IOAPIC id %d under DRHD base" + " 0x%Lx\n", scope->enumeration_id, + drhd->address); + + ir_ioapic[ir_ioapic_num].iommu = iommu; + ir_ioapic[ir_ioapic_num].id = scope->enumeration_id; + ir_ioapic_num++; + } + start += scope->length; + } + + return 0; +} + +/* + * Finds the assocaition between IOAPIC's and its Interrupt-remapping + * hardware unit. + */ +int __init parse_ioapics_under_ir(void) +{ + struct dmar_drhd_unit *drhd; + int ir_supported = 0; + + for_each_drhd_unit(drhd) { + struct intel_iommu *iommu = drhd->iommu; + + if (ecap_ir_support(iommu->ecap)) { + if (ir_parse_ioapic_scope(drhd->hdr, iommu)) + return -1; + + ir_supported = 1; + } + } + + if (ir_supported && ir_ioapic_num != nr_ioapics) { + printk(KERN_WARNING + "Not all IO-APIC's listed under remapping hardware\n"); + return -1; + } + + return ir_supported; +} diff --git a/drivers/pci/intr_remapping.h b/drivers/pci/intr_remapping.h new file mode 100644 index 000000000000..c4a40b2f33fa --- /dev/null +++ b/drivers/pci/intr_remapping.h @@ -0,0 +1,6 @@ +#include "intel-iommu.h" + +struct ioapic_scope { + struct intel_iommu *iommu; + unsigned int id; +}; diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 3ab07e425583..c4e96eb29617 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -47,6 +47,7 @@ extern int intel_iommu_init(void); extern int dmar_table_init(void); extern int early_dmar_detect(void); extern int dmar_dev_scope_init(void); +extern int parse_ioapics_under_ir(void); extern struct list_head dmar_drhd_units; extern struct list_head dmar_rmrr_units; -- cgit v1.2.3 From cf1337f0447e5be8e66daa944f0ea3bcac2b6179 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:41 -0700 Subject: x64, x2apic/intr-remap: move IOMMU_WAIT_OP() macro to intel-iommu.h move IOMMU_WAIT_OP() macro to header file. This will be used by both DMA-remapping and Intr-remapping. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- drivers/pci/intel-iommu.c | 15 --------------- drivers/pci/intel-iommu.h | 15 +++++++++++++++ 2 files changed, 15 insertions(+), 15 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 218a1f357b4d..fb701d9dd8c0 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -49,8 +49,6 @@ #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48 -#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */ - #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) @@ -486,19 +484,6 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu) return 0; } -#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \ -{\ - cycles_t start_time = get_cycles();\ - while (1) {\ - sts = op (iommu->reg + offset);\ - if (cond)\ - break;\ - if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\ - panic("DMAR hardware is malfunctioning\n");\ - cpu_relax();\ - }\ -} - static void iommu_set_root_entry(struct intel_iommu *iommu) { void *addr; diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h index eb167e39b464..3a650e8cba33 100644 --- a/drivers/pci/intel-iommu.h +++ b/drivers/pci/intel-iommu.h @@ -177,6 +177,21 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define dma_frcd_source_id(c) (c & 0xffff) #define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */ +#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */ + +#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \ +{\ + cycles_t start_time = get_cycles();\ + while (1) {\ + sts = op (iommu->reg + offset);\ + if (cond)\ + break;\ + if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\ + panic("DMAR hardware is malfunctioning\n");\ + cpu_relax();\ + }\ +} + struct intel_iommu { void __iomem *reg; /* Pointer to hardware regs, virtual addr */ u64 cap; -- cgit v1.2.3 From fe962e90cb17a8426e144dee970e77ed789d98ee Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:42 -0700 Subject: x64, x2apic/intr-remap: Queued invalidation infrastructure (part of VT-d) Queued invalidation (part of Intel Virtualization Technology for Directed I/O architecture) infrastructure. This will be used for invalidating the interrupt entry cache in the case of Interrupt-remapping and IOTLB invalidation in the case of DMA-remapping. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- drivers/pci/dmar.c | 150 ++++++++++++++++++++++++++++++++++++++++++++++ drivers/pci/intel-iommu.c | 7 --- drivers/pci/intel-iommu.h | 61 +++++++++++++++++++ 3 files changed, 211 insertions(+), 7 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 127764cfbe27..aba151ca6d26 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -28,6 +28,7 @@ #include #include +#include #include "iova.h" #include "intel-iommu.h" @@ -509,3 +510,152 @@ void free_iommu(struct intel_iommu *iommu) iounmap(iommu->reg); kfree(iommu); } + +/* + * Reclaim all the submitted descriptors which have completed its work. + */ +static inline void reclaim_free_desc(struct q_inval *qi) +{ + while (qi->desc_status[qi->free_tail] == QI_DONE) { + qi->desc_status[qi->free_tail] = QI_FREE; + qi->free_tail = (qi->free_tail + 1) % QI_LENGTH; + qi->free_cnt++; + } +} + +/* + * Submit the queued invalidation descriptor to the remapping + * hardware unit and wait for its completion. + */ +void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) +{ + struct q_inval *qi = iommu->qi; + struct qi_desc *hw, wait_desc; + int wait_index, index; + unsigned long flags; + + if (!qi) + return; + + hw = qi->desc; + + spin_lock(&qi->q_lock); + while (qi->free_cnt < 3) { + spin_unlock(&qi->q_lock); + cpu_relax(); + spin_lock(&qi->q_lock); + } + + index = qi->free_head; + wait_index = (index + 1) % QI_LENGTH; + + qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE; + + hw[index] = *desc; + + wait_desc.low = QI_IWD_STATUS_DATA(2) | QI_IWD_STATUS_WRITE | QI_IWD_TYPE; + wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]); + + hw[wait_index] = wait_desc; + + __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc)); + __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc)); + + qi->free_head = (qi->free_head + 2) % QI_LENGTH; + qi->free_cnt -= 2; + + spin_lock_irqsave(&iommu->register_lock, flags); + /* + * update the HW tail register indicating the presence of + * new descriptors. + */ + writel(qi->free_head << 4, iommu->reg + DMAR_IQT_REG); + spin_unlock_irqrestore(&iommu->register_lock, flags); + + while (qi->desc_status[wait_index] != QI_DONE) { + spin_unlock(&qi->q_lock); + cpu_relax(); + spin_lock(&qi->q_lock); + } + + qi->desc_status[index] = QI_DONE; + + reclaim_free_desc(qi); + spin_unlock(&qi->q_lock); +} + +/* + * Flush the global interrupt entry cache. + */ +void qi_global_iec(struct intel_iommu *iommu) +{ + struct qi_desc desc; + + desc.low = QI_IEC_TYPE; + desc.high = 0; + + qi_submit_sync(&desc, iommu); +} + +/* + * Enable Queued Invalidation interface. This is a must to support + * interrupt-remapping. Also used by DMA-remapping, which replaces + * register based IOTLB invalidation. + */ +int dmar_enable_qi(struct intel_iommu *iommu) +{ + u32 cmd, sts; + unsigned long flags; + struct q_inval *qi; + + if (!ecap_qis(iommu->ecap)) + return -ENOENT; + + /* + * queued invalidation is already setup and enabled. + */ + if (iommu->qi) + return 0; + + iommu->qi = kmalloc(sizeof(*qi), GFP_KERNEL); + if (!iommu->qi) + return -ENOMEM; + + qi = iommu->qi; + + qi->desc = (void *)(get_zeroed_page(GFP_KERNEL)); + if (!qi->desc) { + kfree(qi); + iommu->qi = 0; + return -ENOMEM; + } + + qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_KERNEL); + if (!qi->desc_status) { + free_page((unsigned long) qi->desc); + kfree(qi); + iommu->qi = 0; + return -ENOMEM; + } + + qi->free_head = qi->free_tail = 0; + qi->free_cnt = QI_LENGTH; + + spin_lock_init(&qi->q_lock); + + spin_lock_irqsave(&iommu->register_lock, flags); + /* write zero to the tail reg */ + writel(0, iommu->reg + DMAR_IQT_REG); + + dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc)); + + cmd = iommu->gcmd | DMA_GCMD_QIE; + iommu->gcmd |= DMA_GCMD_QIE; + writel(cmd, iommu->reg + DMAR_GCMD_REG); + + /* Make sure hardware complete it */ + IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts); + spin_unlock_irqrestore(&iommu->register_lock, flags); + + return 0; +} diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index fb701d9dd8c0..347bf2e47168 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -181,13 +181,6 @@ void free_iova_mem(struct iova *iova) kmem_cache_free(iommu_iova_cache, iova); } -static inline void __iommu_flush_cache( - struct intel_iommu *iommu, void *addr, int size) -{ - if (!ecap_coherent(iommu->ecap)) - clflush_cache_range(addr, size); -} - /* Gets context entry for a given bus and devfn */ static struct context_entry * device_to_context_entry(struct intel_iommu *iommu, u8 bus, u8 devfn) diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h index 3a650e8cba33..2983ce895353 100644 --- a/drivers/pci/intel-iommu.h +++ b/drivers/pci/intel-iommu.h @@ -27,6 +27,7 @@ #include #include "iova.h" #include +#include #include "dma_remapping.h" /* @@ -51,6 +52,10 @@ #define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */ #define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */ #define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */ +#define DMAR_IQH_REG 0x80 /* Invalidation queue head register */ +#define DMAR_IQT_REG 0x88 /* Invalidation queue tail register */ +#define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */ +#define DMAR_ICS_REG 0x98 /* Invalidation complete status register */ #define OFFSET_STRIDE (9) /* @@ -114,6 +119,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define ecap_max_iotlb_offset(e) \ (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16) #define ecap_coherent(e) ((e) & 0x1) +#define ecap_qis(e) ((e) & 0x2) #define ecap_eim_support(e) ((e >> 4) & 0x1) #define ecap_ir_support(e) ((e >> 3) & 0x1) @@ -131,6 +137,17 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define DMA_TLB_IH_NONLEAF (((u64)1) << 6) #define DMA_TLB_MAX_SIZE (0x3f) +/* INVALID_DESC */ +#define DMA_ID_TLB_GLOBAL_FLUSH (((u64)1) << 3) +#define DMA_ID_TLB_DSI_FLUSH (((u64)2) << 3) +#define DMA_ID_TLB_PSI_FLUSH (((u64)3) << 3) +#define DMA_ID_TLB_READ_DRAIN (((u64)1) << 7) +#define DMA_ID_TLB_WRITE_DRAIN (((u64)1) << 6) +#define DMA_ID_TLB_DID(id) (((u64)((id & 0xffff) << 16))) +#define DMA_ID_TLB_IH_NONLEAF (((u64)1) << 6) +#define DMA_ID_TLB_ADDR(addr) (addr) +#define DMA_ID_TLB_ADDR_MASK(mask) (mask) + /* PMEN_REG */ #define DMA_PMEN_EPM (((u32)1)<<31) #define DMA_PMEN_PRS (((u32)1)<<0) @@ -140,6 +157,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define DMA_GCMD_SRTP (((u32)1) << 30) #define DMA_GCMD_SFL (((u32)1) << 29) #define DMA_GCMD_EAFL (((u32)1) << 28) +#define DMA_GCMD_QIE (((u32)1) << 26) #define DMA_GCMD_WBF (((u32)1) << 27) /* GSTS_REG */ @@ -147,6 +165,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define DMA_GSTS_RTPS (((u32)1) << 30) #define DMA_GSTS_FLS (((u32)1) << 29) #define DMA_GSTS_AFLS (((u32)1) << 28) +#define DMA_GSTS_QIES (((u32)1) << 26) #define DMA_GSTS_WBFS (((u32)1) << 27) /* CCMD_REG */ @@ -192,6 +211,40 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) }\ } +#define QI_LENGTH 256 /* queue length */ + +enum { + QI_FREE, + QI_IN_USE, + QI_DONE +}; + +#define QI_CC_TYPE 0x1 +#define QI_IOTLB_TYPE 0x2 +#define QI_DIOTLB_TYPE 0x3 +#define QI_IEC_TYPE 0x4 +#define QI_IWD_TYPE 0x5 + +#define QI_IEC_SELECTIVE (((u64)1) << 4) +#define QI_IEC_IIDEX(idx) (((u64)(idx & 0xffff) << 32)) +#define QI_IEC_IM(m) (((u64)(m & 0x1f) << 27)) + +#define QI_IWD_STATUS_DATA(d) (((u64)d) << 32) +#define QI_IWD_STATUS_WRITE (((u64)1) << 5) + +struct qi_desc { + u64 low, high; +}; + +struct q_inval { + spinlock_t q_lock; + struct qi_desc *desc; /* invalidation queue */ + int *desc_status; /* desc status */ + int free_head; /* first free entry */ + int free_tail; /* last free entry */ + int free_cnt; +}; + struct intel_iommu { void __iomem *reg; /* Pointer to hardware regs, virtual addr */ u64 cap; @@ -212,8 +265,16 @@ struct intel_iommu { struct msi_msg saved_msg; struct sys_device sysdev; #endif + struct q_inval *qi; /* Queued invalidation info */ }; +static inline void __iommu_flush_cache( + struct intel_iommu *iommu, void *addr, int size) +{ + if (!ecap_coherent(iommu->ecap)) + clflush_cache_range(addr, size); +} + extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); extern int alloc_iommu(struct dmar_drhd_unit *drhd); -- cgit v1.2.3 From 2ae21010694e56461a63bfc80e960090ce0a5ed9 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:43 -0700 Subject: x64, x2apic/intr-remap: Interrupt remapping infrastructure Interrupt remapping (part of Intel Virtualization Tech for directed I/O) infrastructure. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- drivers/pci/dma_remapping.h | 2 + drivers/pci/dmar.c | 16 +++++ drivers/pci/intel-iommu.c | 21 +++---- drivers/pci/intel-iommu.h | 24 +++++++- drivers/pci/intr_remapping.c | 137 +++++++++++++++++++++++++++++++++++++++++++ drivers/pci/intr_remapping.h | 2 + include/linux/dmar.h | 120 ++++++++++++++++++++++++++----------- 7 files changed, 272 insertions(+), 50 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/dma_remapping.h b/drivers/pci/dma_remapping.h index 05aac8ef96c7..bff5c65f81dc 100644 --- a/drivers/pci/dma_remapping.h +++ b/drivers/pci/dma_remapping.h @@ -145,6 +145,8 @@ struct device_domain_info { extern int init_dmars(void); extern void free_dmar_iommu(struct intel_iommu *iommu); +extern int dmar_disabled; + #ifndef CONFIG_DMAR_GFX_WA static inline void iommu_prepare_gfx_mapping(void) { diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index aba151ca6d26..23a119e6485e 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -449,6 +449,22 @@ int __init early_dmar_detect(void) return (ACPI_SUCCESS(status) ? 1 : 0); } +void __init detect_intel_iommu(void) +{ + int ret; + + ret = early_dmar_detect(); + +#ifdef CONFIG_DMAR + { + if (ret && !no_iommu && !iommu_detected && !swiotlb && + !dmar_disabled) + iommu_detected = 1; + } +#endif +} + + int alloc_iommu(struct dmar_drhd_unit *drhd) { struct intel_iommu *iommu; diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 347bf2e47168..ffccf2341b98 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -76,7 +76,7 @@ static long list_size; static void domain_remove_dev_info(struct dmar_domain *domain); -static int dmar_disabled; +int dmar_disabled; static int __initdata dmar_map_gfx = 1; static int dmar_forcedac; static int intel_iommu_strict; @@ -2238,15 +2238,6 @@ static void __init iommu_exit_mempool(void) } -void __init detect_intel_iommu(void) -{ - if (swiotlb || no_iommu || iommu_detected || dmar_disabled) - return; - if (early_dmar_detect()) { - iommu_detected = 1; - } -} - static void __init init_no_remapping_devices(void) { struct dmar_drhd_unit *drhd; @@ -2293,15 +2284,19 @@ int __init intel_iommu_init(void) { int ret = 0; - if (no_iommu || swiotlb || dmar_disabled) - return -ENODEV; - if (dmar_table_init()) return -ENODEV; if (dmar_dev_scope_init()) return -ENODEV; + /* + * Check the need for DMA-remapping initialization now. + * Above initialization will also be used by Interrupt-remapping. + */ + if (no_iommu || swiotlb || dmar_disabled) + return -ENODEV; + iommu_init_mempool(); dmar_init_reserved_ranges(); diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h index 2983ce895353..a81a74e2bd9e 100644 --- a/drivers/pci/intel-iommu.h +++ b/drivers/pci/intel-iommu.h @@ -56,6 +56,7 @@ #define DMAR_IQT_REG 0x88 /* Invalidation queue tail register */ #define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */ #define DMAR_ICS_REG 0x98 /* Invalidation complete status register */ +#define DMAR_IRTA_REG 0xb8 /* Interrupt remapping table addr register */ #define OFFSET_STRIDE (9) /* @@ -157,16 +158,20 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define DMA_GCMD_SRTP (((u32)1) << 30) #define DMA_GCMD_SFL (((u32)1) << 29) #define DMA_GCMD_EAFL (((u32)1) << 28) -#define DMA_GCMD_QIE (((u32)1) << 26) #define DMA_GCMD_WBF (((u32)1) << 27) +#define DMA_GCMD_QIE (((u32)1) << 26) +#define DMA_GCMD_SIRTP (((u32)1) << 24) +#define DMA_GCMD_IRE (((u32) 1) << 25) /* GSTS_REG */ #define DMA_GSTS_TES (((u32)1) << 31) #define DMA_GSTS_RTPS (((u32)1) << 30) #define DMA_GSTS_FLS (((u32)1) << 29) #define DMA_GSTS_AFLS (((u32)1) << 28) -#define DMA_GSTS_QIES (((u32)1) << 26) #define DMA_GSTS_WBFS (((u32)1) << 27) +#define DMA_GSTS_QIES (((u32)1) << 26) +#define DMA_GSTS_IRTPS (((u32)1) << 24) +#define DMA_GSTS_IRES (((u32)1) << 25) /* CCMD_REG */ #define DMA_CCMD_ICC (((u64)1) << 63) @@ -245,6 +250,16 @@ struct q_inval { int free_cnt; }; +#ifdef CONFIG_INTR_REMAP +/* 1MB - maximum possible interrupt remapping table size */ +#define INTR_REMAP_PAGE_ORDER 8 +#define INTR_REMAP_TABLE_REG_SIZE 0xf + +struct ir_table { + struct irte *base; +}; +#endif + struct intel_iommu { void __iomem *reg; /* Pointer to hardware regs, virtual addr */ u64 cap; @@ -266,6 +281,9 @@ struct intel_iommu { struct sys_device sysdev; #endif struct q_inval *qi; /* Queued invalidation info */ +#ifdef CONFIG_INTR_REMAP + struct ir_table *ir_table; /* Interrupt remapping info */ +#endif }; static inline void __iommu_flush_cache( @@ -279,5 +297,7 @@ extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); extern int alloc_iommu(struct dmar_drhd_unit *drhd); extern void free_iommu(struct intel_iommu *iommu); +extern int dmar_enable_qi(struct intel_iommu *iommu); +extern void qi_global_iec(struct intel_iommu *iommu); #endif diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index a80b87921c68..3d10cdc90314 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -1,10 +1,147 @@ #include +#include +#include +#include #include #include "intel-iommu.h" #include "intr_remapping.h" static struct ioapic_scope ir_ioapic[MAX_IO_APICS]; static int ir_ioapic_num; +int intr_remapping_enabled; + +static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode) +{ + u64 addr; + u32 cmd, sts; + unsigned long flags; + + addr = virt_to_phys((void *)iommu->ir_table->base); + + spin_lock_irqsave(&iommu->register_lock, flags); + + dmar_writeq(iommu->reg + DMAR_IRTA_REG, + (addr) | IR_X2APIC_MODE(mode) | INTR_REMAP_TABLE_REG_SIZE); + + /* Set interrupt-remapping table pointer */ + cmd = iommu->gcmd | DMA_GCMD_SIRTP; + writel(cmd, iommu->reg + DMAR_GCMD_REG); + + IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, + readl, (sts & DMA_GSTS_IRTPS), sts); + spin_unlock_irqrestore(&iommu->register_lock, flags); + + /* + * global invalidation of interrupt entry cache before enabling + * interrupt-remapping. + */ + qi_global_iec(iommu); + + spin_lock_irqsave(&iommu->register_lock, flags); + + /* Enable interrupt-remapping */ + cmd = iommu->gcmd | DMA_GCMD_IRE; + iommu->gcmd |= DMA_GCMD_IRE; + writel(cmd, iommu->reg + DMAR_GCMD_REG); + + IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, + readl, (sts & DMA_GSTS_IRES), sts); + + spin_unlock_irqrestore(&iommu->register_lock, flags); +} + + +static int setup_intr_remapping(struct intel_iommu *iommu, int mode) +{ + struct ir_table *ir_table; + struct page *pages; + + ir_table = iommu->ir_table = kzalloc(sizeof(struct ir_table), + GFP_KERNEL); + + if (!iommu->ir_table) + return -ENOMEM; + + pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, INTR_REMAP_PAGE_ORDER); + + if (!pages) { + printk(KERN_ERR "failed to allocate pages of order %d\n", + INTR_REMAP_PAGE_ORDER); + kfree(iommu->ir_table); + return -ENOMEM; + } + + ir_table->base = page_address(pages); + + iommu_set_intr_remapping(iommu, mode); + return 0; +} + +int __init enable_intr_remapping(int eim) +{ + struct dmar_drhd_unit *drhd; + int setup = 0; + + /* + * check for the Interrupt-remapping support + */ + for_each_drhd_unit(drhd) { + struct intel_iommu *iommu = drhd->iommu; + + if (!ecap_ir_support(iommu->ecap)) + continue; + + if (eim && !ecap_eim_support(iommu->ecap)) { + printk(KERN_INFO "DRHD %Lx: EIM not supported by DRHD, " + " ecap %Lx\n", drhd->reg_base_addr, iommu->ecap); + return -1; + } + } + + /* + * Enable queued invalidation for all the DRHD's. + */ + for_each_drhd_unit(drhd) { + int ret; + struct intel_iommu *iommu = drhd->iommu; + ret = dmar_enable_qi(iommu); + + if (ret) { + printk(KERN_ERR "DRHD %Lx: failed to enable queued, " + " invalidation, ecap %Lx, ret %d\n", + drhd->reg_base_addr, iommu->ecap, ret); + return -1; + } + } + + /* + * Setup Interrupt-remapping for all the DRHD's now. + */ + for_each_drhd_unit(drhd) { + struct intel_iommu *iommu = drhd->iommu; + + if (!ecap_ir_support(iommu->ecap)) + continue; + + if (setup_intr_remapping(iommu, eim)) + goto error; + + setup = 1; + } + + if (!setup) + goto error; + + intr_remapping_enabled = 1; + + return 0; + +error: + /* + * handle error condition gracefully here! + */ + return -1; +} static int ir_parse_ioapic_scope(struct acpi_dmar_header *header, struct intel_iommu *iommu) diff --git a/drivers/pci/intr_remapping.h b/drivers/pci/intr_remapping.h index c4a40b2f33fa..05f2635bbe4e 100644 --- a/drivers/pci/intr_remapping.h +++ b/drivers/pci/intr_remapping.h @@ -4,3 +4,5 @@ struct ioapic_scope { struct intel_iommu *iommu; unsigned int id; }; + +#define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0) diff --git a/include/linux/dmar.h b/include/linux/dmar.h index c4e96eb29617..8a0238dd2c11 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -25,9 +25,85 @@ #include #include -#ifdef CONFIG_DMAR +#if defined(CONFIG_DMAR) || defined(CONFIG_INTR_REMAP) struct intel_iommu; +struct dmar_drhd_unit { + struct list_head list; /* list of drhd units */ + struct acpi_dmar_header *hdr; /* ACPI header */ + u64 reg_base_addr; /* register base address*/ + struct pci_dev **devices; /* target device array */ + int devices_cnt; /* target device count */ + u8 ignored:1; /* ignore drhd */ + u8 include_all:1; + struct intel_iommu *iommu; +}; + +extern struct list_head dmar_drhd_units; + +#define for_each_drhd_unit(drhd) \ + list_for_each_entry(drhd, &dmar_drhd_units, list) + +extern int dmar_table_init(void); +extern int early_dmar_detect(void); +extern int dmar_dev_scope_init(void); + +/* Intel IOMMU detection */ +extern void detect_intel_iommu(void); + + +extern int parse_ioapics_under_ir(void); +extern int alloc_iommu(struct dmar_drhd_unit *); +#else +static inline void detect_intel_iommu(void) +{ + return; +} + +static inline int dmar_table_init(void) +{ + return -ENODEV; +} +#endif /* !CONFIG_DMAR && !CONFIG_INTR_REMAP */ + +#ifdef CONFIG_INTR_REMAP +extern int intr_remapping_enabled; +extern int enable_intr_remapping(int); + +struct irte { + union { + struct { + __u64 present : 1, + fpd : 1, + dst_mode : 1, + redir_hint : 1, + trigger_mode : 1, + dlvry_mode : 3, + avail : 4, + __reserved_1 : 4, + vector : 8, + __reserved_2 : 8, + dest_id : 32; + }; + __u64 low; + }; + + union { + struct { + __u64 sid : 16, + sq : 2, + svt : 2, + __reserved_3 : 44; + }; + __u64 high; + }; +}; +#else +#define enable_intr_remapping(mode) (-1) +#define intr_remapping_enabled (0) +#endif + +#ifdef CONFIG_DMAR extern const char *dmar_get_fault_reason(u8 fault_reason); /* Can't use the common MSI interrupt functions @@ -40,29 +116,8 @@ extern void dmar_msi_write(int irq, struct msi_msg *msg); extern int dmar_set_interrupt(struct intel_iommu *iommu); extern int arch_setup_dmar_msi(unsigned int irq); -/* Intel IOMMU detection and initialization functions */ -extern void detect_intel_iommu(void); -extern int intel_iommu_init(void); - -extern int dmar_table_init(void); -extern int early_dmar_detect(void); -extern int dmar_dev_scope_init(void); -extern int parse_ioapics_under_ir(void); - -extern struct list_head dmar_drhd_units; +extern int iommu_detected, no_iommu; extern struct list_head dmar_rmrr_units; - -struct dmar_drhd_unit { - struct list_head list; /* list of drhd units */ - struct acpi_dmar_header *hdr; /* ACPI header */ - u64 reg_base_addr; /* register base address*/ - struct pci_dev **devices; /* target device array */ - int devices_cnt; /* target device count */ - u8 ignored:1; /* ignore drhd */ - u8 include_all:1; - struct intel_iommu *iommu; -}; - struct dmar_rmrr_unit { struct list_head list; /* list of rmrr units */ struct acpi_dmar_header *hdr; /* ACPI header */ @@ -72,24 +127,19 @@ struct dmar_rmrr_unit { int devices_cnt; /* target device count */ }; -#define for_each_drhd_unit(drhd) \ - list_for_each_entry(drhd, &dmar_drhd_units, list) #define for_each_rmrr_units(rmrr) \ list_for_each_entry(rmrr, &dmar_rmrr_units, list) - -extern int alloc_iommu(struct dmar_drhd_unit *); +/* Intel DMAR initialization functions */ +extern int intel_iommu_init(void); +extern int dmar_disabled; #else -static inline void detect_intel_iommu(void) -{ - return; -} static inline int intel_iommu_init(void) { +#ifdef CONFIG_INTR_REMAP + return dmar_dev_scope_init(); +#else return -ENODEV; -} -static inline int dmar_table_init(void) -{ - return -ENODEV; +#endif } #endif /* !CONFIG_DMAR */ #endif /* __DMAR_H__ */ -- cgit v1.2.3 From b6fcb33ad6c05f152a672f7c96c1fab006527b80 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:44 -0700 Subject: x64, x2apic/intr-remap: routines managing Interrupt remapping table entries. Routines handling the management of interrupt remapping table entries. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- drivers/pci/intel-iommu.h | 4 + drivers/pci/intr_remapping.c | 243 +++++++++++++++++++++++++++++++++++++++++++ include/linux/dmar.h | 12 +++ 3 files changed, 259 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h index a81a74e2bd9e..2142c01e0143 100644 --- a/drivers/pci/intel-iommu.h +++ b/drivers/pci/intel-iommu.h @@ -123,6 +123,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define ecap_qis(e) ((e) & 0x2) #define ecap_eim_support(e) ((e >> 4) & 0x1) #define ecap_ir_support(e) ((e >> 3) & 0x1) +#define ecap_max_handle_mask(e) ((e >> 20) & 0xf) /* IOTLB_REG */ @@ -255,6 +256,8 @@ struct q_inval { #define INTR_REMAP_PAGE_ORDER 8 #define INTR_REMAP_TABLE_REG_SIZE 0xf +#define INTR_REMAP_TABLE_ENTRIES 65536 + struct ir_table { struct irte *base; }; @@ -300,4 +303,5 @@ extern void free_iommu(struct intel_iommu *iommu); extern int dmar_enable_qi(struct intel_iommu *iommu); extern void qi_global_iec(struct intel_iommu *iommu); +extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); #endif diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index 3d10cdc90314..bddb4b19b6c7 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include "intel-iommu.h" #include "intr_remapping.h" @@ -10,6 +11,248 @@ static struct ioapic_scope ir_ioapic[MAX_IO_APICS]; static int ir_ioapic_num; int intr_remapping_enabled; +static struct { + struct intel_iommu *iommu; + u16 irte_index; + u16 sub_handle; + u8 irte_mask; +} irq_2_iommu[NR_IRQS]; + +static DEFINE_SPINLOCK(irq_2_ir_lock); + +int irq_remapped(int irq) +{ + if (irq > NR_IRQS) + return 0; + + if (!irq_2_iommu[irq].iommu) + return 0; + + return 1; +} + +int get_irte(int irq, struct irte *entry) +{ + int index; + + if (!entry || irq > NR_IRQS) + return -1; + + spin_lock(&irq_2_ir_lock); + if (!irq_2_iommu[irq].iommu) { + spin_unlock(&irq_2_ir_lock); + return -1; + } + + index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle; + *entry = *(irq_2_iommu[irq].iommu->ir_table->base + index); + + spin_unlock(&irq_2_ir_lock); + return 0; +} + +int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) +{ + struct ir_table *table = iommu->ir_table; + u16 index, start_index; + unsigned int mask = 0; + int i; + + if (!count) + return -1; + + /* + * start the IRTE search from index 0. + */ + index = start_index = 0; + + if (count > 1) { + count = __roundup_pow_of_two(count); + mask = ilog2(count); + } + + if (mask > ecap_max_handle_mask(iommu->ecap)) { + printk(KERN_ERR + "Requested mask %x exceeds the max invalidation handle" + " mask value %Lx\n", mask, + ecap_max_handle_mask(iommu->ecap)); + return -1; + } + + spin_lock(&irq_2_ir_lock); + do { + for (i = index; i < index + count; i++) + if (table->base[i].present) + break; + /* empty index found */ + if (i == index + count) + break; + + index = (index + count) % INTR_REMAP_TABLE_ENTRIES; + + if (index == start_index) { + spin_unlock(&irq_2_ir_lock); + printk(KERN_ERR "can't allocate an IRTE\n"); + return -1; + } + } while (1); + + for (i = index; i < index + count; i++) + table->base[i].present = 1; + + irq_2_iommu[irq].iommu = iommu; + irq_2_iommu[irq].irte_index = index; + irq_2_iommu[irq].sub_handle = 0; + irq_2_iommu[irq].irte_mask = mask; + + spin_unlock(&irq_2_ir_lock); + + return index; +} + +static void qi_flush_iec(struct intel_iommu *iommu, int index, int mask) +{ + struct qi_desc desc; + + desc.low = QI_IEC_IIDEX(index) | QI_IEC_TYPE | QI_IEC_IM(mask) + | QI_IEC_SELECTIVE; + desc.high = 0; + + qi_submit_sync(&desc, iommu); +} + +int map_irq_to_irte_handle(int irq, u16 *sub_handle) +{ + int index; + + spin_lock(&irq_2_ir_lock); + if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) { + spin_unlock(&irq_2_ir_lock); + return -1; + } + + *sub_handle = irq_2_iommu[irq].sub_handle; + index = irq_2_iommu[irq].irte_index; + spin_unlock(&irq_2_ir_lock); + return index; +} + +int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle) +{ + spin_lock(&irq_2_ir_lock); + if (irq >= NR_IRQS || irq_2_iommu[irq].iommu) { + spin_unlock(&irq_2_ir_lock); + return -1; + } + + irq_2_iommu[irq].iommu = iommu; + irq_2_iommu[irq].irte_index = index; + irq_2_iommu[irq].sub_handle = subhandle; + irq_2_iommu[irq].irte_mask = 0; + + spin_unlock(&irq_2_ir_lock); + + return 0; +} + +int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index) +{ + spin_lock(&irq_2_ir_lock); + if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) { + spin_unlock(&irq_2_ir_lock); + return -1; + } + + irq_2_iommu[irq].iommu = NULL; + irq_2_iommu[irq].irte_index = 0; + irq_2_iommu[irq].sub_handle = 0; + irq_2_iommu[irq].irte_mask = 0; + + spin_unlock(&irq_2_ir_lock); + + return 0; +} + +int modify_irte(int irq, struct irte *irte_modified) +{ + int index; + struct irte *irte; + struct intel_iommu *iommu; + + spin_lock(&irq_2_ir_lock); + if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) { + spin_unlock(&irq_2_ir_lock); + return -1; + } + + iommu = irq_2_iommu[irq].iommu; + + index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle; + irte = &iommu->ir_table->base[index]; + + set_64bit((unsigned long *)irte, irte_modified->low | (1 << 1)); + __iommu_flush_cache(iommu, irte, sizeof(*irte)); + + qi_flush_iec(iommu, index, 0); + + spin_unlock(&irq_2_ir_lock); + return 0; +} + +int flush_irte(int irq) +{ + int index; + struct intel_iommu *iommu; + + spin_lock(&irq_2_ir_lock); + if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) { + spin_unlock(&irq_2_ir_lock); + return -1; + } + + iommu = irq_2_iommu[irq].iommu; + + index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle; + + qi_flush_iec(iommu, index, irq_2_iommu[irq].irte_mask); + spin_unlock(&irq_2_ir_lock); + + return 0; +} + +int free_irte(int irq) +{ + int index, i; + struct irte *irte; + struct intel_iommu *iommu; + + spin_lock(&irq_2_ir_lock); + if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) { + spin_unlock(&irq_2_ir_lock); + return -1; + } + + iommu = irq_2_iommu[irq].iommu; + + index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle; + irte = &iommu->ir_table->base[index]; + + if (!irq_2_iommu[irq].sub_handle) { + for (i = 0; i < (1 << irq_2_iommu[irq].irte_mask); i++) + set_64bit((unsigned long *)irte, 0); + qi_flush_iec(iommu, index, irq_2_iommu[irq].irte_mask); + } + + irq_2_iommu[irq].iommu = NULL; + irq_2_iommu[irq].irte_index = 0; + irq_2_iommu[irq].sub_handle = 0; + irq_2_iommu[irq].irte_mask = 0; + + spin_unlock(&irq_2_ir_lock); + + return 0; +} + static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode) { u64 addr; diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 8a0238dd2c11..324bbca85a26 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -98,7 +98,19 @@ struct irte { __u64 high; }; }; +extern int get_irte(int irq, struct irte *entry); +extern int modify_irte(int irq, struct irte *irte_modified); +extern int alloc_irte(struct intel_iommu *iommu, int irq, u16 count); +extern int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, + u16 sub_handle); +extern int map_irq_to_irte_handle(int irq, u16 *sub_handle); +extern int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index); +extern int flush_irte(int irq); +extern int free_irte(int irq); + +extern int irq_remapped(int irq); #else +#define irq_remapped(irq) (0) #define enable_intr_remapping(mode) (-1) #define intr_remapping_enabled (0) #endif -- cgit v1.2.3 From 1cb11583a6c4ceda7426eb36f7bf0419da8dfbc2 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:51 -0700 Subject: x64, x2apic/intr-remap: disable DMA-remapping if Interrupt-remapping is detected (temporary quirk) Interrupt-remapping enables queued invalidation. And once queued invalidation is enabled, IOTLB invalidation also needs to use the queued invalidation mechanism and the register based IOTLB invalidation doesn't work. For now, Support for IOTLB invalidation using queued invalidation is missing. Meanwhile, disable DMA-remapping, if Interrupt-remapping support is detected. For the meanwhile, if someone wants to really enable DMA-remapping, they can use nox2apic, which will disable interrupt-remapping and as such doesn't enable queued invalidation. And given that none of the release platforms support intr-remapping yet, we should be ok for this temporary hack. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- drivers/pci/dmar.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 23a119e6485e..bd2c01674f5e 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -457,6 +457,31 @@ void __init detect_intel_iommu(void) #ifdef CONFIG_DMAR { + struct acpi_table_dmar *dmar; + /* + * for now we will disable dma-remapping when interrupt + * remapping is enabled. + * When support for queued invalidation for IOTLB invalidation + * is added, we will not need this any more. + */ + dmar = (struct acpi_table_dmar *) dmar_tbl; + if (ret && cpu_has_x2apic && dmar->flags & 0x1) { + printk(KERN_INFO + "Queued invalidation will be enabled to support " + "x2apic and Intr-remapping.\n"); + printk(KERN_INFO + "Disabling IOMMU detection, because of missing " + "queued invalidation support for IOTLB " + "invalidation\n"); + printk(KERN_INFO + "Use \"nox2apic\", if you want to use Intel " + " IOMMU for DMA-remapping and don't care about " + " x2apic support\n"); + + dmar_disabled = 1; + return; + } + if (ret && !no_iommu && !iommu_detected && !swiotlb && !dmar_disabled) iommu_detected = 1; -- cgit v1.2.3 From 89027d35aa5b8f45ce0f7fa0911db85b46563da0 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:56 -0700 Subject: x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping IO-APIC support in the presence of interrupt-remapping infrastructure. IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE) index and the IRTE will contain information about the vector, cpu destination, trigger mode etc, which traditionally was present in the IO-APIC RTE. Introduce a new irq_chip for cleaner irq migration (in the process context as opposed to the current irq migration in the context of an interrupt. interrupt-remapping infrastructure will help us achieve this cleanly). For edge triggered, irq migration is a simple atomic update(of vector and cpu destination) of IRTE and flush the hardware cache. For level triggered, we need to modify the io-apic RTE aswell with the update vector information, along with modifying IRTE with vector and cpu destination. So irq migration for level triggered is little bit more complex compared to edge triggered migration. But the good news is, we use the same algorithm for level triggered migration as we have today, only difference being, we now initiate the irq migration from process context instead of the interrupt context. In future, when we do a directed EOI (combined with cpu EOI broadcast suppression) to the IO-APIC, level triggered irq migration will also be as simple as edge triggered migration and we can do the irq migration with a simple atomic update to IO-APIC RTE. TBD: some tests/changes needed in the presence of fixup_irqs() for level triggered irq migration. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 1 + arch/x86/kernel/io_apic_64.c | 300 +++++++++++++++++++++++++++++++++++++--- drivers/pci/intr_remapping.c | 10 ++ include/asm-x86/apic.h | 9 ++ include/asm-x86/io_apic.h | 14 ++ include/asm-x86/irq_remapping.h | 8 ++ include/linux/dmar.h | 1 + 7 files changed, 321 insertions(+), 22 deletions(-) create mode 100644 include/asm-x86/irq_remapping.h (limited to 'drivers/pci') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index a969ef78e12a..d5c06917b5b1 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -46,6 +46,7 @@ static int disable_apic_timer __cpuinitdata; static int apic_calibrate_pmtmr __initdata; int disable_apic; +int x2apic; /* Local APIC timer works in C2 */ int local_apic_timer_c2_ok; diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index b62d42ef9283..9bd02ef049a0 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -37,6 +37,7 @@ #include #endif #include +#include #include #include @@ -48,6 +49,7 @@ #include #include #include +#include #include #include @@ -312,7 +314,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) pin = entry->pin; if (pin == -1) break; - io_apic_write(apic, 0x11 + pin*2, dest); + /* + * With interrupt-remapping, destination information comes + * from interrupt-remapping table entry. + */ + if (!irq_remapped(irq)) + io_apic_write(apic, 0x11 + pin*2, dest); reg = io_apic_read(apic, 0x10 + pin*2); reg &= ~IO_APIC_REDIR_VECTOR_MASK; reg |= vector; @@ -906,18 +913,98 @@ void setup_vector_irq(int cpu) static struct irq_chip ioapic_chip; +#ifdef CONFIG_INTR_REMAP +static struct irq_chip ir_ioapic_chip; +#endif static void ioapic_register_intr(int irq, unsigned long trigger) { - if (trigger) { + if (trigger) irq_desc[irq].status |= IRQ_LEVEL; - set_irq_chip_and_handler_name(irq, &ioapic_chip, - handle_fasteoi_irq, "fasteoi"); - } else { + else irq_desc[irq].status &= ~IRQ_LEVEL; + +#ifdef CONFIG_INTR_REMAP + if (irq_remapped(irq)) { + irq_desc[irq].status |= IRQ_MOVE_PCNTXT; + if (trigger) + set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, + handle_fasteoi_irq, + "fasteoi"); + else + set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, + handle_edge_irq, "edge"); + return; + } +#endif + if (trigger) + set_irq_chip_and_handler_name(irq, &ioapic_chip, + handle_fasteoi_irq, + "fasteoi"); + else set_irq_chip_and_handler_name(irq, &ioapic_chip, handle_edge_irq, "edge"); +} + +static int setup_ioapic_entry(int apic, int irq, + struct IO_APIC_route_entry *entry, + unsigned int destination, int trigger, + int polarity, int vector) +{ + /* + * add it to the IO-APIC irq-routing table: + */ + memset(entry,0,sizeof(*entry)); + +#ifdef CONFIG_INTR_REMAP + if (intr_remapping_enabled) { + struct intel_iommu *iommu = map_ioapic_to_ir(apic); + struct irte irte; + struct IR_IO_APIC_route_entry *ir_entry = + (struct IR_IO_APIC_route_entry *) entry; + int index; + + if (!iommu) + panic("No mapping iommu for ioapic %d\n", apic); + + index = alloc_irte(iommu, irq, 1); + if (index < 0) + panic("Failed to allocate IRTE for ioapic %d\n", apic); + + memset(&irte, 0, sizeof(irte)); + + irte.present = 1; + irte.dst_mode = INT_DEST_MODE; + irte.trigger_mode = trigger; + irte.dlvry_mode = INT_DELIVERY_MODE; + irte.vector = vector; + irte.dest_id = IRTE_DEST(destination); + + modify_irte(irq, &irte); + + ir_entry->index2 = (index >> 15) & 0x1; + ir_entry->zero = 0; + ir_entry->format = 1; + ir_entry->index = (index & 0x7fff); + } else +#endif + { + entry->delivery_mode = INT_DELIVERY_MODE; + entry->dest_mode = INT_DEST_MODE; + entry->dest = destination; } + + entry->mask = 0; /* enable IRQ */ + entry->trigger = trigger; + entry->polarity = polarity; + entry->vector = vector; + + /* Mask level triggered irqs. + * Use IRQ_DELAYED_DISABLE for edge triggered irqs. + */ + if (trigger) + entry->mask = 1; + return 0; } static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, @@ -942,24 +1029,15 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, irq, trigger, polarity); - /* - * add it to the IO-APIC irq-routing table: - */ - memset(&entry,0,sizeof(entry)); - - entry.delivery_mode = INT_DELIVERY_MODE; - entry.dest_mode = INT_DEST_MODE; - entry.dest = cpu_mask_to_apicid(mask); - entry.mask = 0; /* enable IRQ */ - entry.trigger = trigger; - entry.polarity = polarity; - entry.vector = cfg->vector; - /* Mask level triggered irqs. - * Use IRQ_DELAYED_DISABLE for edge triggered irqs. - */ - if (trigger) - entry.mask = 1; + if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, + cpu_mask_to_apicid(mask), trigger, polarity, + cfg->vector)) { + printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", + mp_ioapics[apic].mp_apicid, pin); + __clear_irq_vector(irq); + return; + } ioapic_register_intr(irq, trigger); if (irq < 16) @@ -1011,6 +1089,9 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, { struct IO_APIC_route_entry entry; + if (intr_remapping_enabled) + return; + memset(&entry, 0, sizeof(entry)); /* @@ -1466,6 +1547,147 @@ static int ioapic_retrigger_irq(unsigned int irq) */ #ifdef CONFIG_SMP + +#ifdef CONFIG_INTR_REMAP +static void ir_irq_migration(struct work_struct *work); + +static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); + +/* + * Migrate the IO-APIC irq in the presence of intr-remapping. + * + * For edge triggered, irq migration is a simple atomic update(of vector + * and cpu destination) of IRTE and flush the hardware cache. + * + * For level triggered, we need to modify the io-apic RTE aswell with the update + * vector information, along with modifying IRTE with vector and destination. + * So irq migration for level triggered is little bit more complex compared to + * edge triggered migration. But the good news is, we use the same algorithm + * for level triggered migration as we have today, only difference being, + * we now initiate the irq migration from process context instead of the + * interrupt context. + * + * In future, when we do a directed EOI (combined with cpu EOI broadcast + * suppression) to the IO-APIC, level triggered irq migration will also be + * as simple as edge triggered migration and we can do the irq migration + * with a simple atomic update to IO-APIC RTE. + */ +static void migrate_ioapic_irq(int irq, cpumask_t mask) +{ + struct irq_cfg *cfg = irq_cfg + irq; + struct irq_desc *desc = irq_desc + irq; + cpumask_t tmp, cleanup_mask; + struct irte irte; + int modify_ioapic_rte = desc->status & IRQ_LEVEL; + unsigned int dest; + unsigned long flags; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + if (get_irte(irq, &irte)) + return; + + if (assign_irq_vector(irq, mask)) + return; + + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + if (modify_ioapic_rte) { + spin_lock_irqsave(&ioapic_lock, flags); + __target_IO_APIC_irq(irq, dest, cfg->vector); + spin_unlock_irqrestore(&ioapic_lock, flags); + } + + irte.vector = cfg->vector; + irte.dest_id = IRTE_DEST(dest); + + /* + * Modified the IRTE and flushes the Interrupt entry cache. + */ + modify_irte(irq, &irte); + + if (cfg->move_in_progress) { + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + cfg->move_in_progress = 0; + } + + irq_desc[irq].affinity = mask; +} + +static int migrate_irq_remapped_level(int irq) +{ + int ret = -1; + + mask_IO_APIC_irq(irq); + + if (io_apic_level_ack_pending(irq)) { + /* + * Interrupt in progress. Migrating irq now will change the + * vector information in the IO-APIC RTE and that will confuse + * the EOI broadcast performed by cpu. + * So, delay the irq migration to the next instance. + */ + schedule_delayed_work(&ir_migration_work, 1); + goto unmask; + } + + /* everthing is clear. we have right of way */ + migrate_ioapic_irq(irq, irq_desc[irq].pending_mask); + + ret = 0; + irq_desc[irq].status &= ~IRQ_MOVE_PENDING; + cpus_clear(irq_desc[irq].pending_mask); + +unmask: + unmask_IO_APIC_irq(irq); + return ret; +} + +static void ir_irq_migration(struct work_struct *work) +{ + int irq; + + for (irq = 0; irq < NR_IRQS; irq++) { + struct irq_desc *desc = irq_desc + irq; + if (desc->status & IRQ_MOVE_PENDING) { + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + if (!desc->chip->set_affinity || + !(desc->status & IRQ_MOVE_PENDING)) { + desc->status &= ~IRQ_MOVE_PENDING; + spin_unlock_irqrestore(&desc->lock, flags); + continue; + } + + desc->chip->set_affinity(irq, + irq_desc[irq].pending_mask); + spin_unlock_irqrestore(&desc->lock, flags); + } + } +} + +/* + * Migrates the IRQ destination in the process context. + */ +static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) +{ + if (irq_desc[irq].status & IRQ_LEVEL) { + irq_desc[irq].status |= IRQ_MOVE_PENDING; + irq_desc[irq].pending_mask = mask; + migrate_irq_remapped_level(irq); + return; + } + + migrate_ioapic_irq(irq, mask); +} +#endif + asmlinkage void smp_irq_move_cleanup_interrupt(void) { unsigned vector, me; @@ -1522,6 +1744,17 @@ static void irq_complete_move(unsigned int irq) #else static inline void irq_complete_move(unsigned int irq) {} #endif +#ifdef CONFIG_INTR_REMAP +static void ack_x2apic_level(unsigned int irq) +{ + ack_x2APIC_irq(); +} + +static void ack_x2apic_edge(unsigned int irq) +{ + ack_x2APIC_irq(); +} +#endif static void ack_apic_edge(unsigned int irq) { @@ -1596,6 +1829,21 @@ static struct irq_chip ioapic_chip __read_mostly = { .retrigger = ioapic_retrigger_irq, }; +#ifdef CONFIG_INTR_REMAP +static struct irq_chip ir_ioapic_chip __read_mostly = { + .name = "IR-IO-APIC", + .startup = startup_ioapic_irq, + .mask = mask_IO_APIC_irq, + .unmask = unmask_IO_APIC_irq, + .ack = ack_x2apic_edge, + .eoi = ack_x2apic_level, +#ifdef CONFIG_SMP + .set_affinity = set_ir_ioapic_affinity_irq, +#endif + .retrigger = ioapic_retrigger_irq, +}; +#endif + static inline void init_IO_APIC_traps(void) { int irq; @@ -1783,6 +2031,8 @@ static inline void __init check_timer(void) * 8259A. */ if (pin1 == -1) { + if (intr_remapping_enabled) + panic("BIOS bug: timer not connected to IO-APIC"); pin1 = pin2; apic1 = apic2; no_pin1 = 1; @@ -1809,6 +2059,8 @@ static inline void __init check_timer(void) clear_IO_APIC_pin(0, pin1); goto out; } + if (intr_remapping_enabled) + panic("timer doesn't work through Interrupt-remapped IO-APIC"); clear_IO_APIC_pin(apic1, pin1); if (!no_pin1) apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: " @@ -2401,6 +2653,10 @@ void __init setup_ioapic_dest(void) setup_IO_APIC_irq(ioapic, pin, irq, irq_trigger(irq_entry), irq_polarity(irq_entry)); +#ifdef CONFIG_INTR_REMAP + else if (intr_remapping_enabled) + set_ir_ioapic_affinity_irq(irq, TARGET_CPUS); +#endif else set_ioapic_affinity_irq(irq, TARGET_CPUS); } diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index bddb4b19b6c7..32e55c7a9805 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -220,6 +220,16 @@ int flush_irte(int irq) return 0; } +struct intel_iommu *map_ioapic_to_ir(int apic) +{ + int i; + + for (i = 0; i < MAX_IO_APICS; i++) + if (ir_ioapic[i].id == apic) + return ir_ioapic[i].iommu; + return NULL; +} + int free_irte(int irq) { int index, i; diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h index bb54928373ca..aa746704a5c9 100644 --- a/include/asm-x86/apic.h +++ b/include/asm-x86/apic.h @@ -134,6 +134,15 @@ extern int get_physical_broadcast(void); # define apic_write_around(x, y) apic_write_atomic((x), (y)) #endif +#ifdef CONFIG_X86_64 +static inline void ack_x2APIC_irq(void) +{ + /* Docs say use 0 for future compatibility */ + native_apic_msr_write(APIC_EOI, 0); +} +#endif + + static inline void ack_APIC_irq(void) { /* diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h index 1c4a99d882f5..8dc2622714c8 100644 --- a/include/asm-x86/io_apic.h +++ b/include/asm-x86/io_apic.h @@ -107,6 +107,20 @@ struct IO_APIC_route_entry { } __attribute__ ((packed)); +struct IR_IO_APIC_route_entry { + __u64 vector : 8, + zero : 3, + index2 : 1, + delivery_status : 1, + polarity : 1, + irr : 1, + trigger : 1, + mask : 1, + reserved : 31, + format : 1, + index : 15; +} __attribute__ ((packed)); + #ifdef CONFIG_X86_IO_APIC /* diff --git a/include/asm-x86/irq_remapping.h b/include/asm-x86/irq_remapping.h new file mode 100644 index 000000000000..78242c6ffa58 --- /dev/null +++ b/include/asm-x86/irq_remapping.h @@ -0,0 +1,8 @@ +#ifndef _ASM_IRQ_REMAPPING_H +#define _ASM_IRQ_REMAPPING_H + +extern int x2apic; + +#define IRTE_DEST(dest) ((x2apic) ? dest : dest << 8) + +#endif diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 324bbca85a26..bf41ffa74705 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -109,6 +109,7 @@ extern int flush_irte(int irq); extern int free_irte(int irq); extern int irq_remapped(int irq); +extern struct intel_iommu *map_ioapic_to_ir(int apic); #else #define irq_remapped(irq) (0) #define enable_intr_remapping(mode) (-1) -- cgit v1.2.3 From 75c46fa61bc5b4ccd20a168ff325c58771248fcd Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:57 -0700 Subject: x64, x2apic/intr-remap: MSI and MSI-X support for interrupt remapping infrastructure MSI and MSI-X support for interrupt remapping infrastructure. MSI address register will be programmed with interrupt-remapping table entry(IRTE) index and the IRTE will contain information about the vector, cpu destination, etc. For MSI-X, all the IRTE's will be consecutively allocated in the table, and the address registers will contain the starting index to the block and the data register will contain the subindex with in that block. This also introduces a new irq_chip for cleaner irq migration (in the process context as opposed to the current irq migration in the context of an interrupt. interrupt-remapping infrastructure will help us achieve this). As MSI is edge triggered, irq migration is a simple atomic update(of vector and cpu destination) of IRTE and flushing the hardware cache. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_64.c | 230 +++++++++++++++++++++++++++++++++++++++++-- drivers/pci/intr_remapping.c | 11 +++ include/asm-x86/msidef.h | 4 + include/linux/dmar.h | 1 + 4 files changed, 238 insertions(+), 8 deletions(-) (limited to 'drivers/pci') diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 9bd02ef049a0..877aa2e9d7e8 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -2297,6 +2297,9 @@ void destroy_irq(unsigned int irq) dynamic_irq_cleanup(irq); +#ifdef CONFIG_INTR_REMAP + free_irte(irq); +#endif spin_lock_irqsave(&vector_lock, flags); __clear_irq_vector(irq); spin_unlock_irqrestore(&vector_lock, flags); @@ -2315,10 +2318,41 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms tmp = TARGET_CPUS; err = assign_irq_vector(irq, tmp); - if (!err) { - cpus_and(tmp, cfg->domain, tmp); - dest = cpu_mask_to_apicid(tmp); + if (err) + return err; + + cpus_and(tmp, cfg->domain, tmp); + dest = cpu_mask_to_apicid(tmp); + +#ifdef CONFIG_INTR_REMAP + if (irq_remapped(irq)) { + struct irte irte; + int ir_index; + u16 sub_handle; + + ir_index = map_irq_to_irte_handle(irq, &sub_handle); + BUG_ON(ir_index == -1); + + memset (&irte, 0, sizeof(irte)); + + irte.present = 1; + irte.dst_mode = INT_DEST_MODE; + irte.trigger_mode = 0; /* edge */ + irte.dlvry_mode = INT_DELIVERY_MODE; + irte.vector = cfg->vector; + irte.dest_id = IRTE_DEST(dest); + + modify_irte(irq, &irte); + msg->address_hi = MSI_ADDR_BASE_HI; + msg->data = sub_handle; + msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT | + MSI_ADDR_IR_SHV | + MSI_ADDR_IR_INDEX1(ir_index) | + MSI_ADDR_IR_INDEX2(ir_index); + } else +#endif + { msg->address_hi = MSI_ADDR_BASE_HI; msg->address_lo = MSI_ADDR_BASE_LO | @@ -2369,6 +2403,55 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) write_msi_msg(irq, &msg); irq_desc[irq].affinity = mask; } + +#ifdef CONFIG_INTR_REMAP +/* + * Migrate the MSI irq to another cpumask. This migration is + * done in the process context using interrupt-remapping hardware. + */ +static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) +{ + struct irq_cfg *cfg = irq_cfg + irq; + unsigned int dest; + cpumask_t tmp, cleanup_mask; + struct irte irte; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + if (get_irte(irq, &irte)) + return; + + if (assign_irq_vector(irq, mask)) + return; + + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + irte.vector = cfg->vector; + irte.dest_id = IRTE_DEST(dest); + + /* + * atomically update the IRTE with the new destination and vector. + */ + modify_irte(irq, &irte); + + /* + * After this point, all the interrupts will start arriving + * at the new destination. So, time to cleanup the previous + * vector allocation. + */ + if (cfg->move_in_progress) { + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + cfg->move_in_progress = 0; + } + + irq_desc[irq].affinity = mask; +} +#endif #endif /* CONFIG_SMP */ /* @@ -2386,26 +2469,157 @@ static struct irq_chip msi_chip = { .retrigger = ioapic_retrigger_irq, }; -int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) +#ifdef CONFIG_INTR_REMAP +static struct irq_chip msi_ir_chip = { + .name = "IR-PCI-MSI", + .unmask = unmask_msi_irq, + .mask = mask_msi_irq, + .ack = ack_x2apic_edge, +#ifdef CONFIG_SMP + .set_affinity = ir_set_msi_irq_affinity, +#endif + .retrigger = ioapic_retrigger_irq, +}; + +/* + * Map the PCI dev to the corresponding remapping hardware unit + * and allocate 'nvec' consecutive interrupt-remapping table entries + * in it. + */ +static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) +{ + struct intel_iommu *iommu; + int index; + + iommu = map_dev_to_ir(dev); + if (!iommu) { + printk(KERN_ERR + "Unable to map PCI %s to iommu\n", pci_name(dev)); + return -ENOENT; + } + + index = alloc_irte(iommu, irq, nvec); + if (index < 0) { + printk(KERN_ERR + "Unable to allocate %d IRTE for PCI %s\n", nvec, + pci_name(dev)); + return -ENOSPC; + } + return index; +} +#endif + +static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) { + int ret; struct msi_msg msg; + + ret = msi_compose_msg(dev, irq, &msg); + if (ret < 0) + return ret; + + set_irq_msi(irq, desc); + write_msi_msg(irq, &msg); + +#ifdef CONFIG_INTR_REMAP + if (irq_remapped(irq)) { + struct irq_desc *desc = irq_desc + irq; + /* + * irq migration in process context + */ + desc->status |= IRQ_MOVE_PCNTXT; + set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge"); + } else +#endif + set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); + + return 0; +} + +int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) +{ int irq, ret; + irq = create_irq(); if (irq < 0) return irq; - ret = msi_compose_msg(dev, irq, &msg); +#ifdef CONFIG_INTR_REMAP + if (!intr_remapping_enabled) + goto no_ir; + + ret = msi_alloc_irte(dev, irq, 1); + if (ret < 0) + goto error; +no_ir: +#endif + ret = setup_msi_irq(dev, desc, irq); if (ret < 0) { destroy_irq(irq); return ret; } + return 0; - set_irq_msi(irq, desc); - write_msi_msg(irq, &msg); +#ifdef CONFIG_INTR_REMAP +error: + destroy_irq(irq); + return ret; +#endif +} - set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); +int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + int irq, ret, sub_handle; + struct msi_desc *desc; +#ifdef CONFIG_INTR_REMAP + struct intel_iommu *iommu = 0; + int index = 0; +#endif + sub_handle = 0; + list_for_each_entry(desc, &dev->msi_list, list) { + irq = create_irq(); + if (irq < 0) + return irq; +#ifdef CONFIG_INTR_REMAP + if (!intr_remapping_enabled) + goto no_ir; + + if (!sub_handle) { + /* + * allocate the consecutive block of IRTE's + * for 'nvec' + */ + index = msi_alloc_irte(dev, irq, nvec); + if (index < 0) { + ret = index; + goto error; + } + } else { + iommu = map_dev_to_ir(dev); + if (!iommu) { + ret = -ENOENT; + goto error; + } + /* + * setup the mapping between the irq and the IRTE + * base index, the sub_handle pointing to the + * appropriate interrupt remap table entry. + */ + set_irte_irq(irq, iommu, index, sub_handle); + } +no_ir: +#endif + ret = setup_msi_irq(dev, desc, irq); + if (ret < 0) + goto error; + sub_handle++; + } return 0; + +error: + destroy_irq(irq); + return ret; } void arch_teardown_msi_irq(unsigned int irq) diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index 32e55c7a9805..bb642cc5e18c 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -230,6 +230,17 @@ struct intel_iommu *map_ioapic_to_ir(int apic) return NULL; } +struct intel_iommu *map_dev_to_ir(struct pci_dev *dev) +{ + struct dmar_drhd_unit *drhd; + + drhd = dmar_find_matched_drhd_unit(dev); + if (!drhd) + return NULL; + + return drhd->iommu; +} + int free_irte(int irq) { int index, i; diff --git a/include/asm-x86/msidef.h b/include/asm-x86/msidef.h index 296f29ce426d..57fd85935e5a 100644 --- a/include/asm-x86/msidef.h +++ b/include/asm-x86/msidef.h @@ -48,4 +48,8 @@ #define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \ MSI_ADDR_DEST_ID_MASK) +#define MSI_ADDR_IR_EXT_INT (1 << 4) +#define MSI_ADDR_IR_SHV (1 << 3) +#define MSI_ADDR_IR_INDEX1(index) ((index & 0x8000) >> 13) +#define MSI_ADDR_IR_INDEX2(index) ((index & 0x7fff) << 5) #endif /* ASM_MSIDEF_H */ diff --git a/include/linux/dmar.h b/include/linux/dmar.h index bf41ffa74705..c360c558e59e 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -109,6 +109,7 @@ extern int flush_irte(int irq); extern int free_irte(int irq); extern int irq_remapped(int irq); +extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev); extern struct intel_iommu *map_ioapic_to_ir(int apic); #else #define irq_remapped(irq) (0) -- cgit v1.2.3 From e51af6630848406fc97adbd71443818cdcda297b Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 4 Sep 2008 09:54:37 +0100 Subject: x86: blacklist DMAR on Intel G31/G33 chipsets Some BIOSes (the Intel DG33BU, for example) wrongly claim to have DMAR when they don't. Avoid the resulting crashes when it doesn't work as expected. I'd still be grateful if someone could test it on a DG33BU with the old BIOS though, since I've killed mine. I tested the DMI version, but not this one. Signed-off-by: David Woodhouse Signed-off-by: Ingo Molnar --- arch/x86/kernel/early-quirks.c | 18 ++++++++++++++++++ drivers/pci/intel-iommu.c | 2 +- include/asm-x86/iommu.h | 1 + 3 files changed, 20 insertions(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 4353cf5e6fac..24bb5faf5efa 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -95,6 +95,20 @@ static void __init nvidia_bugs(int num, int slot, int func) } +#ifdef CONFIG_DMAR +static void __init intel_g33_dmar(int num, int slot, int func) +{ + struct acpi_table_header *dmar_tbl; + acpi_status status; + + status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl); + if (ACPI_SUCCESS(status)) { + printk(KERN_INFO "BIOS BUG: DMAR advertised on Intel G31/G33 chipset -- ignoring\n"); + dmar_disabled = 1; + } +} +#endif + #define QFLAG_APPLY_ONCE 0x1 #define QFLAG_APPLIED 0x2 #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) @@ -114,6 +128,10 @@ static struct chipset early_qrk[] __initdata = { PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, via_bugs }, { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB, PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config }, +#ifdef CONFIG_DMAR + { PCI_VENDOR_ID_INTEL, 0x29c0, + PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, intel_g33_dmar }, +#endif {} }; diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 8d0e60ac849c..eaba6ecc2adb 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -80,7 +80,7 @@ static long list_size; static void domain_remove_dev_info(struct dmar_domain *domain); -static int dmar_disabled; +int dmar_disabled; static int __initdata dmar_map_gfx = 1; static int dmar_forcedac; static int intel_iommu_strict; diff --git a/include/asm-x86/iommu.h b/include/asm-x86/iommu.h index 5f888cc5be49..621a1af94c4c 100644 --- a/include/asm-x86/iommu.h +++ b/include/asm-x86/iommu.h @@ -6,6 +6,7 @@ extern void no_iommu_init(void); extern struct dma_mapping_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; +extern int dmar_disabled; extern unsigned long iommu_num_pages(unsigned long addr, unsigned long len); -- cgit v1.2.3 From cbda1ba898647aeb4ee770b803c922f595e97731 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 7 Sep 2008 16:35:26 +0100 Subject: PCI/iommu: blacklist DMAR on Intel G31/G33 chipsets Some BIOSes (the Intel DG33BU, for example) wrongly claim to have DMAR when they don't. Avoid the resulting crashes when it doesn't work as expected. Signed-off-by: David Woodhouse Acked-by: Ingo Molnar Signed-off-by: Jesse Barnes --- drivers/pci/intel-iommu.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 8d0e60ac849c..c3edcdc08e72 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2348,11 +2348,34 @@ static void __init iommu_exit_mempool(void) } +static int blacklist_iommu(const struct dmi_system_id *id) +{ + printk(KERN_INFO "%s detected; disabling IOMMU\n", + id->ident); + dmar_disabled = 1; + return 0; +} + +static struct dmi_system_id __initdata intel_iommu_dmi_table[] = { + { /* Some DG33BU BIOS revisions advertised non-existent VT-d */ + .callback = blacklist_iommu, + .ident = "Intel DG33BU", + { DMI_MATCH(DMI_BOARD_VENDOR, "Intel Corporation"), + DMI_MATCH(DMI_BOARD_NAME, "DG33BU"), + } + }, + { } +}; + + void __init detect_intel_iommu(void) { if (swiotlb || no_iommu || iommu_detected || dmar_disabled) return; if (early_dmar_detect()) { + dmi_check_system(intel_iommu_dmi_table); + if (dmar_disabled) + return; iommu_detected = 1; } } -- cgit v1.2.3 From 4ca8a7726fb0e8094fdb56f2ae2d69fcf9254eae Mon Sep 17 00:00:00 2001 From: Johann Felix Soden Date: Fri, 22 Aug 2008 20:46:59 +0200 Subject: PCI: Fix printk warnings in probe.c The cleaned up resource code in probe.c introduced some warnings: drivers/pci/probe.c: In function 'pci_read_bridge_bases': drivers/pci/probe.c:386: warning: format '%llx' expects type 'long long unsigned int', but argument 3 has type 'resource_size_t' drivers/pci/probe.c:386: warning: format '%llx' expects type 'long long unsigned int', but argument 4 has type 'resource_size_t' drivers/pci/probe.c:398: warning: format '%llx' expects type 'long long unsigned int', but argument 3 has type 'resource_size_t' drivers/pci/probe.c:398: warning: format '%llx' expects type 'long long unsigned int', but argument 4 has type 'resource_size_t' drivers/pci/probe.c:434: warning: format '%llx' expects type 'long long unsigned int', but argument 4 has type 'resource_size_t' drivers/pci/probe.c:434: warning: format '%llx' expects type 'long long unsigned int', but argument 5 has type 'resource_size_t' So fix them up. Signed-off-by: Johann Felix Soden Signed-off-by: Jesse Barnes --- drivers/pci/probe.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index cce2f4cb1fbf..0ad936765b26 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -383,7 +383,9 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child) res->start = base; if (!res->end) res->end = limit + 0xfff; - printk(KERN_INFO "PCI: bridge %s io port: [%llx, %llx]\n", pci_name(dev), res->start, res->end); + printk(KERN_INFO "PCI: bridge %s io port: [%llx, %llx]\n", + pci_name(dev), (unsigned long long) res->start, + (unsigned long long) res->end); } res = child->resource[1]; @@ -395,7 +397,8 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child) res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM; res->start = base; res->end = limit + 0xfffff; - printk(KERN_INFO "PCI: bridge %s 32bit mmio: [%llx, %llx]\n", pci_name(dev), res->start, res->end); + printk(KERN_INFO "PCI: bridge %s 32bit mmio: [%llx, %llx]\n", pci_name(dev), + (unsigned long long) res->start, (unsigned long long) res->end); } res = child->resource[2]; @@ -431,7 +434,9 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child) res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM | IORESOURCE_PREFETCH; res->start = base; res->end = limit + 0xfffff; - printk(KERN_INFO "PCI: bridge %s %sbit mmio pref: [%llx, %llx]\n", pci_name(dev), (res->flags & PCI_PREF_RANGE_TYPE_64)?"64":"32",res->start, res->end); + printk(KERN_INFO "PCI: bridge %s %sbit mmio pref: [%llx, %llx]\n", + pci_name(dev), (res->flags & PCI_PREF_RANGE_TYPE_64) ? "64" : "32", + (unsigned long long) res->start, (unsigned long long) res->end); } } -- cgit v1.2.3 From 53bc88701be91e7f631ad31418b32392aa952e9b Mon Sep 17 00:00:00 2001 From: Johann Felix Soden Date: Fri, 22 Aug 2008 20:25:10 +0200 Subject: PCI: Fix printk warnings in setup-bus.c Again, the cleaned up code introduced some resource warnings: drivers/pci/setup-bus.c: In function 'pci_bus_dump_res': drivers/pci/setup-bus.c:542: warning: format '%llx' expects type 'long long unsigned int', but argument 5 has type 'resource_size_t' drivers/pci/setup-bus.c:542: warning: format '%llx' expects type 'long long unsigned int', but argument 6 has type 'resource_size_t' Fix those up too. Signed-off-by: Johann Felix Soden Signed-off-by: Jesse Barnes --- drivers/pci/setup-bus.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 82634a2f1b1d..c6ea4e97e8b6 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -539,7 +539,11 @@ static void pci_bus_dump_res(struct pci_bus *bus) if (!res) continue; - printk(KERN_INFO "bus: %02x index %x %s: [%llx, %llx]\n", bus->number, i, (res->flags & IORESOURCE_IO)? "io port":"mmio", res->start, res->end); + printk(KERN_INFO "bus: %02x index %x %s: [%llx, %llx]\n", + bus->number, i, + (res->flags & IORESOURCE_IO) ? "io port" : "mmio", + (unsigned long long) res->start, + (unsigned long long) res->end); } } -- cgit v1.2.3 From 48902025af4da44dbbc0231061e542d6d40e712e Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Fri, 5 Sep 2008 14:05:03 -0700 Subject: PCI Hotplug: fakephp: fix deadlock... again Commit fe99740cac117f208707488c03f3789cf4904957 (construct one fakephp slot per PCI slot) introduced a regression, causing a deadlock when removing a PCI device. We also never actually removed the device from the PCI core. So we: - remove the device from the PCI core - do not directly call remove_slot() to prevent deadlock Yu Zhao reported and diagnosed this defect. Signed-off-by: Alex Chiang Acked-by: Yu Zhao Cc: Matthew Wilcox Cc: Kristen Carlson Accardi Signed-off-by: Andrew Morton Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/fakephp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/hotplug/fakephp.c b/drivers/pci/hotplug/fakephp.c index 40337a06c18a..146ca9cd1567 100644 --- a/drivers/pci/hotplug/fakephp.c +++ b/drivers/pci/hotplug/fakephp.c @@ -320,15 +320,15 @@ static int disable_slot(struct hotplug_slot *slot) return -ENODEV; } + /* remove the device from the pci core */ + pci_remove_bus_device(dev); + /* queue work item to blow away this sysfs entry and other * parts. */ INIT_WORK(&dslot->remove_work, remove_slot_worker); queue_work(dummyphp_wq, &dslot->remove_work); - /* blow away this sysfs entry and other parts. */ - remove_slot(dslot); - pci_dev_put(dev); } return 0; -- cgit v1.2.3 From a5827f40afafc864f57a1c44915f0bfaf3d94f53 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 28 Aug 2008 01:05:26 +0300 Subject: PCI: fix pciehp_free_irq() This patch fixes an obvious bug (loop was never entered) caused by commit 820943b6fc4781621dee52ba026106758a727dd3 (pciehp: cleanup pcie_poll_cmd). Reported-by: Adrian Bunk Signed-off-by: Adrian Bunk Acked-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/pciehp_hpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index ab31f5ba665d..9d934ddee956 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -258,7 +258,7 @@ static int pcie_poll_cmd(struct controller *ctrl) return 1; } } - while (timeout > 1000) { + while (timeout > 0) { msleep(10); timeout -= 10; if (!pciehp_readw(ctrl, SLOTSTATUS, &slot_status)) { -- cgit v1.2.3 From 395a125c6237802b19bf22b41017c99ff57f1024 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 9 Sep 2008 12:27:52 -0700 Subject: PCI: re-add debug prints for unmodified BARs Print out for device BAR values before the kernel tries to update them. Also make related output use KERN_DEBUG. Signed-off-by: Yinghai Lu Signed-off-by: Jesse Barnes --- drivers/pci/probe.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 0ad936765b26..36698e57b97f 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -304,6 +304,9 @@ static int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, } else { res->start = l64; res->end = l64 + sz64; + printk(KERN_DEBUG "PCI: %s reg %x 64bit mmio: [%llx, %llx]\n", + pci_name(dev), pos, (unsigned long long)res->start, + (unsigned long long)res->end); } } else { sz = pci_size(l, sz, mask); @@ -313,6 +316,9 @@ static int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, res->start = l; res->end = l + sz; + printk(KERN_DEBUG "PCI: %s reg %x %s: [%llx, %llx]\n", pci_name(dev), + pos, (res->flags & IORESOURCE_IO) ? "io port":"32bit mmio", + (unsigned long long)res->start, (unsigned long long)res->end); } out: @@ -383,7 +389,7 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child) res->start = base; if (!res->end) res->end = limit + 0xfff; - printk(KERN_INFO "PCI: bridge %s io port: [%llx, %llx]\n", + printk(KERN_DEBUG "PCI: bridge %s io port: [%llx, %llx]\n", pci_name(dev), (unsigned long long) res->start, (unsigned long long) res->end); } @@ -397,8 +403,9 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child) res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM; res->start = base; res->end = limit + 0xfffff; - printk(KERN_INFO "PCI: bridge %s 32bit mmio: [%llx, %llx]\n", pci_name(dev), - (unsigned long long) res->start, (unsigned long long) res->end); + printk(KERN_DEBUG "PCI: bridge %s 32bit mmio: [%llx, %llx]\n", + pci_name(dev), (unsigned long long) res->start, + (unsigned long long) res->end); } res = child->resource[2]; @@ -434,7 +441,7 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child) res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM | IORESOURCE_PREFETCH; res->start = base; res->end = limit + 0xfffff; - printk(KERN_INFO "PCI: bridge %s %sbit mmio pref: [%llx, %llx]\n", + printk(KERN_DEBUG "PCI: bridge %s %sbit mmio pref: [%llx, %llx]\n", pci_name(dev), (res->flags & PCI_PREF_RANGE_TYPE_64) ? "64" : "32", (unsigned long long) res->start, (unsigned long long) res->end); } -- cgit v1.2.3 From e1f4f59d1ab9ebac44830d6ae450fb358ac559d3 Mon Sep 17 00:00:00 2001 From: Sitsofe Wheeler Date: Tue, 16 Sep 2008 14:27:13 +0100 Subject: PCI: Fix pcie_aspm=force pcie_aspm=force did not work because aspm_force was being double negated leading to the sanity check failing. Moving a bracket should fix this. Acked-by: Alan Cox Signed-off-by: Sitsofe Wheeler Signed-off-by: Jesse Barnes --- drivers/pci/pcie/aspm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 9a7c9e1408a4..851f5b83cdbc 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -527,7 +527,7 @@ static int pcie_aspm_sanity_check(struct pci_dev *pdev) */ pci_read_config_dword(child_dev, child_pos + PCI_EXP_DEVCAP, ®32); - if (!(reg32 & PCI_EXP_DEVCAP_RBER && !aspm_force)) { + if (!(reg32 & PCI_EXP_DEVCAP_RBER) && !aspm_force) { printk("Pre-1.1 PCIe device detected, " "disable ASPM for %s. It can be enabled forcedly" " with 'pcie_aspm=force'\n", pci_name(pdev)); -- cgit v1.2.3 From b08508c40adf3fd1330aabc4f37d3254179776c4 Mon Sep 17 00:00:00 2001 From: Greg KH Date: Tue, 26 Aug 2008 08:20:34 -0700 Subject: PCI: fix compiler warnings in pci_get_subsys() pci_get_subsys() changed in 2.6.26 so that the from pointer is modified when the call is being invoked, so fix up the 'const' marking of it that the compiler is complaining about. Reported-by: Rufus & Azrael Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jesse Barnes --- drivers/pci/search.c | 6 +++--- include/linux/pci.h | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/search.c b/drivers/pci/search.c index 3b3b5f178797..4edfc4731bd4 100644 --- a/drivers/pci/search.c +++ b/drivers/pci/search.c @@ -162,7 +162,7 @@ EXPORT_SYMBOL(pci_find_slot); * time. */ struct pci_dev *pci_find_device(unsigned int vendor, unsigned int device, - const struct pci_dev *from) + struct pci_dev *from) { struct pci_dev *pdev; @@ -263,7 +263,7 @@ static int match_pci_dev_by_id(struct device *dev, void *data) * this file. */ static struct pci_dev *pci_get_dev_by_id(const struct pci_device_id *id, - const struct pci_dev *from) + struct pci_dev *from) { struct device *dev; struct device *dev_start = NULL; @@ -303,7 +303,7 @@ static struct pci_dev *pci_get_dev_by_id(const struct pci_device_id *id, */ struct pci_dev *pci_get_subsys(unsigned int vendor, unsigned int device, unsigned int ss_vendor, unsigned int ss_device, - const struct pci_dev *from) + struct pci_dev *from) { struct pci_dev *pdev; struct pci_device_id *id; diff --git a/include/linux/pci.h b/include/linux/pci.h index c0e14008a3c2..98dc6243a706 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -534,7 +534,7 @@ extern void pci_sort_breadthfirst(void); #ifdef CONFIG_PCI_LEGACY struct pci_dev __deprecated *pci_find_device(unsigned int vendor, unsigned int device, - const struct pci_dev *from); + struct pci_dev *from); struct pci_dev __deprecated *pci_find_slot(unsigned int bus, unsigned int devfn); #endif /* CONFIG_PCI_LEGACY */ @@ -550,7 +550,7 @@ struct pci_dev *pci_get_device(unsigned int vendor, unsigned int device, struct pci_dev *from); struct pci_dev *pci_get_subsys(unsigned int vendor, unsigned int device, unsigned int ss_vendor, unsigned int ss_device, - const struct pci_dev *from); + struct pci_dev *from); struct pci_dev *pci_get_slot(struct pci_bus *bus, unsigned int devfn); struct pci_dev *pci_get_bus_and_slot(unsigned int bus, unsigned int devfn); struct pci_dev *pci_get_class(unsigned int class, struct pci_dev *from); @@ -816,7 +816,7 @@ _PCI_NOP_ALL(write,) static inline struct pci_dev *pci_find_device(unsigned int vendor, unsigned int device, - const struct pci_dev *from) + struct pci_dev *from) { return NULL; } @@ -838,7 +838,7 @@ static inline struct pci_dev *pci_get_subsys(unsigned int vendor, unsigned int device, unsigned int ss_vendor, unsigned int ss_device, - const struct pci_dev *from) + struct pci_dev *from) { return NULL; } -- cgit v1.2.3 From b5ff7df3df9efab511244d5a299fce706c71af48 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 2 Oct 2008 18:52:51 -0700 Subject: Check mapped ranges on sysfs resource files This is loosely based on a patch by Jesse Barnes to check the user-space PCI mappings though the sysfs interfaces. Quoting Jesse's original explanation: It's fairly common for applications to map PCI resources through sysfs. However, with the current implementation, it's possible for an application to map far more than the range corresponding to the resourceN file it opened. This patch plugs that hole by checking the range at mmap time, similar to what is done on platforms like sparc64 in their lower level PCI remapping routines. It was initially put together to help debug the e1000e NVRAM corruption problem, since we initially thought an X driver might be walking past the end of one of its mappings and clobbering the NVRAM. It now looks like that's not the case, but doing the check is still important for obvious reasons. and this version of the patch differs in that it uses a helper function to clarify the code, and does all the checks in pages (instead of bytes) in order to avoid overflows when doing "<< PAGE_SHIFT" etc. Acked-by: Jesse Barnes Signed-off-by: Linus Torvalds --- drivers/pci/pci-sysfs.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 9c718583a237..77baff022f71 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -16,6 +16,7 @@ #include +#include #include #include #include @@ -484,6 +485,21 @@ pci_mmap_legacy_mem(struct kobject *kobj, struct bin_attribute *attr, #endif /* HAVE_PCI_LEGACY */ #ifdef HAVE_PCI_MMAP + +static int pci_mmap_fits(struct pci_dev *pdev, int resno, struct vm_area_struct *vma) +{ + unsigned long nr, start, size; + + nr = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + start = vma->vm_pgoff; + size = pci_resource_len(pdev, resno) >> PAGE_SHIFT; + if (start < size && size - start >= nr) + return 1; + WARN(1, "process \"%s\" tried to map 0x%08lx-0x%08lx on %s BAR %d (size 0x%08lx)\n", + current->comm, start, start+nr, pci_name(pdev), resno, size); + return 0; +} + /** * pci_mmap_resource - map a PCI resource into user memory space * @kobj: kobject for mapping @@ -510,6 +526,9 @@ pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr, if (i >= PCI_ROM_RESOURCE) return -ENODEV; + if (!pci_mmap_fits(pdev, i, vma)) + return -EINVAL; + /* pci_mmap_page_range() expects the same kind of entry as coming * from /proc/bus/pci/ which is a "user visible" value. If this is * different from the resource itself, arch will do necessary fixup. -- cgit v1.2.3