From 3569dd07aaad71920c5ea4da2d5cc9a167c1ffd4 Mon Sep 17 00:00:00 2001 From: Sohil Mehta Date: Wed, 21 Nov 2018 15:29:33 -0800 Subject: iommu/vt-d: Handle domain agaw being less than iommu agaw The Intel IOMMU driver opportunistically skips a few top level page tables from the domain paging directory while programming the IOMMU context entry. However there is an implicit assumption in the code that domain's adjusted guest address width (agaw) would always be greater than IOMMU's agaw. The IOMMU capabilities in an upcoming platform cause the domain's agaw to be lower than IOMMU's agaw. The issue is seen when the IOMMU supports both 4-level and 5-level paging. The domain builds a 4-level page table based on agaw of 2. However the IOMMU's agaw is set as 3 (5-level). In this case the code incorrectly tries to skip page page table levels. This causes the IOMMU driver to avoid programming the context entry. The fix handles this case and programs the context entry accordingly. Fixes: de24e55395698 ("iommu/vt-d: Simplify domain_context_mapping_one") Cc: Cc: Ashok Raj Cc: Jacob Pan Cc: Lu Baolu Reviewed-by: Lu Baolu Reported-by: Ramos Falcon, Ernesto R Tested-by: Ricardo Neri Signed-off-by: Sohil Mehta Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/iommu/intel-iommu.c') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index f3ccf025108b..fdf79baf1d79 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2044,7 +2044,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, * than default. Unnecessary for PT mode. */ if (translation != CONTEXT_TT_PASS_THROUGH) { - for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) { + for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { ret = -ENOMEM; pgd = phys_to_virt(dma_pte_addr(pgd)); if (!dma_pte_present(pgd)) @@ -2058,7 +2058,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, translation = CONTEXT_TT_MULTI_LEVEL; context_set_address_root(context, virt_to_phys(pgd)); - context_set_address_width(context, iommu->agaw); + context_set_address_width(context, agaw); } else { /* * In pass through mode, AW must be programmed to -- cgit v1.2.3 From 765b6a98c1de3d84dfdae344cc4ee4c24d9447f7 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 10 Dec 2018 09:58:55 +0800 Subject: iommu/vt-d: Enumerate the scalable mode capability The Intel vt-d spec rev3.0 introduces a new translation mode called scalable mode, which enables PASID-granular translations for first level, second level, nested and pass-through modes. At the same time, the previous Extended Context (ECS) mode is deprecated (no production ever implements ECS). This patch adds enumeration for Scalable Mode and removes the deprecated ECS enumeration. It provides a boot time option to disable scalable mode even hardware claims to support it. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Signed-off-by: Sanjay Kumar Signed-off-by: Liu Yi L Signed-off-by: Lu Baolu Reviewed-by: Ashok Raj Reviewed-by: Kevin Tian Signed-off-by: Joerg Roedel --- Documentation/admin-guide/kernel-parameters.txt | 12 ++--- drivers/iommu/intel-iommu.c | 64 +++++++------------------ include/linux/intel-iommu.h | 1 + 3 files changed, 24 insertions(+), 53 deletions(-) (limited to 'drivers/iommu/intel-iommu.c') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 81d1d5a74728..abe9769a9276 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1682,12 +1682,12 @@ By default, super page will be supported if Intel IOMMU has the capability. With this option, super page will not be supported. - ecs_off [Default Off] - By default, extended context tables will be supported if - the hardware advertises that it has support both for the - extended tables themselves, and also PASID support. With - this option set, extended tables will not be used even - on hardware which claims to support them. + sm_off [Default Off] + By default, scalable mode will be supported if the + hardware advertises that it has support for the scalable + mode translation. With this option set, scalable mode + will not be used even on hardware which claims to support + it. tboot_noforce [Default Off] Do not force the Intel IOMMU enabled under tboot. By default, tboot will force Intel IOMMU on, which diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index fdf79baf1d79..2b9784a1887b 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -405,38 +405,16 @@ static int dmar_map_gfx = 1; static int dmar_forcedac; static int intel_iommu_strict; static int intel_iommu_superpage = 1; -static int intel_iommu_ecs = 1; -static int intel_iommu_pasid28; +static int intel_iommu_sm = 1; static int iommu_identity_mapping; #define IDENTMAP_ALL 1 #define IDENTMAP_GFX 2 #define IDENTMAP_AZALIA 4 -/* Broadwell and Skylake have broken ECS support — normal so-called "second - * level" translation of DMA requests-without-PASID doesn't actually happen - * unless you also set the NESTE bit in an extended context-entry. Which of - * course means that SVM doesn't work because it's trying to do nested - * translation of the physical addresses it finds in the process page tables, - * through the IOVA->phys mapping found in the "second level" page tables. - * - * The VT-d specification was retroactively changed to change the definition - * of the capability bits and pretend that Broadwell/Skylake never happened... - * but unfortunately the wrong bit was changed. It's ECS which is broken, but - * for some reason it was the PASID capability bit which was redefined (from - * bit 28 on BDW/SKL to bit 40 in future). - * - * So our test for ECS needs to eschew those implementations which set the old - * PASID capabiity bit 28, since those are the ones on which ECS is broken. - * Unless we are working around the 'pasid28' limitations, that is, by putting - * the device into passthrough mode for normal DMA and thus masking the bug. - */ -#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \ - (intel_iommu_pasid28 || !ecap_broken_pasid(iommu->ecap))) -/* PASID support is thus enabled if ECS is enabled and *either* of the old - * or new capability bits are set. */ -#define pasid_enabled(iommu) (ecs_enabled(iommu) && \ - (ecap_pasid(iommu->ecap) || ecap_broken_pasid(iommu->ecap))) +#define sm_supported(iommu) (intel_iommu_sm && ecap_smts((iommu)->ecap)) +#define pasid_supported(iommu) (sm_supported(iommu) && \ + ecap_pasid((iommu)->ecap)) int intel_iommu_gfx_mapped; EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); @@ -516,15 +494,9 @@ static int __init intel_iommu_setup(char *str) } else if (!strncmp(str, "sp_off", 6)) { pr_info("Disable supported super page\n"); intel_iommu_superpage = 0; - } else if (!strncmp(str, "ecs_off", 7)) { - printk(KERN_INFO - "Intel-IOMMU: disable extended context table support\n"); - intel_iommu_ecs = 0; - } else if (!strncmp(str, "pasid28", 7)) { - printk(KERN_INFO - "Intel-IOMMU: enable pre-production PASID support\n"); - intel_iommu_pasid28 = 1; - iommu_identity_mapping |= IDENTMAP_GFX; + } else if (!strncmp(str, "sm_off", 6)) { + pr_info("Intel-IOMMU: disable scalable mode support\n"); + intel_iommu_sm = 0; } else if (!strncmp(str, "tboot_noforce", 13)) { printk(KERN_INFO "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n"); @@ -771,7 +743,7 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, u64 *entry; entry = &root->lo; - if (ecs_enabled(iommu)) { + if (sm_supported(iommu)) { if (devfn >= 0x80) { devfn -= 0x80; entry = &root->hi; @@ -913,7 +885,7 @@ static void free_context_table(struct intel_iommu *iommu) if (context) free_pgtable_page(context); - if (!ecs_enabled(iommu)) + if (!sm_supported(iommu)) continue; context = iommu_context_addr(iommu, i, 0x80, 0); @@ -1265,8 +1237,6 @@ static void iommu_set_root_entry(struct intel_iommu *iommu) unsigned long flag; addr = virt_to_phys(iommu->root_entry); - if (ecs_enabled(iommu)) - addr |= DMA_RTADDR_RTT; raw_spin_lock_irqsave(&iommu->register_lock, flag); dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr); @@ -1755,7 +1725,7 @@ static void free_dmar_iommu(struct intel_iommu *iommu) free_context_table(iommu); #ifdef CONFIG_INTEL_IOMMU_SVM - if (pasid_enabled(iommu)) { + if (pasid_supported(iommu)) { if (ecap_prs(iommu->ecap)) intel_svm_finish_prq(iommu); intel_svm_exit(iommu); @@ -2464,8 +2434,8 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, dmar_find_matched_atsr_unit(pdev)) info->ats_supported = 1; - if (ecs_enabled(iommu)) { - if (pasid_enabled(iommu)) { + if (sm_supported(iommu)) { + if (pasid_supported(iommu)) { int features = pci_pasid_features(pdev); if (features >= 0) info->pasid_supported = features | 1; @@ -3277,7 +3247,7 @@ static int __init init_dmars(void) * We need to ensure the system pasid table is no bigger * than the smallest supported. */ - if (pasid_enabled(iommu)) { + if (pasid_supported(iommu)) { u32 temp = 2 << ecap_pss(iommu->ecap); intel_pasid_max_id = min_t(u32, temp, @@ -3338,7 +3308,7 @@ static int __init init_dmars(void) if (!ecap_pass_through(iommu->ecap)) hw_pass_through = 0; #ifdef CONFIG_INTEL_IOMMU_SVM - if (pasid_enabled(iommu)) + if (pasid_supported(iommu)) intel_svm_init(iommu); #endif } @@ -3442,7 +3412,7 @@ domains_done: iommu_flush_write_buffer(iommu); #ifdef CONFIG_INTEL_IOMMU_SVM - if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) { + if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) { ret = intel_svm_enable_prq(iommu); if (ret) goto free_iommu; @@ -4331,7 +4301,7 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru) goto out; #ifdef CONFIG_INTEL_IOMMU_SVM - if (pasid_enabled(iommu)) + if (pasid_supported(iommu)) intel_svm_init(iommu); #endif @@ -4348,7 +4318,7 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru) iommu_flush_write_buffer(iommu); #ifdef CONFIG_INTEL_IOMMU_SVM - if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) { + if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) { ret = intel_svm_enable_prq(iommu); if (ret) goto disable_iommu; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index a58bc05d6798..8c9b6063d275 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -177,6 +177,7 @@ * Extended Capability Register */ +#define ecap_smts(e) (((e) >> 43) & 0x1) #define ecap_dit(e) ((e >> 41) & 0x1) #define ecap_pasid(e) ((e >> 40) & 0x1) #define ecap_pss(e) ((e >> 35) & 0x1f) -- cgit v1.2.3 From 0bbeb01a4fafbf8422e5c8882d461d6ac4f71e15 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 10 Dec 2018 09:58:56 +0800 Subject: iommu/vt-d: Manage scalalble mode PASID tables In scalable mode, pasid structure is a two level table with a pasid directory table and a pasid table. Any pasid entry can be identified by a pasid value in below way. 1 9 6 5 0 .-----------------------.-------. | PASID | | '-----------------------'-------' .-------------. | | | | | | | | | | | | | .-----------. | .-------------. | | | |----->| PASID Entry | | | | | '-------------' | | | |Plus | | | .-----------. | | | |---->| DIR Entry |-------->| | | '-----------' '-------------' .---------. |Plus | | | Context | | | | | Entry |------->| | '---------' '-----------' This changes the pasid table APIs to support scalable mode PASID directory and PASID table. It also adds a helper to get the PASID table entry according to the pasid value. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Signed-off-by: Sanjay Kumar Signed-off-by: Liu Yi L Signed-off-by: Lu Baolu Reviewed-by: Ashok Raj Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 23 +++++++----- drivers/iommu/intel-pasid.c | 87 +++++++++++++++++++++++++++++++++++++-------- drivers/iommu/intel-pasid.h | 12 +++++-- drivers/iommu/intel-svm.c | 6 +--- 4 files changed, 97 insertions(+), 31 deletions(-) (limited to 'drivers/iommu/intel-iommu.c') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 2b9784a1887b..5552a1aaf5ea 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -425,21 +425,24 @@ static LIST_HEAD(device_domain_list); /* * Iterate over elements in device_domain_list and call the specified - * callback @fn against each element. This helper should only be used - * in the context where the device_domain_lock has already been holden. + * callback @fn against each element. */ int for_each_device_domain(int (*fn)(struct device_domain_info *info, void *data), void *data) { int ret = 0; + unsigned long flags; struct device_domain_info *info; - assert_spin_locked(&device_domain_lock); + spin_lock_irqsave(&device_domain_lock, flags); list_for_each_entry(info, &device_domain_list, global) { ret = fn(info, data); - if (ret) + if (ret) { + spin_unlock_irqrestore(&device_domain_lock, flags); return ret; + } } + spin_unlock_irqrestore(&device_domain_lock, flags); return 0; } @@ -2481,16 +2484,18 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, list_add(&info->global, &device_domain_list); if (dev) dev->archdata.iommu = info; + spin_unlock_irqrestore(&device_domain_lock, flags); - if (dev && dev_is_pci(dev) && info->pasid_supported) { + /* PASID table is mandatory for a PCI device in scalable mode. */ + if (dev && dev_is_pci(dev) && sm_supported(iommu)) { ret = intel_pasid_alloc_table(dev); if (ret) { - pr_warn("No pasid table for %s, pasid disabled\n", - dev_name(dev)); - info->pasid_supported = 0; + pr_err("PASID table allocation for %s failed\n", + dev_name(dev)); + dmar_remove_one_dev_info(domain, dev); + return NULL; } } - spin_unlock_irqrestore(&device_domain_lock, flags); if (dev && domain_context_mapping(domain, dev)) { pr_err("Domain context map for %s failed\n", dev_name(dev)); diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c index fe95c9bd4d33..fd3ccc0753b0 100644 --- a/drivers/iommu/intel-pasid.c +++ b/drivers/iommu/intel-pasid.c @@ -123,12 +123,13 @@ int intel_pasid_alloc_table(struct device *dev) struct pasid_table *pasid_table; struct pasid_table_opaque data; struct page *pages; - size_t size, count; + int max_pasid = 0; int ret, order; + int size; + might_sleep(); info = dev->archdata.iommu; - if (WARN_ON(!info || !dev_is_pci(dev) || - !info->pasid_supported || info->pasid_table)) + if (WARN_ON(!info || !dev_is_pci(dev) || info->pasid_table)) return -EINVAL; /* DMA alias device already has a pasid table, use it: */ @@ -138,23 +139,25 @@ int intel_pasid_alloc_table(struct device *dev) if (ret) goto attach_out; - pasid_table = kzalloc(sizeof(*pasid_table), GFP_ATOMIC); + pasid_table = kzalloc(sizeof(*pasid_table), GFP_KERNEL); if (!pasid_table) return -ENOMEM; INIT_LIST_HEAD(&pasid_table->dev); - size = sizeof(struct pasid_entry); - count = min_t(int, pci_max_pasids(to_pci_dev(dev)), intel_pasid_max_id); - order = get_order(size * count); + if (info->pasid_supported) + max_pasid = min_t(int, pci_max_pasids(to_pci_dev(dev)), + intel_pasid_max_id); + + size = max_pasid >> (PASID_PDE_SHIFT - 3); + order = size ? get_order(size) : 0; pages = alloc_pages_node(info->iommu->node, - GFP_ATOMIC | __GFP_ZERO, - order); + GFP_KERNEL | __GFP_ZERO, order); if (!pages) return -ENOMEM; pasid_table->table = page_address(pages); pasid_table->order = order; - pasid_table->max_pasid = count; + pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3); attach_out: device_attach_pasid_table(info, pasid_table); @@ -162,14 +165,33 @@ attach_out: return 0; } +/* Get PRESENT bit of a PASID directory entry. */ +static inline bool +pasid_pde_is_present(struct pasid_dir_entry *pde) +{ + return READ_ONCE(pde->val) & PASID_PTE_PRESENT; +} + +/* Get PASID table from a PASID directory entry. */ +static inline struct pasid_entry * +get_pasid_table_from_pde(struct pasid_dir_entry *pde) +{ + if (!pasid_pde_is_present(pde)) + return NULL; + + return phys_to_virt(READ_ONCE(pde->val) & PDE_PFN_MASK); +} + void intel_pasid_free_table(struct device *dev) { struct device_domain_info *info; struct pasid_table *pasid_table; + struct pasid_dir_entry *dir; + struct pasid_entry *table; + int i, max_pde; info = dev->archdata.iommu; - if (!info || !dev_is_pci(dev) || - !info->pasid_supported || !info->pasid_table) + if (!info || !dev_is_pci(dev) || !info->pasid_table) return; pasid_table = info->pasid_table; @@ -178,6 +200,14 @@ void intel_pasid_free_table(struct device *dev) if (!list_empty(&pasid_table->dev)) return; + /* Free scalable mode PASID directory tables: */ + dir = pasid_table->table; + max_pde = pasid_table->max_pasid >> PASID_PDE_SHIFT; + for (i = 0; i < max_pde; i++) { + table = get_pasid_table_from_pde(&dir[i]); + free_pgtable_page(table); + } + free_pages((unsigned long)pasid_table->table, pasid_table->order); kfree(pasid_table); } @@ -206,17 +236,37 @@ int intel_pasid_get_dev_max_id(struct device *dev) struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid) { + struct device_domain_info *info; struct pasid_table *pasid_table; + struct pasid_dir_entry *dir; struct pasid_entry *entries; + int dir_index, index; pasid_table = intel_pasid_get_table(dev); if (WARN_ON(!pasid_table || pasid < 0 || pasid >= intel_pasid_get_dev_max_id(dev))) return NULL; - entries = pasid_table->table; + dir = pasid_table->table; + info = dev->archdata.iommu; + dir_index = pasid >> PASID_PDE_SHIFT; + index = pasid & PASID_PTE_MASK; + + spin_lock(&pasid_lock); + entries = get_pasid_table_from_pde(&dir[dir_index]); + if (!entries) { + entries = alloc_pgtable_page(info->iommu->node); + if (!entries) { + spin_unlock(&pasid_lock); + return NULL; + } + + WRITE_ONCE(dir[dir_index].val, + (u64)virt_to_phys(entries) | PASID_PTE_PRESENT); + } + spin_unlock(&pasid_lock); - return &entries[pasid]; + return &entries[index]; } /* @@ -224,7 +274,14 @@ struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid) */ static inline void pasid_clear_entry(struct pasid_entry *pe) { - WRITE_ONCE(pe->val, 0); + WRITE_ONCE(pe->val[0], 0); + WRITE_ONCE(pe->val[1], 0); + WRITE_ONCE(pe->val[2], 0); + WRITE_ONCE(pe->val[3], 0); + WRITE_ONCE(pe->val[4], 0); + WRITE_ONCE(pe->val[5], 0); + WRITE_ONCE(pe->val[6], 0); + WRITE_ONCE(pe->val[7], 0); } void intel_pasid_clear_entry(struct device *dev, int pasid) diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h index 1fb5e12b029a..12f480c2bb8b 100644 --- a/drivers/iommu/intel-pasid.h +++ b/drivers/iommu/intel-pasid.h @@ -11,12 +11,20 @@ #define __INTEL_PASID_H #define PASID_MIN 0x1 -#define PASID_MAX 0x20000 +#define PASID_MAX 0x100000 +#define PASID_PTE_MASK 0x3F +#define PASID_PTE_PRESENT 1 +#define PDE_PFN_MASK PAGE_MASK +#define PASID_PDE_SHIFT 6 -struct pasid_entry { +struct pasid_dir_entry { u64 val; }; +struct pasid_entry { + u64 val[8]; +}; + /* The representative of a PASID table */ struct pasid_table { void *table; /* pasid table pointer */ diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index db301efe126d..d6c99935d5d9 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -65,8 +65,6 @@ int intel_svm_init(struct intel_iommu *iommu) order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max); if (ecap_dis(iommu->ecap)) { - /* Just making it explicit... */ - BUILD_BUG_ON(sizeof(struct pasid_entry) != sizeof(struct pasid_state_entry)); pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order); if (pages) iommu->pasid_state_table = page_address(pages); @@ -405,9 +403,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ pasid_entry_val |= PASID_ENTRY_FLPM_5LP; entry = intel_pasid_get_entry(dev, svm->pasid); - entry->val = pasid_entry_val; - - wmb(); + WRITE_ONCE(entry->val[0], pasid_entry_val); /* * Flush PASID cache when a PASID table entry becomes -- cgit v1.2.3 From 4f2ed183cfebf42b29ed8fe442169de97bc0fe61 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 10 Dec 2018 09:58:57 +0800 Subject: iommu/vt-d: Move page table helpers into header So that they could also be used in other source files. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Cc: Liu Yi L Cc: Sanjay Kumar Signed-off-by: Lu Baolu Reviewed-by: Ashok Raj Reviewed-by: Kevin Tian Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 43 ------------------------------------------- include/linux/intel-iommu.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 43 deletions(-) (limited to 'drivers/iommu/intel-iommu.c') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 5552a1aaf5ea..d55254abd5ff 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -290,49 +290,6 @@ static inline void context_clear_entry(struct context_entry *context) context->hi = 0; } -/* - * 0: readable - * 1: writable - * 2-6: reserved - * 7: super page - * 8-10: available - * 11: snoop behavior - * 12-63: Host physcial address - */ -struct dma_pte { - u64 val; -}; - -static inline void dma_clear_pte(struct dma_pte *pte) -{ - pte->val = 0; -} - -static inline u64 dma_pte_addr(struct dma_pte *pte) -{ -#ifdef CONFIG_64BIT - return pte->val & VTD_PAGE_MASK; -#else - /* Must have a full atomic 64-bit read */ - return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK; -#endif -} - -static inline bool dma_pte_present(struct dma_pte *pte) -{ - return (pte->val & 3) != 0; -} - -static inline bool dma_pte_superpage(struct dma_pte *pte) -{ - return (pte->val & DMA_PTE_LARGE_PAGE); -} - -static inline int first_pte_in_page(struct dma_pte *pte) -{ - return !((unsigned long)pte & ~VTD_PAGE_MASK); -} - /* * This domain is a statically identity mapping domain. * 1. This domain creats a static 1:1 mapping to all usable memory. diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 8c9b6063d275..b4da61385ebf 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -590,6 +590,49 @@ static inline void __iommu_flush_cache( clflush_cache_range(addr, size); } +/* + * 0: readable + * 1: writable + * 2-6: reserved + * 7: super page + * 8-10: available + * 11: snoop behavior + * 12-63: Host physcial address + */ +struct dma_pte { + u64 val; +}; + +static inline void dma_clear_pte(struct dma_pte *pte) +{ + pte->val = 0; +} + +static inline u64 dma_pte_addr(struct dma_pte *pte) +{ +#ifdef CONFIG_64BIT + return pte->val & VTD_PAGE_MASK; +#else + /* Must have a full atomic 64-bit read */ + return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK; +#endif +} + +static inline bool dma_pte_present(struct dma_pte *pte) +{ + return (pte->val & 3) != 0; +} + +static inline bool dma_pte_superpage(struct dma_pte *pte) +{ + return (pte->val & DMA_PTE_LARGE_PAGE); +} + +static inline int first_pte_in_page(struct dma_pte *pte) +{ + return !((unsigned long)pte & ~VTD_PAGE_MASK); +} + extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); extern int dmar_find_matched_atsr_unit(struct pci_dev *dev); -- cgit v1.2.3 From 3b33d4ab3217337f648f7ff3e57e755e6444c748 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 10 Dec 2018 09:58:59 +0800 Subject: iommu/vt-d: Reserve a domain id for FL and PT modes Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid entry for first-level or pass-through translation should be programmed with a domain id different from those used for second-level or nested translation. It is recommended that software could use a same domain id for all first-only and pass-through translations. This reserves a domain id for first-level and pass-through translations. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Cc: Liu Yi L Cc: Sanjay Kumar Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 10 ++++++++++ drivers/iommu/intel-pasid.h | 6 ++++++ 2 files changed, 16 insertions(+) (limited to 'drivers/iommu/intel-iommu.c') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index d55254abd5ff..9818aaf2d0f7 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1621,6 +1621,16 @@ static int iommu_init_domains(struct intel_iommu *iommu) */ set_bit(0, iommu->domain_ids); + /* + * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid + * entry for first-level or pass-through translation modes should + * be programmed with a domain id different from those used for + * second-level or nested translation. We reserve a domain id for + * this purpose. + */ + if (sm_supported(iommu)) + set_bit(FLPT_DEFAULT_DID, iommu->domain_ids); + return 0; } diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h index 12f480c2bb8b..03c1612d173c 100644 --- a/drivers/iommu/intel-pasid.h +++ b/drivers/iommu/intel-pasid.h @@ -17,6 +17,12 @@ #define PDE_PFN_MASK PAGE_MASK #define PASID_PDE_SHIFT 6 +/* + * Domain ID reserved for pasid entries programmed for first-level + * only and pass-through transfer modes. + */ +#define FLPT_DEFAULT_DID 1 + struct pasid_dir_entry { u64 val; }; -- cgit v1.2.3 From 6f7db75e1c469057fe7588ed959328ead771ccc7 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 10 Dec 2018 09:59:00 +0800 Subject: iommu/vt-d: Add second level page table interface This adds the interfaces to setup or tear down the structures for second level page table translations. This includes types of second level only translation and pass through. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Signed-off-by: Sanjay Kumar Signed-off-by: Liu Yi L Signed-off-by: Lu Baolu Reviewed-by: Ashok Raj Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 2 +- drivers/iommu/intel-pasid.c | 280 ++++++++++++++++++++++++++++++++++++++++++++ drivers/iommu/intel-pasid.h | 8 ++ include/linux/intel-iommu.h | 3 + 4 files changed, 292 insertions(+), 1 deletion(-) (limited to 'drivers/iommu/intel-iommu.c') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 9818aaf2d0f7..f2976a3f1d67 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1210,7 +1210,7 @@ static void iommu_set_root_entry(struct intel_iommu *iommu) raw_spin_unlock_irqrestore(&iommu->register_lock, flag); } -static void iommu_flush_write_buffer(struct intel_iommu *iommu) +void iommu_flush_write_buffer(struct intel_iommu *iommu) { u32 val; unsigned long flag; diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c index fd3ccc0753b0..6d2b2e87e6fc 100644 --- a/drivers/iommu/intel-pasid.c +++ b/drivers/iommu/intel-pasid.c @@ -9,6 +9,7 @@ #define pr_fmt(fmt) "DMAR: " fmt +#include #include #include #include @@ -294,3 +295,282 @@ void intel_pasid_clear_entry(struct device *dev, int pasid) pasid_clear_entry(pe); } + +static inline void pasid_set_bits(u64 *ptr, u64 mask, u64 bits) +{ + u64 old; + + old = READ_ONCE(*ptr); + WRITE_ONCE(*ptr, (old & ~mask) | bits); +} + +/* + * Setup the DID(Domain Identifier) field (Bit 64~79) of scalable mode + * PASID entry. + */ +static inline void +pasid_set_domain_id(struct pasid_entry *pe, u64 value) +{ + pasid_set_bits(&pe->val[1], GENMASK_ULL(15, 0), value); +} + +/* + * Get domain ID value of a scalable mode PASID entry. + */ +static inline u16 +pasid_get_domain_id(struct pasid_entry *pe) +{ + return (u16)(READ_ONCE(pe->val[1]) & GENMASK_ULL(15, 0)); +} + +/* + * Setup the SLPTPTR(Second Level Page Table Pointer) field (Bit 12~63) + * of a scalable mode PASID entry. + */ +static inline void +pasid_set_slptr(struct pasid_entry *pe, u64 value) +{ + pasid_set_bits(&pe->val[0], VTD_PAGE_MASK, value); +} + +/* + * Setup the AW(Address Width) field (Bit 2~4) of a scalable mode PASID + * entry. + */ +static inline void +pasid_set_address_width(struct pasid_entry *pe, u64 value) +{ + pasid_set_bits(&pe->val[0], GENMASK_ULL(4, 2), value << 2); +} + +/* + * Setup the PGTT(PASID Granular Translation Type) field (Bit 6~8) + * of a scalable mode PASID entry. + */ +static inline void +pasid_set_translation_type(struct pasid_entry *pe, u64 value) +{ + pasid_set_bits(&pe->val[0], GENMASK_ULL(8, 6), value << 6); +} + +/* + * Enable fault processing by clearing the FPD(Fault Processing + * Disable) field (Bit 1) of a scalable mode PASID entry. + */ +static inline void pasid_set_fault_enable(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[0], 1 << 1, 0); +} + +/* + * Setup the SRE(Supervisor Request Enable) field (Bit 128) of a + * scalable mode PASID entry. + */ +static inline void pasid_set_sre(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[2], 1 << 0, 1); +} + +/* + * Setup the P(Present) field (Bit 0) of a scalable mode PASID + * entry. + */ +static inline void pasid_set_present(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[0], 1 << 0, 1); +} + +/* + * Setup Page Walk Snoop bit (Bit 87) of a scalable mode PASID + * entry. + */ +static inline void pasid_set_page_snoop(struct pasid_entry *pe, bool value) +{ + pasid_set_bits(&pe->val[1], 1 << 23, value); +} + +static void +pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu, + u16 did, int pasid) +{ + struct qi_desc desc; + + desc.qw0 = QI_PC_DID(did) | QI_PC_PASID_SEL | QI_PC_PASID(pasid); + desc.qw1 = 0; + desc.qw2 = 0; + desc.qw3 = 0; + + qi_submit_sync(&desc, iommu); +} + +static void +iotlb_invalidation_with_pasid(struct intel_iommu *iommu, u16 did, u32 pasid) +{ + struct qi_desc desc; + + desc.qw0 = QI_EIOTLB_PASID(pasid) | QI_EIOTLB_DID(did) | + QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE; + desc.qw1 = 0; + desc.qw2 = 0; + desc.qw3 = 0; + + qi_submit_sync(&desc, iommu); +} + +static void +devtlb_invalidation_with_pasid(struct intel_iommu *iommu, + struct device *dev, int pasid) +{ + struct device_domain_info *info; + u16 sid, qdep, pfsid; + + info = dev->archdata.iommu; + if (!info || !info->ats_enabled) + return; + + sid = info->bus << 8 | info->devfn; + qdep = info->ats_qdep; + pfsid = info->pfsid; + + qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT); +} + +void intel_pasid_tear_down_entry(struct intel_iommu *iommu, + struct device *dev, int pasid) +{ + struct pasid_entry *pte; + u16 did; + + pte = intel_pasid_get_entry(dev, pasid); + if (WARN_ON(!pte)) + return; + + intel_pasid_clear_entry(dev, pasid); + did = pasid_get_domain_id(pte); + + if (!ecap_coherent(iommu->ecap)) + clflush_cache_range(pte, sizeof(*pte)); + + pasid_cache_invalidation_with_pasid(iommu, did, pasid); + iotlb_invalidation_with_pasid(iommu, did, pasid); + + /* Device IOTLB doesn't need to be flushed in caching mode. */ + if (!cap_caching_mode(iommu->cap)) + devtlb_invalidation_with_pasid(iommu, dev, pasid); +} + +/* + * Set up the scalable mode pasid entry for second only translation type. + */ +int intel_pasid_setup_second_level(struct intel_iommu *iommu, + struct dmar_domain *domain, + struct device *dev, int pasid) +{ + struct pasid_entry *pte; + struct dma_pte *pgd; + u64 pgd_val; + int agaw; + u16 did; + + /* + * If hardware advertises no support for second level + * translation, return directly. + */ + if (!ecap_slts(iommu->ecap)) { + pr_err("No second level translation support on %s\n", + iommu->name); + return -EINVAL; + } + + /* + * Skip top levels of page tables for iommu which has less agaw + * than default. Unnecessary for PT mode. + */ + pgd = domain->pgd; + for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { + pgd = phys_to_virt(dma_pte_addr(pgd)); + if (!dma_pte_present(pgd)) { + dev_err(dev, "Invalid domain page table\n"); + return -EINVAL; + } + } + + pgd_val = virt_to_phys(pgd); + did = domain->iommu_did[iommu->seq_id]; + + pte = intel_pasid_get_entry(dev, pasid); + if (!pte) { + dev_err(dev, "Failed to get pasid entry of PASID %d\n", pasid); + return -ENODEV; + } + + pasid_clear_entry(pte); + pasid_set_domain_id(pte, did); + pasid_set_slptr(pte, pgd_val); + pasid_set_address_width(pte, agaw); + pasid_set_translation_type(pte, 2); + pasid_set_fault_enable(pte); + pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); + + /* + * Since it is a second level only translation setup, we should + * set SRE bit as well (addresses are expected to be GPAs). + */ + pasid_set_sre(pte); + pasid_set_present(pte); + + if (!ecap_coherent(iommu->ecap)) + clflush_cache_range(pte, sizeof(*pte)); + + if (cap_caching_mode(iommu->cap)) { + pasid_cache_invalidation_with_pasid(iommu, did, pasid); + iotlb_invalidation_with_pasid(iommu, did, pasid); + } else { + iommu_flush_write_buffer(iommu); + } + + return 0; +} + +/* + * Set up the scalable mode pasid entry for passthrough translation type. + */ +int intel_pasid_setup_pass_through(struct intel_iommu *iommu, + struct dmar_domain *domain, + struct device *dev, int pasid) +{ + u16 did = FLPT_DEFAULT_DID; + struct pasid_entry *pte; + + pte = intel_pasid_get_entry(dev, pasid); + if (!pte) { + dev_err(dev, "Failed to get pasid entry of PASID %d\n", pasid); + return -ENODEV; + } + + pasid_clear_entry(pte); + pasid_set_domain_id(pte, did); + pasid_set_address_width(pte, iommu->agaw); + pasid_set_translation_type(pte, 4); + pasid_set_fault_enable(pte); + pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); + + /* + * We should set SRE bit as well since the addresses are expected + * to be GPAs. + */ + pasid_set_sre(pte); + pasid_set_present(pte); + + if (!ecap_coherent(iommu->ecap)) + clflush_cache_range(pte, sizeof(*pte)); + + if (cap_caching_mode(iommu->cap)) { + pasid_cache_invalidation_with_pasid(iommu, did, pasid); + iotlb_invalidation_with_pasid(iommu, did, pasid); + } else { + iommu_flush_write_buffer(iommu); + } + + return 0; +} diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h index 03c1612d173c..3c70522091d3 100644 --- a/drivers/iommu/intel-pasid.h +++ b/drivers/iommu/intel-pasid.h @@ -49,5 +49,13 @@ struct pasid_table *intel_pasid_get_table(struct device *dev); int intel_pasid_get_dev_max_id(struct device *dev); struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid); void intel_pasid_clear_entry(struct device *dev, int pasid); +int intel_pasid_setup_second_level(struct intel_iommu *iommu, + struct dmar_domain *domain, + struct device *dev, int pasid); +int intel_pasid_setup_pass_through(struct intel_iommu *iommu, + struct dmar_domain *domain, + struct device *dev, int pasid); +void intel_pasid_tear_down_entry(struct intel_iommu *iommu, + struct device *dev, int pasid); #endif /* __INTEL_PASID_H */ diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 08ff588a4df7..cb3ebda47fa7 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -177,6 +177,8 @@ * Extended Capability Register */ +#define ecap_smpwc(e) (((e) >> 48) & 0x1) +#define ecap_slts(e) (((e) >> 46) & 0x1) #define ecap_smts(e) (((e) >> 43) & 0x1) #define ecap_dit(e) ((e >> 41) & 0x1) #define ecap_pasid(e) ((e >> 40) & 0x1) @@ -662,6 +664,7 @@ void free_pgtable_page(void *vaddr); struct intel_iommu *domain_get_iommu(struct dmar_domain *domain); int for_each_device_domain(int (*fn)(struct device_domain_info *info, void *data), void *data); +void iommu_flush_write_buffer(struct intel_iommu *iommu); #ifdef CONFIG_INTEL_IOMMU_SVM int intel_svm_init(struct intel_iommu *iommu); -- cgit v1.2.3 From ef848b7e5a6a0ef5b10640debce790e12717375b Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 10 Dec 2018 09:59:01 +0800 Subject: iommu/vt-d: Setup pasid entry for RID2PASID support when the scalable mode is enabled, there is no second level page translation pointer in the context entry any more (for DMA request without PASID). Instead, a new RID2PASID field is introduced in the context entry. Software can choose any PASID value to set RID2PASID and then setup the translation in the corresponding PASID entry. Upon receiving a DMA request without PASID, hardware will firstly look at this RID2PASID field and then treat this request as a request with a pasid value specified in RID2PASID field. Though software is allowed to use any PASID for the RID2PASID, we will always use the PASID 0 as a sort of design decision. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Signed-off-by: Sanjay Kumar Signed-off-by: Liu Yi L Signed-off-by: Lu Baolu Reviewed-by: Ashok Raj Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 20 ++++++++++++++++++++ drivers/iommu/intel-pasid.h | 1 + 2 files changed, 21 insertions(+) (limited to 'drivers/iommu/intel-iommu.c') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index f2976a3f1d67..620342bffc0b 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2462,6 +2462,22 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, dmar_remove_one_dev_info(domain, dev); return NULL; } + + /* Setup the PASID entry for requests without PASID: */ + spin_lock(&iommu->lock); + if (hw_pass_through && domain_type_is_si(domain)) + ret = intel_pasid_setup_pass_through(iommu, domain, + dev, PASID_RID2PASID); + else + ret = intel_pasid_setup_second_level(iommu, domain, + dev, PASID_RID2PASID); + spin_unlock(&iommu->lock); + if (ret) { + pr_err("Setup RID2PASID for %s failed\n", + dev_name(dev)); + dmar_remove_one_dev_info(domain, dev); + return NULL; + } } if (dev && domain_context_mapping(domain, dev)) { @@ -4825,6 +4841,10 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info) iommu = info->iommu; if (info->dev) { + if (dev_is_pci(info->dev) && sm_supported(iommu)) + intel_pasid_tear_down_entry(iommu, info->dev, + PASID_RID2PASID); + iommu_disable_dev_iotlb(info); domain_context_clear(iommu, info->dev); intel_pasid_free_table(info->dev); diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h index 3c70522091d3..d6f4fead4491 100644 --- a/drivers/iommu/intel-pasid.h +++ b/drivers/iommu/intel-pasid.h @@ -10,6 +10,7 @@ #ifndef __INTEL_PASID_H #define __INTEL_PASID_H +#define PASID_RID2PASID 0x0 #define PASID_MIN 0x1 #define PASID_MAX 0x100000 #define PASID_PTE_MASK 0x3F -- cgit v1.2.3 From ca6e322d60498087a160fad1c4ea8765ff2d21e7 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 10 Dec 2018 09:59:02 +0800 Subject: iommu/vt-d: Pass pasid table to context mapping So that the pasid related info, such as the pasid table and the maximum of pasid could be used during setting up scalable mode context. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Cc: Liu Yi L Cc: Sanjay Kumar Signed-off-by: Lu Baolu Reviewed-by: Ashok Raj Reviewed-by: Kevin Tian Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'drivers/iommu/intel-iommu.c') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 620342bffc0b..13c3c2dd0459 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1920,6 +1920,7 @@ static void domain_exit(struct dmar_domain *domain) static int domain_context_mapping_one(struct dmar_domain *domain, struct intel_iommu *iommu, + struct pasid_table *table, u8 bus, u8 devfn) { u16 did = domain->iommu_did[iommu->seq_id]; @@ -2042,6 +2043,7 @@ out_unlock: struct domain_context_mapping_data { struct dmar_domain *domain; struct intel_iommu *iommu; + struct pasid_table *table; }; static int domain_context_mapping_cb(struct pci_dev *pdev, @@ -2050,25 +2052,31 @@ static int domain_context_mapping_cb(struct pci_dev *pdev, struct domain_context_mapping_data *data = opaque; return domain_context_mapping_one(data->domain, data->iommu, - PCI_BUS_NUM(alias), alias & 0xff); + data->table, PCI_BUS_NUM(alias), + alias & 0xff); } static int domain_context_mapping(struct dmar_domain *domain, struct device *dev) { + struct domain_context_mapping_data data; + struct pasid_table *table; struct intel_iommu *iommu; u8 bus, devfn; - struct domain_context_mapping_data data; iommu = device_to_iommu(dev, &bus, &devfn); if (!iommu) return -ENODEV; + table = intel_pasid_get_table(dev); + if (!dev_is_pci(dev)) - return domain_context_mapping_one(domain, iommu, bus, devfn); + return domain_context_mapping_one(domain, iommu, table, + bus, devfn); data.domain = domain; data.iommu = iommu; + data.table = table; return pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_mapping_cb, &data); -- cgit v1.2.3 From 7373a8cc381978cfafa4b0285cdd935682f1b2d2 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 10 Dec 2018 09:59:03 +0800 Subject: iommu/vt-d: Setup context and enable RID2PASID support This patch enables the translation for requests without PASID in the scalable mode by setting up the root and context entries. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Signed-off-by: Sanjay Kumar Signed-off-by: Liu Yi L Signed-off-by: Lu Baolu Reviewed-by: Ashok Raj Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 136 ++++++++++++++++++++++++++++++++++---------- drivers/iommu/intel-pasid.h | 1 + include/linux/intel-iommu.h | 1 + 3 files changed, 108 insertions(+), 30 deletions(-) (limited to 'drivers/iommu/intel-iommu.c') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 13c3c2dd0459..21a6853290cc 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1197,6 +1197,8 @@ static void iommu_set_root_entry(struct intel_iommu *iommu) unsigned long flag; addr = virt_to_phys(iommu->root_entry); + if (sm_supported(iommu)) + addr |= DMA_RTADDR_SMT; raw_spin_lock_irqsave(&iommu->register_lock, flag); dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr); @@ -1918,6 +1920,56 @@ static void domain_exit(struct dmar_domain *domain) free_domain_mem(domain); } +/* + * Get the PASID directory size for scalable mode context entry. + * Value of X in the PDTS field of a scalable mode context entry + * indicates PASID directory with 2^(X + 7) entries. + */ +static inline unsigned long context_get_sm_pds(struct pasid_table *table) +{ + int pds, max_pde; + + max_pde = table->max_pasid >> PASID_PDE_SHIFT; + pds = find_first_bit((unsigned long *)&max_pde, MAX_NR_PASID_BITS); + if (pds < 7) + return 0; + + return pds - 7; +} + +/* + * Set the RID_PASID field of a scalable mode context entry. The + * IOMMU hardware will use the PASID value set in this field for + * DMA translations of DMA requests without PASID. + */ +static inline void +context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid) +{ + context->hi |= pasid & ((1 << 20) - 1); + context->hi |= (1 << 20); +} + +/* + * Set the DTE(Device-TLB Enable) field of a scalable mode context + * entry. + */ +static inline void context_set_sm_dte(struct context_entry *context) +{ + context->lo |= (1 << 2); +} + +/* + * Set the PRE(Page Request Enable) field of a scalable mode context + * entry. + */ +static inline void context_set_sm_pre(struct context_entry *context) +{ + context->lo |= (1 << 4); +} + +/* Convert value to context PASID directory size field coding. */ +#define context_pdts(pds) (((pds) & 0x7) << 9) + static int domain_context_mapping_one(struct dmar_domain *domain, struct intel_iommu *iommu, struct pasid_table *table, @@ -1928,8 +1980,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, struct device_domain_info *info = NULL; struct context_entry *context; unsigned long flags; - struct dma_pte *pgd; - int ret, agaw; + int ret; WARN_ON(did == 0); @@ -1975,41 +2026,67 @@ static int domain_context_mapping_one(struct dmar_domain *domain, } } - pgd = domain->pgd; - context_clear_entry(context); - context_set_domain_id(context, did); - /* - * Skip top levels of page tables for iommu which has less agaw - * than default. Unnecessary for PT mode. - */ - if (translation != CONTEXT_TT_PASS_THROUGH) { - for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { - ret = -ENOMEM; - pgd = phys_to_virt(dma_pte_addr(pgd)); - if (!dma_pte_present(pgd)) - goto out_unlock; - } + if (sm_supported(iommu)) { + unsigned long pds; - info = iommu_support_dev_iotlb(domain, iommu, bus, devfn); - if (info && info->ats_supported) - translation = CONTEXT_TT_DEV_IOTLB; - else - translation = CONTEXT_TT_MULTI_LEVEL; + WARN_ON(!table); + + /* Setup the PASID DIR pointer: */ + pds = context_get_sm_pds(table); + context->lo = (u64)virt_to_phys(table->table) | + context_pdts(pds); + + /* Setup the RID_PASID field: */ + context_set_sm_rid2pasid(context, PASID_RID2PASID); - context_set_address_root(context, virt_to_phys(pgd)); - context_set_address_width(context, agaw); - } else { /* - * In pass through mode, AW must be programmed to - * indicate the largest AGAW value supported by - * hardware. And ASR is ignored by hardware. + * Setup the Device-TLB enable bit and Page request + * Enable bit: */ - context_set_address_width(context, iommu->msagaw); + info = iommu_support_dev_iotlb(domain, iommu, bus, devfn); + if (info && info->ats_supported) + context_set_sm_dte(context); + if (info && info->pri_supported) + context_set_sm_pre(context); + } else { + struct dma_pte *pgd = domain->pgd; + int agaw; + + context_set_domain_id(context, did); + context_set_translation_type(context, translation); + + if (translation != CONTEXT_TT_PASS_THROUGH) { + /* + * Skip top levels of page tables for iommu which has + * less agaw than default. Unnecessary for PT mode. + */ + for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { + ret = -ENOMEM; + pgd = phys_to_virt(dma_pte_addr(pgd)); + if (!dma_pte_present(pgd)) + goto out_unlock; + } + + info = iommu_support_dev_iotlb(domain, iommu, bus, devfn); + if (info && info->ats_supported) + translation = CONTEXT_TT_DEV_IOTLB; + else + translation = CONTEXT_TT_MULTI_LEVEL; + + context_set_address_root(context, virt_to_phys(pgd)); + context_set_address_width(context, agaw); + } else { + /* + * In pass through mode, AW must be programmed to + * indicate the largest AGAW value supported by + * hardware. And ASR is ignored by hardware. + */ + context_set_address_width(context, iommu->msagaw); + } } - context_set_translation_type(context, translation); context_set_fault_enable(context); context_set_present(context); domain_flush_cache(domain, context, sizeof(*context)); @@ -5180,7 +5257,6 @@ static void intel_iommu_put_resv_regions(struct device *dev, } #ifdef CONFIG_INTEL_IOMMU_SVM -#define MAX_NR_PASID_BITS (20) static inline unsigned long intel_iommu_get_pts(struct device *dev) { int pts, max_pasid; diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h index d6f4fead4491..55bb8715329d 100644 --- a/drivers/iommu/intel-pasid.h +++ b/drivers/iommu/intel-pasid.h @@ -17,6 +17,7 @@ #define PASID_PTE_PRESENT 1 #define PDE_PFN_MASK PAGE_MASK #define PASID_PDE_SHIFT 6 +#define MAX_NR_PASID_BITS 20 /* * Domain ID reserved for pasid entries programmed for first-level diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index cb3ebda47fa7..5fdd33ed2cce 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -258,6 +258,7 @@ /* DMA_RTADDR_REG */ #define DMA_RTADDR_RTT (((u64)1) << 11) +#define DMA_RTADDR_SMT (((u64)1) << 10) /* CCMD_REG */ #define DMA_CCMD_ICC (((u64)1) << 63) -- cgit v1.2.3 From 1c4f88b7f1f9298b56c7dac18c0bcd8d2f75059a Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 10 Dec 2018 09:59:05 +0800 Subject: iommu/vt-d: Shared virtual address in scalable mode This patch enables the current SVA (Shared Virtual Address) implementation to work in the scalable mode. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Signed-off-by: Sanjay Kumar Signed-off-by: Liu Yi L Signed-off-by: Lu Baolu Reviewed-by: Ashok Raj Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 38 ----------------------------- drivers/iommu/intel-pasid.c | 2 +- drivers/iommu/intel-pasid.h | 1 - drivers/iommu/intel-svm.c | 58 ++++++++++++++------------------------------- include/linux/intel-iommu.h | 9 +------ 5 files changed, 20 insertions(+), 88 deletions(-) (limited to 'drivers/iommu/intel-iommu.c') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 21a6853290cc..cec88df671a6 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5257,18 +5257,6 @@ static void intel_iommu_put_resv_regions(struct device *dev, } #ifdef CONFIG_INTEL_IOMMU_SVM -static inline unsigned long intel_iommu_get_pts(struct device *dev) -{ - int pts, max_pasid; - - max_pasid = intel_pasid_get_dev_max_id(dev); - pts = find_first_bit((unsigned long *)&max_pasid, MAX_NR_PASID_BITS); - if (pts < 5) - return 0; - - return pts - 5; -} - int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev) { struct device_domain_info *info; @@ -5300,33 +5288,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd sdev->sid = PCI_DEVID(info->bus, info->devfn); if (!(ctx_lo & CONTEXT_PASIDE)) { - if (iommu->pasid_state_table) - context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table); - context[1].lo = (u64)virt_to_phys(info->pasid_table->table) | - intel_iommu_get_pts(sdev->dev); - - wmb(); - /* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both - * extended to permit requests-with-PASID if the PASIDE bit - * is set. which makes sense. For CONTEXT_TT_PASS_THROUGH, - * however, the PASIDE bit is ignored and requests-with-PASID - * are unconditionally blocked. Which makes less sense. - * So convert from CONTEXT_TT_PASS_THROUGH to one of the new - * "guest mode" translation types depending on whether ATS - * is available or not. Annoyingly, we can't use the new - * modes *unless* PASIDE is set. */ - if ((ctx_lo & CONTEXT_TT_MASK) == (CONTEXT_TT_PASS_THROUGH << 2)) { - ctx_lo &= ~CONTEXT_TT_MASK; - if (info->ats_supported) - ctx_lo |= CONTEXT_TT_PT_PASID_DEV_IOTLB << 2; - else - ctx_lo |= CONTEXT_TT_PT_PASID << 2; - } ctx_lo |= CONTEXT_PASIDE; - if (iommu->pasid_state_table) - ctx_lo |= CONTEXT_DINVE; - if (info->pri_supported) - ctx_lo |= CONTEXT_PRS; context[0].lo = ctx_lo; wmb(); iommu->flush.flush_context(iommu, sdev->did, sdev->sid, diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c index c3dcf4dc2496..53fe5248d8f1 100644 --- a/drivers/iommu/intel-pasid.c +++ b/drivers/iommu/intel-pasid.c @@ -286,7 +286,7 @@ static inline void pasid_clear_entry(struct pasid_entry *pe) WRITE_ONCE(pe->val[7], 0); } -void intel_pasid_clear_entry(struct device *dev, int pasid) +static void intel_pasid_clear_entry(struct device *dev, int pasid) { struct pasid_entry *pe; diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h index 512c63ec8a22..23537b3f34e3 100644 --- a/drivers/iommu/intel-pasid.h +++ b/drivers/iommu/intel-pasid.h @@ -58,7 +58,6 @@ void intel_pasid_free_table(struct device *dev); struct pasid_table *intel_pasid_get_table(struct device *dev); int intel_pasid_get_dev_max_id(struct device *dev); struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid); -void intel_pasid_clear_entry(struct device *dev, int pasid); int intel_pasid_setup_first_level(struct intel_iommu *iommu, struct device *dev, pgd_t *pgd, int pasid, u16 did, int flags); diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index b7f1d12e24b0..04d6bdb51404 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -29,10 +29,6 @@ #include "intel-pasid.h" -#define PASID_ENTRY_P BIT_ULL(0) -#define PASID_ENTRY_FLPM_5LP BIT_ULL(9) -#define PASID_ENTRY_SRE BIT_ULL(11) - static irqreturn_t prq_event_thread(int irq, void *d); struct pasid_state_entry { @@ -248,20 +244,6 @@ static void intel_invalidate_range(struct mmu_notifier *mn, (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0, 0); } - -static void intel_flush_pasid_dev(struct intel_svm *svm, struct intel_svm_dev *sdev, int pasid) -{ - struct qi_desc desc; - - desc.qw0 = QI_PC_TYPE | QI_PC_DID(sdev->did) | - QI_PC_PASID_SEL | QI_PC_PASID(pasid); - desc.qw1 = 0; - desc.qw2 = 0; - desc.qw3 = 0; - - qi_submit_sync(&desc, svm->iommu); -} - static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) { struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); @@ -281,8 +263,7 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) */ rcu_read_lock(); list_for_each_entry_rcu(sdev, &svm->devs, list) { - intel_pasid_clear_entry(sdev->dev, svm->pasid); - intel_flush_pasid_dev(svm, sdev, svm->pasid); + intel_pasid_tear_down_entry(svm->iommu, sdev->dev, svm->pasid); intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm); } rcu_read_unlock(); @@ -301,11 +282,9 @@ static LIST_HEAD(global_svm_list); int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops) { struct intel_iommu *iommu = intel_svm_device_to_iommu(dev); - struct pasid_entry *entry; struct intel_svm_dev *sdev; struct intel_svm *svm = NULL; struct mm_struct *mm = NULL; - u64 pasid_entry_val; int pasid_max; int ret; @@ -414,22 +393,22 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ kfree(sdev); goto out; } - pasid_entry_val = (u64)__pa(mm->pgd) | PASID_ENTRY_P; - } else - pasid_entry_val = (u64)__pa(init_mm.pgd) | - PASID_ENTRY_P | PASID_ENTRY_SRE; - if (cpu_feature_enabled(X86_FEATURE_LA57)) - pasid_entry_val |= PASID_ENTRY_FLPM_5LP; - - entry = intel_pasid_get_entry(dev, svm->pasid); - WRITE_ONCE(entry->val[0], pasid_entry_val); - - /* - * Flush PASID cache when a PASID table entry becomes - * present. - */ - if (cap_caching_mode(iommu->cap)) - intel_flush_pasid_dev(svm, sdev, svm->pasid); + } + + spin_lock(&iommu->lock); + ret = intel_pasid_setup_first_level(iommu, dev, + mm ? mm->pgd : init_mm.pgd, + svm->pasid, FLPT_DEFAULT_DID, + mm ? 0 : PASID_FLAG_SUPERVISOR_MODE); + spin_unlock(&iommu->lock); + if (ret) { + if (mm) + mmu_notifier_unregister(&svm->notifier, mm); + intel_pasid_free_id(svm->pasid); + kfree(svm); + kfree(sdev); + goto out; + } list_add_tail(&svm->list, &global_svm_list); } @@ -475,10 +454,9 @@ int intel_svm_unbind_mm(struct device *dev, int pasid) * to use. We have a *shared* PASID table, because it's * large and has to be physically contiguous. So it's * hard to be as defensive as we might like. */ - intel_flush_pasid_dev(svm, sdev, svm->pasid); + intel_pasid_tear_down_entry(iommu, dev, svm->pasid); intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm); kfree_rcu(sdev, rcu); - intel_pasid_clear_entry(dev, svm->pasid); if (list_empty(&svm->devs)) { intel_pasid_free_id(svm->pasid); diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 4ad62396e81e..cfcf9c1e1872 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -54,14 +54,7 @@ #define CONTEXT_TT_MULTI_LEVEL 0 #define CONTEXT_TT_DEV_IOTLB 1 #define CONTEXT_TT_PASS_THROUGH 2 -/* Extended context entry types */ -#define CONTEXT_TT_PT_PASID 4 -#define CONTEXT_TT_PT_PASID_DEV_IOTLB 5 -#define CONTEXT_TT_MASK (7ULL << 2) - -#define CONTEXT_DINVE (1ULL << 8) -#define CONTEXT_PRS (1ULL << 9) -#define CONTEXT_PASIDE (1ULL << 11) +#define CONTEXT_PASIDE BIT_ULL(3) /* * Intel IOMMU register specification per version 1.0 public spec. -- cgit v1.2.3 From 6d68b88e0993d67e9ebb1240f84240b712fbc8a4 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 10 Dec 2018 09:59:06 +0800 Subject: iommu/vt-d: Remove deferred invalidation Deferred invalidation is an ECS specific feature. It will not be supported when IOMMU works in scalable mode. As we deprecated the ECS support, remove deferred invalidation and cleanup the code. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Cc: Liu Yi L Cc: Sanjay Kumar Signed-off-by: Lu Baolu Reviewed-by: Ashok Raj Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 1 - drivers/iommu/intel-svm.c | 45 --------------------------------------------- include/linux/intel-iommu.h | 8 -------- 3 files changed, 54 deletions(-) (limited to 'drivers/iommu/intel-iommu.c') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index cec88df671a6..9043e1e9b2be 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1700,7 +1700,6 @@ static void free_dmar_iommu(struct intel_iommu *iommu) if (pasid_supported(iommu)) { if (ecap_prs(iommu->ecap)) intel_svm_finish_prq(iommu); - intel_svm_exit(iommu); } #endif } diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index 04d6bdb51404..5b2e3b2d593b 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -31,15 +31,8 @@ static irqreturn_t prq_event_thread(int irq, void *d); -struct pasid_state_entry { - u64 val; -}; - int intel_svm_init(struct intel_iommu *iommu) { - struct page *pages; - int order; - if (cpu_feature_enabled(X86_FEATURE_GBPAGES) && !cap_fl1gp_support(iommu->cap)) return -EINVAL; @@ -48,39 +41,6 @@ int intel_svm_init(struct intel_iommu *iommu) !cap_5lp_support(iommu->cap)) return -EINVAL; - /* Start at 2 because it's defined as 2^(1+PSS) */ - iommu->pasid_max = 2 << ecap_pss(iommu->ecap); - - /* Eventually I'm promised we will get a multi-level PASID table - * and it won't have to be physically contiguous. Until then, - * limit the size because 8MiB contiguous allocations can be hard - * to come by. The limit of 0x20000, which is 1MiB for each of - * the PASID and PASID-state tables, is somewhat arbitrary. */ - if (iommu->pasid_max > 0x20000) - iommu->pasid_max = 0x20000; - - order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max); - if (ecap_dis(iommu->ecap)) { - pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order); - if (pages) - iommu->pasid_state_table = page_address(pages); - else - pr_warn("IOMMU: %s: Failed to allocate PASID state table\n", - iommu->name); - } - - return 0; -} - -int intel_svm_exit(struct intel_iommu *iommu) -{ - int order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max); - - if (iommu->pasid_state_table) { - free_pages((unsigned long)iommu->pasid_state_table, order); - iommu->pasid_state_table = NULL; - } - return 0; } @@ -214,11 +174,6 @@ static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address, { struct intel_svm_dev *sdev; - /* Try deferred invalidate if available */ - if (svm->iommu->pasid_state_table && - !cmpxchg64(&svm->iommu->pasid_state_table[svm->pasid].val, 0, 1ULL << 63)) - return; - rcu_read_lock(); list_for_each_entry_rcu(sdev, &svm->devs, list) intel_flush_svm_range_dev(svm, sdev, address, pages, ih, gl); diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index cfcf9c1e1872..0605f3bf6e79 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -541,15 +541,8 @@ struct intel_iommu { struct iommu_flush flush; #endif #ifdef CONFIG_INTEL_IOMMU_SVM - /* These are large and need to be contiguous, so we allocate just - * one for now. We'll maybe want to rethink that if we truly give - * devices away to userspace processes (e.g. for DPDK) and don't - * want to trust that userspace will use *only* the PASID it was - * told to. But while it's all driver-arbitrated, we're fine. */ - struct pasid_state_entry *pasid_state_table; struct page_req_dsc *prq; unsigned char prq_name[16]; /* Name for PRQ interrupt */ - u32 pasid_max; #endif struct q_inval *qi; /* Queued invalidation info */ u32 *iommu_state; /* Store iommu states between suspend and resume.*/ @@ -663,7 +656,6 @@ void iommu_flush_write_buffer(struct intel_iommu *iommu); #ifdef CONFIG_INTEL_IOMMU_SVM int intel_svm_init(struct intel_iommu *iommu); -int intel_svm_exit(struct intel_iommu *iommu); extern int intel_svm_enable_prq(struct intel_iommu *iommu); extern int intel_svm_finish_prq(struct intel_iommu *iommu); -- cgit v1.2.3