diff options
author | Alexey Kardashevskiy <aik@ozlabs.ru> | 2015-06-05 09:35:15 +0300 |
---|---|---|
committer | Michael Ellerman <mpe@ellerman.id.au> | 2015-06-11 08:16:49 +0300 |
commit | 05c6cfb9dce0d13d37e9d007ee6a4af36f1c0a58 (patch) | |
tree | 8bd7df6fe974bb59d2860a1a6725bd44779edaa7 /arch/powerpc/kernel/iommu.c | |
parent | c5bb44edee19b2c19221a0b5a68add37ea5733c5 (diff) | |
download | linux-05c6cfb9dce0d13d37e9d007ee6a4af36f1c0a58.tar.xz |
powerpc/iommu/powernv: Release replaced TCE
At the moment writing new TCE value to the IOMMU table fails with EBUSY
if there is a valid entry already. However PAPR specification allows
the guest to write new TCE value without clearing it first.
Another problem this patch is addressing is the use of pool locks for
external IOMMU users such as VFIO. The pool locks are to protect
DMA page allocator rather than entries and since the host kernel does
not control what pages are in use, there is no point in pool locks and
exchange()+put_page(oldtce) is sufficient to avoid possible races.
This adds an exchange() callback to iommu_table_ops which does the same
thing as set() plus it returns replaced TCE and DMA direction so
the caller can release the pages afterwards. The exchange() receives
a physical address unlike set() which receives linear mapping address;
and returns a physical address as the clear() does.
This implements exchange() for P5IOC2/IODA/IODA2. This adds a requirement
for a platform to have exchange() implemented in order to support VFIO.
This replaces iommu_tce_build() and iommu_clear_tce() with
a single iommu_tce_xchg().
This makes sure that TCE permission bits are not set in TCE passed to
IOMMU API as those are to be calculated by platform code from
DMA direction.
This moves SetPageDirty() to the IOMMU code to make it work for both
VFIO ioctl interface in in-kernel TCE acceleration (when it becomes
available later).
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
[aw: for the vfio related changes]
Acked-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'arch/powerpc/kernel/iommu.c')
-rw-r--r-- | arch/powerpc/kernel/iommu.c | 59 |
1 files changed, 19 insertions, 40 deletions
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 0fb88005c3c5..a8e3490b54e3 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -965,10 +965,7 @@ EXPORT_SYMBOL_GPL(iommu_tce_clear_param_check); int iommu_tce_put_param_check(struct iommu_table *tbl, unsigned long ioba, unsigned long tce) { - if (!(tce & (TCE_PCI_WRITE | TCE_PCI_READ))) - return -EINVAL; - - if (tce & ~(IOMMU_PAGE_MASK(tbl) | TCE_PCI_WRITE | TCE_PCI_READ)) + if (tce & ~IOMMU_PAGE_MASK(tbl)) return -EINVAL; if (ioba & ~IOMMU_PAGE_MASK(tbl)) @@ -985,44 +982,16 @@ int iommu_tce_put_param_check(struct iommu_table *tbl, } EXPORT_SYMBOL_GPL(iommu_tce_put_param_check); -unsigned long iommu_clear_tce(struct iommu_table *tbl, unsigned long entry) +long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry, + unsigned long *hpa, enum dma_data_direction *direction) { - unsigned long oldtce; - struct iommu_pool *pool = get_pool(tbl, entry); + long ret; - spin_lock(&(pool->lock)); + ret = tbl->it_ops->exchange(tbl, entry, hpa, direction); - oldtce = tbl->it_ops->get(tbl, entry); - if (oldtce & (TCE_PCI_WRITE | TCE_PCI_READ)) - tbl->it_ops->clear(tbl, entry, 1); - else - oldtce = 0; - - spin_unlock(&(pool->lock)); - - return oldtce; -} -EXPORT_SYMBOL_GPL(iommu_clear_tce); - -/* - * hwaddr is a kernel virtual address here (0xc... bazillion), - * tce_build converts it to a physical address. - */ -int iommu_tce_build(struct iommu_table *tbl, unsigned long entry, - unsigned long hwaddr, enum dma_data_direction direction) -{ - int ret = -EBUSY; - unsigned long oldtce; - struct iommu_pool *pool = get_pool(tbl, entry); - - spin_lock(&(pool->lock)); - - oldtce = tbl->it_ops->get(tbl, entry); - /* Add new entry if it is not busy */ - if (!(oldtce & (TCE_PCI_WRITE | TCE_PCI_READ))) - ret = tbl->it_ops->set(tbl, entry, 1, hwaddr, direction, NULL); - - spin_unlock(&(pool->lock)); + if (!ret && ((*direction == DMA_FROM_DEVICE) || + (*direction == DMA_BIDIRECTIONAL))) + SetPageDirty(pfn_to_page(*hpa >> PAGE_SHIFT)); /* if (unlikely(ret)) pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n", @@ -1031,13 +1000,23 @@ int iommu_tce_build(struct iommu_table *tbl, unsigned long entry, return ret; } -EXPORT_SYMBOL_GPL(iommu_tce_build); +EXPORT_SYMBOL_GPL(iommu_tce_xchg); int iommu_take_ownership(struct iommu_table *tbl) { unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; int ret = 0; + /* + * VFIO does not control TCE entries allocation and the guest + * can write new TCEs on top of existing ones so iommu_tce_build() + * must be able to release old pages. This functionality + * requires exchange() callback defined so if it is not + * implemented, we disallow taking ownership over the table. + */ + if (!tbl->it_ops->exchange) + return -EINVAL; + spin_lock_irqsave(&tbl->large_pool.lock, flags); for (i = 0; i < tbl->nr_pools; i++) spin_lock(&tbl->pools[i].lock); |