From 5ffd229c02731a91d08ca21e76b503c5bbb5c095 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 21 May 2013 13:33:10 +1000 Subject: powerpc/vfio: Implement IOMMU driver for VFIO VFIO implements platform independent stuff such as a PCI driver, BAR access (via read/write on a file descriptor or direct mapping when possible) and IRQ signaling. The platform dependent part includes IOMMU initialization and handling. This implements an IOMMU driver for VFIO which does mapping/unmapping pages for the guest IO and provides information about DMA window (required by a POWER guest). Cc: David Gibson Signed-off-by: Alexey Kardashevskiy Signed-off-by: Paul Mackerras Acked-by: Alex Williamson Signed-off-by: Benjamin Herrenschmidt --- drivers/vfio/Kconfig | 6 + drivers/vfio/Makefile | 1 + drivers/vfio/vfio.c | 1 + drivers/vfio/vfio_iommu_spapr_tce.c | 377 ++++++++++++++++++++++++++++++++++++ 4 files changed, 385 insertions(+) create mode 100644 drivers/vfio/vfio_iommu_spapr_tce.c (limited to 'drivers/vfio') diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index 7cd5dec0abd1..b464687f6e14 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -3,10 +3,16 @@ config VFIO_IOMMU_TYPE1 depends on VFIO default n +config VFIO_IOMMU_SPAPR_TCE + tristate + depends on VFIO && SPAPR_TCE_IOMMU + default n + menuconfig VFIO tristate "VFIO Non-Privileged userspace driver framework" depends on IOMMU_API select VFIO_IOMMU_TYPE1 if X86 + select VFIO_IOMMU_SPAPR_TCE if PPC_POWERNV help VFIO provides a framework for secure userspace device drivers. See Documentation/vfio.txt for more details. diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index 2398d4a0e38b..72bfabc8629e 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile @@ -1,3 +1,4 @@ obj-$(CONFIG_VFIO) += vfio.o obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o +obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o obj-$(CONFIG_VFIO_PCI) += pci/ diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 6d78736563de..259ad282ae5d 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1415,6 +1415,7 @@ static int __init vfio_init(void) * drivers. */ request_module_nowait("vfio_iommu_type1"); + request_module_nowait("vfio_iommu_spapr_tce"); return 0; diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c new file mode 100644 index 000000000000..bdae7a04af75 --- /dev/null +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -0,0 +1,377 @@ +/* + * VFIO: IOMMU DMA mapping support for TCE on POWER + * + * Copyright (C) 2013 IBM Corp. All rights reserved. + * Author: Alexey Kardashevskiy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Derived from original vfio_iommu_type1.c: + * Copyright (C) 2012 Red Hat, Inc. All rights reserved. + * Author: Alex Williamson + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRIVER_VERSION "0.1" +#define DRIVER_AUTHOR "aik@ozlabs.ru" +#define DRIVER_DESC "VFIO IOMMU SPAPR TCE" + +static void tce_iommu_detach_group(void *iommu_data, + struct iommu_group *iommu_group); + +/* + * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation + * + * This code handles mapping and unmapping of user data buffers + * into DMA'ble space using the IOMMU + */ + +/* + * The container descriptor supports only a single group per container. + * Required by the API as the container is not supplied with the IOMMU group + * at the moment of initialization. + */ +struct tce_container { + struct mutex lock; + struct iommu_table *tbl; + bool enabled; +}; + +static int tce_iommu_enable(struct tce_container *container) +{ + int ret = 0; + unsigned long locked, lock_limit, npages; + struct iommu_table *tbl = container->tbl; + + if (!container->tbl) + return -ENXIO; + + if (!current->mm) + return -ESRCH; /* process exited */ + + if (container->enabled) + return -EBUSY; + + /* + * When userspace pages are mapped into the IOMMU, they are effectively + * locked memory, so, theoretically, we need to update the accounting + * of locked pages on each map and unmap. For powerpc, the map unmap + * paths can be very hot, though, and the accounting would kill + * performance, especially since it would be difficult to impossible + * to handle the accounting in real mode only. + * + * To address that, rather than precisely accounting every page, we + * instead account for a worst case on locked memory when the iommu is + * enabled and disabled. The worst case upper bound on locked memory + * is the size of the whole iommu window, which is usually relatively + * small (compared to total memory sizes) on POWER hardware. + * + * Also we don't have a nice way to fail on H_PUT_TCE due to ulimits, + * that would effectively kill the guest at random points, much better + * enforcing the limit based on the max that the guest can map. + */ + down_write(¤t->mm->mmap_sem); + npages = (tbl->it_size << IOMMU_PAGE_SHIFT) >> PAGE_SHIFT; + locked = current->mm->locked_vm + npages; + lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + if (locked > lock_limit && !capable(CAP_IPC_LOCK)) { + pr_warn("RLIMIT_MEMLOCK (%ld) exceeded\n", + rlimit(RLIMIT_MEMLOCK)); + ret = -ENOMEM; + } else { + + current->mm->locked_vm += npages; + container->enabled = true; + } + up_write(¤t->mm->mmap_sem); + + return ret; +} + +static void tce_iommu_disable(struct tce_container *container) +{ + if (!container->enabled) + return; + + container->enabled = false; + + if (!container->tbl || !current->mm) + return; + + down_write(¤t->mm->mmap_sem); + current->mm->locked_vm -= (container->tbl->it_size << + IOMMU_PAGE_SHIFT) >> PAGE_SHIFT; + up_write(¤t->mm->mmap_sem); +} + +static void *tce_iommu_open(unsigned long arg) +{ + struct tce_container *container; + + if (arg != VFIO_SPAPR_TCE_IOMMU) { + pr_err("tce_vfio: Wrong IOMMU type\n"); + return ERR_PTR(-EINVAL); + } + + container = kzalloc(sizeof(*container), GFP_KERNEL); + if (!container) + return ERR_PTR(-ENOMEM); + + mutex_init(&container->lock); + + return container; +} + +static void tce_iommu_release(void *iommu_data) +{ + struct tce_container *container = iommu_data; + + WARN_ON(container->tbl && !container->tbl->it_group); + tce_iommu_disable(container); + + if (container->tbl && container->tbl->it_group) + tce_iommu_detach_group(iommu_data, container->tbl->it_group); + + mutex_destroy(&container->lock); + + kfree(container); +} + +static long tce_iommu_ioctl(void *iommu_data, + unsigned int cmd, unsigned long arg) +{ + struct tce_container *container = iommu_data; + unsigned long minsz; + long ret; + + switch (cmd) { + case VFIO_CHECK_EXTENSION: + return (arg == VFIO_SPAPR_TCE_IOMMU) ? 1 : 0; + + case VFIO_IOMMU_SPAPR_TCE_GET_INFO: { + struct vfio_iommu_spapr_tce_info info; + struct iommu_table *tbl = container->tbl; + + if (WARN_ON(!tbl)) + return -ENXIO; + + minsz = offsetofend(struct vfio_iommu_spapr_tce_info, + dma32_window_size); + + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + + if (info.argsz < minsz) + return -EINVAL; + + info.dma32_window_start = tbl->it_offset << IOMMU_PAGE_SHIFT; + info.dma32_window_size = tbl->it_size << IOMMU_PAGE_SHIFT; + info.flags = 0; + + if (copy_to_user((void __user *)arg, &info, minsz)) + return -EFAULT; + + return 0; + } + case VFIO_IOMMU_MAP_DMA: { + struct vfio_iommu_type1_dma_map param; + struct iommu_table *tbl = container->tbl; + unsigned long tce, i; + + if (!tbl) + return -ENXIO; + + BUG_ON(!tbl->it_group); + + minsz = offsetofend(struct vfio_iommu_type1_dma_map, size); + + if (copy_from_user(¶m, (void __user *)arg, minsz)) + return -EFAULT; + + if (param.argsz < minsz) + return -EINVAL; + + if (param.flags & ~(VFIO_DMA_MAP_FLAG_READ | + VFIO_DMA_MAP_FLAG_WRITE)) + return -EINVAL; + + if ((param.size & ~IOMMU_PAGE_MASK) || + (param.vaddr & ~IOMMU_PAGE_MASK)) + return -EINVAL; + + /* iova is checked by the IOMMU API */ + tce = param.vaddr; + if (param.flags & VFIO_DMA_MAP_FLAG_READ) + tce |= TCE_PCI_READ; + if (param.flags & VFIO_DMA_MAP_FLAG_WRITE) + tce |= TCE_PCI_WRITE; + + ret = iommu_tce_put_param_check(tbl, param.iova, tce); + if (ret) + return ret; + + for (i = 0; i < (param.size >> IOMMU_PAGE_SHIFT); ++i) { + ret = iommu_put_tce_user_mode(tbl, + (param.iova >> IOMMU_PAGE_SHIFT) + i, + tce); + if (ret) + break; + tce += IOMMU_PAGE_SIZE; + } + if (ret) + iommu_clear_tces_and_put_pages(tbl, + param.iova >> IOMMU_PAGE_SHIFT, i); + + iommu_flush_tce(tbl); + + return ret; + } + case VFIO_IOMMU_UNMAP_DMA: { + struct vfio_iommu_type1_dma_unmap param; + struct iommu_table *tbl = container->tbl; + + if (WARN_ON(!tbl)) + return -ENXIO; + + minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, + size); + + if (copy_from_user(¶m, (void __user *)arg, minsz)) + return -EFAULT; + + if (param.argsz < minsz) + return -EINVAL; + + /* No flag is supported now */ + if (param.flags) + return -EINVAL; + + if (param.size & ~IOMMU_PAGE_MASK) + return -EINVAL; + + ret = iommu_tce_clear_param_check(tbl, param.iova, 0, + param.size >> IOMMU_PAGE_SHIFT); + if (ret) + return ret; + + ret = iommu_clear_tces_and_put_pages(tbl, + param.iova >> IOMMU_PAGE_SHIFT, + param.size >> IOMMU_PAGE_SHIFT); + iommu_flush_tce(tbl); + + return ret; + } + case VFIO_IOMMU_ENABLE: + mutex_lock(&container->lock); + ret = tce_iommu_enable(container); + mutex_unlock(&container->lock); + return ret; + + + case VFIO_IOMMU_DISABLE: + mutex_lock(&container->lock); + tce_iommu_disable(container); + mutex_unlock(&container->lock); + return 0; + } + + return -ENOTTY; +} + +static int tce_iommu_attach_group(void *iommu_data, + struct iommu_group *iommu_group) +{ + int ret; + struct tce_container *container = iommu_data; + struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group); + + BUG_ON(!tbl); + mutex_lock(&container->lock); + + /* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n", + iommu_group_id(iommu_group), iommu_group); */ + if (container->tbl) { + pr_warn("tce_vfio: Only one group per IOMMU container is allowed, existing id=%d, attaching id=%d\n", + iommu_group_id(container->tbl->it_group), + iommu_group_id(iommu_group)); + ret = -EBUSY; + } else if (container->enabled) { + pr_err("tce_vfio: attaching group #%u to enabled container\n", + iommu_group_id(iommu_group)); + ret = -EBUSY; + } else { + ret = iommu_take_ownership(tbl); + if (!ret) + container->tbl = tbl; + } + + mutex_unlock(&container->lock); + + return ret; +} + +static void tce_iommu_detach_group(void *iommu_data, + struct iommu_group *iommu_group) +{ + struct tce_container *container = iommu_data; + struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group); + + BUG_ON(!tbl); + mutex_lock(&container->lock); + if (tbl != container->tbl) { + pr_warn("tce_vfio: detaching group #%u, expected group is #%u\n", + iommu_group_id(iommu_group), + iommu_group_id(tbl->it_group)); + } else { + if (container->enabled) { + pr_warn("tce_vfio: detaching group #%u from enabled container, forcing disable\n", + iommu_group_id(tbl->it_group)); + tce_iommu_disable(container); + } + + /* pr_debug("tce_vfio: detaching group #%u from iommu %p\n", + iommu_group_id(iommu_group), iommu_group); */ + container->tbl = NULL; + iommu_release_ownership(tbl); + } + mutex_unlock(&container->lock); +} + +const struct vfio_iommu_driver_ops tce_iommu_driver_ops = { + .name = "iommu-vfio-powerpc", + .owner = THIS_MODULE, + .open = tce_iommu_open, + .release = tce_iommu_release, + .ioctl = tce_iommu_ioctl, + .attach_group = tce_iommu_attach_group, + .detach_group = tce_iommu_detach_group, +}; + +static int __init tce_iommu_init(void) +{ + return vfio_register_iommu_driver(&tce_iommu_driver_ops); +} + +static void __exit tce_iommu_cleanup(void) +{ + vfio_unregister_iommu_driver(&tce_iommu_driver_ops); +} + +module_init(tce_iommu_init); +module_exit(tce_iommu_cleanup); + +MODULE_VERSION(DRIVER_VERSION); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); + -- cgit v1.2.3 From 5b25199eff8e124297e6e95392f1719d20daca89 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 21 May 2013 13:33:11 +1000 Subject: powerpc/vfio: Enable on pSeries platform The enables VFIO on the pSeries platform, enabling user space programs to access PCI devices directly. Signed-off-by: Alexey Kardashevskiy Cc: David Gibson Signed-off-by: Paul Mackerras Acked-by: Alex Williamson Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/pseries/iommu.c | 4 ++++ drivers/iommu/Kconfig | 2 +- drivers/vfio/Kconfig | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/vfio') diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 86ae364900d6..23fc1dcf4434 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -614,6 +614,7 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) iommu_table_setparms(pci->phb, dn, tbl); pci->iommu_table = iommu_init_table(tbl, pci->phb->node); + iommu_register_group(tbl, pci_domain_nr(bus), 0); /* Divide the rest (1.75GB) among the children */ pci->phb->dma_window_size = 0x80000000ul; @@ -658,6 +659,7 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) ppci->phb->node); iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window); ppci->iommu_table = iommu_init_table(tbl, ppci->phb->node); + iommu_register_group(tbl, pci_domain_nr(bus), 0); pr_debug(" created table: %p\n", ppci->iommu_table); } } @@ -684,6 +686,7 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) phb->node); iommu_table_setparms(phb, dn, tbl); PCI_DN(dn)->iommu_table = iommu_init_table(tbl, phb->node); + iommu_register_group(tbl, pci_domain_nr(phb->bus), 0); set_iommu_table_base(&dev->dev, PCI_DN(dn)->iommu_table); return; } @@ -1184,6 +1187,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) pci->phb->node); iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window); pci->iommu_table = iommu_init_table(tbl, pci->phb->node); + iommu_register_group(tbl, pci_domain_nr(pci->phb->bus), 0); pr_debug(" created table: %p\n", pci->iommu_table); } else { pr_debug(" found DMA window, table: %p\n", pci->iommu_table); diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 3f3abde8a7f9..01730b2b9954 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -263,7 +263,7 @@ config SHMOBILE_IOMMU_L1SIZE config SPAPR_TCE_IOMMU bool "sPAPR TCE IOMMU Support" - depends on PPC_POWERNV + depends on PPC_POWERNV || PPC_PSERIES select IOMMU_API help Enables bits of IOMMU API required by VFIO. The iommu_ops diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index b464687f6e14..26b3d9d1409f 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -12,7 +12,7 @@ menuconfig VFIO tristate "VFIO Non-Privileged userspace driver framework" depends on IOMMU_API select VFIO_IOMMU_TYPE1 if X86 - select VFIO_IOMMU_SPAPR_TCE if PPC_POWERNV + select VFIO_IOMMU_SPAPR_TCE if (PPC_POWERNV || PPC_PSERIES) help VFIO provides a framework for secure userspace device drivers. See Documentation/vfio.txt for more details. -- cgit v1.2.3