summaryrefslogtreecommitdiff
path: root/drivers/xen
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/xen')
-rw-r--r--drivers/xen/Kconfig20
-rw-r--r--drivers/xen/Makefile2
-rw-r--r--drivers/xen/features.c2
-rw-r--r--drivers/xen/gntdev-common.h7
-rw-r--r--drivers/xen/gntdev.c157
-rw-r--r--drivers/xen/grant-dma-iommu.c78
-rw-r--r--drivers/xen/grant-dma-ops.c346
-rw-r--r--drivers/xen/grant-table.c251
-rw-r--r--drivers/xen/xlate_mmu.c1
9 files changed, 775 insertions, 89 deletions
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 120d32f164ac..bfd5f4f706bc 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -335,4 +335,24 @@ config XEN_UNPOPULATED_ALLOC
having to balloon out RAM regions in order to obtain physical memory
space to create such mappings.
+config XEN_GRANT_DMA_IOMMU
+ bool
+ select IOMMU_API
+
+config XEN_GRANT_DMA_OPS
+ bool
+ select DMA_OPS
+
+config XEN_VIRTIO
+ bool "Xen virtio support"
+ depends on VIRTIO
+ select XEN_GRANT_DMA_OPS
+ select XEN_GRANT_DMA_IOMMU if OF
+ help
+ Enable virtio support for running as Xen guest. Depending on the
+ guest type this will require special support on the backend side
+ (qemu or kernel, depending on the virtio device types used).
+
+ If in doubt, say n.
+
endmenu
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 5aae66e638a7..c0503f1c7d5b 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -39,3 +39,5 @@ xen-gntalloc-y := gntalloc.o
xen-privcmd-y := privcmd.o privcmd-buf.o
obj-$(CONFIG_XEN_FRONT_PGDIR_SHBUF) += xen-front-pgdir-shbuf.o
obj-$(CONFIG_XEN_UNPOPULATED_ALLOC) += unpopulated-alloc.o
+obj-$(CONFIG_XEN_GRANT_DMA_OPS) += grant-dma-ops.o
+obj-$(CONFIG_XEN_GRANT_DMA_IOMMU) += grant-dma-iommu.o
diff --git a/drivers/xen/features.c b/drivers/xen/features.c
index 7b591443833c..87f1828d40d5 100644
--- a/drivers/xen/features.c
+++ b/drivers/xen/features.c
@@ -42,7 +42,7 @@ void xen_setup_features(void)
if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0)
break;
for (j = 0; j < 32; j++)
- xen_features[i * 32 + j] = !!(fi.submap & 1<<j);
+ xen_features[i * 32 + j] = !!(fi.submap & 1U << j);
}
if (xen_pv_domain()) {
diff --git a/drivers/xen/gntdev-common.h b/drivers/xen/gntdev-common.h
index 20d7d059dadb..40ef379c28ab 100644
--- a/drivers/xen/gntdev-common.h
+++ b/drivers/xen/gntdev-common.h
@@ -16,6 +16,7 @@
#include <linux/mmu_notifier.h>
#include <linux/types.h>
#include <xen/interface/event_channel.h>
+#include <xen/grant_table.h>
struct gntdev_dmabuf_priv;
@@ -56,6 +57,7 @@ struct gntdev_grant_map {
struct gnttab_unmap_grant_ref *unmap_ops;
struct gnttab_map_grant_ref *kmap_ops;
struct gnttab_unmap_grant_ref *kunmap_ops;
+ bool *being_removed;
struct page **pages;
unsigned long pages_vm_start;
@@ -73,6 +75,11 @@ struct gntdev_grant_map {
/* Needed to avoid allocation in gnttab_dma_free_pages(). */
xen_pfn_t *frames;
#endif
+
+ /* Number of live grants */
+ atomic_t live_grants;
+ /* Needed to avoid allocation in __unmap_grant_pages */
+ struct gntab_unmap_queue_data unmap_data;
};
struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count,
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 59ffea800079..4b56c39f766d 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -35,6 +35,7 @@
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/refcount.h>
+#include <linux/workqueue.h>
#include <xen/xen.h>
#include <xen/grant_table.h>
@@ -60,10 +61,11 @@ module_param(limit, uint, 0644);
MODULE_PARM_DESC(limit,
"Maximum number of grants that may be mapped by one mapping request");
+/* True in PV mode, false otherwise */
static int use_ptemod;
-static int unmap_grant_pages(struct gntdev_grant_map *map,
- int offset, int pages);
+static void unmap_grant_pages(struct gntdev_grant_map *map,
+ int offset, int pages);
static struct miscdevice gntdev_miscdev;
@@ -120,6 +122,7 @@ static void gntdev_free_map(struct gntdev_grant_map *map)
kvfree(map->unmap_ops);
kvfree(map->kmap_ops);
kvfree(map->kunmap_ops);
+ kvfree(map->being_removed);
kfree(map);
}
@@ -140,10 +143,13 @@ struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count,
add->unmap_ops = kvmalloc_array(count, sizeof(add->unmap_ops[0]),
GFP_KERNEL);
add->pages = kvcalloc(count, sizeof(add->pages[0]), GFP_KERNEL);
+ add->being_removed =
+ kvcalloc(count, sizeof(add->being_removed[0]), GFP_KERNEL);
if (NULL == add->grants ||
NULL == add->map_ops ||
NULL == add->unmap_ops ||
- NULL == add->pages)
+ NULL == add->pages ||
+ NULL == add->being_removed)
goto err;
if (use_ptemod) {
add->kmap_ops = kvmalloc_array(count, sizeof(add->kmap_ops[0]),
@@ -250,9 +256,36 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
if (!refcount_dec_and_test(&map->users))
return;
- if (map->pages && !use_ptemod)
+ if (map->pages && !use_ptemod) {
+ /*
+ * Increment the reference count. This ensures that the
+ * subsequent call to unmap_grant_pages() will not wind up
+ * re-entering itself. It *can* wind up calling
+ * gntdev_put_map() recursively, but such calls will be with a
+ * reference count greater than 1, so they will return before
+ * this code is reached. The recursion depth is thus limited to
+ * 1. Do NOT use refcount_inc() here, as it will detect that
+ * the reference count is zero and WARN().
+ */
+ refcount_set(&map->users, 1);
+
+ /*
+ * Unmap the grants. This may or may not be asynchronous, so it
+ * is possible that the reference count is 1 on return, but it
+ * could also be greater than 1.
+ */
unmap_grant_pages(map, 0, map->count);
+ /* Check if the memory now needs to be freed */
+ if (!refcount_dec_and_test(&map->users))
+ return;
+
+ /*
+ * All pages have been returned to the hypervisor, so free the
+ * map.
+ */
+ }
+
if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
notify_remote_via_evtchn(map->notify.event);
evtchn_put(map->notify.event);
@@ -283,6 +316,7 @@ static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data)
int gntdev_map_grant_pages(struct gntdev_grant_map *map)
{
+ size_t alloced = 0;
int i, err = 0;
if (!use_ptemod) {
@@ -331,97 +365,116 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map)
map->count);
for (i = 0; i < map->count; i++) {
- if (map->map_ops[i].status == GNTST_okay)
+ if (map->map_ops[i].status == GNTST_okay) {
map->unmap_ops[i].handle = map->map_ops[i].handle;
- else if (!err)
+ if (!use_ptemod)
+ alloced++;
+ } else if (!err)
err = -EINVAL;
if (map->flags & GNTMAP_device_map)
map->unmap_ops[i].dev_bus_addr = map->map_ops[i].dev_bus_addr;
if (use_ptemod) {
- if (map->kmap_ops[i].status == GNTST_okay)
+ if (map->kmap_ops[i].status == GNTST_okay) {
+ if (map->map_ops[i].status == GNTST_okay)
+ alloced++;
map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
- else if (!err)
+ } else if (!err)
err = -EINVAL;
}
}
+ atomic_add(alloced, &map->live_grants);
return err;
}
-static int __unmap_grant_pages(struct gntdev_grant_map *map, int offset,
- int pages)
+static void __unmap_grant_pages_done(int result,
+ struct gntab_unmap_queue_data *data)
{
- int i, err = 0;
- struct gntab_unmap_queue_data unmap_data;
-
- if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
- int pgno = (map->notify.addr >> PAGE_SHIFT);
- if (pgno >= offset && pgno < offset + pages) {
- /* No need for kmap, pages are in lowmem */
- uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno]));
- tmp[map->notify.addr & (PAGE_SIZE-1)] = 0;
- map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
- }
- }
-
- unmap_data.unmap_ops = map->unmap_ops + offset;
- unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL;
- unmap_data.pages = map->pages + offset;
- unmap_data.count = pages;
-
- err = gnttab_unmap_refs_sync(&unmap_data);
- if (err)
- return err;
+ unsigned int i;
+ struct gntdev_grant_map *map = data->data;
+ unsigned int offset = data->unmap_ops - map->unmap_ops;
- for (i = 0; i < pages; i++) {
- if (map->unmap_ops[offset+i].status)
- err = -EINVAL;
+ for (i = 0; i < data->count; i++) {
+ WARN_ON(map->unmap_ops[offset+i].status);
pr_debug("unmap handle=%d st=%d\n",
map->unmap_ops[offset+i].handle,
map->unmap_ops[offset+i].status);
map->unmap_ops[offset+i].handle = INVALID_GRANT_HANDLE;
if (use_ptemod) {
- if (map->kunmap_ops[offset+i].status)
- err = -EINVAL;
+ WARN_ON(map->kunmap_ops[offset+i].status);
pr_debug("kunmap handle=%u st=%d\n",
map->kunmap_ops[offset+i].handle,
map->kunmap_ops[offset+i].status);
map->kunmap_ops[offset+i].handle = INVALID_GRANT_HANDLE;
}
}
- return err;
+ /*
+ * Decrease the live-grant counter. This must happen after the loop to
+ * prevent premature reuse of the grants by gnttab_mmap().
+ */
+ atomic_sub(data->count, &map->live_grants);
+
+ /* Release reference taken by __unmap_grant_pages */
+ gntdev_put_map(NULL, map);
+}
+
+static void __unmap_grant_pages(struct gntdev_grant_map *map, int offset,
+ int pages)
+{
+ if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
+ int pgno = (map->notify.addr >> PAGE_SHIFT);
+
+ if (pgno >= offset && pgno < offset + pages) {
+ /* No need for kmap, pages are in lowmem */
+ uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno]));
+
+ tmp[map->notify.addr & (PAGE_SIZE-1)] = 0;
+ map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
+ }
+ }
+
+ map->unmap_data.unmap_ops = map->unmap_ops + offset;
+ map->unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL;
+ map->unmap_data.pages = map->pages + offset;
+ map->unmap_data.count = pages;
+ map->unmap_data.done = __unmap_grant_pages_done;
+ map->unmap_data.data = map;
+ refcount_inc(&map->users); /* to keep map alive during async call below */
+
+ gnttab_unmap_refs_async(&map->unmap_data);
}
-static int unmap_grant_pages(struct gntdev_grant_map *map, int offset,
- int pages)
+static void unmap_grant_pages(struct gntdev_grant_map *map, int offset,
+ int pages)
{
- int range, err = 0;
+ int range;
+
+ if (atomic_read(&map->live_grants) == 0)
+ return; /* Nothing to do */
pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count, offset, pages);
/* It is possible the requested range will have a "hole" where we
* already unmapped some of the grants. Only unmap valid ranges.
*/
- while (pages && !err) {
- while (pages &&
- map->unmap_ops[offset].handle == INVALID_GRANT_HANDLE) {
+ while (pages) {
+ while (pages && map->being_removed[offset]) {
offset++;
pages--;
}
range = 0;
while (range < pages) {
- if (map->unmap_ops[offset + range].handle ==
- INVALID_GRANT_HANDLE)
+ if (map->being_removed[offset + range])
break;
+ map->being_removed[offset + range] = true;
range++;
}
- err = __unmap_grant_pages(map, offset, range);
+ if (range)
+ __unmap_grant_pages(map, offset, range);
offset += range;
pages -= range;
}
-
- return err;
}
/* ------------------------------------------------------------------ */
@@ -473,7 +526,6 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn,
struct gntdev_grant_map *map =
container_of(mn, struct gntdev_grant_map, notifier);
unsigned long mstart, mend;
- int err;
if (!mmu_notifier_range_blockable(range))
return false;
@@ -494,10 +546,9 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn,
map->index, map->count,
map->vma->vm_start, map->vma->vm_end,
range->start, range->end, mstart, mend);
- err = unmap_grant_pages(map,
+ unmap_grant_pages(map,
(mstart - map->vma->vm_start) >> PAGE_SHIFT,
(mend - mstart) >> PAGE_SHIFT);
- WARN_ON(err);
return true;
}
@@ -985,6 +1036,10 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
goto unlock_out;
if (use_ptemod && map->vma)
goto unlock_out;
+ if (atomic_read(&map->live_grants)) {
+ err = -EAGAIN;
+ goto unlock_out;
+ }
refcount_inc(&map->users);
vma->vm_ops = &gntdev_vmops;
diff --git a/drivers/xen/grant-dma-iommu.c b/drivers/xen/grant-dma-iommu.c
new file mode 100644
index 000000000000..16b8bc0c0b33
--- /dev/null
+++ b/drivers/xen/grant-dma-iommu.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Stub IOMMU driver which does nothing.
+ * The main purpose of it being present is to reuse generic IOMMU device tree
+ * bindings by Xen grant DMA-mapping layer.
+ *
+ * Copyright (C) 2022 EPAM Systems Inc.
+ */
+
+#include <linux/iommu.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
+struct grant_dma_iommu_device {
+ struct device *dev;
+ struct iommu_device iommu;
+};
+
+/* Nothing is really needed here */
+static const struct iommu_ops grant_dma_iommu_ops;
+
+static const struct of_device_id grant_dma_iommu_of_match[] = {
+ { .compatible = "xen,grant-dma" },
+ { },
+};
+
+static int grant_dma_iommu_probe(struct platform_device *pdev)
+{
+ struct grant_dma_iommu_device *mmu;
+ int ret;
+
+ mmu = devm_kzalloc(&pdev->dev, sizeof(*mmu), GFP_KERNEL);
+ if (!mmu)
+ return -ENOMEM;
+
+ mmu->dev = &pdev->dev;
+
+ ret = iommu_device_register(&mmu->iommu, &grant_dma_iommu_ops, &pdev->dev);
+ if (ret)
+ return ret;
+
+ platform_set_drvdata(pdev, mmu);
+
+ return 0;
+}
+
+static int grant_dma_iommu_remove(struct platform_device *pdev)
+{
+ struct grant_dma_iommu_device *mmu = platform_get_drvdata(pdev);
+
+ platform_set_drvdata(pdev, NULL);
+ iommu_device_unregister(&mmu->iommu);
+
+ return 0;
+}
+
+static struct platform_driver grant_dma_iommu_driver = {
+ .driver = {
+ .name = "grant-dma-iommu",
+ .of_match_table = grant_dma_iommu_of_match,
+ },
+ .probe = grant_dma_iommu_probe,
+ .remove = grant_dma_iommu_remove,
+};
+
+static int __init grant_dma_iommu_init(void)
+{
+ struct device_node *iommu_np;
+
+ iommu_np = of_find_matching_node(NULL, grant_dma_iommu_of_match);
+ if (!iommu_np)
+ return 0;
+
+ of_node_put(iommu_np);
+
+ return platform_driver_register(&grant_dma_iommu_driver);
+}
+subsys_initcall(grant_dma_iommu_init);
diff --git a/drivers/xen/grant-dma-ops.c b/drivers/xen/grant-dma-ops.c
new file mode 100644
index 000000000000..fc0142484001
--- /dev/null
+++ b/drivers/xen/grant-dma-ops.c
@@ -0,0 +1,346 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Xen grant DMA-mapping layer - contains special DMA-mapping routines
+ * for providing grant references as DMA addresses to be used by frontends
+ * (e.g. virtio) in Xen guests
+ *
+ * Copyright (c) 2021, Juergen Gross <jgross@suse.com>
+ */
+
+#include <linux/module.h>
+#include <linux/dma-map-ops.h>
+#include <linux/of.h>
+#include <linux/pfn.h>
+#include <linux/xarray.h>
+#include <xen/xen.h>
+#include <xen/xen-ops.h>
+#include <xen/grant_table.h>
+
+struct xen_grant_dma_data {
+ /* The ID of backend domain */
+ domid_t backend_domid;
+ /* Is device behaving sane? */
+ bool broken;
+};
+
+static DEFINE_XARRAY(xen_grant_dma_devices);
+
+#define XEN_GRANT_DMA_ADDR_OFF (1ULL << 63)
+
+static inline dma_addr_t grant_to_dma(grant_ref_t grant)
+{
+ return XEN_GRANT_DMA_ADDR_OFF | ((dma_addr_t)grant << PAGE_SHIFT);
+}
+
+static inline grant_ref_t dma_to_grant(dma_addr_t dma)
+{
+ return (grant_ref_t)((dma & ~XEN_GRANT_DMA_ADDR_OFF) >> PAGE_SHIFT);
+}
+
+static struct xen_grant_dma_data *find_xen_grant_dma_data(struct device *dev)
+{
+ struct xen_grant_dma_data *data;
+
+ xa_lock(&xen_grant_dma_devices);
+ data = xa_load(&xen_grant_dma_devices, (unsigned long)dev);
+ xa_unlock(&xen_grant_dma_devices);
+
+ return data;
+}
+
+/*
+ * DMA ops for Xen frontends (e.g. virtio).
+ *
+ * Used to act as a kind of software IOMMU for Xen guests by using grants as
+ * DMA addresses.
+ * Such a DMA address is formed by using the grant reference as a frame
+ * number and setting the highest address bit (this bit is for the backend
+ * to be able to distinguish it from e.g. a mmio address).
+ */
+static void *xen_grant_dma_alloc(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t gfp,
+ unsigned long attrs)
+{
+ struct xen_grant_dma_data *data;
+ unsigned int i, n_pages = PFN_UP(size);
+ unsigned long pfn;
+ grant_ref_t grant;
+ void *ret;
+
+ data = find_xen_grant_dma_data(dev);
+ if (!data)
+ return NULL;
+
+ if (unlikely(data->broken))
+ return NULL;
+
+ ret = alloc_pages_exact(n_pages * PAGE_SIZE, gfp);
+ if (!ret)
+ return NULL;
+
+ pfn = virt_to_pfn(ret);
+
+ if (gnttab_alloc_grant_reference_seq(n_pages, &grant)) {
+ free_pages_exact(ret, n_pages * PAGE_SIZE);
+ return NULL;
+ }
+
+ for (i = 0; i < n_pages; i++) {
+ gnttab_grant_foreign_access_ref(grant + i, data->backend_domid,
+ pfn_to_gfn(pfn + i), 0);
+ }
+
+ *dma_handle = grant_to_dma(grant);
+
+ return ret;
+}
+
+static void xen_grant_dma_free(struct device *dev, size_t size, void *vaddr,
+ dma_addr_t dma_handle, unsigned long attrs)
+{
+ struct xen_grant_dma_data *data;
+ unsigned int i, n_pages = PFN_UP(size);
+ grant_ref_t grant;
+
+ data = find_xen_grant_dma_data(dev);
+ if (!data)
+ return;
+
+ if (unlikely(data->broken))
+ return;
+
+ grant = dma_to_grant(dma_handle);
+
+ for (i = 0; i < n_pages; i++) {
+ if (unlikely(!gnttab_end_foreign_access_ref(grant + i))) {
+ dev_alert(dev, "Grant still in use by backend domain, disabled for further use\n");
+ data->broken = true;
+ return;
+ }
+ }
+
+ gnttab_free_grant_reference_seq(grant, n_pages);
+
+ free_pages_exact(vaddr, n_pages * PAGE_SIZE);
+}
+
+static struct page *xen_grant_dma_alloc_pages(struct device *dev, size_t size,
+ dma_addr_t *dma_handle,
+ enum dma_data_direction dir,
+ gfp_t gfp)
+{
+ void *vaddr;
+
+ vaddr = xen_grant_dma_alloc(dev, size, dma_handle, gfp, 0);
+ if (!vaddr)
+ return NULL;
+
+ return virt_to_page(vaddr);
+}
+
+static void xen_grant_dma_free_pages(struct device *dev, size_t size,
+ struct page *vaddr, dma_addr_t dma_handle,
+ enum dma_data_direction dir)
+{
+ xen_grant_dma_free(dev, size, page_to_virt(vaddr), dma_handle, 0);
+}
+
+static dma_addr_t xen_grant_dma_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ struct xen_grant_dma_data *data;
+ unsigned int i, n_pages = PFN_UP(size);
+ grant_ref_t grant;
+ dma_addr_t dma_handle;
+
+ if (WARN_ON(dir == DMA_NONE))
+ return DMA_MAPPING_ERROR;
+
+ data = find_xen_grant_dma_data(dev);
+ if (!data)
+ return DMA_MAPPING_ERROR;
+
+ if (unlikely(data->broken))
+ return DMA_MAPPING_ERROR;
+
+ if (gnttab_alloc_grant_reference_seq(n_pages, &grant))
+ return DMA_MAPPING_ERROR;
+
+ for (i = 0; i < n_pages; i++) {
+ gnttab_grant_foreign_access_ref(grant + i, data->backend_domid,
+ xen_page_to_gfn(page) + i, dir == DMA_TO_DEVICE);
+ }
+
+ dma_handle = grant_to_dma(grant) + offset;
+
+ return dma_handle;
+}
+
+static void xen_grant_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
+ size_t size, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ struct xen_grant_dma_data *data;
+ unsigned int i, n_pages = PFN_UP(size);
+ grant_ref_t grant;
+
+ if (WARN_ON(dir == DMA_NONE))
+ return;
+
+ data = find_xen_grant_dma_data(dev);
+ if (!data)
+ return;
+
+ if (unlikely(data->broken))
+ return;
+
+ grant = dma_to_grant(dma_handle);
+
+ for (i = 0; i < n_pages; i++) {
+ if (unlikely(!gnttab_end_foreign_access_ref(grant + i))) {
+ dev_alert(dev, "Grant still in use by backend domain, disabled for further use\n");
+ data->broken = true;
+ return;
+ }
+ }
+
+ gnttab_free_grant_reference_seq(grant, n_pages);
+}
+
+static void xen_grant_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ struct scatterlist *s;
+ unsigned int i;
+
+ if (WARN_ON(dir == DMA_NONE))
+ return;
+
+ for_each_sg(sg, s, nents, i)
+ xen_grant_dma_unmap_page(dev, s->dma_address, sg_dma_len(s), dir,
+ attrs);
+}
+
+static int xen_grant_dma_map_sg(struct device *dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ struct scatterlist *s;
+ unsigned int i;
+
+ if (WARN_ON(dir == DMA_NONE))
+ return -EINVAL;
+
+ for_each_sg(sg, s, nents, i) {
+ s->dma_address = xen_grant_dma_map_page(dev, sg_page(s), s->offset,
+ s->length, dir, attrs);
+ if (s->dma_address == DMA_MAPPING_ERROR)
+ goto out;
+
+ sg_dma_len(s) = s->length;
+ }
+
+ return nents;
+
+out:
+ xen_grant_dma_unmap_sg(dev, sg, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
+ sg_dma_len(sg) = 0;
+
+ return -EIO;
+}
+
+static int xen_grant_dma_supported(struct device *dev, u64 mask)
+{
+ return mask == DMA_BIT_MASK(64);
+}
+
+static const struct dma_map_ops xen_grant_dma_ops = {
+ .alloc = xen_grant_dma_alloc,
+ .free = xen_grant_dma_free,
+ .alloc_pages = xen_grant_dma_alloc_pages,
+ .free_pages = xen_grant_dma_free_pages,
+ .mmap = dma_common_mmap,
+ .get_sgtable = dma_common_get_sgtable,
+ .map_page = xen_grant_dma_map_page,
+ .unmap_page = xen_grant_dma_unmap_page,
+ .map_sg = xen_grant_dma_map_sg,
+ .unmap_sg = xen_grant_dma_unmap_sg,
+ .dma_supported = xen_grant_dma_supported,
+};
+
+bool xen_is_grant_dma_device(struct device *dev)
+{
+ struct device_node *iommu_np;
+ bool has_iommu;
+
+ /* XXX Handle only DT devices for now */
+ if (!dev->of_node)
+ return false;
+
+ iommu_np = of_parse_phandle(dev->of_node, "iommus", 0);
+ has_iommu = iommu_np && of_device_is_compatible(iommu_np, "xen,grant-dma");
+ of_node_put(iommu_np);
+
+ return has_iommu;
+}
+
+void xen_grant_setup_dma_ops(struct device *dev)
+{
+ struct xen_grant_dma_data *data;
+ struct of_phandle_args iommu_spec;
+
+ data = find_xen_grant_dma_data(dev);
+ if (data) {
+ dev_err(dev, "Xen grant DMA data is already created\n");
+ return;
+ }
+
+ /* XXX ACPI device unsupported for now */
+ if (!dev->of_node)
+ goto err;
+
+ if (of_parse_phandle_with_args(dev->of_node, "iommus", "#iommu-cells",
+ 0, &iommu_spec)) {
+ dev_err(dev, "Cannot parse iommus property\n");
+ goto err;
+ }
+
+ if (!of_device_is_compatible(iommu_spec.np, "xen,grant-dma") ||
+ iommu_spec.args_count != 1) {
+ dev_err(dev, "Incompatible IOMMU node\n");
+ of_node_put(iommu_spec.np);
+ goto err;
+ }
+
+ of_node_put(iommu_spec.np);
+
+ data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
+ if (!data)
+ goto err;
+
+ /*
+ * The endpoint ID here means the ID of the domain where the corresponding
+ * backend is running
+ */
+ data->backend_domid = iommu_spec.args[0];
+
+ if (xa_err(xa_store(&xen_grant_dma_devices, (unsigned long)dev, data,
+ GFP_KERNEL))) {
+ dev_err(dev, "Cannot store Xen grant DMA data\n");
+ goto err;
+ }
+
+ dev->dma_ops = &xen_grant_dma_ops;
+
+ return;
+
+err:
+ dev_err(dev, "Cannot set up Xen grant DMA ops, retain platform DMA ops\n");
+}
+
+MODULE_DESCRIPTION("Xen grant DMA-mapping layer");
+MODULE_AUTHOR("Juergen Gross <jgross@suse.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 7a18292540bc..738029de3c67 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -33,6 +33,7 @@
#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
+#include <linux/bitmap.h>
#include <linux/memblock.h>
#include <linux/sched.h>
#include <linux/mm.h>
@@ -70,9 +71,32 @@
static grant_ref_t **gnttab_list;
static unsigned int nr_grant_frames;
+
+/*
+ * Handling of free grants:
+ *
+ * Free grants are in a simple list anchored in gnttab_free_head. They are
+ * linked by grant ref, the last element contains GNTTAB_LIST_END. The number
+ * of free entries is stored in gnttab_free_count.
+ * Additionally there is a bitmap of free entries anchored in
+ * gnttab_free_bitmap. This is being used for simplifying allocation of
+ * multiple consecutive grants, which is needed e.g. for support of virtio.
+ * gnttab_last_free is used to add free entries of new frames at the end of
+ * the free list.
+ * gnttab_free_tail_ptr specifies the variable which references the start
+ * of consecutive free grants ending with gnttab_last_free. This pointer is
+ * updated in a rather defensive way, in order to avoid performance hits in
+ * hot paths.
+ * All those variables are protected by gnttab_list_lock.
+ */
static int gnttab_free_count;
-static grant_ref_t gnttab_free_head;
+static unsigned int gnttab_size;
+static grant_ref_t gnttab_free_head = GNTTAB_LIST_END;
+static grant_ref_t gnttab_last_free = GNTTAB_LIST_END;
+static grant_ref_t *gnttab_free_tail_ptr;
+static unsigned long *gnttab_free_bitmap;
static DEFINE_SPINLOCK(gnttab_list_lock);
+
struct grant_frames xen_auto_xlat_grant_frames;
static unsigned int xen_gnttab_version;
module_param_named(version, xen_gnttab_version, uint, 0);
@@ -168,16 +192,116 @@ static int get_free_entries(unsigned count)
ref = head = gnttab_free_head;
gnttab_free_count -= count;
- while (count-- > 1)
- head = gnttab_entry(head);
+ while (count--) {
+ bitmap_clear(gnttab_free_bitmap, head, 1);
+ if (gnttab_free_tail_ptr == __gnttab_entry(head))
+ gnttab_free_tail_ptr = &gnttab_free_head;
+ if (count)
+ head = gnttab_entry(head);
+ }
gnttab_free_head = gnttab_entry(head);
gnttab_entry(head) = GNTTAB_LIST_END;
+ if (!gnttab_free_count) {
+ gnttab_last_free = GNTTAB_LIST_END;
+ gnttab_free_tail_ptr = NULL;
+ }
+
spin_unlock_irqrestore(&gnttab_list_lock, flags);
return ref;
}
+static int get_seq_entry_count(void)
+{
+ if (gnttab_last_free == GNTTAB_LIST_END || !gnttab_free_tail_ptr ||
+ *gnttab_free_tail_ptr == GNTTAB_LIST_END)
+ return 0;
+
+ return gnttab_last_free - *gnttab_free_tail_ptr + 1;
+}
+
+/* Rebuilds the free grant list and tries to find count consecutive entries. */
+static int get_free_seq(unsigned int count)
+{
+ int ret = -ENOSPC;
+ unsigned int from, to;
+ grant_ref_t *last;
+
+ gnttab_free_tail_ptr = &gnttab_free_head;
+ last = &gnttab_free_head;
+
+ for (from = find_first_bit(gnttab_free_bitmap, gnttab_size);
+ from < gnttab_size;
+ from = find_next_bit(gnttab_free_bitmap, gnttab_size, to + 1)) {
+ to = find_next_zero_bit(gnttab_free_bitmap, gnttab_size,
+ from + 1);
+ if (ret < 0 && to - from >= count) {
+ ret = from;
+ bitmap_clear(gnttab_free_bitmap, ret, count);
+ from += count;
+ gnttab_free_count -= count;
+ if (from == to)
+ continue;
+ }
+
+ /*
+ * Recreate the free list in order to have it properly sorted.
+ * This is needed to make sure that the free tail has the maximum
+ * possible size.
+ */
+ while (from < to) {
+ *last = from;
+ last = __gnttab_entry(from);
+ gnttab_last_free = from;
+ from++;
+ }
+ if (to < gnttab_size)
+ gnttab_free_tail_ptr = __gnttab_entry(to - 1);
+ }
+
+ *last = GNTTAB_LIST_END;
+ if (gnttab_last_free != gnttab_size - 1)
+ gnttab_free_tail_ptr = NULL;
+
+ return ret;
+}
+
+static int get_free_entries_seq(unsigned int count)
+{
+ unsigned long flags;
+ int ret = 0;
+
+ spin_lock_irqsave(&gnttab_list_lock, flags);
+
+ if (gnttab_free_count < count) {
+ ret = gnttab_expand(count - gnttab_free_count);
+ if (ret < 0)
+ goto out;
+ }
+
+ if (get_seq_entry_count() < count) {
+ ret = get_free_seq(count);
+ if (ret >= 0)
+ goto out;
+ ret = gnttab_expand(count - get_seq_entry_count());
+ if (ret < 0)
+ goto out;
+ }
+
+ ret = *gnttab_free_tail_ptr;
+ *gnttab_free_tail_ptr = gnttab_entry(ret + count - 1);
+ gnttab_free_count -= count;
+ if (!gnttab_free_count)
+ gnttab_free_tail_ptr = NULL;
+ bitmap_clear(gnttab_free_bitmap, ret, count);
+
+ out:
+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
+
+ return ret;
+}
+
static void do_free_callbacks(void)
{
struct gnttab_free_callback *callback, *next;
@@ -204,21 +328,51 @@ static inline void check_free_callbacks(void)
do_free_callbacks();
}
-static void put_free_entry(grant_ref_t ref)
+static void put_free_entry_locked(grant_ref_t ref)
{
- unsigned long flags;
-
if (unlikely(ref < GNTTAB_NR_RESERVED_ENTRIES))
return;
- spin_lock_irqsave(&gnttab_list_lock, flags);
gnttab_entry(ref) = gnttab_free_head;
gnttab_free_head = ref;
+ if (!gnttab_free_count)
+ gnttab_last_free = ref;
+ if (gnttab_free_tail_ptr == &gnttab_free_head)
+ gnttab_free_tail_ptr = __gnttab_entry(ref);
gnttab_free_count++;
+ bitmap_set(gnttab_free_bitmap, ref, 1);
+}
+
+static void put_free_entry(grant_ref_t ref)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&gnttab_list_lock, flags);
+ put_free_entry_locked(ref);
check_free_callbacks();
spin_unlock_irqrestore(&gnttab_list_lock, flags);
}
+static void gnttab_set_free(unsigned int start, unsigned int n)
+{
+ unsigned int i;
+
+ for (i = start; i < start + n - 1; i++)
+ gnttab_entry(i) = i + 1;
+
+ gnttab_entry(i) = GNTTAB_LIST_END;
+ if (!gnttab_free_count) {
+ gnttab_free_head = start;
+ gnttab_free_tail_ptr = &gnttab_free_head;
+ } else {
+ gnttab_entry(gnttab_last_free) = start;
+ }
+ gnttab_free_count += n;
+ gnttab_last_free = i;
+
+ bitmap_set(gnttab_free_bitmap, start, n);
+}
+
/*
* Following applies to gnttab_update_entry_v1 and gnttab_update_entry_v2.
* Introducing a valid entry into the grant table:
@@ -450,23 +604,31 @@ void gnttab_free_grant_references(grant_ref_t head)
{
grant_ref_t ref;
unsigned long flags;
- int count = 1;
- if (head == GNTTAB_LIST_END)
- return;
+
spin_lock_irqsave(&gnttab_list_lock, flags);
- ref = head;
- while (gnttab_entry(ref) != GNTTAB_LIST_END) {
- ref = gnttab_entry(ref);
- count++;
+ while (head != GNTTAB_LIST_END) {
+ ref = gnttab_entry(head);
+ put_free_entry_locked(head);
+ head = ref;
}
- gnttab_entry(ref) = gnttab_free_head;
- gnttab_free_head = head;
- gnttab_free_count += count;
check_free_callbacks();
spin_unlock_irqrestore(&gnttab_list_lock, flags);
}
EXPORT_SYMBOL_GPL(gnttab_free_grant_references);
+void gnttab_free_grant_reference_seq(grant_ref_t head, unsigned int count)
+{
+ unsigned long flags;
+ unsigned int i;
+
+ spin_lock_irqsave(&gnttab_list_lock, flags);
+ for (i = count; i > 0; i--)
+ put_free_entry_locked(head + i - 1);
+ check_free_callbacks();
+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
+}
+EXPORT_SYMBOL_GPL(gnttab_free_grant_reference_seq);
+
int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
{
int h = get_free_entries(count);
@@ -480,6 +642,24 @@ int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
}
EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references);
+int gnttab_alloc_grant_reference_seq(unsigned int count, grant_ref_t *first)
+{
+ int h;
+
+ if (count == 1)
+ h = get_free_entries(1);
+ else
+ h = get_free_entries_seq(count);
+
+ if (h < 0)
+ return -ENOSPC;
+
+ *first = h;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(gnttab_alloc_grant_reference_seq);
+
int gnttab_empty_grant_references(const grant_ref_t *private_head)
{
return (*private_head == GNTTAB_LIST_END);
@@ -572,16 +752,13 @@ static int grow_gnttab_list(unsigned int more_frames)
goto grow_nomem;
}
+ gnttab_set_free(gnttab_size, extra_entries);
- for (i = grefs_per_frame * nr_grant_frames;
- i < grefs_per_frame * new_nr_grant_frames - 1; i++)
- gnttab_entry(i) = i + 1;
-
- gnttab_entry(i) = gnttab_free_head;
- gnttab_free_head = grefs_per_frame * nr_grant_frames;
- gnttab_free_count += extra_entries;
+ if (!gnttab_free_tail_ptr)
+ gnttab_free_tail_ptr = __gnttab_entry(gnttab_size);
nr_grant_frames = new_nr_grant_frames;
+ gnttab_size += extra_entries;
check_free_callbacks();
@@ -1424,20 +1601,20 @@ static int gnttab_expand(unsigned int req_entries)
int gnttab_init(void)
{
int i;
- unsigned long max_nr_grant_frames;
+ unsigned long max_nr_grant_frames, max_nr_grefs;
unsigned int max_nr_glist_frames, nr_glist_frames;
- unsigned int nr_init_grefs;
int ret;
gnttab_request_version();
max_nr_grant_frames = gnttab_max_grant_frames();
+ max_nr_grefs = max_nr_grant_frames *
+ gnttab_interface->grefs_per_grant_frame;
nr_grant_frames = 1;
/* Determine the maximum number of frames required for the
* grant reference free list on the current hypervisor.
*/
- max_nr_glist_frames = (max_nr_grant_frames *
- gnttab_interface->grefs_per_grant_frame / RPP);
+ max_nr_glist_frames = max_nr_grefs / RPP;
gnttab_list = kmalloc_array(max_nr_glist_frames,
sizeof(grant_ref_t *),
@@ -1454,6 +1631,12 @@ int gnttab_init(void)
}
}
+ gnttab_free_bitmap = bitmap_zalloc(max_nr_grefs, GFP_KERNEL);
+ if (!gnttab_free_bitmap) {
+ ret = -ENOMEM;
+ goto ini_nomem;
+ }
+
ret = arch_gnttab_init(max_nr_grant_frames,
nr_status_frames(max_nr_grant_frames));
if (ret < 0)
@@ -1464,15 +1647,10 @@ int gnttab_init(void)
goto ini_nomem;
}
- nr_init_grefs = nr_grant_frames *
- gnttab_interface->grefs_per_grant_frame;
-
- for (i = GNTTAB_NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
- gnttab_entry(i) = i + 1;
+ gnttab_size = nr_grant_frames * gnttab_interface->grefs_per_grant_frame;
- gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
- gnttab_free_count = nr_init_grefs - GNTTAB_NR_RESERVED_ENTRIES;
- gnttab_free_head = GNTTAB_NR_RESERVED_ENTRIES;
+ gnttab_set_free(GNTTAB_NR_RESERVED_ENTRIES,
+ gnttab_size - GNTTAB_NR_RESERVED_ENTRIES);
printk("Grant table initialized\n");
return 0;
@@ -1481,6 +1659,7 @@ int gnttab_init(void)
for (i--; i >= 0; i--)
free_page((unsigned long)gnttab_list[i]);
kfree(gnttab_list);
+ bitmap_free(gnttab_free_bitmap);
return ret;
}
EXPORT_SYMBOL_GPL(gnttab_init);
diff --git a/drivers/xen/xlate_mmu.c b/drivers/xen/xlate_mmu.c
index 34742c6e189e..f17c4c03db30 100644
--- a/drivers/xen/xlate_mmu.c
+++ b/drivers/xen/xlate_mmu.c
@@ -261,7 +261,6 @@ int __init xen_xlate_map_ballooned_pages(xen_pfn_t **gfns, void **virt,
return 0;
}
-EXPORT_SYMBOL_GPL(xen_xlate_map_ballooned_pages);
struct remap_pfn {
struct mm_struct *mm;