diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-10-13 00:39:38 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-10-13 00:39:38 +0300 |
commit | 778ce723e93ee803ef5883619fe2391e00dbc209 (patch) | |
tree | 861f03223f34780fecce597b45588118236874d7 | |
parent | 1440f576022887004f719883acb094e7e0dd4944 (diff) | |
parent | 7880672bdc975daa586e8256714d9906d30c615e (diff) | |
download | linux-778ce723e93ee803ef5883619fe2391e00dbc209.tar.xz |
Merge tag 'for-linus-6.1-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull xen updates from Juergen Gross:
- Some minor typo fixes
- A fix of the Xen pcifront driver for supporting the device model to
run in a Linux stub domain
- A cleanup of the pcifront driver
- A series to enable grant-based virtio with Xen on x86
- A cleanup of Xen PV guests to distinguish between safe and faulting
MSR accesses
- Two fixes of the Xen gntdev driver
- Two fixes of the new xen grant DMA driver
* tag 'for-linus-6.1-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
xen: Kconfig: Fix spelling mistake "Maxmium" -> "Maximum"
xen/pv: support selecting safe/unsafe msr accesses
xen/pv: refactor msr access functions to support safe and unsafe accesses
xen/pv: fix vendor checks for pmu emulation
xen/pv: add fault recovery control to pmu msr accesses
xen/virtio: enable grant based virtio on x86
xen/virtio: use dom0 as default backend for CONFIG_XEN_VIRTIO_FORCE_GRANT
xen/virtio: restructure xen grant dma setup
xen/pcifront: move xenstore config scanning into sub-function
xen/gntdev: Accommodate VMA splitting
xen/gntdev: Prevent leaking grants
xen/virtio: Fix potential deadlock when accessing xen_grant_dma_devices
xen/virtio: Fix n_pages calculation in xen_grant_dma_map(unmap)_page()
xen/xenbus: Fix spelling mistake "hardward" -> "hardware"
xen-pcifront: Handle missed Connected state
-rw-r--r-- | Documentation/admin-guide/kernel-parameters.txt | 6 | ||||
-rw-r--r-- | arch/x86/xen/Kconfig | 9 | ||||
-rw-r--r-- | arch/x86/xen/enlighten_hvm.c | 2 | ||||
-rw-r--r-- | arch/x86/xen/enlighten_pv.c | 101 | ||||
-rw-r--r-- | arch/x86/xen/pmu.c | 71 | ||||
-rw-r--r-- | drivers/pci/xen-pcifront.c | 161 | ||||
-rw-r--r-- | drivers/xen/Kconfig | 2 | ||||
-rw-r--r-- | drivers/xen/gntdev-common.h | 3 | ||||
-rw-r--r-- | drivers/xen/gntdev.c | 80 | ||||
-rw-r--r-- | drivers/xen/grant-dma-ops.c | 112 | ||||
-rw-r--r-- | drivers/xen/xen-pciback/xenbus.c | 2 | ||||
-rw-r--r-- | include/xen/xen-ops.h | 6 |
12 files changed, 313 insertions, 242 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 69b1533c1f02..a465d5242774 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -6851,6 +6851,12 @@ Crash from Xen panic notifier, without executing late panic() code such as dumping handler. + xen_msr_safe= [X86,XEN] + Format: <bool> + Select whether to always use non-faulting (safe) MSR + access functions when running as Xen PV guest. The + default value is controlled by CONFIG_XEN_PV_MSR_SAFE. + xen_nopvspin [X86,XEN] Disables the qspinlock slowpath using Xen PV optimizations. This parameter is obsoleted by "nopvspin" parameter, which diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 85246dd9faa1..9b1ec5d8c99c 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -92,3 +92,12 @@ config XEN_DOM0 select X86_X2APIC if XEN_PVH && X86_64 help Support running as a Xen Dom0 guest. + +config XEN_PV_MSR_SAFE + bool "Always use safe MSR accesses in PV guests" + default y + depends on XEN_PV + help + Use safe (not faulting) MSR access functions even if the MSR access + should not fault anyway. + The default can be changed by using the "xen_msr_safe" boot parameter. diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index 1c1ac418484b..c1cd28e915a3 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -212,7 +212,7 @@ static void __init xen_hvm_guest_init(void) return; if (IS_ENABLED(CONFIG_XEN_VIRTIO_FORCE_GRANT)) - virtio_set_mem_acc_cb(virtio_require_restricted_mem_acc); + virtio_set_mem_acc_cb(xen_virtio_restricted_mem_acc); init_hvm_pv_info(); diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 9b1a58dda935..f82857e48815 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -108,11 +108,21 @@ struct tls_descs { */ static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc); +static __read_mostly bool xen_msr_safe = IS_ENABLED(CONFIG_XEN_PV_MSR_SAFE); + +static int __init parse_xen_msr_safe(char *str) +{ + if (str) + return strtobool(str, &xen_msr_safe); + return -EINVAL; +} +early_param("xen_msr_safe", parse_xen_msr_safe); + static void __init xen_pv_init_platform(void) { /* PV guests can't operate virtio devices without grants. */ if (IS_ENABLED(CONFIG_XEN_VIRTIO)) - virtio_set_mem_acc_cb(virtio_require_restricted_mem_acc); + virtio_set_mem_acc_cb(xen_virtio_restricted_mem_acc); populate_extra_pte(fix_to_virt(FIX_PARAVIRT_BOOTMAP)); @@ -917,14 +927,18 @@ static void xen_write_cr4(unsigned long cr4) native_write_cr4(cr4); } -static u64 xen_read_msr_safe(unsigned int msr, int *err) +static u64 xen_do_read_msr(unsigned int msr, int *err) { - u64 val; + u64 val = 0; /* Avoid uninitialized value for safe variant. */ if (pmu_msr_read(msr, &val, err)) return val; - val = native_read_msr_safe(msr, err); + if (err) + val = native_read_msr_safe(msr, err); + else + val = native_read_msr(msr); + switch (msr) { case MSR_IA32_APICBASE: val &= ~X2APIC_ENABLE; @@ -933,23 +947,39 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err) return val; } -static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) +static void set_seg(unsigned int which, unsigned int low, unsigned int high, + int *err) { - int ret; - unsigned int which; - u64 base; + u64 base = ((u64)high << 32) | low; - ret = 0; + if (HYPERVISOR_set_segment_base(which, base) == 0) + return; + if (err) + *err = -EIO; + else + WARN(1, "Xen set_segment_base(%u, %llx) failed\n", which, base); +} + +/* + * Support write_msr_safe() and write_msr() semantics. + * With err == NULL write_msr() semantics are selected. + * Supplying an err pointer requires err to be pre-initialized with 0. + */ +static void xen_do_write_msr(unsigned int msr, unsigned int low, + unsigned int high, int *err) +{ switch (msr) { - case MSR_FS_BASE: which = SEGBASE_FS; goto set; - case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set; - case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set; - - set: - base = ((u64)high << 32) | low; - if (HYPERVISOR_set_segment_base(which, base) != 0) - ret = -EIO; + case MSR_FS_BASE: + set_seg(SEGBASE_FS, low, high, err); + break; + + case MSR_KERNEL_GS_BASE: + set_seg(SEGBASE_GS_USER, low, high, err); + break; + + case MSR_GS_BASE: + set_seg(SEGBASE_GS_KERNEL, low, high, err); break; case MSR_STAR: @@ -965,31 +995,42 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) break; default: - if (!pmu_msr_write(msr, low, high, &ret)) - ret = native_write_msr_safe(msr, low, high); + if (!pmu_msr_write(msr, low, high, err)) { + if (err) + *err = native_write_msr_safe(msr, low, high); + else + native_write_msr(msr, low, high); + } } +} - return ret; +static u64 xen_read_msr_safe(unsigned int msr, int *err) +{ + return xen_do_read_msr(msr, err); +} + +static int xen_write_msr_safe(unsigned int msr, unsigned int low, + unsigned int high) +{ + int err = 0; + + xen_do_write_msr(msr, low, high, &err); + + return err; } static u64 xen_read_msr(unsigned int msr) { - /* - * This will silently swallow a #GP from RDMSR. It may be worth - * changing that. - */ int err; - return xen_read_msr_safe(msr, &err); + return xen_do_read_msr(msr, xen_msr_safe ? &err : NULL); } static void xen_write_msr(unsigned int msr, unsigned low, unsigned high) { - /* - * This will silently swallow a #GP from WRMSR. It may be worth - * changing that. - */ - xen_write_msr_safe(msr, low, high); + int err; + + xen_do_write_msr(msr, low, high, xen_msr_safe ? &err : NULL); } /* This is called once we have the cpu_possible_mask */ diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c index 21ecbe754cb2..68aff1382872 100644 --- a/arch/x86/xen/pmu.c +++ b/arch/x86/xen/pmu.c @@ -131,6 +131,10 @@ static inline uint32_t get_fam15h_addr(u32 addr) static inline bool is_amd_pmu_msr(unsigned int msr) { + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) + return false; + if ((msr >= MSR_F15H_PERF_CTL && msr < MSR_F15H_PERF_CTR + (amd_num_counters * 2)) || (msr >= MSR_K7_EVNTSEL0 && @@ -140,10 +144,15 @@ static inline bool is_amd_pmu_msr(unsigned int msr) return false; } -static int is_intel_pmu_msr(u32 msr_index, int *type, int *index) +static bool is_intel_pmu_msr(u32 msr_index, int *type, int *index) { u32 msr_index_pmc; + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL && + boot_cpu_data.x86_vendor != X86_VENDOR_CENTAUR && + boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN) + return false; + switch (msr_index) { case MSR_CORE_PERF_FIXED_CTR_CTRL: case MSR_IA32_DS_AREA: @@ -290,48 +299,52 @@ static bool xen_amd_pmu_emulate(unsigned int msr, u64 *val, bool is_read) return false; } +static bool pmu_msr_chk_emulated(unsigned int msr, uint64_t *val, bool is_read, + bool *emul) +{ + int type, index; + + if (is_amd_pmu_msr(msr)) + *emul = xen_amd_pmu_emulate(msr, val, is_read); + else if (is_intel_pmu_msr(msr, &type, &index)) + *emul = xen_intel_pmu_emulate(msr, val, type, index, is_read); + else + return false; + + return true; +} + bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err) { - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { - if (is_amd_pmu_msr(msr)) { - if (!xen_amd_pmu_emulate(msr, val, 1)) - *val = native_read_msr_safe(msr, err); - return true; - } - } else { - int type, index; + bool emulated; - if (is_intel_pmu_msr(msr, &type, &index)) { - if (!xen_intel_pmu_emulate(msr, val, type, index, 1)) - *val = native_read_msr_safe(msr, err); - return true; - } + if (!pmu_msr_chk_emulated(msr, val, true, &emulated)) + return false; + + if (!emulated) { + *val = err ? native_read_msr_safe(msr, err) + : native_read_msr(msr); } - return false; + return true; } bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err) { uint64_t val = ((uint64_t)high << 32) | low; + bool emulated; - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { - if (is_amd_pmu_msr(msr)) { - if (!xen_amd_pmu_emulate(msr, &val, 0)) - *err = native_write_msr_safe(msr, low, high); - return true; - } - } else { - int type, index; + if (!pmu_msr_chk_emulated(msr, &val, false, &emulated)) + return false; - if (is_intel_pmu_msr(msr, &type, &index)) { - if (!xen_intel_pmu_emulate(msr, &val, type, index, 0)) - *err = native_write_msr_safe(msr, low, high); - return true; - } + if (!emulated) { + if (err) + *err = native_write_msr_safe(msr, low, high); + else + native_write_msr(msr, low, high); } - return false; + return true; } static unsigned long long xen_amd_read_pmc(int counter) diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c index 689271c4245c..7378e2f3e525 100644 --- a/drivers/pci/xen-pcifront.c +++ b/drivers/pci/xen-pcifront.c @@ -521,24 +521,14 @@ static int pcifront_rescan_root(struct pcifront_device *pdev, int err; struct pci_bus *b; -#ifndef CONFIG_PCI_DOMAINS - if (domain != 0) { - dev_err(&pdev->xdev->dev, - "PCI Root in non-zero PCI Domain! domain=%d\n", domain); - dev_err(&pdev->xdev->dev, - "Please compile with CONFIG_PCI_DOMAINS\n"); - return -EINVAL; - } -#endif - - dev_info(&pdev->xdev->dev, "Rescanning PCI Frontend Bus %04x:%02x\n", - domain, bus); - b = pci_find_bus(domain, bus); if (!b) /* If the bus is unknown, create it. */ return pcifront_scan_root(pdev, domain, bus); + dev_info(&pdev->xdev->dev, "Rescanning PCI Frontend Bus %04x:%02x\n", + domain, bus); + err = pcifront_scan_bus(pdev, domain, bus, b); /* Claim resources before going "live" with our devices */ @@ -819,76 +809,73 @@ out: return err; } -static int pcifront_try_connect(struct pcifront_device *pdev) +static void pcifront_connect(struct pcifront_device *pdev) { - int err = -EFAULT; + int err; int i, num_roots, len; char str[64]; unsigned int domain, bus; - - /* Only connect once */ - if (xenbus_read_driver_state(pdev->xdev->nodename) != - XenbusStateInitialised) - goto out; - - err = pcifront_connect_and_init_dma(pdev); - if (err && err != -EEXIST) { - xenbus_dev_fatal(pdev->xdev, err, - "Error setting up PCI Frontend"); - goto out; - } - err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, "root_num", "%d", &num_roots); if (err == -ENOENT) { xenbus_dev_error(pdev->xdev, err, "No PCI Roots found, trying 0000:00"); - err = pcifront_scan_root(pdev, 0, 0); + err = pcifront_rescan_root(pdev, 0, 0); if (err) { xenbus_dev_fatal(pdev->xdev, err, "Error scanning PCI root 0000:00"); - goto out; + return; } num_roots = 0; } else if (err != 1) { - if (err == 0) - err = -EINVAL; - xenbus_dev_fatal(pdev->xdev, err, + xenbus_dev_fatal(pdev->xdev, err >= 0 ? -EINVAL : err, "Error reading number of PCI roots"); - goto out; + return; } for (i = 0; i < num_roots; i++) { len = snprintf(str, sizeof(str), "root-%d", i); - if (unlikely(len >= (sizeof(str) - 1))) { - err = -ENOMEM; - goto out; - } + if (unlikely(len >= (sizeof(str) - 1))) + return; err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, "%x:%x", &domain, &bus); if (err != 2) { - if (err >= 0) - err = -EINVAL; - xenbus_dev_fatal(pdev->xdev, err, + xenbus_dev_fatal(pdev->xdev, err >= 0 ? -EINVAL : err, "Error reading PCI root %d", i); - goto out; + return; } - err = pcifront_scan_root(pdev, domain, bus); + err = pcifront_rescan_root(pdev, domain, bus); if (err) { xenbus_dev_fatal(pdev->xdev, err, "Error scanning PCI root %04x:%02x", domain, bus); - goto out; + return; } } - err = xenbus_switch_state(pdev->xdev, XenbusStateConnected); + xenbus_switch_state(pdev->xdev, XenbusStateConnected); +} -out: - return err; +static void pcifront_try_connect(struct pcifront_device *pdev) +{ + int err; + + /* Only connect once */ + if (xenbus_read_driver_state(pdev->xdev->nodename) != + XenbusStateInitialised) + return; + + err = pcifront_connect_and_init_dma(pdev); + if (err && err != -EEXIST) { + xenbus_dev_fatal(pdev->xdev, err, + "Error setting up PCI Frontend"); + return; + } + + pcifront_connect(pdev); } static int pcifront_try_disconnect(struct pcifront_device *pdev) @@ -914,80 +901,37 @@ out: return err; } -static int pcifront_attach_devices(struct pcifront_device *pdev) +static void pcifront_attach_devices(struct pcifront_device *pdev) { - int err = -EFAULT; - int i, num_roots, len; - unsigned int domain, bus; - char str[64]; - - if (xenbus_read_driver_state(pdev->xdev->nodename) != + if (xenbus_read_driver_state(pdev->xdev->nodename) == XenbusStateReconfiguring) - goto out; - - err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, - "root_num", "%d", &num_roots); - if (err == -ENOENT) { - xenbus_dev_error(pdev->xdev, err, - "No PCI Roots found, trying 0000:00"); - err = pcifront_rescan_root(pdev, 0, 0); - if (err) { - xenbus_dev_fatal(pdev->xdev, err, - "Error scanning PCI root 0000:00"); - goto out; - } - num_roots = 0; - } else if (err != 1) { - if (err == 0) - err = -EINVAL; - xenbus_dev_fatal(pdev->xdev, err, - "Error reading number of PCI roots"); - goto out; - } - - for (i = 0; i < num_roots; i++) { - len = snprintf(str, sizeof(str), "root-%d", i); - if (unlikely(len >= (sizeof(str) - 1))) { - err = -ENOMEM; - goto out; - } - - err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, - "%x:%x", &domain, &bus); - if (err != 2) { - if (err >= 0) - err = -EINVAL; - xenbus_dev_fatal(pdev->xdev, err, - "Error reading PCI root %d", i); - goto out; - } - - err = pcifront_rescan_root(pdev, domain, bus); - if (err) { - xenbus_dev_fatal(pdev->xdev, err, - "Error scanning PCI root %04x:%02x", - domain, bus); - goto out; - } - } - - xenbus_switch_state(pdev->xdev, XenbusStateConnected); - -out: - return err; + pcifront_connect(pdev); } static int pcifront_detach_devices(struct pcifront_device *pdev) { int err = 0; int i, num_devs; + enum xenbus_state state; unsigned int domain, bus, slot, func; struct pci_dev *pci_dev; char str[64]; - if (xenbus_read_driver_state(pdev->xdev->nodename) != - XenbusStateConnected) + state = xenbus_read_driver_state(pdev->xdev->nodename); + if (state == XenbusStateInitialised) { + dev_dbg(&pdev->xdev->dev, "Handle skipped connect.\n"); + /* We missed Connected and need to initialize. */ + err = pcifront_connect_and_init_dma(pdev); + if (err && err != -EEXIST) { + xenbus_dev_fatal(pdev->xdev, err, + "Error setting up PCI Frontend"); + goto out; + } + + goto out_switch_state; + } else if (state != XenbusStateConnected) { goto out; + } err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, "num_devs", "%d", &num_devs); @@ -1048,6 +992,7 @@ static int pcifront_detach_devices(struct pcifront_device *pdev) domain, bus, slot, func); } + out_switch_state: err = xenbus_switch_state(pdev->xdev, XenbusStateReconfiguring); out: diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index a65bd92121a5..d5d7c402b651 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -56,7 +56,7 @@ config XEN_MEMORY_HOTPLUG_LIMIT depends on XEN_HAVE_PVMMU depends on MEMORY_HOTPLUG help - Maxmium amount of memory (in GiB) that a PV guest can be + Maximum amount of memory (in GiB) that a PV guest can be expanded to when using memory hotplug. A PV guest can have more memory than this limit if is diff --git a/drivers/xen/gntdev-common.h b/drivers/xen/gntdev-common.h index 40ef379c28ab..9c286b2a1900 100644 --- a/drivers/xen/gntdev-common.h +++ b/drivers/xen/gntdev-common.h @@ -44,9 +44,10 @@ struct gntdev_unmap_notify { }; struct gntdev_grant_map { + atomic_t in_use; struct mmu_interval_notifier notifier; + bool notifier_init; struct list_head next; - struct vm_area_struct *vma; int index; int count; int flags; diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 84b143eef395..4d9a3050de6a 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -286,6 +286,9 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map) */ } + if (use_ptemod && map->notifier_init) + mmu_interval_notifier_remove(&map->notifier); + if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) { notify_remote_via_evtchn(map->notify.event); evtchn_put(map->notify.event); @@ -298,7 +301,7 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map) static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data) { struct gntdev_grant_map *map = data; - unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT; + unsigned int pgnr = (addr - map->pages_vm_start) >> PAGE_SHIFT; int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte | (1 << _GNTMAP_guest_avail0); u64 pte_maddr; @@ -367,8 +370,7 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map) for (i = 0; i < map->count; i++) { if (map->map_ops[i].status == GNTST_okay) { map->unmap_ops[i].handle = map->map_ops[i].handle; - if (!use_ptemod) - alloced++; + alloced++; } else if (!err) err = -EINVAL; @@ -377,8 +379,7 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map) if (use_ptemod) { if (map->kmap_ops[i].status == GNTST_okay) { - if (map->map_ops[i].status == GNTST_okay) - alloced++; + alloced++; map->kunmap_ops[i].handle = map->kmap_ops[i].handle; } else if (!err) err = -EINVAL; @@ -394,8 +395,14 @@ static void __unmap_grant_pages_done(int result, unsigned int i; struct gntdev_grant_map *map = data->data; unsigned int offset = data->unmap_ops - map->unmap_ops; + int successful_unmaps = 0; + int live_grants; for (i = 0; i < data->count; i++) { + if (map->unmap_ops[offset + i].status == GNTST_okay && + map->unmap_ops[offset + i].handle != INVALID_GRANT_HANDLE) + successful_unmaps++; + WARN_ON(map->unmap_ops[offset + i].status != GNTST_okay && map->unmap_ops[offset + i].handle != INVALID_GRANT_HANDLE); pr_debug("unmap handle=%d st=%d\n", @@ -403,6 +410,10 @@ static void __unmap_grant_pages_done(int result, map->unmap_ops[offset+i].status); map->unmap_ops[offset+i].handle = INVALID_GRANT_HANDLE; if (use_ptemod) { + if (map->kunmap_ops[offset + i].status == GNTST_okay && + map->kunmap_ops[offset + i].handle != INVALID_GRANT_HANDLE) + successful_unmaps++; + WARN_ON(map->kunmap_ops[offset + i].status != GNTST_okay && map->kunmap_ops[offset + i].handle != INVALID_GRANT_HANDLE); pr_debug("kunmap handle=%u st=%d\n", @@ -411,11 +422,15 @@ static void __unmap_grant_pages_done(int result, map->kunmap_ops[offset+i].handle = INVALID_GRANT_HANDLE; } } + /* * Decrease the live-grant counter. This must happen after the loop to * prevent premature reuse of the grants by gnttab_mmap(). */ - atomic_sub(data->count, &map->live_grants); + live_grants = atomic_sub_return(successful_unmaps, &map->live_grants); + if (WARN_ON(live_grants < 0)) + pr_err("%s: live_grants became negative (%d) after unmapping %d pages!\n", + __func__, live_grants, successful_unmaps); /* Release reference taken by __unmap_grant_pages */ gntdev_put_map(NULL, map); @@ -496,11 +511,7 @@ static void gntdev_vma_close(struct vm_area_struct *vma) struct gntdev_priv *priv = file->private_data; pr_debug("gntdev_vma_close %p\n", vma); - if (use_ptemod) { - WARN_ON(map->vma != vma); - mmu_interval_notifier_remove(&map->notifier); - map->vma = NULL; - } + vma->vm_private_data = NULL; gntdev_put_map(priv, map); } @@ -528,29 +539,30 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn, struct gntdev_grant_map *map = container_of(mn, struct gntdev_grant_map, notifier); unsigned long mstart, mend; + unsigned long map_start, map_end; if (!mmu_notifier_range_blockable(range)) return false; + map_start = map->pages_vm_start; + map_end = map->pages_vm_start + (map->count << PAGE_SHIFT); + /* * If the VMA is split or otherwise changed the notifier is not * updated, but we don't want to process VA's outside the modified * VMA. FIXME: It would be much more understandable to just prevent * modifying the VMA in the first place. */ - if (map->vma->vm_start >= range->end || - map->vma->vm_end <= range->start) + if (map_start >= range->end || map_end <= range->start) return true; - mstart = max(range->start, map->vma->vm_start); - mend = min(range->end, map->vma->vm_end); + mstart = max(range->start, map_start); + mend = min(range->end, map_end); pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n", - map->index, map->count, - map->vma->vm_start, map->vma->vm_end, - range->start, range->end, mstart, mend); - unmap_grant_pages(map, - (mstart - map->vma->vm_start) >> PAGE_SHIFT, - (mend - mstart) >> PAGE_SHIFT); + map->index, map->count, map_start, map_end, + range->start, range->end, mstart, mend); + unmap_grant_pages(map, (mstart - map_start) >> PAGE_SHIFT, + (mend - mstart) >> PAGE_SHIFT); return true; } @@ -1030,18 +1042,15 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) return -EINVAL; pr_debug("map %d+%d at %lx (pgoff %lx)\n", - index, count, vma->vm_start, vma->vm_pgoff); + index, count, vma->vm_start, vma->vm_pgoff); mutex_lock(&priv->lock); map = gntdev_find_map_index(priv, index, count); if (!map) goto unlock_out; - if (use_ptemod && map->vma) + if (!atomic_add_unless(&map->in_use, 1, 1)) goto unlock_out; - if (atomic_read(&map->live_grants)) { - err = -EAGAIN; - goto unlock_out; - } + refcount_inc(&map->users); vma->vm_ops = &gntdev_vmops; @@ -1062,15 +1071,16 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) map->flags |= GNTMAP_readonly; } + map->pages_vm_start = vma->vm_start; + if (use_ptemod) { - map->vma = vma; err = mmu_interval_notifier_insert_locked( &map->notifier, vma->vm_mm, vma->vm_start, vma->vm_end - vma->vm_start, &gntdev_mmu_ops); - if (err) { - map->vma = NULL; + if (err) goto out_unlock_put; - } + + map->notifier_init = true; } mutex_unlock(&priv->lock); @@ -1087,7 +1097,6 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) */ mmu_interval_read_begin(&map->notifier); - map->pages_vm_start = vma->vm_start; err = apply_to_page_range(vma->vm_mm, vma->vm_start, vma->vm_end - vma->vm_start, find_grant_ptes, map); @@ -1116,13 +1125,8 @@ unlock_out: out_unlock_put: mutex_unlock(&priv->lock); out_put_map: - if (use_ptemod) { + if (use_ptemod) unmap_grant_pages(map, 0, map->count); - if (map->vma) { - mmu_interval_notifier_remove(&map->notifier); - map->vma = NULL; - } - } gntdev_put_map(priv, map); return err; } diff --git a/drivers/xen/grant-dma-ops.c b/drivers/xen/grant-dma-ops.c index 8973fc1e9ccc..860f37c93af4 100644 --- a/drivers/xen/grant-dma-ops.c +++ b/drivers/xen/grant-dma-ops.c @@ -25,7 +25,7 @@ struct xen_grant_dma_data { bool broken; }; -static DEFINE_XARRAY(xen_grant_dma_devices); +static DEFINE_XARRAY_FLAGS(xen_grant_dma_devices, XA_FLAGS_LOCK_IRQ); #define XEN_GRANT_DMA_ADDR_OFF (1ULL << 63) @@ -42,14 +42,29 @@ static inline grant_ref_t dma_to_grant(dma_addr_t dma) static struct xen_grant_dma_data *find_xen_grant_dma_data(struct device *dev) { struct xen_grant_dma_data *data; + unsigned long flags; - xa_lock(&xen_grant_dma_devices); + xa_lock_irqsave(&xen_grant_dma_devices, flags); data = xa_load(&xen_grant_dma_devices, (unsigned long)dev); - xa_unlock(&xen_grant_dma_devices); + xa_unlock_irqrestore(&xen_grant_dma_devices, flags); return data; } +static int store_xen_grant_dma_data(struct device *dev, + struct xen_grant_dma_data *data) +{ + unsigned long flags; + int ret; + + xa_lock_irqsave(&xen_grant_dma_devices, flags); + ret = xa_err(__xa_store(&xen_grant_dma_devices, (unsigned long)dev, data, + GFP_ATOMIC)); + xa_unlock_irqrestore(&xen_grant_dma_devices, flags); + + return ret; +} + /* * DMA ops for Xen frontends (e.g. virtio). * @@ -153,7 +168,7 @@ static dma_addr_t xen_grant_dma_map_page(struct device *dev, struct page *page, unsigned long attrs) { struct xen_grant_dma_data *data; - unsigned int i, n_pages = PFN_UP(size); + unsigned int i, n_pages = PFN_UP(offset + size); grant_ref_t grant; dma_addr_t dma_handle; @@ -185,7 +200,8 @@ static void xen_grant_dma_unmap_page(struct device *dev, dma_addr_t dma_handle, unsigned long attrs) { struct xen_grant_dma_data *data; - unsigned int i, n_pages = PFN_UP(size); + unsigned long offset = dma_handle & (PAGE_SIZE - 1); + unsigned int i, n_pages = PFN_UP(offset + size); grant_ref_t grant; if (WARN_ON(dir == DMA_NONE)) @@ -273,72 +289,91 @@ static const struct dma_map_ops xen_grant_dma_ops = { .dma_supported = xen_grant_dma_supported, }; -bool xen_is_grant_dma_device(struct device *dev) +static bool xen_is_dt_grant_dma_device(struct device *dev) { struct device_node *iommu_np; bool has_iommu; - /* XXX Handle only DT devices for now */ - if (!dev->of_node) - return false; - iommu_np = of_parse_phandle(dev->of_node, "iommus", 0); - has_iommu = iommu_np && of_device_is_compatible(iommu_np, "xen,grant-dma"); + has_iommu = iommu_np && + of_device_is_compatible(iommu_np, "xen,grant-dma"); of_node_put(iommu_np); return has_iommu; } +bool xen_is_grant_dma_device(struct device *dev) +{ + /* XXX Handle only DT devices for now */ + if (dev->of_node) + return xen_is_dt_grant_dma_device(dev); + + return false; +} + bool xen_virtio_mem_acc(struct virtio_device *dev) { - if (IS_ENABLED(CONFIG_XEN_VIRTIO_FORCE_GRANT)) + if (IS_ENABLED(CONFIG_XEN_VIRTIO_FORCE_GRANT) || xen_pv_domain()) return true; return xen_is_grant_dma_device(dev->dev.parent); } -void xen_grant_setup_dma_ops(struct device *dev) +static int xen_dt_grant_init_backend_domid(struct device *dev, + struct xen_grant_dma_data *data) { - struct xen_grant_dma_data *data; struct of_phandle_args iommu_spec; - data = find_xen_grant_dma_data(dev); - if (data) { - dev_err(dev, "Xen grant DMA data is already created\n"); - return; - } - - /* XXX ACPI device unsupported for now */ - if (!dev->of_node) - goto err; - if (of_parse_phandle_with_args(dev->of_node, "iommus", "#iommu-cells", 0, &iommu_spec)) { dev_err(dev, "Cannot parse iommus property\n"); - goto err; + return -ESRCH; } if (!of_device_is_compatible(iommu_spec.np, "xen,grant-dma") || iommu_spec.args_count != 1) { dev_err(dev, "Incompatible IOMMU node\n"); of_node_put(iommu_spec.np); - goto err; + return -ESRCH; } of_node_put(iommu_spec.np); + /* + * The endpoint ID here means the ID of the domain where the + * corresponding backend is running + */ + data->backend_domid = iommu_spec.args[0]; + + return 0; +} + +void xen_grant_setup_dma_ops(struct device *dev) +{ + struct xen_grant_dma_data *data; + + data = find_xen_grant_dma_data(dev); + if (data) { + dev_err(dev, "Xen grant DMA data is already created\n"); + return; + } + data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); if (!data) goto err; - /* - * The endpoint ID here means the ID of the domain where the corresponding - * backend is running - */ - data->backend_domid = iommu_spec.args[0]; + if (dev->of_node) { + if (xen_dt_grant_init_backend_domid(dev, data)) + goto err; + } else if (IS_ENABLED(CONFIG_XEN_VIRTIO_FORCE_GRANT)) { + dev_info(dev, "Using dom0 as backend\n"); + data->backend_domid = 0; + } else { + /* XXX ACPI device unsupported for now */ + goto err; + } - if (xa_err(xa_store(&xen_grant_dma_devices, (unsigned long)dev, data, - GFP_KERNEL))) { + if (store_xen_grant_dma_data(dev, data)) { dev_err(dev, "Cannot store Xen grant DMA data\n"); goto err; } @@ -348,9 +383,20 @@ void xen_grant_setup_dma_ops(struct device *dev) return; err: + devm_kfree(dev, data); dev_err(dev, "Cannot set up Xen grant DMA ops, retain platform DMA ops\n"); } +bool xen_virtio_restricted_mem_acc(struct virtio_device *dev) +{ + bool ret = xen_virtio_mem_acc(dev); + + if (ret) + xen_grant_setup_dma_ops(dev->dev.parent); + + return ret; +} + MODULE_DESCRIPTION("Xen grant DMA-mapping layer"); MODULE_AUTHOR("Juergen Gross <jgross@suse.com>"); MODULE_LICENSE("GPL"); diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index bde63ef677b8..d171091eec12 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -31,7 +31,7 @@ MODULE_PARM_DESC(passthrough, " frontend (for example, a device at 06:01.b will still appear at\n"\ " 06:01.b to the frontend). This is similar to how Xen 2.0.x\n"\ " exposed PCI devices to its driver domains. This may be required\n"\ - " for drivers which depend on finding their hardward in certain\n"\ + " for drivers which depend on finding their hardware in certain\n"\ " bus/slot locations."); static struct xen_pcibk_device *alloc_pdev(struct xenbus_device *xdev) diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index dae0f350c678..a34f4271a2e9 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -219,6 +219,7 @@ static inline void xen_preemptible_hcall_end(void) { } void xen_grant_setup_dma_ops(struct device *dev); bool xen_is_grant_dma_device(struct device *dev); bool xen_virtio_mem_acc(struct virtio_device *dev); +bool xen_virtio_restricted_mem_acc(struct virtio_device *dev); #else static inline void xen_grant_setup_dma_ops(struct device *dev) { @@ -234,6 +235,11 @@ static inline bool xen_virtio_mem_acc(struct virtio_device *dev) { return false; } + +static inline bool xen_virtio_restricted_mem_acc(struct virtio_device *dev) +{ + return false; +} #endif /* CONFIG_XEN_GRANT_DMA_OPS */ #endif /* INCLUDE_XEN_OPS_H */ |