diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-05 04:45:39 +0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-05 04:45:39 +0400 |
commit | cf39c8e5352b4fb9efedfe7e9acb566a85ed847c (patch) | |
tree | 58d9f4b8c2ac48134264f1480cfc35b36462c4f4 /drivers | |
parent | 3398d252a4da80c47fe9b802184fa0a792387732 (diff) | |
parent | 23b7eaf8220721892975610dd0ae5c846a34dcb4 (diff) | |
download | linux-cf39c8e5352b4fb9efedfe7e9acb566a85ed847c.tar.xz |
Merge tag 'stable/for-linus-3.12-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull Xen updates from Konrad Rzeszutek Wilk:
"A couple of features and a ton of bug-fixes. There is also some
maintership changes. Jeremy is enjoying the full-time work at the
startup and as much as he would love to help - he can't find the time.
I have a bunch of other things that I promised to work on - paravirt
diet, get SWIOTLB working everywhere, etc, but haven't been able to
find the time.
As such both David Vrabel and Boris Ostrovsky have graciously
volunteered to help with the maintership role. They will keep the lid
on regressions, bug-fixes, etc. I will be in the background to help -
but eventually there will be less of me doing the Xen GIT pulls and
more of them. Stefano is still doing the ARM/ARM64 and will continue
on doing so.
Features:
- Xen Trusted Platform Module (TPM) frontend driver - with the
backend in MiniOS.
- Scalability improvements in event channel.
- Two extra Xen co-maintainers (David, Boris) and one going away (Jeremy)
Bug-fixes:
- Make the 1:1 mapping work during early bootup on selective regions.
- Add scratch page to balloon driver to deal with unexpected code
still holding on stale pages.
- Allow NMIs on PV guests (64-bit only)
- Remove unnecessary TLB flush in M2P code.
- Fixes duplicate callbacks in Xen granttable code.
- Fixes in PRIVCMD_MMAPBATCH ioctls to allow retries
- Fix for events being lost due to rescheduling on different VCPUs.
- More documentation"
* tag 'stable/for-linus-3.12-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip: (23 commits)
hvc_xen: Remove unnecessary __GFP_ZERO from kzalloc
drivers/xen-tpmfront: Fix compile issue with missing option.
xen/balloon: don't set P2M entry for auto translated guest
xen/evtchn: double free on error
Xen: Fix retry calls into PRIVCMD_MMAPBATCH*.
xen/pvhvm: Initialize xen panic handler for PVHVM guests
xen/m2p: use GNTTABOP_unmap_and_replace to reinstate the original mapping
xen: fix ARM build after 6efa20e4
MAINTAINERS: Remove Jeremy from the Xen subsystem.
xen/events: document behaviour when scanning the start word for events
x86/xen: during early setup, only 1:1 map the ISA region
x86/xen: disable premption when enabling local irqs
swiotlb-xen: replace dma_length with sg_dma_len() macro
swiotlb: replace dma_length with sg_dma_len() macro
xen/balloon: set a mapping for ballooned out pages
xen/evtchn: improve scalability by using per-user locks
xen/p2m: avoid unneccesary TLB flush in m2p_remove_override()
MAINTAINERS: Add in two extra co-maintainers of the Xen tree.
MAINTAINERS: Update the Xen subsystem's with proper mailing list.
xen: replace strict_strtoul() with kstrtoul()
...
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/char/tpm/Kconfig | 12 | ||||
-rw-r--r-- | drivers/char/tpm/Makefile | 1 | ||||
-rw-r--r-- | drivers/char/tpm/xen-tpmfront.c | 473 | ||||
-rw-r--r-- | drivers/xen/balloon.c | 74 | ||||
-rw-r--r-- | drivers/xen/events.c | 30 | ||||
-rw-r--r-- | drivers/xen/evtchn.c | 191 | ||||
-rw-r--r-- | drivers/xen/gntdev.c | 11 | ||||
-rw-r--r-- | drivers/xen/grant-table.c | 13 | ||||
-rw-r--r-- | drivers/xen/privcmd.c | 83 | ||||
-rw-r--r-- | drivers/xen/swiotlb-xen.c | 8 | ||||
-rw-r--r-- | drivers/xen/xen-selfballoon.c | 54 |
11 files changed, 808 insertions, 142 deletions
diff --git a/drivers/char/tpm/Kconfig b/drivers/char/tpm/Kconfig index dbfd56446c31..94c0c74434ea 100644 --- a/drivers/char/tpm/Kconfig +++ b/drivers/char/tpm/Kconfig @@ -91,4 +91,16 @@ config TCG_ST33_I2C To compile this driver as a module, choose M here; the module will be called tpm_stm_st33_i2c. +config TCG_XEN + tristate "XEN TPM Interface" + depends on TCG_TPM && XEN + select XEN_XENBUS_FRONTEND + ---help--- + If you want to make TPM support available to a Xen user domain, + say Yes and it will be accessible from within Linux. See + the manpages for xl, xl.conf, and docs/misc/vtpm.txt in + the Xen source repository for more details. + To compile this driver as a module, choose M here; the module + will be called xen-tpmfront. + endif # TCG_TPM diff --git a/drivers/char/tpm/Makefile b/drivers/char/tpm/Makefile index a3736c97c65a..eb41ff97d0ad 100644 --- a/drivers/char/tpm/Makefile +++ b/drivers/char/tpm/Makefile @@ -18,3 +18,4 @@ obj-$(CONFIG_TCG_ATMEL) += tpm_atmel.o obj-$(CONFIG_TCG_INFINEON) += tpm_infineon.o obj-$(CONFIG_TCG_IBMVTPM) += tpm_ibmvtpm.o obj-$(CONFIG_TCG_ST33_I2C) += tpm_i2c_stm_st33.o +obj-$(CONFIG_TCG_XEN) += xen-tpmfront.o diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c new file mode 100644 index 000000000000..7a7929ba2658 --- /dev/null +++ b/drivers/char/tpm/xen-tpmfront.c @@ -0,0 +1,473 @@ +/* + * Implementation of the Xen vTPM device frontend + * + * Author: Daniel De Graaf <dgdegra@tycho.nsa.gov> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, + * as published by the Free Software Foundation. + */ +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/interrupt.h> +#include <xen/events.h> +#include <xen/interface/io/tpmif.h> +#include <xen/grant_table.h> +#include <xen/xenbus.h> +#include <xen/page.h> +#include "tpm.h" + +struct tpm_private { + struct tpm_chip *chip; + struct xenbus_device *dev; + + struct vtpm_shared_page *shr; + + unsigned int evtchn; + int ring_ref; + domid_t backend_id; +}; + +enum status_bits { + VTPM_STATUS_RUNNING = 0x1, + VTPM_STATUS_IDLE = 0x2, + VTPM_STATUS_RESULT = 0x4, + VTPM_STATUS_CANCELED = 0x8, +}; + +static u8 vtpm_status(struct tpm_chip *chip) +{ + struct tpm_private *priv = TPM_VPRIV(chip); + switch (priv->shr->state) { + case VTPM_STATE_IDLE: + return VTPM_STATUS_IDLE | VTPM_STATUS_CANCELED; + case VTPM_STATE_FINISH: + return VTPM_STATUS_IDLE | VTPM_STATUS_RESULT; + case VTPM_STATE_SUBMIT: + case VTPM_STATE_CANCEL: /* cancel requested, not yet canceled */ + return VTPM_STATUS_RUNNING; + default: + return 0; + } +} + +static bool vtpm_req_canceled(struct tpm_chip *chip, u8 status) +{ + return status & VTPM_STATUS_CANCELED; +} + +static void vtpm_cancel(struct tpm_chip *chip) +{ + struct tpm_private *priv = TPM_VPRIV(chip); + priv->shr->state = VTPM_STATE_CANCEL; + wmb(); + notify_remote_via_evtchn(priv->evtchn); +} + +static unsigned int shr_data_offset(struct vtpm_shared_page *shr) +{ + return sizeof(*shr) + sizeof(u32) * shr->nr_extra_pages; +} + +static int vtpm_send(struct tpm_chip *chip, u8 *buf, size_t count) +{ + struct tpm_private *priv = TPM_VPRIV(chip); + struct vtpm_shared_page *shr = priv->shr; + unsigned int offset = shr_data_offset(shr); + + u32 ordinal; + unsigned long duration; + + if (offset > PAGE_SIZE) + return -EINVAL; + + if (offset + count > PAGE_SIZE) + return -EINVAL; + + /* Wait for completion of any existing command or cancellation */ + if (wait_for_tpm_stat(chip, VTPM_STATUS_IDLE, chip->vendor.timeout_c, + &chip->vendor.read_queue, true) < 0) { + vtpm_cancel(chip); + return -ETIME; + } + + memcpy(offset + (u8 *)shr, buf, count); + shr->length = count; + barrier(); + shr->state = VTPM_STATE_SUBMIT; + wmb(); + notify_remote_via_evtchn(priv->evtchn); + + ordinal = be32_to_cpu(((struct tpm_input_header*)buf)->ordinal); + duration = tpm_calc_ordinal_duration(chip, ordinal); + + if (wait_for_tpm_stat(chip, VTPM_STATUS_IDLE, duration, + &chip->vendor.read_queue, true) < 0) { + /* got a signal or timeout, try to cancel */ + vtpm_cancel(chip); + return -ETIME; + } + + return count; +} + +static int vtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count) +{ + struct tpm_private *priv = TPM_VPRIV(chip); + struct vtpm_shared_page *shr = priv->shr; + unsigned int offset = shr_data_offset(shr); + size_t length = shr->length; + + if (shr->state == VTPM_STATE_IDLE) + return -ECANCELED; + + /* In theory the wait at the end of _send makes this one unnecessary */ + if (wait_for_tpm_stat(chip, VTPM_STATUS_RESULT, chip->vendor.timeout_c, + &chip->vendor.read_queue, true) < 0) { + vtpm_cancel(chip); + return -ETIME; + } + + if (offset > PAGE_SIZE) + return -EIO; + + if (offset + length > PAGE_SIZE) + length = PAGE_SIZE - offset; + + if (length > count) + length = count; + + memcpy(buf, offset + (u8 *)shr, length); + + return length; +} + +ssize_t tpm_show_locality(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct tpm_chip *chip = dev_get_drvdata(dev); + struct tpm_private *priv = TPM_VPRIV(chip); + u8 locality = priv->shr->locality; + + return sprintf(buf, "%d\n", locality); +} + +ssize_t tpm_store_locality(struct device *dev, struct device_attribute *attr, + const char *buf, size_t len) +{ + struct tpm_chip *chip = dev_get_drvdata(dev); + struct tpm_private *priv = TPM_VPRIV(chip); + u8 val; + + int rv = kstrtou8(buf, 0, &val); + if (rv) + return rv; + + priv->shr->locality = val; + + return len; +} + +static const struct file_operations vtpm_ops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .open = tpm_open, + .read = tpm_read, + .write = tpm_write, + .release = tpm_release, +}; + +static DEVICE_ATTR(pubek, S_IRUGO, tpm_show_pubek, NULL); +static DEVICE_ATTR(pcrs, S_IRUGO, tpm_show_pcrs, NULL); +static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL); +static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL); +static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL); +static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated, + NULL); +static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL); +static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel); +static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL); +static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL); +static DEVICE_ATTR(locality, S_IRUGO | S_IWUSR, tpm_show_locality, + tpm_store_locality); + +static struct attribute *vtpm_attrs[] = { + &dev_attr_pubek.attr, + &dev_attr_pcrs.attr, + &dev_attr_enabled.attr, + &dev_attr_active.attr, + &dev_attr_owned.attr, + &dev_attr_temp_deactivated.attr, + &dev_attr_caps.attr, + &dev_attr_cancel.attr, + &dev_attr_durations.attr, + &dev_attr_timeouts.attr, + &dev_attr_locality.attr, + NULL, +}; + +static struct attribute_group vtpm_attr_grp = { + .attrs = vtpm_attrs, +}; + +#define TPM_LONG_TIMEOUT (10 * 60 * HZ) + +static const struct tpm_vendor_specific tpm_vtpm = { + .status = vtpm_status, + .recv = vtpm_recv, + .send = vtpm_send, + .cancel = vtpm_cancel, + .req_complete_mask = VTPM_STATUS_IDLE | VTPM_STATUS_RESULT, + .req_complete_val = VTPM_STATUS_IDLE | VTPM_STATUS_RESULT, + .req_canceled = vtpm_req_canceled, + .attr_group = &vtpm_attr_grp, + .miscdev = { + .fops = &vtpm_ops, + }, + .duration = { + TPM_LONG_TIMEOUT, + TPM_LONG_TIMEOUT, + TPM_LONG_TIMEOUT, + }, +}; + +static irqreturn_t tpmif_interrupt(int dummy, void *dev_id) +{ + struct tpm_private *priv = dev_id; + + switch (priv->shr->state) { + case VTPM_STATE_IDLE: + case VTPM_STATE_FINISH: + wake_up_interruptible(&priv->chip->vendor.read_queue); + break; + case VTPM_STATE_SUBMIT: + case VTPM_STATE_CANCEL: + default: + break; + } + return IRQ_HANDLED; +} + +static int setup_chip(struct device *dev, struct tpm_private *priv) +{ + struct tpm_chip *chip; + + chip = tpm_register_hardware(dev, &tpm_vtpm); + if (!chip) + return -ENODEV; + + init_waitqueue_head(&chip->vendor.read_queue); + + priv->chip = chip; + TPM_VPRIV(chip) = priv; + + return 0; +} + +/* caller must clean up in case of errors */ +static int setup_ring(struct xenbus_device *dev, struct tpm_private *priv) +{ + struct xenbus_transaction xbt; + const char *message = NULL; + int rv; + + priv->shr = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO); + if (!priv->shr) { + xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); + return -ENOMEM; + } + + rv = xenbus_grant_ring(dev, virt_to_mfn(priv->shr)); + if (rv < 0) + return rv; + + priv->ring_ref = rv; + + rv = xenbus_alloc_evtchn(dev, &priv->evtchn); + if (rv) + return rv; + + rv = bind_evtchn_to_irqhandler(priv->evtchn, tpmif_interrupt, 0, + "tpmif", priv); + if (rv <= 0) { + xenbus_dev_fatal(dev, rv, "allocating TPM irq"); + return rv; + } + priv->chip->vendor.irq = rv; + + again: + rv = xenbus_transaction_start(&xbt); + if (rv) { + xenbus_dev_fatal(dev, rv, "starting transaction"); + return rv; + } + + rv = xenbus_printf(xbt, dev->nodename, + "ring-ref", "%u", priv->ring_ref); + if (rv) { + message = "writing ring-ref"; + goto abort_transaction; + } + + rv = xenbus_printf(xbt, dev->nodename, "event-channel", "%u", + priv->evtchn); + if (rv) { + message = "writing event-channel"; + goto abort_transaction; + } + + rv = xenbus_printf(xbt, dev->nodename, "feature-protocol-v2", "1"); + if (rv) { + message = "writing feature-protocol-v2"; + goto abort_transaction; + } + + rv = xenbus_transaction_end(xbt, 0); + if (rv == -EAGAIN) + goto again; + if (rv) { + xenbus_dev_fatal(dev, rv, "completing transaction"); + return rv; + } + + xenbus_switch_state(dev, XenbusStateInitialised); + + return 0; + + abort_transaction: + xenbus_transaction_end(xbt, 1); + if (message) + xenbus_dev_error(dev, rv, "%s", message); + + return rv; +} + +static void ring_free(struct tpm_private *priv) +{ + if (!priv) + return; + + if (priv->ring_ref) + gnttab_end_foreign_access(priv->ring_ref, 0, + (unsigned long)priv->shr); + else + free_page((unsigned long)priv->shr); + + if (priv->chip && priv->chip->vendor.irq) + unbind_from_irqhandler(priv->chip->vendor.irq, priv); + + kfree(priv); +} + +static int tpmfront_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) +{ + struct tpm_private *priv; + int rv; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) { + xenbus_dev_fatal(dev, -ENOMEM, "allocating priv structure"); + return -ENOMEM; + } + + rv = setup_chip(&dev->dev, priv); + if (rv) { + kfree(priv); + return rv; + } + + rv = setup_ring(dev, priv); + if (rv) { + tpm_remove_hardware(&dev->dev); + ring_free(priv); + return rv; + } + + tpm_get_timeouts(priv->chip); + + dev_set_drvdata(&dev->dev, priv->chip); + + return rv; +} + +static int tpmfront_remove(struct xenbus_device *dev) +{ + struct tpm_chip *chip = dev_get_drvdata(&dev->dev); + struct tpm_private *priv = TPM_VPRIV(chip); + tpm_remove_hardware(&dev->dev); + ring_free(priv); + TPM_VPRIV(chip) = NULL; + return 0; +} + +static int tpmfront_resume(struct xenbus_device *dev) +{ + /* A suspend/resume/migrate will interrupt a vTPM anyway */ + tpmfront_remove(dev); + return tpmfront_probe(dev, NULL); +} + +static void backend_changed(struct xenbus_device *dev, + enum xenbus_state backend_state) +{ + int val; + + switch (backend_state) { + case XenbusStateInitialised: + case XenbusStateConnected: + if (dev->state == XenbusStateConnected) + break; + + if (xenbus_scanf(XBT_NIL, dev->otherend, + "feature-protocol-v2", "%d", &val) < 0) + val = 0; + if (!val) { + xenbus_dev_fatal(dev, -EINVAL, + "vTPM protocol 2 required"); + return; + } + xenbus_switch_state(dev, XenbusStateConnected); + break; + + case XenbusStateClosing: + case XenbusStateClosed: + device_unregister(&dev->dev); + xenbus_frontend_closed(dev); + break; + default: + break; + } +} + +static const struct xenbus_device_id tpmfront_ids[] = { + { "vtpm" }, + { "" } +}; +MODULE_ALIAS("xen:vtpm"); + +static DEFINE_XENBUS_DRIVER(tpmfront, , + .probe = tpmfront_probe, + .remove = tpmfront_remove, + .resume = tpmfront_resume, + .otherend_changed = backend_changed, + ); + +static int __init xen_tpmfront_init(void) +{ + if (!xen_domain()) + return -ENODEV; + + return xenbus_register_frontend(&tpmfront_driver); +} +module_init(xen_tpmfront_init); + +static void __exit xen_tpmfront_exit(void) +{ + xenbus_unregister_driver(&tpmfront_driver); +} +module_exit(xen_tpmfront_exit); + +MODULE_AUTHOR("Daniel De Graaf <dgdegra@tycho.nsa.gov>"); +MODULE_DESCRIPTION("Xen vTPM Driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 2a2ef97697b2..3101cf6daf56 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -38,6 +38,7 @@ #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt +#include <linux/cpu.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/errno.h> @@ -52,6 +53,7 @@ #include <linux/notifier.h> #include <linux/memory.h> #include <linux/memory_hotplug.h> +#include <linux/percpu-defs.h> #include <asm/page.h> #include <asm/pgalloc.h> @@ -90,6 +92,8 @@ EXPORT_SYMBOL_GPL(balloon_stats); /* We increase/decrease in batches which fit in a page */ static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)]; +static DEFINE_PER_CPU(struct page *, balloon_scratch_page); + /* List of ballooned pages, threaded through the mem_map array. */ static LIST_HEAD(ballooned_pages); @@ -412,7 +416,8 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) if (xen_pv_domain() && !PageHighMem(page)) { ret = HYPERVISOR_update_va_mapping( (unsigned long)__va(pfn << PAGE_SHIFT), - __pte_ma(0), 0); + pfn_pte(page_to_pfn(__get_cpu_var(balloon_scratch_page)), + PAGE_KERNEL_RO), 0); BUG_ON(ret); } #endif @@ -425,7 +430,13 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) /* No more mappings: invalidate P2M and add to balloon. */ for (i = 0; i < nr_pages; i++) { pfn = mfn_to_pfn(frame_list[i]); - __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + unsigned long p; + struct page *pg; + pg = __get_cpu_var(balloon_scratch_page); + p = page_to_pfn(pg); + __set_phys_to_machine(pfn, pfn_to_mfn(p)); + } balloon_append(pfn_to_page(pfn)); } @@ -480,6 +491,18 @@ static void balloon_process(struct work_struct *work) mutex_unlock(&balloon_mutex); } +struct page *get_balloon_scratch_page(void) +{ + struct page *ret = get_cpu_var(balloon_scratch_page); + BUG_ON(ret == NULL); + return ret; +} + +void put_balloon_scratch_page(void) +{ + put_cpu_var(balloon_scratch_page); +} + /* Resets the Xen limit, sets new target, and kicks off processing. */ void balloon_set_new_target(unsigned long target) { @@ -573,13 +596,47 @@ static void __init balloon_add_region(unsigned long start_pfn, } } +static int __cpuinit balloon_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + int cpu = (long)hcpu; + switch (action) { + case CPU_UP_PREPARE: + if (per_cpu(balloon_scratch_page, cpu) != NULL) + break; + per_cpu(balloon_scratch_page, cpu) = alloc_page(GFP_KERNEL); + if (per_cpu(balloon_scratch_page, cpu) == NULL) { + pr_warn("Failed to allocate balloon_scratch_page for cpu %d\n", cpu); + return NOTIFY_BAD; + } + break; + default: + break; + } + return NOTIFY_OK; +} + +static struct notifier_block balloon_cpu_notifier __cpuinitdata = { + .notifier_call = balloon_cpu_notify, +}; + static int __init balloon_init(void) { - int i; + int i, cpu; if (!xen_domain()) return -ENODEV; + for_each_online_cpu(cpu) + { + per_cpu(balloon_scratch_page, cpu) = alloc_page(GFP_KERNEL); + if (per_cpu(balloon_scratch_page, cpu) == NULL) { + pr_warn("Failed to allocate balloon_scratch_page for cpu %d\n", cpu); + return -ENOMEM; + } + } + register_cpu_notifier(&balloon_cpu_notifier); + pr_info("Initialising balloon driver\n"); balloon_stats.current_pages = xen_pv_domain() @@ -616,4 +673,15 @@ static int __init balloon_init(void) subsys_initcall(balloon_init); +static int __init balloon_clear(void) +{ + int cpu; + + for_each_possible_cpu(cpu) + per_cpu(balloon_scratch_page, cpu) = NULL; + + return 0; +} +early_initcall(balloon_clear); + MODULE_LICENSE("GPL"); diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 5e8be462aed5..4035e833ea26 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -56,6 +56,7 @@ #include <xen/interface/hvm/params.h> #include <xen/interface/physdev.h> #include <xen/interface/sched.h> +#include <xen/interface/vcpu.h> #include <asm/hw_irq.h> /* @@ -1212,7 +1213,17 @@ EXPORT_SYMBOL_GPL(evtchn_put); void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) { - int irq = per_cpu(ipi_to_irq, cpu)[vector]; + int irq; + +#ifdef CONFIG_X86 + if (unlikely(vector == XEN_NMI_VECTOR)) { + int rc = HYPERVISOR_vcpu_op(VCPUOP_send_nmi, cpu, NULL); + if (rc < 0) + printk(KERN_WARNING "Sending nmi to CPU%d failed (rc:%d)\n", cpu, rc); + return; + } +#endif + irq = per_cpu(ipi_to_irq, cpu)[vector]; BUG_ON(irq < 0); notify_remote_via_irq(irq); } @@ -1379,14 +1390,21 @@ static void __xen_evtchn_do_upcall(void) pending_bits = active_evtchns(cpu, s, word_idx); bit_idx = 0; /* usually scan entire word from start */ + /* + * We scan the starting word in two parts. + * + * 1st time: start in the middle, scanning the + * upper bits. + * + * 2nd time: scan the whole word (not just the + * parts skipped in the first pass) -- if an + * event in the previously scanned bits is + * pending again it would just be scanned on + * the next loop anyway. + */ if (word_idx == start_word_idx) { - /* We scan the starting word in two parts */ if (i == 0) - /* 1st time: start in the middle */ bit_idx = start_bit_idx; - else - /* 2nd time: mask bits done already */ - bit_idx &= (1UL << start_bit_idx) - 1; } do { diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index b6165e047f48..8b3a69a06c39 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -57,6 +57,7 @@ struct per_user_data { struct mutex bind_mutex; /* serialize bind/unbind operations */ + struct rb_root evtchns; /* Notification ring, accessed via /dev/xen/evtchn. */ #define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t)) @@ -64,6 +65,7 @@ struct per_user_data { evtchn_port_t *ring; unsigned int ring_cons, ring_prod, ring_overflow; struct mutex ring_cons_mutex; /* protect against concurrent readers */ + spinlock_t ring_prod_lock; /* product against concurrent interrupts */ /* Processes wait on this queue when ring is empty. */ wait_queue_head_t evtchn_wait; @@ -71,54 +73,79 @@ struct per_user_data { const char *name; }; -/* - * Who's bound to each port? This is logically an array of struct - * per_user_data *, but we encode the current enabled-state in bit 0. - */ -static unsigned long *port_user; -static DEFINE_SPINLOCK(port_user_lock); /* protects port_user[] and ring_prod */ +struct user_evtchn { + struct rb_node node; + struct per_user_data *user; + unsigned port; + bool enabled; +}; -static inline struct per_user_data *get_port_user(unsigned port) +static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn) { - return (struct per_user_data *)(port_user[port] & ~1); -} + struct rb_node **new = &(u->evtchns.rb_node), *parent = NULL; -static inline void set_port_user(unsigned port, struct per_user_data *u) -{ - port_user[port] = (unsigned long)u; + while (*new) { + struct user_evtchn *this; + + this = container_of(*new, struct user_evtchn, node); + + parent = *new; + if (this->port < evtchn->port) + new = &((*new)->rb_left); + else if (this->port > evtchn->port) + new = &((*new)->rb_right); + else + return -EEXIST; + } + + /* Add new node and rebalance tree. */ + rb_link_node(&evtchn->node, parent, new); + rb_insert_color(&evtchn->node, &u->evtchns); + + return 0; } -static inline bool get_port_enabled(unsigned port) +static void del_evtchn(struct per_user_data *u, struct user_evtchn *evtchn) { - return port_user[port] & 1; + rb_erase(&evtchn->node, &u->evtchns); + kfree(evtchn); } -static inline void set_port_enabled(unsigned port, bool enabled) +static struct user_evtchn *find_evtchn(struct per_user_data *u, unsigned port) { - if (enabled) - port_user[port] |= 1; - else - port_user[port] &= ~1; + struct rb_node *node = u->evtchns.rb_node; + + while (node) { + struct user_evtchn *evtchn; + + evtchn = container_of(node, struct user_evtchn, node); + + if (evtchn->port < port) + node = node->rb_left; + else if (evtchn->port > port) + node = node->rb_right; + else + return evtchn; + } + return NULL; } static irqreturn_t evtchn_interrupt(int irq, void *data) { - unsigned int port = (unsigned long)data; - struct per_user_data *u; - - spin_lock(&port_user_lock); - - u = get_port_user(port); + struct user_evtchn *evtchn = data; + struct per_user_data *u = evtchn->user; - WARN(!get_port_enabled(port), + WARN(!evtchn->enabled, "Interrupt for port %d, but apparently not enabled; per-user %p\n", - port, u); + evtchn->port, u); disable_irq_nosync(irq); - set_port_enabled(port, false); + evtchn->enabled = false; + + spin_lock(&u->ring_prod_lock); if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) { - u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port; + u->ring[EVTCHN_RING_MASK(u->ring_prod)] = evtchn->port; wmb(); /* Ensure ring contents visible */ if (u->ring_cons == u->ring_prod++) { wake_up_interruptible(&u->evtchn_wait); @@ -128,7 +155,7 @@ static irqreturn_t evtchn_interrupt(int irq, void *data) } else u->ring_overflow = 1; - spin_unlock(&port_user_lock); + spin_unlock(&u->ring_prod_lock); return IRQ_HANDLED; } @@ -229,20 +256,20 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf, if (copy_from_user(kbuf, buf, count) != 0) goto out; - spin_lock_irq(&port_user_lock); + mutex_lock(&u->bind_mutex); for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) { unsigned port = kbuf[i]; + struct user_evtchn *evtchn; - if (port < NR_EVENT_CHANNELS && - get_port_user(port) == u && - !get_port_enabled(port)) { - set_port_enabled(port, true); + evtchn = find_evtchn(u, port); + if (evtchn && !evtchn->enabled) { + evtchn->enabled = true; enable_irq(irq_from_evtchn(port)); } } - spin_unlock_irq(&port_user_lock); + mutex_unlock(&u->bind_mutex); rc = count; @@ -253,6 +280,8 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf, static int evtchn_bind_to_user(struct per_user_data *u, int port) { + struct user_evtchn *evtchn; + struct evtchn_close close; int rc = 0; /* @@ -263,35 +292,46 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port) * interrupt handler yet, and our caller has already * serialized bind operations.) */ - BUG_ON(get_port_user(port) != NULL); - set_port_user(port, u); - set_port_enabled(port, true); /* start enabled */ + + evtchn = kzalloc(sizeof(*evtchn), GFP_KERNEL); + if (!evtchn) + return -ENOMEM; + + evtchn->user = u; + evtchn->port = port; + evtchn->enabled = true; /* start enabled */ + + rc = add_evtchn(u, evtchn); + if (rc < 0) + goto err; rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED, - u->name, (void *)(unsigned long)port); - if (rc >= 0) - rc = evtchn_make_refcounted(port); - else { - /* bind failed, should close the port now */ - struct evtchn_close close; - close.port = port; - if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) - BUG(); - set_port_user(port, NULL); - } + u->name, evtchn); + if (rc < 0) + goto err; + rc = evtchn_make_refcounted(port); + return rc; + +err: + /* bind failed, should close the port now */ + close.port = port; + if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) + BUG(); + del_evtchn(u, evtchn); return rc; } -static void evtchn_unbind_from_user(struct per_user_data *u, int port) +static void evtchn_unbind_from_user(struct per_user_data *u, + struct user_evtchn *evtchn) { - int irq = irq_from_evtchn(port); + int irq = irq_from_evtchn(evtchn->port); BUG_ON(irq < 0); - unbind_from_irqhandler(irq, (void *)(unsigned long)port); + unbind_from_irqhandler(irq, evtchn); - set_port_user(port, NULL); + del_evtchn(u, evtchn); } static long evtchn_ioctl(struct file *file, @@ -370,6 +410,7 @@ static long evtchn_ioctl(struct file *file, case IOCTL_EVTCHN_UNBIND: { struct ioctl_evtchn_unbind unbind; + struct user_evtchn *evtchn; rc = -EFAULT; if (copy_from_user(&unbind, uarg, sizeof(unbind))) @@ -380,29 +421,27 @@ static long evtchn_ioctl(struct file *file, break; rc = -ENOTCONN; - if (get_port_user(unbind.port) != u) + evtchn = find_evtchn(u, unbind.port); + if (!evtchn) break; disable_irq(irq_from_evtchn(unbind.port)); - - evtchn_unbind_from_user(u, unbind.port); - + evtchn_unbind_from_user(u, evtchn); rc = 0; break; } case IOCTL_EVTCHN_NOTIFY: { struct ioctl_evtchn_notify notify; + struct user_evtchn *evtchn; rc = -EFAULT; if (copy_from_user(¬ify, uarg, sizeof(notify))) break; - if (notify.port >= NR_EVENT_CHANNELS) { - rc = -EINVAL; - } else if (get_port_user(notify.port) != u) { - rc = -ENOTCONN; - } else { + rc = -ENOTCONN; + evtchn = find_evtchn(u, notify.port); + if (evtchn) { notify_remote_via_evtchn(notify.port); rc = 0; } @@ -412,9 +451,9 @@ static long evtchn_ioctl(struct file *file, case IOCTL_EVTCHN_RESET: { /* Initialise the ring to empty. Clear errors. */ mutex_lock(&u->ring_cons_mutex); - spin_lock_irq(&port_user_lock); + spin_lock_irq(&u->ring_prod_lock); u->ring_cons = u->ring_prod = u->ring_overflow = 0; - spin_unlock_irq(&port_user_lock); + spin_unlock_irq(&u->ring_prod_lock); mutex_unlock(&u->ring_cons_mutex); rc = 0; break; @@ -473,6 +512,7 @@ static int evtchn_open(struct inode *inode, struct file *filp) mutex_init(&u->bind_mutex); mutex_init(&u->ring_cons_mutex); + spin_lock_init(&u->ring_prod_lock); filp->private_data = u; @@ -481,15 +521,15 @@ static int evtchn_open(struct inode *inode, struct file *filp) static int evtchn_release(struct inode *inode, struct file *filp) { - int i; struct per_user_data *u = filp->private_data; + struct rb_node *node; - for (i = 0; i < NR_EVENT_CHANNELS; i++) { - if (get_port_user(i) != u) - continue; + while ((node = u->evtchns.rb_node)) { + struct user_evtchn *evtchn; - disable_irq(irq_from_evtchn(i)); - evtchn_unbind_from_user(get_port_user(i), i); + evtchn = rb_entry(node, struct user_evtchn, node); + disable_irq(irq_from_evtchn(evtchn->port)); + evtchn_unbind_from_user(u, evtchn); } free_page((unsigned long)u->ring); @@ -523,12 +563,6 @@ static int __init evtchn_init(void) if (!xen_domain()) return -ENODEV; - port_user = kcalloc(NR_EVENT_CHANNELS, sizeof(*port_user), GFP_KERNEL); - if (port_user == NULL) - return -ENOMEM; - - spin_lock_init(&port_user_lock); - /* Create '/dev/xen/evtchn'. */ err = misc_register(&evtchn_miscdev); if (err != 0) { @@ -543,9 +577,6 @@ static int __init evtchn_init(void) static void __exit evtchn_cleanup(void) { - kfree(port_user); - port_user = NULL; - misc_deregister(&evtchn_miscdev); } diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index eab5427c75f5..e41c79c986ea 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -272,19 +272,12 @@ static int map_grant_pages(struct grant_map *map) * with find_grant_ptes. */ for (i = 0; i < map->count; i++) { - unsigned level; unsigned long address = (unsigned long) pfn_to_kaddr(page_to_pfn(map->pages[i])); - pte_t *ptep; - u64 pte_maddr = 0; BUG_ON(PageHighMem(map->pages[i])); - ptep = lookup_address(address, &level); - pte_maddr = arbitrary_virt_to_machine(ptep).maddr; - gnttab_set_map_op(&map->kmap_ops[i], pte_maddr, - map->flags | - GNTMAP_host_map | - GNTMAP_contains_pte, + gnttab_set_map_op(&map->kmap_ops[i], address, + map->flags | GNTMAP_host_map, map->grants[i].ref, map->grants[i].domid); } diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 04cdeb8e3719..c4d2298893b1 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -730,9 +730,18 @@ void gnttab_request_free_callback(struct gnttab_free_callback *callback, void (*fn)(void *), void *arg, u16 count) { unsigned long flags; + struct gnttab_free_callback *cb; + spin_lock_irqsave(&gnttab_list_lock, flags); - if (callback->next) - goto out; + + /* Check if the callback is already on the list */ + cb = gnttab_free_callback_list; + while (cb) { + if (cb == callback) + goto out; + cb = cb->next; + } + callback->fn = fn; callback->arg = arg; callback->count = count; diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index f8e5dd701ecb..8e74590fa1bb 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -43,9 +43,10 @@ MODULE_LICENSE("GPL"); #define PRIV_VMA_LOCKED ((void *)1) -#ifndef HAVE_ARCH_PRIVCMD_MMAP -static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma); -#endif +static int privcmd_vma_range_is_mapped( + struct vm_area_struct *vma, + unsigned long addr, + unsigned long nr_pages); static long privcmd_ioctl_hypercall(void __user *udata) { @@ -225,9 +226,9 @@ static long privcmd_ioctl_mmap(void __user *udata) vma = find_vma(mm, msg->va); rc = -EINVAL; - if (!vma || (msg->va != vma->vm_start) || - !privcmd_enforce_singleshot_mapping(vma)) + if (!vma || (msg->va != vma->vm_start) || vma->vm_private_data) goto out_up; + vma->vm_private_data = PRIV_VMA_LOCKED; } state.va = vma->vm_start; @@ -358,7 +359,7 @@ static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs) kfree(pages); return -ENOMEM; } - BUG_ON(vma->vm_private_data != PRIV_VMA_LOCKED); + BUG_ON(vma->vm_private_data != NULL); vma->vm_private_data = pages; return 0; @@ -421,19 +422,43 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version) vma = find_vma(mm, m.addr); if (!vma || - vma->vm_ops != &privcmd_vm_ops || - (m.addr != vma->vm_start) || - ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) || - !privcmd_enforce_singleshot_mapping(vma)) { - up_write(&mm->mmap_sem); + vma->vm_ops != &privcmd_vm_ops) { ret = -EINVAL; - goto out; + goto out_unlock; } - if (xen_feature(XENFEAT_auto_translated_physmap)) { - ret = alloc_empty_pages(vma, m.num); - if (ret < 0) { - up_write(&mm->mmap_sem); - goto out; + + /* + * Caller must either: + * + * Map the whole VMA range, which will also allocate all the + * pages required for the auto_translated_physmap case. + * + * Or + * + * Map unmapped holes left from a previous map attempt (e.g., + * because those foreign frames were previously paged out). + */ + if (vma->vm_private_data == NULL) { + if (m.addr != vma->vm_start || + m.addr + (nr_pages << PAGE_SHIFT) != vma->vm_end) { + ret = -EINVAL; + goto out_unlock; + } + if (xen_feature(XENFEAT_auto_translated_physmap)) { + ret = alloc_empty_pages(vma, m.num); + if (ret < 0) + goto out_unlock; + } else + vma->vm_private_data = PRIV_VMA_LOCKED; + } else { + if (m.addr < vma->vm_start || + m.addr + (nr_pages << PAGE_SHIFT) > vma->vm_end) { + ret = -EINVAL; + goto out_unlock; + } + if (privcmd_vma_range_is_mapped(vma, m.addr, nr_pages)) { + ret = -EINVAL; + goto out_unlock; } } @@ -466,8 +491,11 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version) out: free_page_list(&pagelist); - return ret; + +out_unlock: + up_write(&mm->mmap_sem); + goto out; } static long privcmd_ioctl(struct file *file, @@ -540,9 +568,24 @@ static int privcmd_mmap(struct file *file, struct vm_area_struct *vma) return 0; } -static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma) +/* + * For MMAPBATCH*. This allows asserting the singleshot mapping + * on a per pfn/pte basis. Mapping calls that fail with ENOENT + * can be then retried until success. + */ +static int is_mapped_fn(pte_t *pte, struct page *pmd_page, + unsigned long addr, void *data) +{ + return pte_none(*pte) ? 0 : -EBUSY; +} + +static int privcmd_vma_range_is_mapped( + struct vm_area_struct *vma, + unsigned long addr, + unsigned long nr_pages) { - return !cmpxchg(&vma->vm_private_data, NULL, PRIV_VMA_LOCKED); + return apply_to_page_range(vma->vm_mm, addr, nr_pages << PAGE_SHIFT, + is_mapped_fn, NULL) != 0; } const struct file_operations xen_privcmd_fops = { diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index aadffcf7db9b..1b2277c311d2 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -506,13 +506,13 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, to do proper error handling. */ xen_swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir, attrs); - sgl[0].dma_length = 0; + sg_dma_len(sgl) = 0; return DMA_ERROR_CODE; } sg->dma_address = xen_phys_to_bus(map); } else sg->dma_address = dev_addr; - sg->dma_length = sg->length; + sg_dma_len(sg) = sg->length; } return nelems; } @@ -533,7 +533,7 @@ xen_swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, BUG_ON(dir == DMA_NONE); for_each_sg(sgl, sg, nelems, i) - xen_unmap_single(hwdev, sg->dma_address, sg->dma_length, dir); + xen_unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir); } EXPORT_SYMBOL_GPL(xen_swiotlb_unmap_sg_attrs); @@ -555,7 +555,7 @@ xen_swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl, for_each_sg(sgl, sg, nelems, i) xen_swiotlb_sync_single(hwdev, sg->dma_address, - sg->dma_length, dir, target); + sg_dma_len(sg), dir, target); } void diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c index 02817a85f877..21e18c18c7a1 100644 --- a/drivers/xen/xen-selfballoon.c +++ b/drivers/xen/xen-selfballoon.c @@ -265,8 +265,10 @@ static ssize_t store_selfballooning(struct device *dev, if (!capable(CAP_SYS_ADMIN)) return -EPERM; - err = strict_strtoul(buf, 10, &tmp); - if (err || ((tmp != 0) && (tmp != 1))) + err = kstrtoul(buf, 10, &tmp); + if (err) + return err; + if ((tmp != 0) && (tmp != 1)) return -EINVAL; xen_selfballooning_enabled = !!tmp; @@ -292,8 +294,10 @@ static ssize_t store_selfballoon_interval(struct device *dev, if (!capable(CAP_SYS_ADMIN)) return -EPERM; - err = strict_strtoul(buf, 10, &val); - if (err || val == 0) + err = kstrtoul(buf, 10, &val); + if (err) + return err; + if (val == 0) return -EINVAL; selfballoon_interval = val; return count; @@ -314,8 +318,10 @@ static ssize_t store_selfballoon_downhys(struct device *dev, if (!capable(CAP_SYS_ADMIN)) return -EPERM; - err = strict_strtoul(buf, 10, &val); - if (err || val == 0) + err = kstrtoul(buf, 10, &val); + if (err) + return err; + if (val == 0) return -EINVAL; selfballoon_downhysteresis = val; return count; @@ -337,8 +343,10 @@ static ssize_t store_selfballoon_uphys(struct device *dev, if (!capable(CAP_SYS_ADMIN)) return -EPERM; - err = strict_strtoul(buf, 10, &val); - if (err || val == 0) + err = kstrtoul(buf, 10, &val); + if (err) + return err; + if (val == 0) return -EINVAL; selfballoon_uphysteresis = val; return count; @@ -360,8 +368,10 @@ static ssize_t store_selfballoon_min_usable_mb(struct device *dev, if (!capable(CAP_SYS_ADMIN)) return -EPERM; - err = strict_strtoul(buf, 10, &val); - if (err || val == 0) + err = kstrtoul(buf, 10, &val); + if (err) + return err; + if (val == 0) return -EINVAL; selfballoon_min_usable_mb = val; return count; @@ -384,8 +394,10 @@ static ssize_t store_selfballoon_reserved_mb(struct device *dev, if (!capable(CAP_SYS_ADMIN)) return -EPERM; - err = strict_strtoul(buf, 10, &val); - if (err || val == 0) + err = kstrtoul(buf, 10, &val); + if (err) + return err; + if (val == 0) return -EINVAL; selfballoon_reserved_mb = val; return count; @@ -410,8 +422,10 @@ static ssize_t store_frontswap_selfshrinking(struct device *dev, if (!capable(CAP_SYS_ADMIN)) return -EPERM; - err = strict_strtoul(buf, 10, &tmp); - if (err || ((tmp != 0) && (tmp != 1))) + err = kstrtoul(buf, 10, &tmp); + if (err) + return err; + if ((tmp != 0) && (tmp != 1)) return -EINVAL; frontswap_selfshrinking = !!tmp; if (!was_enabled && !xen_selfballooning_enabled && @@ -437,8 +451,10 @@ static ssize_t store_frontswap_inertia(struct device *dev, if (!capable(CAP_SYS_ADMIN)) return -EPERM; - err = strict_strtoul(buf, 10, &val); - if (err || val == 0) + err = kstrtoul(buf, 10, &val); + if (err) + return err; + if (val == 0) return -EINVAL; frontswap_inertia = val; frontswap_inertia_counter = val; @@ -460,8 +476,10 @@ static ssize_t store_frontswap_hysteresis(struct device *dev, if (!capable(CAP_SYS_ADMIN)) return -EPERM; - err = strict_strtoul(buf, 10, &val); - if (err || val == 0) + err = kstrtoul(buf, 10, &val); + if (err) + return err; + if (val == 0) return -EINVAL; frontswap_hysteresis = val; return count; |