From ec9b2065d4d3b797604c09a569083dd9ff951b1b Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Mon, 26 May 2008 23:31:13 +0100 Subject: xen: Move manage.c to drivers/xen for ia64/xen support move arch/x86/xen/manage.c under drivers/xen/to share codes with x86 and ia64. ia64/xen also uses manage.c Signed-off-by: Isaku Yamahata Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- drivers/xen/Makefile | 2 +- drivers/xen/manage.c | 143 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 144 insertions(+), 1 deletion(-) create mode 100644 drivers/xen/manage.c (limited to 'drivers/xen') diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 37af04f1ffd9..363286c54290 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -1,4 +1,4 @@ -obj-y += grant-table.o features.o events.o +obj-y += grant-table.o features.o events.o manage.o obj-y += xenbus/ obj-$(CONFIG_XEN_XENCOMM) += xencomm.o obj-$(CONFIG_XEN_BALLOON) += balloon.o diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c new file mode 100644 index 000000000000..aa7af9e6abc0 --- /dev/null +++ b/drivers/xen/manage.c @@ -0,0 +1,143 @@ +/* + * Handle extern requests for shutdown, reboot and sysrq + */ +#include +#include +#include +#include + +#include + +#define SHUTDOWN_INVALID -1 +#define SHUTDOWN_POWEROFF 0 +#define SHUTDOWN_SUSPEND 2 +/* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only + * report a crash, not be instructed to crash! + * HALT is the same as POWEROFF, as far as we're concerned. The tools use + * the distinction when we return the reason code to them. + */ +#define SHUTDOWN_HALT 4 + +/* Ignore multiple shutdown requests. */ +static int shutting_down = SHUTDOWN_INVALID; + +static void shutdown_handler(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + char *str; + struct xenbus_transaction xbt; + int err; + + if (shutting_down != SHUTDOWN_INVALID) + return; + + again: + err = xenbus_transaction_start(&xbt); + if (err) + return; + + str = (char *)xenbus_read(xbt, "control", "shutdown", NULL); + /* Ignore read errors and empty reads. */ + if (XENBUS_IS_ERR_READ(str)) { + xenbus_transaction_end(xbt, 1); + return; + } + + xenbus_write(xbt, "control", "shutdown", ""); + + err = xenbus_transaction_end(xbt, 0); + if (err == -EAGAIN) { + kfree(str); + goto again; + } + + if (strcmp(str, "poweroff") == 0 || + strcmp(str, "halt") == 0) + orderly_poweroff(false); + else if (strcmp(str, "reboot") == 0) + ctrl_alt_del(); + else { + printk(KERN_INFO "Ignoring shutdown request: %s\n", str); + shutting_down = SHUTDOWN_INVALID; + } + + kfree(str); +} + +static void sysrq_handler(struct xenbus_watch *watch, const char **vec, + unsigned int len) +{ + char sysrq_key = '\0'; + struct xenbus_transaction xbt; + int err; + + again: + err = xenbus_transaction_start(&xbt); + if (err) + return; + if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) { + printk(KERN_ERR "Unable to read sysrq code in " + "control/sysrq\n"); + xenbus_transaction_end(xbt, 1); + return; + } + + if (sysrq_key != '\0') + xenbus_printf(xbt, "control", "sysrq", "%c", '\0'); + + err = xenbus_transaction_end(xbt, 0); + if (err == -EAGAIN) + goto again; + + if (sysrq_key != '\0') + handle_sysrq(sysrq_key, NULL); +} + +static struct xenbus_watch shutdown_watch = { + .node = "control/shutdown", + .callback = shutdown_handler +}; + +static struct xenbus_watch sysrq_watch = { + .node = "control/sysrq", + .callback = sysrq_handler +}; + +static int setup_shutdown_watcher(void) +{ + int err; + + err = register_xenbus_watch(&shutdown_watch); + if (err) { + printk(KERN_ERR "Failed to set shutdown watcher\n"); + return err; + } + + err = register_xenbus_watch(&sysrq_watch); + if (err) { + printk(KERN_ERR "Failed to set sysrq watcher\n"); + return err; + } + + return 0; +} + +static int shutdown_event(struct notifier_block *notifier, + unsigned long event, + void *data) +{ + setup_shutdown_watcher(); + return NOTIFY_DONE; +} + +static int __init setup_shutdown_event(void) +{ + static struct notifier_block xenstore_notifier = { + .notifier_call = shutdown_event + }; + register_xenstore_notifier(&xenstore_notifier); + + return 0; +} + +subsys_initcall(setup_shutdown_event); -- cgit v1.2.3 From a90971ebddc81330f59203dee9803512aa4e2ef6 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Mon, 26 May 2008 23:31:14 +0100 Subject: xen: compilation fix to balloon driver for ia64 support fix compilation error of ballon driver on ia64. extent_start member is pointer argument. On x86 pointer argument for xen hypercall is passed as virtual address. On the other hand, ia64 and ppc, pointer argument is passed in pseudo physical address. (guest physicall address.) So they must be passed as handle and convert right before issuing hypercall. CC drivers/xen/balloon.o linux-2.6-x86/drivers/xen/balloon.c: In function 'increase_reservation': linux-2.6-x86/drivers/xen/balloon.c:228: error: incompatible types in assignment linux-2.6-x86/drivers/xen/balloon.c: In function 'decrease_reservation': linux-2.6-x86/drivers/xen/balloon.c:324: error: incompatible types in assignment linux-2.6-x86/drivers/xen/balloon.c: In function 'dealloc_pte_fn': linux-2.6-x86/drivers/xen/balloon.c:486: error: incompatible types in assignment linux-2.6-x86/drivers/xen/balloon.c: In function 'alloc_empty_pages_and_pagevec': linux-2.6-x86/drivers/xen/balloon.c:522: error: incompatible types in assignment make[2]: *** [drivers/xen/balloon.o] Error 1 Signed-off-by: Isaku Yamahata Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- drivers/xen/balloon.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index ab25ba6cbbb9..097ba02ac809 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -225,7 +225,7 @@ static int increase_reservation(unsigned long nr_pages) page = balloon_next_page(page); } - reservation.extent_start = (unsigned long)frame_list; + set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = nr_pages; rc = HYPERVISOR_memory_op( XENMEM_populate_physmap, &reservation); @@ -321,7 +321,7 @@ static int decrease_reservation(unsigned long nr_pages) balloon_append(pfn_to_page(pfn)); } - reservation.extent_start = (unsigned long)frame_list; + set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = nr_pages; ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); BUG_ON(ret != nr_pages); @@ -483,7 +483,7 @@ static int dealloc_pte_fn( .extent_order = 0, .domid = DOMID_SELF }; - reservation.extent_start = (unsigned long)&mfn; + set_xen_guest_handle(reservation.extent_start, &mfn); set_pte_at(&init_mm, addr, pte, __pte_ma(0ull)); set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY); ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); @@ -519,7 +519,7 @@ static struct page **alloc_empty_pages_and_pagevec(int nr_pages) .extent_order = 0, .domid = DOMID_SELF }; - reservation.extent_start = (unsigned long)&gmfn; + set_xen_guest_handle(reservation.extent_start, &gmfn); ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); if (ret == 1) -- cgit v1.2.3 From 955d6f1778da5a9795f2dfb07f760006f194609a Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 26 May 2008 23:31:17 +0100 Subject: xen: drivers/xen/balloon.c: make a function static Make the needlessly global balloon_set_new_target() static. Signed-off-by: Adrian Bunk Acked-by: Chris Wright Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- drivers/xen/balloon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/xen') diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 097ba02ac809..591bc29b55f5 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -368,7 +368,7 @@ static void balloon_process(struct work_struct *work) } /* Resets the Xen limit, sets new target, and kicks off processing. */ -void balloon_set_new_target(unsigned long target) +static void balloon_set_new_target(unsigned long target) { /* No need for lock. Not read-modify-write updates. */ balloon_stats.hard_limit = ~0UL; -- cgit v1.2.3 From eb1e305f4ef201e549ffd475b7dcbcd4ec36d7dc Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:23 +0100 Subject: xen: add rebind_evtchn_irq Add rebind_evtchn_irq(), which will rebind an device driver's existing irq to a new event channel on restore. Since the new event channel will be masked and bound to vcpu0, we update the state accordingly and unmask the irq once everything is set up. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- drivers/xen/events.c | 27 +++++++++++++++++++++++++++ include/xen/events.h | 1 + 2 files changed, 28 insertions(+) (limited to 'drivers/xen') diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 4f0f22b020ea..f64e9798129c 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -557,6 +557,33 @@ out: put_cpu(); } +/* Rebind a new event channel to an existing irq. */ +void rebind_evtchn_irq(int evtchn, int irq) +{ + /* Make sure the irq is masked, since the new event channel + will also be masked. */ + disable_irq(irq); + + spin_lock(&irq_mapping_update_lock); + + /* After resume the irq<->evtchn mappings are all cleared out */ + BUG_ON(evtchn_to_irq[evtchn] != -1); + /* Expect irq to have been bound before, + so the bindcount should be non-0 */ + BUG_ON(irq_bindcount[irq] == 0); + + evtchn_to_irq[evtchn] = irq; + irq_info[irq] = mk_irq_info(IRQT_EVTCHN, 0, evtchn); + + spin_unlock(&irq_mapping_update_lock); + + /* new event channels are always bound to cpu 0 */ + irq_set_affinity(irq, cpumask_of_cpu(0)); + + /* Unmask the event channel. */ + enable_irq(irq); +} + /* Rebind an evtchn so that it gets delivered to a specific cpu */ static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu) { diff --git a/include/xen/events.h b/include/xen/events.h index acd8e062c85f..a82ec0c45c38 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -32,6 +32,7 @@ void unbind_from_irqhandler(unsigned int irq, void *dev_id); void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector); int resend_irq_on_evtchn(unsigned int irq); +void rebind_evtchn_irq(int evtchn, int irq); static inline void notify_remote_via_evtchn(int port) { -- cgit v1.2.3 From 0f2287ad7c61f10b2a22a06e2a66cdbbbfc44ad0 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:24 +0100 Subject: xen: fix unbind_from_irq() Rearrange the tests in unbind_from_irq() so that we can still unbind an irq even if the underlying event channel is bad. This allows a device driver to shuffle its irqs on save/restore before the underlying event channels have been fixed up. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- drivers/xen/events.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/events.c b/drivers/xen/events.c index f64e9798129c..70375a690761 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -355,7 +355,7 @@ static void unbind_from_irq(unsigned int irq) spin_lock(&irq_mapping_update_lock); - if (VALID_EVTCHN(evtchn) && (--irq_bindcount[irq] == 0)) { + if ((--irq_bindcount[irq] == 0) && VALID_EVTCHN(evtchn)) { close.port = evtchn; if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) BUG(); @@ -375,7 +375,7 @@ static void unbind_from_irq(unsigned int irq) evtchn_to_irq[evtchn] = -1; irq_info[irq] = IRQ_UNBOUND; - dynamic_irq_init(irq); + dynamic_irq_cleanup(irq); } spin_unlock(&irq_mapping_update_lock); -- cgit v1.2.3 From 7d88d32a4670af583c896e5ecd3929b78538ca62 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:26 +0100 Subject: xenbus: rebind irq on restore When restoring, rebind the existing xenbus irq to the new xenbus event channel. (It turns out in practice that this is always the same, and is never updated on restore. That's a bug, but Xeno-linux has been like this for a long time, so it can't really be fixed.) Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- drivers/xen/xenbus/xenbus_comms.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c index 6efbe3f29ca5..090c61ee8fd0 100644 --- a/drivers/xen/xenbus/xenbus_comms.c +++ b/drivers/xen/xenbus/xenbus_comms.c @@ -203,7 +203,6 @@ int xb_read(void *data, unsigned len) int xb_init_comms(void) { struct xenstore_domain_interface *intf = xen_store_interface; - int err; if (intf->req_prod != intf->req_cons) printk(KERN_ERR "XENBUS request ring is not quiescent " @@ -216,18 +215,20 @@ int xb_init_comms(void) intf->rsp_cons = intf->rsp_prod; } - if (xenbus_irq) - unbind_from_irqhandler(xenbus_irq, &xb_waitq); + if (xenbus_irq) { + /* Already have an irq; assume we're resuming */ + rebind_evtchn_irq(xen_store_evtchn, xenbus_irq); + } else { + int err; + err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting, + 0, "xenbus", &xb_waitq); + if (err <= 0) { + printk(KERN_ERR "XENBUS request irq failed %i\n", err); + return err; + } - err = bind_evtchn_to_irqhandler( - xen_store_evtchn, wake_waiting, - 0, "xenbus", &xb_waitq); - if (err <= 0) { - printk(KERN_ERR "XENBUS request irq failed %i\n", err); - return err; + xenbus_irq = err; } - xenbus_irq = err; - return 0; } -- cgit v1.2.3 From 0e91398f2a5d4eb6b07df8115917d0d1cf3e9b58 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:27 +0100 Subject: xen: implement save/restore This patch implements Xen save/restore and migration. Saving is triggered via xenbus, which is polled in drivers/xen/manage.c. When a suspend request comes in, the kernel prepares itself for saving by: 1 - Freeze all processes. This is primarily to prevent any partially-completed pagetable updates from confusing the suspend process. If CONFIG_PREEMPT isn't defined, then this isn't necessary. 2 - Suspend xenbus and other devices 3 - Stop_machine, to make sure all the other vcpus are quiescent. The Xen tools require the domain to run its save off vcpu0. 4 - Within the stop_machine state, it pins any unpinned pgds (under construction or destruction), performs canonicalizes various other pieces of state (mostly converting mfns to pfns), and finally 5 - Suspend the domain Restore reverses the steps used to save the domain, ending when all the frozen processes are thawed. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- arch/x86/xen/Makefile | 2 +- arch/x86/xen/enlighten.c | 6 +-- arch/x86/xen/mmu.c | 46 +++++++++++++++++ arch/x86/xen/smp.c | 2 +- arch/x86/xen/suspend.c | 42 +++++++++++++++ arch/x86/xen/time.c | 8 +++ arch/x86/xen/xen-ops.h | 4 ++ drivers/xen/events.c | 83 +++++++++++++++++++++++++++++ drivers/xen/grant-table.c | 4 +- drivers/xen/manage.c | 126 ++++++++++++++++++++++++++++++++++++++++----- include/linux/page-flags.h | 1 + include/xen/events.h | 3 ++ include/xen/grant_table.h | 3 ++ include/xen/xen-ops.h | 9 ++++ 14 files changed, 318 insertions(+), 21 deletions(-) create mode 100644 arch/x86/xen/suspend.c (limited to 'drivers/xen') diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 40b119b6b103..2ba2d1649131 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -1,4 +1,4 @@ obj-y := enlighten.o setup.o multicalls.o mmu.o \ - time.o xen-asm.o grant-table.o + time.o xen-asm.o grant-table.o suspend.o obj-$(CONFIG_SMP) += smp.o diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index ce67dc8f36af..b94f63ac228b 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -857,7 +857,7 @@ static __init void xen_pagetable_setup_start(pgd_t *base) PFN_DOWN(__pa(xen_start_info->pt_base))); } -static __init void setup_shared_info(void) +void xen_setup_shared_info(void) { if (!xen_feature(XENFEAT_auto_translated_physmap)) { unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP); @@ -894,7 +894,7 @@ static __init void xen_pagetable_setup_done(pgd_t *base) pv_mmu_ops.release_pmd = xen_release_pmd; pv_mmu_ops.set_pte = xen_set_pte; - setup_shared_info(); + xen_setup_shared_info(); /* Actually pin the pagetable down, but we can't set PG_pinned yet because the page structures don't exist yet. */ @@ -902,7 +902,7 @@ static __init void xen_pagetable_setup_done(pgd_t *base) } /* This is called once we have the cpu_possible_map */ -void __init xen_setup_vcpu_info_placement(void) +void xen_setup_vcpu_info_placement(void) { int cpu; diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 4740cda36563..e95955968ba3 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -560,6 +560,29 @@ void xen_pgd_pin(pgd_t *pgd) xen_mc_issue(0); } +/* + * On save, we need to pin all pagetables to make sure they get their + * mfns turned into pfns. Search the list for any unpinned pgds and pin + * them (unpinned pgds are not currently in use, probably because the + * process is under construction or destruction). + */ +void xen_mm_pin_all(void) +{ + unsigned long flags; + struct page *page; + + spin_lock_irqsave(&pgd_lock, flags); + + list_for_each_entry(page, &pgd_list, lru) { + if (!PagePinned(page)) { + xen_pgd_pin((pgd_t *)page_address(page)); + SetPageSavePinned(page); + } + } + + spin_unlock_irqrestore(&pgd_lock, flags); +} + /* The init_mm pagetable is really pinned as soon as its created, but that's before we have page structures to store the bits. So do all the book-keeping now. */ @@ -617,6 +640,29 @@ static void xen_pgd_unpin(pgd_t *pgd) xen_mc_issue(0); } +/* + * On resume, undo any pinning done at save, so that the rest of the + * kernel doesn't see any unexpected pinned pagetables. + */ +void xen_mm_unpin_all(void) +{ + unsigned long flags; + struct page *page; + + spin_lock_irqsave(&pgd_lock, flags); + + list_for_each_entry(page, &pgd_list, lru) { + if (PageSavePinned(page)) { + BUG_ON(!PagePinned(page)); + printk("unpinning pinned %p\n", page_address(page)); + xen_pgd_unpin((pgd_t *)page_address(page)); + ClearPageSavePinned(page); + } + } + + spin_unlock_irqrestore(&pgd_lock, flags); +} + void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) { spin_lock(&next->page_table_lock); diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 74ab8968c525..d2e3c20127d7 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -35,7 +35,7 @@ #include "xen-ops.h" #include "mmu.h" -static cpumask_t xen_cpu_initialized_map; +cpumask_t xen_cpu_initialized_map; static DEFINE_PER_CPU(int, resched_irq) = -1; static DEFINE_PER_CPU(int, callfunc_irq) = -1; static DEFINE_PER_CPU(int, debug_irq) = -1; diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c new file mode 100644 index 000000000000..7620a16fe535 --- /dev/null +++ b/arch/x86/xen/suspend.c @@ -0,0 +1,42 @@ +#include + +#include +#include +#include + +#include +#include + +#include "xen-ops.h" +#include "mmu.h" + +void xen_pre_suspend(void) +{ + xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn); + xen_start_info->console.domU.mfn = + mfn_to_pfn(xen_start_info->console.domU.mfn); + + BUG_ON(!irqs_disabled()); + + HYPERVISOR_shared_info = &xen_dummy_shared_info; + if (HYPERVISOR_update_va_mapping(fix_to_virt(FIX_PARAVIRT_BOOTMAP), + __pte_ma(0), 0)) + BUG(); +} + +void xen_post_suspend(int suspend_cancelled) +{ + if (suspend_cancelled) { + xen_start_info->store_mfn = + pfn_to_mfn(xen_start_info->store_mfn); + xen_start_info->console.domU.mfn = + pfn_to_mfn(xen_start_info->console.domU.mfn); + } else { +#ifdef CONFIG_SMP + xen_cpu_initialized_map = cpu_online_map; +#endif + } + + xen_setup_shared_info(); +} + diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index c39e1a5aa241..0bef256e5f2d 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -572,6 +572,14 @@ void xen_setup_cpu_clockevents(void) clockevents_register_device(&__get_cpu_var(xen_clock_events)); } +void xen_time_suspend(void) +{ +} + +void xen_time_resume(void) +{ +} + __init void xen_time_init(void) { int cpu = smp_processor_id(); diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index a1bc89a8f169..a0503acad664 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -9,6 +9,7 @@ extern const char xen_hypervisor_callback[]; extern const char xen_failsafe_callback[]; +struct trap_info; void xen_copy_trap_info(struct trap_info *traps); DECLARE_PER_CPU(unsigned long, xen_cr3); @@ -19,6 +20,7 @@ extern struct shared_info xen_dummy_shared_info; extern struct shared_info *HYPERVISOR_shared_info; void xen_setup_mfn_list_list(void); +void xen_setup_shared_info(void); char * __init xen_memory_setup(void); void __init xen_arch_setup(void); @@ -59,6 +61,8 @@ int xen_smp_call_function_single(int cpu, void (*func) (void *info), void *info, int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info, int wait); +extern cpumask_t xen_cpu_initialized_map; + /* Declare an asm function, along with symbols needed to make it inlineable */ diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 70375a690761..73d78dc9b875 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -674,6 +674,89 @@ static int retrigger_dynirq(unsigned int irq) return ret; } +static void restore_cpu_virqs(unsigned int cpu) +{ + struct evtchn_bind_virq bind_virq; + int virq, irq, evtchn; + + for (virq = 0; virq < NR_VIRQS; virq++) { + if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) + continue; + + BUG_ON(irq_info[irq].type != IRQT_VIRQ); + BUG_ON(irq_info[irq].index != virq); + + /* Get a new binding from Xen. */ + bind_virq.virq = virq; + bind_virq.vcpu = cpu; + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, + &bind_virq) != 0) + BUG(); + evtchn = bind_virq.port; + + /* Record the new mapping. */ + evtchn_to_irq[evtchn] = irq; + irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn); + bind_evtchn_to_cpu(evtchn, cpu); + + /* Ready for use. */ + unmask_evtchn(evtchn); + } +} + +static void restore_cpu_ipis(unsigned int cpu) +{ + struct evtchn_bind_ipi bind_ipi; + int ipi, irq, evtchn; + + for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) { + if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1) + continue; + + BUG_ON(irq_info[irq].type != IRQT_IPI); + BUG_ON(irq_info[irq].index != ipi); + + /* Get a new binding from Xen. */ + bind_ipi.vcpu = cpu; + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, + &bind_ipi) != 0) + BUG(); + evtchn = bind_ipi.port; + + /* Record the new mapping. */ + evtchn_to_irq[evtchn] = irq; + irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn); + bind_evtchn_to_cpu(evtchn, cpu); + + /* Ready for use. */ + unmask_evtchn(evtchn); + + } +} + +void xen_irq_resume(void) +{ + unsigned int cpu, irq, evtchn; + + init_evtchn_cpu_bindings(); + + /* New event-channel space is not 'live' yet. */ + for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) + mask_evtchn(evtchn); + + /* No IRQ <-> event-channel mappings. */ + for (irq = 0; irq < NR_IRQS; irq++) + irq_info[irq].evtchn = 0; /* zap event-channel binding */ + + for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) + evtchn_to_irq[evtchn] = -1; + + for_each_possible_cpu(cpu) { + restore_cpu_virqs(cpu); + restore_cpu_ipis(cpu); + } +} + static struct irq_chip xen_dynamic_chip __read_mostly = { .name = "xen-dyn", .mask = disable_dynirq, diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 52b6b41b909d..e9e11168616a 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -471,14 +471,14 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx) return 0; } -static int gnttab_resume(void) +int gnttab_resume(void) { if (max_nr_grant_frames() < nr_grant_frames) return -ENOSYS; return gnttab_map(0, nr_grant_frames - 1); } -static int gnttab_suspend(void) +int gnttab_suspend(void) { arch_gnttab_unmap_shared(shared, nr_grant_frames); return 0; diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index aa7af9e6abc0..ba85fa2cff33 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -5,21 +5,113 @@ #include #include #include +#include +#include #include - -#define SHUTDOWN_INVALID -1 -#define SHUTDOWN_POWEROFF 0 -#define SHUTDOWN_SUSPEND 2 -/* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only - * report a crash, not be instructed to crash! - * HALT is the same as POWEROFF, as far as we're concerned. The tools use - * the distinction when we return the reason code to them. - */ -#define SHUTDOWN_HALT 4 +#include +#include +#include +#include + +#include +#include + +enum shutdown_state { + SHUTDOWN_INVALID = -1, + SHUTDOWN_POWEROFF = 0, + SHUTDOWN_SUSPEND = 2, + /* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only + report a crash, not be instructed to crash! + HALT is the same as POWEROFF, as far as we're concerned. The tools use + the distinction when we return the reason code to them. */ + SHUTDOWN_HALT = 4, +}; /* Ignore multiple shutdown requests. */ -static int shutting_down = SHUTDOWN_INVALID; +static enum shutdown_state shutting_down = SHUTDOWN_INVALID; + +static int xen_suspend(void *data) +{ + int *cancelled = data; + + BUG_ON(!irqs_disabled()); + + load_cr3(swapper_pg_dir); + + xen_mm_pin_all(); + gnttab_suspend(); + xen_time_suspend(); + xen_pre_suspend(); + + /* + * This hypercall returns 1 if suspend was cancelled + * or the domain was merely checkpointed, and 0 if it + * is resuming in a new domain. + */ + *cancelled = HYPERVISOR_suspend(virt_to_mfn(xen_start_info)); + + xen_post_suspend(*cancelled); + xen_time_resume(); + gnttab_resume(); + xen_mm_unpin_all(); + + if (!*cancelled) { + xen_irq_resume(); + xen_console_resume(); + } + + return 0; +} + +static void do_suspend(void) +{ + int err; + int cancelled = 1; + + shutting_down = SHUTDOWN_SUSPEND; + +#ifdef CONFIG_PREEMPT + /* If the kernel is preemptible, we need to freeze all the processes + to prevent them from being in the middle of a pagetable update + during suspend. */ + err = freeze_processes(); + if (err) { + printk(KERN_ERR "xen suspend: freeze failed %d\n", err); + return; + } +#endif + + err = device_suspend(PMSG_SUSPEND); + if (err) { + printk(KERN_ERR "xen suspend: device_suspend %d\n", err); + goto out; + } + + printk("suspending xenbus...\n"); + /* XXX use normal device tree? */ + xenbus_suspend(); + + err = stop_machine_run(xen_suspend, &cancelled, 0); + if (err) { + printk(KERN_ERR "failed to start xen_suspend: %d\n", err); + goto out; + } + + if (!cancelled) + xenbus_resume(); + else + xenbus_suspend_cancel(); + + device_resume(); + + +out: +#ifdef CONFIG_PREEMPT + thaw_processes(); +#endif + shutting_down = SHUTDOWN_INVALID; +} static void shutdown_handler(struct xenbus_watch *watch, const char **vec, unsigned int len) @@ -52,11 +144,17 @@ static void shutdown_handler(struct xenbus_watch *watch, } if (strcmp(str, "poweroff") == 0 || - strcmp(str, "halt") == 0) + strcmp(str, "halt") == 0) { + shutting_down = SHUTDOWN_POWEROFF; orderly_poweroff(false); - else if (strcmp(str, "reboot") == 0) + } else if (strcmp(str, "reboot") == 0) { + shutting_down = SHUTDOWN_POWEROFF; /* ? */ ctrl_alt_del(); - else { +#ifdef CONFIG_PM_SLEEP + } else if (strcmp(str, "suspend") == 0) { + do_suspend(); +#endif + } else { printk(KERN_INFO "Ignoring shutdown request: %s\n", str); shutting_down = SHUTDOWN_INVALID; } diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 590cff32415d..02955a1c3d76 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -157,6 +157,7 @@ PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active) __PAGEFLAG(Slab, slab) PAGEFLAG(Checked, owner_priv_1) /* Used by some filesystems */ PAGEFLAG(Pinned, owner_priv_1) TESTSCFLAG(Pinned, owner_priv_1) /* Xen */ +PAGEFLAG(SavePinned, dirty); /* Xen */ PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved) PAGEFLAG(Private, private) __CLEARPAGEFLAG(Private, private) __SETPAGEFLAG(Private, private) diff --git a/include/xen/events.h b/include/xen/events.h index a82ec0c45c38..67c4436554a9 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -41,4 +41,7 @@ static inline void notify_remote_via_evtchn(int port) } extern void notify_remote_via_irq(int irq); + +extern void xen_irq_resume(void); + #endif /* _XEN_EVENTS_H */ diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index 466204846121..a40f1cd91be1 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -51,6 +51,9 @@ struct gnttab_free_callback { u16 count; }; +int gnttab_suspend(void); +int gnttab_resume(void); + int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly); diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 10ddfe0142d0..5d7a6db54a8c 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -5,4 +5,13 @@ DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu); +void xen_pre_suspend(void); +void xen_post_suspend(int suspend_cancelled); + +void xen_mm_pin_all(void); +void xen_mm_unpin_all(void); + +void xen_time_suspend(void); +void xen_time_resume(void); + #endif /* INCLUDE_XEN_OPS_H */ -- cgit v1.2.3 From 359cdd3f866b6219a6729e313faf2221397f3278 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:28 +0100 Subject: xen: maintain clock offset over save/restore Hook into the device model to make sure that timekeeping's resume handler is called. This deals with our clocksource's non-monotonicity over the save/restore. Explicitly call clock_has_changed() to make sure that all the timers get retriggered properly. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- arch/x86/xen/time.c | 8 -------- drivers/xen/manage.c | 15 ++++++++++++--- include/xen/xen-ops.h | 3 --- 3 files changed, 12 insertions(+), 14 deletions(-) (limited to 'drivers/xen') diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 0bef256e5f2d..c39e1a5aa241 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -572,14 +572,6 @@ void xen_setup_cpu_clockevents(void) clockevents_register_device(&__get_cpu_var(xen_clock_events)); } -void xen_time_suspend(void) -{ -} - -void xen_time_resume(void) -{ -} - __init void xen_time_init(void) { int cpu = smp_processor_id(); diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index ba85fa2cff33..675c3dd23962 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -34,14 +34,21 @@ static enum shutdown_state shutting_down = SHUTDOWN_INVALID; static int xen_suspend(void *data) { int *cancelled = data; + int err; BUG_ON(!irqs_disabled()); load_cr3(swapper_pg_dir); + err = device_power_down(PMSG_SUSPEND); + if (err) { + printk(KERN_ERR "xen_suspend: device_power_down failed: %d\n", + err); + return err; + } + xen_mm_pin_all(); gnttab_suspend(); - xen_time_suspend(); xen_pre_suspend(); /* @@ -52,10 +59,11 @@ static int xen_suspend(void *data) *cancelled = HYPERVISOR_suspend(virt_to_mfn(xen_start_info)); xen_post_suspend(*cancelled); - xen_time_resume(); gnttab_resume(); xen_mm_unpin_all(); + device_power_up(); + if (!*cancelled) { xen_irq_resume(); xen_console_resume(); @@ -105,7 +113,8 @@ static void do_suspend(void) device_resume(); - + /* Make sure timer events get retriggered on all CPUs */ + clock_was_set(); out: #ifdef CONFIG_PREEMPT thaw_processes(); diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 5d7a6db54a8c..a706d6a78960 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -11,7 +11,4 @@ void xen_post_suspend(int suspend_cancelled); void xen_mm_pin_all(void); void xen_mm_unpin_all(void); -void xen_time_suspend(void); -void xen_time_resume(void); - #endif /* INCLUDE_XEN_OPS_H */ -- cgit v1.2.3 From c78277288e3d561d55fb48bc0fe8d6e2cf4d0880 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 29 May 2008 09:02:19 +0100 Subject: CONFIG_PM_SLEEP fix: xen: fix compilation when CONFIG_PM_SLEEP is disabled Xen save/restore depends on CONFIG_PM_SLEEP being set for device_power_up/down. Signed-off-by: Jeremy Fitzhardinge Acked-by: Randy Dunlap Signed-off-by: Ingo Molnar --- drivers/xen/manage.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/xen') diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index 675c3dd23962..5b546e365f00 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -31,6 +31,7 @@ enum shutdown_state { /* Ignore multiple shutdown requests. */ static enum shutdown_state shutting_down = SHUTDOWN_INVALID; +#ifdef CONFIG_PM_SLEEP static int xen_suspend(void *data) { int *cancelled = data; @@ -121,6 +122,7 @@ out: #endif shutting_down = SHUTDOWN_INVALID; } +#endif /* CONFIG_PM_SLEEP */ static void shutdown_handler(struct xenbus_watch *watch, const char **vec, unsigned int len) -- cgit v1.2.3 From 6673cf63e5d973db5145d1f48b354efcb9fe2a13 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Mon, 16 Jun 2008 14:58:13 -0700 Subject: xen: Use wmb instead of rmb in xen_evtchn_do_upcall(). This patch is ported one from 534:77db69c38249 of linux-2.6.18-xen.hg. Use wmb instead of rmb to enforce ordering between evtchn_upcall_pending and evtchn_pending_sel stores in xen_evtchn_do_upcall(). Cc: Samuel Thibault Signed-off-by: Isaku Yamahata Cc: Nick Piggin Cc: the arch/x86 maintainers Signed-off-by: Ingo Molnar --- drivers/xen/events.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/xen') diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 73d78dc9b875..332dd63750a0 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -529,7 +529,7 @@ void xen_evtchn_do_upcall(struct pt_regs *regs) #ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */ /* Clear master flag /before/ clearing selector flag. */ - rmb(); + wmb(); #endif pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0); while (pending_words != 0) { -- cgit v1.2.3