From c4c303c7c5679b4b368e12f41124aee29c325b76 Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Thu, 25 Aug 2011 18:30:11 +0200 Subject: xen/pv-on-hvm kexec: prevent crash in xenwatch_thread() when stale watch events arrive During repeated kexec boots xenwatch_thread() can crash because xenbus_watch->callback is cleared by xenbus_watch_path() if a node/token combo for a new watch happens to match an already registered watch from an old kernel. In this case xs_watch returns -EEXISTS, then register_xenbus_watch() does not remove the to-be-registered watch from the list of active watches but returns the -EEXISTS to the caller anyway. Because the watch is still active in xenstored it will cause an event which will arrive in the new kernel. process_msg() will find the encapsulated struct xenbus_watch in its list of registered watches and puts the "empty" watch handle in the queue for xenwatch_thread(). xenwatch_thread() then calls ->callback which was cleared earlier by xenbus_watch_path(). To prevent that crash in a guest running on an old xen toolstack remove the special -EEXIST handling. v2: - remove the EEXIST handing in register_xenbus_watch() instead of checking for ->callback in process_msg() Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Olaf Hering --- drivers/xen/xenbus/xenbus_xs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/xen/xenbus') diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index 5534690075af..46347a49c5b8 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -638,8 +638,7 @@ int register_xenbus_watch(struct xenbus_watch *watch) err = xs_watch(watch->node, token); - /* Ignore errors due to multiple registration. */ - if ((err != 0) && (err != -EEXIST)) { + if (err) { spin_lock(&watches_lock); list_del(&watch->list); spin_unlock(&watches_lock); -- cgit v1.2.3 From 116df6f004af81925dcaa90d4a3b76da6b009427 Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Thu, 25 Aug 2011 18:34:45 +0200 Subject: xen/pv-on-hvm kexec+kdump: reset PV devices in kexec or crash kernel After triggering a crash dump in a HVM guest, the PV backend drivers will remain in Connected state. When the kdump kernel starts the PV drivers will skip such devices. As a result, no root device is found and the vmcore cant be saved. A similar situation happens after a kexec boot, here the devices will be in the Closed state. With this change all frontend devices with state XenbusStateConnected or XenbusStateClosed will be reset by changing the state file to Closing -> Closed -> Initializing. This will trigger a disconnect in the backend drivers. Now the frontend drivers will find the backend drivers in state Initwait and can connect. Signed-off-by: Olaf Hering [v2: - add timeout when waiting for backend state change (based on feedback from Ian Campell) - extent printk message to include backend string - add comment to fall-through case in xenbus_reset_frontend] Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xenbus/xenbus_comms.c | 4 +- drivers/xen/xenbus/xenbus_probe_frontend.c | 121 +++++++++++++++++++++++++++++ 2 files changed, 124 insertions(+), 1 deletion(-) (limited to 'drivers/xen/xenbus') diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c index 090c61ee8fd0..2eff7a6aaa20 100644 --- a/drivers/xen/xenbus/xenbus_comms.c +++ b/drivers/xen/xenbus/xenbus_comms.c @@ -212,7 +212,9 @@ int xb_init_comms(void) printk(KERN_WARNING "XENBUS response ring is not quiescent " "(%08x:%08x): fixing up\n", intf->rsp_cons, intf->rsp_prod); - intf->rsp_cons = intf->rsp_prod; + /* breaks kdump */ + if (!reset_devices) + intf->rsp_cons = intf->rsp_prod; } if (xenbus_irq) { diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c index b6a2690c9d49..b521ce43d325 100644 --- a/drivers/xen/xenbus/xenbus_probe_frontend.c +++ b/drivers/xen/xenbus/xenbus_probe_frontend.c @@ -252,10 +252,131 @@ int __xenbus_register_frontend(struct xenbus_driver *drv, } EXPORT_SYMBOL_GPL(__xenbus_register_frontend); +static DECLARE_WAIT_QUEUE_HEAD(backend_state_wq); +static int backend_state; + +static void xenbus_reset_backend_state_changed(struct xenbus_watch *w, + const char **v, unsigned int l) +{ + xenbus_scanf(XBT_NIL, v[XS_WATCH_PATH], "", "%i", &backend_state); + printk(KERN_DEBUG "XENBUS: backend %s %s\n", + v[XS_WATCH_PATH], xenbus_strstate(backend_state)); + wake_up(&backend_state_wq); +} + +static void xenbus_reset_wait_for_backend(char *be, int expected) +{ + long timeout; + timeout = wait_event_interruptible_timeout(backend_state_wq, + backend_state == expected, 5 * HZ); + if (timeout <= 0) + printk(KERN_INFO "XENBUS: backend %s timed out.\n", be); +} + +/* + * Reset frontend if it is in Connected or Closed state. + * Wait for backend to catch up. + * State Connected happens during kdump, Closed after kexec. + */ +static void xenbus_reset_frontend(char *fe, char *be, int be_state) +{ + struct xenbus_watch be_watch; + + printk(KERN_DEBUG "XENBUS: backend %s %s\n", + be, xenbus_strstate(be_state)); + + memset(&be_watch, 0, sizeof(be_watch)); + be_watch.node = kasprintf(GFP_NOIO | __GFP_HIGH, "%s/state", be); + if (!be_watch.node) + return; + + be_watch.callback = xenbus_reset_backend_state_changed; + backend_state = XenbusStateUnknown; + + printk(KERN_INFO "XENBUS: triggering reconnect on %s\n", be); + register_xenbus_watch(&be_watch); + + /* fall through to forward backend to state XenbusStateInitialising */ + switch (be_state) { + case XenbusStateConnected: + xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosing); + xenbus_reset_wait_for_backend(be, XenbusStateClosing); + + case XenbusStateClosing: + xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosed); + xenbus_reset_wait_for_backend(be, XenbusStateClosed); + + case XenbusStateClosed: + xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateInitialising); + xenbus_reset_wait_for_backend(be, XenbusStateInitWait); + } + + unregister_xenbus_watch(&be_watch); + printk(KERN_INFO "XENBUS: reconnect done on %s\n", be); + kfree(be_watch.node); +} + +static void xenbus_check_frontend(char *class, char *dev) +{ + int be_state, fe_state, err; + char *backend, *frontend; + + frontend = kasprintf(GFP_NOIO | __GFP_HIGH, "device/%s/%s", class, dev); + if (!frontend) + return; + + err = xenbus_scanf(XBT_NIL, frontend, "state", "%i", &fe_state); + if (err != 1) + goto out; + + switch (fe_state) { + case XenbusStateConnected: + case XenbusStateClosed: + printk(KERN_DEBUG "XENBUS: frontend %s %s\n", + frontend, xenbus_strstate(fe_state)); + backend = xenbus_read(XBT_NIL, frontend, "backend", NULL); + if (!backend || IS_ERR(backend)) + goto out; + err = xenbus_scanf(XBT_NIL, backend, "state", "%i", &be_state); + if (err == 1) + xenbus_reset_frontend(frontend, backend, be_state); + kfree(backend); + break; + default: + break; + } +out: + kfree(frontend); +} + +static void xenbus_reset_state(void) +{ + char **devclass, **dev; + int devclass_n, dev_n; + int i, j; + + devclass = xenbus_directory(XBT_NIL, "device", "", &devclass_n); + if (IS_ERR(devclass)) + return; + + for (i = 0; i < devclass_n; i++) { + dev = xenbus_directory(XBT_NIL, "device", devclass[i], &dev_n); + if (IS_ERR(dev)) + continue; + for (j = 0; j < dev_n; j++) + xenbus_check_frontend(devclass[i], dev[j]); + kfree(dev); + } + kfree(devclass); +} + static int frontend_probe_and_watch(struct notifier_block *notifier, unsigned long event, void *data) { + /* reset devices in Connected or Closed state */ + if (xen_hvm_domain()) + xenbus_reset_state(); /* Enumerate devices in xenstore and watch for changes. */ xenbus_probe_devices(&xenbus_frontend); register_xenbus_watch(&fe_watch); -- cgit v1.2.3 From ddacf5ef684a655abe2bb50c4b2a5b72ae0d5e05 Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Thu, 22 Sep 2011 16:14:49 +0200 Subject: xen/pv-on-hvm kexec: add xs_reset_watches to shutdown watches from old kernel Add new xs_reset_watches function to shutdown watches from old kernel after kexec boot. The old kernel does not unregister all watches in the shutdown path. They are still active, the double registration can not be detected by the new kernel. When the watches fire, unexpected events will arrive and the xenwatch thread will crash (jumps to NULL). An orderly reboot of a hvm guest will destroy the entire guest with all its resources (including the watches) before it is rebuilt from scratch, so the missing unregister is not an issue in that case. With this change the xenstored is instructed to wipe all active watches for the guest. However, a patch for xenstored is required so that it accepts the XS_RESET_WATCHES request from a client (see changeset 23839:42a45baf037d in xen-unstable.hg). Without the patch for xenstored the registration of watches will fail and some features of a PVonHVM guest are not available. The guest is still able to boot, but repeated kexec boots will fail. [v5: use xs_single instead of passing a dummy string to xs_talkv] [v4: ignore -EEXIST in xs_reset_watches] [v3: use XS_RESET_WATCHES instead of XS_INTRODUCE] [v2: move all code which deals with XS_INTRODUCE into xs_introduce() (based on feedback from Ian Campbell); remove casts from kvec assignment] Signed-off-by: Olaf Hering [v1: Redid the git description a bit] Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xenbus/xenbus_xs.c | 13 +++++++++++++ include/xen/interface/io/xs_wire.h | 3 ++- 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'drivers/xen/xenbus') diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index 46347a49c5b8..d1071de11878 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -620,6 +620,15 @@ static struct xenbus_watch *find_watch(const char *token) return NULL; } +static void xs_reset_watches(void) +{ + int err; + + err = xs_error(xs_single(XBT_NIL, XS_RESET_WATCHES, "", NULL)); + if (err && err != -EEXIST) + printk(KERN_WARNING "xs_reset_watches failed: %d\n", err); +} + /* Register callback to watch this node. */ int register_xenbus_watch(struct xenbus_watch *watch) { @@ -896,5 +905,9 @@ int xs_init(void) if (IS_ERR(task)) return PTR_ERR(task); + /* shutdown watches for kexec boot */ + if (xen_hvm_domain()) + xs_reset_watches(); + return 0; } diff --git a/include/xen/interface/io/xs_wire.h b/include/xen/interface/io/xs_wire.h index f6f07aa35af5..f0b6890370be 100644 --- a/include/xen/interface/io/xs_wire.h +++ b/include/xen/interface/io/xs_wire.h @@ -29,7 +29,8 @@ enum xsd_sockmsg_type XS_IS_DOMAIN_INTRODUCED, XS_RESUME, XS_SET_TARGET, - XS_RESTRICT + XS_RESTRICT, + XS_RESET_WATCHES }; #define XS_WRITE_NONE "NONE" -- cgit v1.2.3 From 5b25d89e19be2ff2fa7a5c80099e88fa56d66334 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 26 Sep 2011 13:13:42 -0400 Subject: xen/pv-on-hvm:kexec: Fix implicit declaration of function 'xen_hvm_domain' Randy found a compile error when using make randconfig to trigger drivers/xen/xenbus/xenbus_xs.c:909:2: error: implicit declaration of function 'xen_hvm_domain' it is unclear which of the CONFIG options triggered this. This patch fixes the error. Reported-by: Randy Dunlap Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xenbus/xenbus_xs.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/xen/xenbus') diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index d1071de11878..b3b8f2f3ad10 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -45,6 +45,7 @@ #include #include #include +#include #include "xenbus_comms.h" struct xs_stored_msg { -- cgit v1.2.3 From 77447991b6c9aef83d101aae4a9e5d83c206b9c5 Mon Sep 17 00:00:00 2001 From: Daniel De Graaf Date: Thu, 13 Oct 2011 16:07:07 -0400 Subject: xenbus: Fix loopback event channel assuming domain 0 The xenbus event channel established in xenbus_init is intended to be a loopback channel, but the remote domain was hardcoded to 0; this will cause the channel to be unusable when xenstore is not being run in domain 0. Signed-off-by: Daniel De Graaf Reviewed-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xenbus/xenbus_probe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/xen/xenbus') diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 739769551e33..d5347fe15882 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -724,7 +724,7 @@ static int __init xenbus_init(void) /* Next allocate a local port which xenstored can bind to */ alloc_unbound.dom = DOMID_SELF; - alloc_unbound.remote_dom = 0; + alloc_unbound.remote_dom = DOMID_SELF; err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, &alloc_unbound); -- cgit v1.2.3 From e4184aaf3b2c4f2b69306f6cfc4bab8733c6c5f1 Mon Sep 17 00:00:00 2001 From: Daniel De Graaf Date: Thu, 13 Oct 2011 16:07:08 -0400 Subject: xenbus: don't rely on xen_initial_domain to detect local xenstore The xenstore daemon does not have to run in the xen initial domain; however, Linux currently uses xen_initial_domain to test if a loopback event channel should be used instead of the event channel provided in Xen's start_info structure. Instead, if the event channel passed in the start_info structure is not valid, assume that this domain will run xenstored locally and set up the event channel. Signed-off-by: Daniel De Graaf Reviewed-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xenbus/xenbus_probe.c | 101 ++++++++++++++++++++------------------ 1 file changed, 53 insertions(+), 48 deletions(-) (limited to 'drivers/xen/xenbus') diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index d5347fe15882..13b0f05bafb7 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -696,64 +696,74 @@ static int __init xenbus_probe_initcall(void) device_initcall(xenbus_probe_initcall); -static int __init xenbus_init(void) +/* Set up event channel for xenstored which is run as a local process + * (this is normally used only in dom0) + */ +static int __init xenstored_local_init(void) { int err = 0; unsigned long page = 0; + struct evtchn_alloc_unbound alloc_unbound; - DPRINTK(""); + /* Allocate Xenstore page */ + page = get_zeroed_page(GFP_KERNEL); + if (!page) + goto out_err; - err = -ENODEV; - if (!xen_domain()) - return err; + xen_store_mfn = xen_start_info->store_mfn = + pfn_to_mfn(virt_to_phys((void *)page) >> + PAGE_SHIFT); - /* - * Domain0 doesn't have a store_evtchn or store_mfn yet. - */ - if (xen_initial_domain()) { - struct evtchn_alloc_unbound alloc_unbound; + /* Next allocate a local port which xenstored can bind to */ + alloc_unbound.dom = DOMID_SELF; + alloc_unbound.remote_dom = DOMID_SELF; - /* Allocate Xenstore page */ - page = get_zeroed_page(GFP_KERNEL); - if (!page) - goto out_error; + err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, + &alloc_unbound); + if (err == -ENOSYS) + goto out_err; - xen_store_mfn = xen_start_info->store_mfn = - pfn_to_mfn(virt_to_phys((void *)page) >> - PAGE_SHIFT); + BUG_ON(err); + xen_store_evtchn = xen_start_info->store_evtchn = + alloc_unbound.port; - /* Next allocate a local port which xenstored can bind to */ - alloc_unbound.dom = DOMID_SELF; - alloc_unbound.remote_dom = DOMID_SELF; + return 0; - err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, - &alloc_unbound); - if (err == -ENOSYS) - goto out_error; + out_err: + if (page != 0) + free_page(page); + return err; +} - BUG_ON(err); - xen_store_evtchn = xen_start_info->store_evtchn = - alloc_unbound.port; +static int __init xenbus_init(void) +{ + int err = 0; - xen_store_interface = mfn_to_virt(xen_store_mfn); + if (!xen_domain()) + return -ENODEV; + + if (xen_hvm_domain()) { + uint64_t v = 0; + err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); + if (err) + goto out_error; + xen_store_evtchn = (int)v; + err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v); + if (err) + goto out_error; + xen_store_mfn = (unsigned long)v; + xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE); } else { - if (xen_hvm_domain()) { - uint64_t v = 0; - err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); - if (err) - goto out_error; - xen_store_evtchn = (int)v; - err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v); + xen_store_evtchn = xen_start_info->store_evtchn; + xen_store_mfn = xen_start_info->store_mfn; + if (xen_store_evtchn) + xenstored_ready = 1; + else { + err = xenstored_local_init(); if (err) goto out_error; - xen_store_mfn = (unsigned long)v; - xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE); - } else { - xen_store_evtchn = xen_start_info->store_evtchn; - xen_store_mfn = xen_start_info->store_mfn; - xen_store_interface = mfn_to_virt(xen_store_mfn); - xenstored_ready = 1; } + xen_store_interface = mfn_to_virt(xen_store_mfn); } /* Initialize the interface to xenstore. */ @@ -772,12 +782,7 @@ static int __init xenbus_init(void) proc_mkdir("xen", NULL); #endif - return 0; - - out_error: - if (page != 0) - free_page(page); - + out_error: return err; } -- cgit v1.2.3