diff options
Diffstat (limited to 'drivers/hv')
-rw-r--r-- | drivers/hv/channel.c | 103 | ||||
-rw-r--r-- | drivers/hv/channel_mgmt.c | 180 | ||||
-rw-r--r-- | drivers/hv/connection.c | 35 | ||||
-rw-r--r-- | drivers/hv/hv.c | 34 | ||||
-rw-r--r-- | drivers/hv/hv_balloon.c | 82 | ||||
-rw-r--r-- | drivers/hv/hv_util.c | 13 | ||||
-rw-r--r-- | drivers/hv/hyperv_vmbus.h | 14 | ||||
-rw-r--r-- | drivers/hv/vmbus_drv.c | 115 |
8 files changed, 403 insertions, 173 deletions
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 2978f5ee8d2a..da53180f5eb4 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -71,7 +71,8 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, struct vmbus_channel_msginfo *open_info = NULL; void *in, *out; unsigned long flags; - int ret, t, err = 0; + int ret, err = 0; + unsigned long t; spin_lock_irqsave(&newchannel->lock, flags); if (newchannel->state == CHANNEL_OPEN_STATE) { @@ -89,9 +90,10 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, out = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, get_order(send_ringbuffer_size + recv_ringbuffer_size)); - if (!out) - return -ENOMEM; - + if (!out) { + err = -ENOMEM; + goto error0; + } in = (void *)((unsigned long)out + send_ringbuffer_size); @@ -135,7 +137,7 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, GFP_KERNEL); if (!open_info) { err = -ENOMEM; - goto error0; + goto error_gpadl; } init_completion(&open_info->waitevent); @@ -151,7 +153,7 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, if (userdatalen > MAX_USER_DEFINED_BYTES) { err = -EINVAL; - goto error0; + goto error_gpadl; } if (userdatalen) @@ -195,10 +197,14 @@ error1: list_del(&open_info->msglistentry); spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); +error_gpadl: + vmbus_teardown_gpadl(newchannel, newchannel->ringbuffer_gpadlhandle); + error0: free_pages((unsigned long)out, get_order(send_ringbuffer_size + recv_ringbuffer_size)); kfree(open_info); + newchannel->state = CHANNEL_OPEN_STATE; return err; } EXPORT_SYMBOL_GPL(vmbus_open); @@ -495,6 +501,15 @@ static int vmbus_close_internal(struct vmbus_channel *channel) put_cpu(); } + /* + * If the channel has been rescinded; process device removal. + */ + if (channel->rescind) { + hv_process_channel_removal(channel, + channel->offermsg.child_relid); + return 0; + } + /* Send a closing message */ msg = &channel->close_msg.msg; @@ -569,23 +584,9 @@ void vmbus_close(struct vmbus_channel *channel) } EXPORT_SYMBOL_GPL(vmbus_close); -/** - * vmbus_sendpacket() - Send the specified buffer on the given channel - * @channel: Pointer to vmbus_channel structure. - * @buffer: Pointer to the buffer you want to receive the data into. - * @bufferlen: Maximum size of what the the buffer will hold - * @requestid: Identifier of the request - * @type: Type of packet that is being send e.g. negotiate, time - * packet etc. - * - * Sends data in @buffer directly to hyper-v via the vmbus - * This will send the data unparsed to hyper-v. - * - * Mainly used by Hyper-V drivers. - */ -int vmbus_sendpacket(struct vmbus_channel *channel, void *buffer, +int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer, u32 bufferlen, u64 requestid, - enum vmbus_packet_type type, u32 flags) + enum vmbus_packet_type type, u32 flags, bool kick_q) { struct vmpacket_descriptor desc; u32 packetlen = sizeof(struct vmpacket_descriptor) + bufferlen; @@ -613,21 +614,49 @@ int vmbus_sendpacket(struct vmbus_channel *channel, void *buffer, ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal); - if (ret == 0 && signal) + if ((ret == 0) && kick_q && signal) vmbus_setevent(channel); return ret; } +EXPORT_SYMBOL(vmbus_sendpacket_ctl); + +/** + * vmbus_sendpacket() - Send the specified buffer on the given channel + * @channel: Pointer to vmbus_channel structure. + * @buffer: Pointer to the buffer you want to receive the data into. + * @bufferlen: Maximum size of what the the buffer will hold + * @requestid: Identifier of the request + * @type: Type of packet that is being send e.g. negotiate, time + * packet etc. + * + * Sends data in @buffer directly to hyper-v via the vmbus + * This will send the data unparsed to hyper-v. + * + * Mainly used by Hyper-V drivers. + */ +int vmbus_sendpacket(struct vmbus_channel *channel, void *buffer, + u32 bufferlen, u64 requestid, + enum vmbus_packet_type type, u32 flags) +{ + return vmbus_sendpacket_ctl(channel, buffer, bufferlen, requestid, + type, flags, true); +} EXPORT_SYMBOL(vmbus_sendpacket); /* - * vmbus_sendpacket_pagebuffer - Send a range of single-page buffer - * packets using a GPADL Direct packet type. + * vmbus_sendpacket_pagebuffer_ctl - Send a range of single-page buffer + * packets using a GPADL Direct packet type. This interface allows you + * to control notifying the host. This will be useful for sending + * batched data. Also the sender can control the send flags + * explicitly. */ -int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel, +int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel, struct hv_page_buffer pagebuffers[], u32 pagecount, void *buffer, u32 bufferlen, - u64 requestid) + u64 requestid, + u32 flags, + bool kick_q) { int ret; int i; @@ -655,7 +684,7 @@ int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel, /* Setup the descriptor */ desc.type = VM_PKT_DATA_USING_GPA_DIRECT; - desc.flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED; + desc.flags = flags; desc.dataoffset8 = descsize >> 3; /* in 8-bytes grandularity */ desc.length8 = (u16)(packetlen_aligned >> 3); desc.transactionid = requestid; @@ -676,11 +705,27 @@ int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel, ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal); - if (ret == 0 && signal) + if ((ret == 0) && kick_q && signal) vmbus_setevent(channel); return ret; } + +/* + * vmbus_sendpacket_pagebuffer - Send a range of single-page buffer + * packets using a GPADL Direct packet type. + */ +int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel, + struct hv_page_buffer pagebuffers[], + u32 pagecount, void *buffer, u32 bufferlen, + u64 requestid) +{ + u32 flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED; + return vmbus_sendpacket_pagebuffer_ctl(channel, pagebuffers, pagecount, + buffer, bufferlen, requestid, + flags, true); + +} EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer); /* diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 3736f71bdec5..611789139f9b 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -134,24 +134,55 @@ fw_error: EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp); +static void vmbus_process_device_unregister(struct work_struct *work) +{ + struct device *dev; + struct vmbus_channel *channel = container_of(work, + struct vmbus_channel, + work); + + dev = get_device(&channel->device_obj->device); + if (dev) { + vmbus_device_unregister(channel->device_obj); + put_device(dev); + } +} + +static void vmbus_sc_creation_cb(struct work_struct *work) +{ + struct vmbus_channel *newchannel = container_of(work, + struct vmbus_channel, + work); + struct vmbus_channel *primary_channel = newchannel->primary_channel; + + /* + * On entry sc_creation_callback has been already verified to + * be non-NULL. + */ + primary_channel->sc_creation_callback(newchannel); +} + /* * alloc_channel - Allocate and initialize a vmbus channel object */ static struct vmbus_channel *alloc_channel(void) { + static atomic_t chan_num = ATOMIC_INIT(0); struct vmbus_channel *channel; channel = kzalloc(sizeof(*channel), GFP_ATOMIC); if (!channel) return NULL; + channel->id = atomic_inc_return(&chan_num); spin_lock_init(&channel->inbound_lock); spin_lock_init(&channel->lock); INIT_LIST_HEAD(&channel->sc_list); INIT_LIST_HEAD(&channel->percpu_list); - channel->controlwq = create_workqueue("hv_vmbus_ctl"); + channel->controlwq = alloc_workqueue("hv_vmbus_ctl/%d", WQ_MEM_RECLAIM, + 1, channel->id); if (!channel->controlwq) { kfree(channel); return NULL; @@ -204,33 +235,21 @@ static void percpu_channel_deq(void *arg) list_del(&channel->percpu_list); } -/* - * vmbus_process_rescind_offer - - * Rescind the offer by initiating a device removal - */ -static void vmbus_process_rescind_offer(struct work_struct *work) + +void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) { - struct vmbus_channel *channel = container_of(work, - struct vmbus_channel, - work); + struct vmbus_channel_relid_released msg; unsigned long flags; struct vmbus_channel *primary_channel; - struct vmbus_channel_relid_released msg; - struct device *dev; - - if (channel->device_obj) { - dev = get_device(&channel->device_obj->device); - if (dev) { - vmbus_device_unregister(channel->device_obj); - put_device(dev); - } - } memset(&msg, 0, sizeof(struct vmbus_channel_relid_released)); - msg.child_relid = channel->offermsg.child_relid; + msg.child_relid = relid; msg.header.msgtype = CHANNELMSG_RELID_RELEASED; vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released)); + if (channel == NULL) + return; + if (channel->target_cpu != get_cpu()) { put_cpu(); smp_call_function_single(channel->target_cpu, @@ -259,7 +278,6 @@ void vmbus_free_channels(void) list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { vmbus_device_unregister(channel->device_obj); - kfree(channel->device_obj); free_channel(channel); } } @@ -268,11 +286,8 @@ void vmbus_free_channels(void) * vmbus_process_offer - Process the offer by creating a channel/device * associated with this offer */ -static void vmbus_process_offer(struct work_struct *work) +static void vmbus_process_offer(struct vmbus_channel *newchannel) { - struct vmbus_channel *newchannel = container_of(work, - struct vmbus_channel, - work); struct vmbus_channel *channel; bool fnew = true; bool enq = false; @@ -335,10 +350,21 @@ static void vmbus_process_offer(struct work_struct *work) } newchannel->state = CHANNEL_OPEN_STATE; + channel->num_sc++; if (channel->sc_creation_callback != NULL) - channel->sc_creation_callback(newchannel); - - goto done_init_rescind; + /* + * We need to invoke the sub-channel creation + * callback; invoke this in a seperate work + * context since we are currently running on + * the global work context in which we handle + * messages from the host. + */ + INIT_WORK(&newchannel->work, + vmbus_sc_creation_cb); + queue_work(newchannel->controlwq, + &newchannel->work); + + return; } goto err_free_chan; @@ -361,7 +387,7 @@ static void vmbus_process_offer(struct work_struct *work) &newchannel->offermsg.offer.if_instance, newchannel); if (!newchannel->device_obj) - goto err_free_chan; + goto err_deq_chan; /* * Add the new device to the bus. This will kick off device-driver @@ -373,21 +399,26 @@ static void vmbus_process_offer(struct work_struct *work) pr_err("unable to add child device object (relid %d)\n", newchannel->offermsg.child_relid); - spin_lock_irqsave(&vmbus_connection.channel_lock, flags); - list_del(&newchannel->listentry); - spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags); kfree(newchannel->device_obj); - goto err_free_chan; + goto err_deq_chan; } -done_init_rescind: - spin_lock_irqsave(&newchannel->lock, flags); - /* The next possible work is rescind handling */ - INIT_WORK(&newchannel->work, vmbus_process_rescind_offer); - /* Check if rescind offer was already received */ - if (newchannel->rescind) - queue_work(newchannel->controlwq, &newchannel->work); - spin_unlock_irqrestore(&newchannel->lock, flags); + return; + +err_deq_chan: + spin_lock_irqsave(&vmbus_connection.channel_lock, flags); + list_del(&newchannel->listentry); + spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags); + + if (newchannel->target_cpu != get_cpu()) { + put_cpu(); + smp_call_function_single(newchannel->target_cpu, + percpu_channel_deq, newchannel, true); + } else { + percpu_channel_deq(newchannel); + put_cpu(); + } + err_free_chan: free_channel(newchannel); } @@ -411,6 +442,8 @@ static const struct hv_vmbus_device_id hp_devs[] = { { HV_SCSI_GUID, }, /* Network */ { HV_NIC_GUID, }, + /* NetworkDirect Guest RDMA */ + { HV_ND_GUID, }, }; @@ -511,8 +544,7 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr) newchannel->monitor_grp = (u8)offer->monitorid / 32; newchannel->monitor_bit = (u8)offer->monitorid % 32; - INIT_WORK(&newchannel->work, vmbus_process_offer); - queue_work(newchannel->controlwq, &newchannel->work); + vmbus_process_offer(newchannel); } /* @@ -529,24 +561,28 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) rescind = (struct vmbus_channel_rescind_offer *)hdr; channel = relid2channel(rescind->child_relid); - if (channel == NULL) - /* Just return here, no channel found */ + if (channel == NULL) { + hv_process_channel_removal(NULL, rescind->child_relid); return; + } spin_lock_irqsave(&channel->lock, flags); channel->rescind = true; - /* - * channel->work.func != vmbus_process_rescind_offer means we are still - * processing offer request and the rescind offer processing should be - * postponed. It will be done at the very end of vmbus_process_offer() - * as rescind flag is being checked there. - */ - if (channel->work.func == vmbus_process_rescind_offer) - /* work is initialized for vmbus_process_rescind_offer() from - * vmbus_process_offer() where the channel got created */ - queue_work(channel->controlwq, &channel->work); - spin_unlock_irqrestore(&channel->lock, flags); + + if (channel->device_obj) { + /* + * We will have to unregister this device from the + * driver core. Do this in the per-channel work context. + * Note that we are currently executing on the global + * workq for handling messages from the host. + */ + INIT_WORK(&channel->work, vmbus_process_device_unregister); + queue_work(channel->controlwq, &channel->work); + } else { + hv_process_channel_removal(channel, + channel->offermsg.child_relid); + } } /* @@ -787,7 +823,8 @@ int vmbus_request_offers(void) { struct vmbus_channel_message_header *msg; struct vmbus_channel_msginfo *msginfo; - int ret, t; + int ret; + unsigned long t; msginfo = kmalloc(sizeof(*msginfo) + sizeof(struct vmbus_channel_message_header), @@ -826,9 +863,8 @@ cleanup: /* * Retrieve the (sub) channel on which to send an outgoing request. - * When a primary channel has multiple sub-channels, we choose a - * channel whose VCPU binding is closest to the VCPU on which - * this call is being made. + * When a primary channel has multiple sub-channels, we try to + * distribute the load equally amongst all available channels. */ struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary) { @@ -836,11 +872,19 @@ struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary) int cur_cpu; struct vmbus_channel *cur_channel; struct vmbus_channel *outgoing_channel = primary; - int cpu_distance, new_cpu_distance; + int next_channel; + int i = 1; if (list_empty(&primary->sc_list)) return outgoing_channel; + next_channel = primary->next_oc++; + + if (next_channel > (primary->num_sc)) { + primary->next_oc = 0; + return outgoing_channel; + } + cur_cpu = hv_context.vp_index[get_cpu()]; put_cpu(); list_for_each_safe(cur, tmp, &primary->sc_list) { @@ -851,18 +895,10 @@ struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary) if (cur_channel->target_vp == cur_cpu) return cur_channel; - cpu_distance = ((outgoing_channel->target_vp > cur_cpu) ? - (outgoing_channel->target_vp - cur_cpu) : - (cur_cpu - outgoing_channel->target_vp)); - - new_cpu_distance = ((cur_channel->target_vp > cur_cpu) ? - (cur_channel->target_vp - cur_cpu) : - (cur_cpu - cur_channel->target_vp)); - - if (cpu_distance < new_cpu_distance) - continue; + if (i == next_channel) + return cur_channel; - outgoing_channel = cur_channel; + i++; } return outgoing_channel; diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index a63a795300b9..583d7d42b46d 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -216,10 +216,21 @@ int vmbus_connect(void) cleanup: pr_err("Unable to connect to host\n"); + vmbus_connection.conn_state = DISCONNECTED; + vmbus_disconnect(); + + kfree(msginfo); - if (vmbus_connection.work_queue) + return ret; +} + +void vmbus_disconnect(void) +{ + if (vmbus_connection.work_queue) { + drain_workqueue(vmbus_connection.work_queue); destroy_workqueue(vmbus_connection.work_queue); + } if (vmbus_connection.int_page) { free_pages((unsigned long)vmbus_connection.int_page, 0); @@ -230,10 +241,6 @@ cleanup: free_pages((unsigned long)vmbus_connection.monitor_pages[1], 0); vmbus_connection.monitor_pages[0] = NULL; vmbus_connection.monitor_pages[1] = NULL; - - kfree(msginfo); - - return ret; } /* @@ -311,10 +318,8 @@ static void process_chn_event(u32 relid) */ channel = pcpu_relid2channel(relid); - if (!channel) { - pr_err("channel not found for relid - %u\n", relid); + if (!channel) return; - } /* * A channel once created is persistent even when there @@ -349,10 +354,7 @@ static void process_chn_event(u32 relid) else bytes_to_read = 0; } while (read_state && (bytes_to_read != 0)); - } else { - pr_err("no channel callback for relid - %u\n", relid); } - } /* @@ -433,9 +435,16 @@ int vmbus_post_msg(void *buffer, size_t buflen) ret = hv_post_message(conn_id, 1, buffer, buflen); switch (ret) { + case HV_STATUS_INVALID_CONNECTION_ID: + /* + * We could get this if we send messages too + * frequently. + */ + ret = -EAGAIN; + break; + case HV_STATUS_INSUFFICIENT_MEMORY: case HV_STATUS_INSUFFICIENT_BUFFERS: ret = -ENOMEM; - case -ENOMEM: break; case HV_STATUS_SUCCESS: return ret; @@ -445,7 +454,7 @@ int vmbus_post_msg(void *buffer, size_t buflen) } retries++; - msleep(100); + msleep(1000); } return ret; } diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 50e51a51ff8b..d3943bceecc3 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -312,7 +312,11 @@ static void hv_init_clockevent_device(struct clock_event_device *dev, int cpu) dev->features = CLOCK_EVT_FEAT_ONESHOT; dev->cpumask = cpumask_of(cpu); dev->rating = 1000; - dev->owner = THIS_MODULE; + /* + * Avoid settint dev->owner = THIS_MODULE deliberately as doing so will + * result in clockevents_config_and_register() taking additional + * references to the hv_vmbus module making it impossible to unload. + */ dev->set_mode = hv_ce_setmode; dev->set_next_event = hv_ce_set_next_event; @@ -470,6 +474,20 @@ void hv_synic_init(void *arg) } /* + * hv_synic_clockevents_cleanup - Cleanup clockevent devices + */ +void hv_synic_clockevents_cleanup(void) +{ + int cpu; + + if (!(ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE)) + return; + + for_each_online_cpu(cpu) + clockevents_unbind_device(hv_context.clk_evt[cpu], cpu); +} + +/* * hv_synic_cleanup - Cleanup routine for hv_synic_init(). */ void hv_synic_cleanup(void *arg) @@ -477,11 +495,17 @@ void hv_synic_cleanup(void *arg) union hv_synic_sint shared_sint; union hv_synic_simp simp; union hv_synic_siefp siefp; + union hv_synic_scontrol sctrl; int cpu = smp_processor_id(); if (!hv_context.synic_initialized) return; + /* Turn off clockevent device */ + if (ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE) + hv_ce_setmode(CLOCK_EVT_MODE_SHUTDOWN, + hv_context.clk_evt[cpu]); + rdmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64); shared_sint.masked = 1; @@ -502,6 +526,10 @@ void hv_synic_cleanup(void *arg) wrmsrl(HV_X64_MSR_SIEFP, siefp.as_uint64); - free_page((unsigned long)hv_context.synic_message_page[cpu]); - free_page((unsigned long)hv_context.synic_event_page[cpu]); + /* Disable the global synic bit */ + rdmsrl(HV_X64_MSR_SCONTROL, sctrl.as_uint64); + sctrl.enable = 0; + wrmsrl(HV_X64_MSR_SCONTROL, sctrl.as_uint64); + + hv_synic_free_cpu(cpu); } diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index ff169386b2c7..c5bb87244dd7 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -503,6 +503,8 @@ struct hv_dynmem_device { * Number of pages we have currently ballooned out. */ unsigned int num_pages_ballooned; + unsigned int num_pages_onlined; + unsigned int num_pages_added; /* * State to manage the ballooning (up) operation. @@ -534,7 +536,6 @@ struct hv_dynmem_device { struct task_struct *thread; struct mutex ha_region_mutex; - struct completion waiter_event; /* * A list of hot-add regions. @@ -554,46 +555,32 @@ static struct hv_dynmem_device dm_device; static void post_status(struct hv_dynmem_device *dm); #ifdef CONFIG_MEMORY_HOTPLUG -static void acquire_region_mutex(bool trylock) -{ - if (trylock) { - reinit_completion(&dm_device.waiter_event); - while (!mutex_trylock(&dm_device.ha_region_mutex)) - wait_for_completion(&dm_device.waiter_event); - } else { - mutex_lock(&dm_device.ha_region_mutex); - } -} - -static void release_region_mutex(bool trylock) -{ - if (trylock) { - mutex_unlock(&dm_device.ha_region_mutex); - } else { - mutex_unlock(&dm_device.ha_region_mutex); - complete(&dm_device.waiter_event); - } -} - static int hv_memory_notifier(struct notifier_block *nb, unsigned long val, void *v) { + struct memory_notify *mem = (struct memory_notify *)v; + switch (val) { case MEM_GOING_ONLINE: - acquire_region_mutex(true); + mutex_lock(&dm_device.ha_region_mutex); break; case MEM_ONLINE: + dm_device.num_pages_onlined += mem->nr_pages; case MEM_CANCEL_ONLINE: - release_region_mutex(true); + mutex_unlock(&dm_device.ha_region_mutex); if (dm_device.ha_waiting) { dm_device.ha_waiting = false; complete(&dm_device.ol_waitevent); } break; - case MEM_GOING_OFFLINE: case MEM_OFFLINE: + mutex_lock(&dm_device.ha_region_mutex); + dm_device.num_pages_onlined -= mem->nr_pages; + mutex_unlock(&dm_device.ha_region_mutex); + break; + case MEM_GOING_OFFLINE: case MEM_CANCEL_OFFLINE: break; } @@ -646,7 +633,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, init_completion(&dm_device.ol_waitevent); dm_device.ha_waiting = true; - release_region_mutex(false); + mutex_unlock(&dm_device.ha_region_mutex); nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn)); ret = add_memory(nid, PFN_PHYS((start_pfn)), (HA_CHUNK << PAGE_SHIFT)); @@ -675,7 +662,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, * have not been "onlined" within the allowed time. */ wait_for_completion_timeout(&dm_device.ol_waitevent, 5*HZ); - acquire_region_mutex(false); + mutex_lock(&dm_device.ha_region_mutex); post_status(&dm_device); } @@ -886,7 +873,7 @@ static void hot_add_req(struct work_struct *dummy) resp.hdr.size = sizeof(struct dm_hot_add_response); #ifdef CONFIG_MEMORY_HOTPLUG - acquire_region_mutex(false); + mutex_lock(&dm_device.ha_region_mutex); pg_start = dm->ha_wrk.ha_page_range.finfo.start_page; pfn_cnt = dm->ha_wrk.ha_page_range.finfo.page_cnt; @@ -918,7 +905,9 @@ static void hot_add_req(struct work_struct *dummy) if (do_hot_add) resp.page_count = process_hot_add(pg_start, pfn_cnt, rg_start, rg_sz); - release_region_mutex(false); + + dm->num_pages_added += resp.page_count; + mutex_unlock(&dm_device.ha_region_mutex); #endif /* * The result field of the response structure has the @@ -1031,17 +1020,21 @@ static void post_status(struct hv_dynmem_device *dm) status.hdr.trans_id = atomic_inc_return(&trans_id); /* - * The host expects the guest to report free memory. - * Further, the host expects the pressure information to - * include the ballooned out pages. - * For a given amount of memory that we are managing, we - * need to compute a floor below which we should not balloon. - * Compute this and add it to the pressure report. + * The host expects the guest to report free and committed memory. + * Furthermore, the host expects the pressure information to include + * the ballooned out pages. For a given amount of memory that we are + * managing we need to compute a floor below which we should not + * balloon. Compute this and add it to the pressure report. + * We also need to report all offline pages (num_pages_added - + * num_pages_onlined) as committed to the host, otherwise it can try + * asking us to balloon them out. */ status.num_avail = val.freeram; status.num_committed = vm_memory_committed() + - dm->num_pages_ballooned + - compute_balloon_floor(); + dm->num_pages_ballooned + + (dm->num_pages_added > dm->num_pages_onlined ? + dm->num_pages_added - dm->num_pages_onlined : 0) + + compute_balloon_floor(); /* * If our transaction ID is no longer current, just don't @@ -1145,6 +1138,8 @@ static void balloon_up(struct work_struct *dummy) bool alloc_error; bool done = false; int i; + struct sysinfo val; + unsigned long floor; /* The host balloons pages in 2M granularity. */ WARN_ON_ONCE(num_pages % PAGES_IN_2M != 0); @@ -1155,6 +1150,15 @@ static void balloon_up(struct work_struct *dummy) */ alloc_unit = 512; + si_meminfo(&val); + floor = compute_balloon_floor(); + + /* Refuse to balloon below the floor, keep the 2M granularity. */ + if (val.freeram - num_pages < floor) { + num_pages = val.freeram > floor ? (val.freeram - floor) : 0; + num_pages -= num_pages % PAGES_IN_2M; + } + while (!done) { bl_resp = (struct dm_balloon_response *)send_buffer; memset(send_buffer, 0, PAGE_SIZE); @@ -1414,7 +1418,8 @@ static void balloon_onchannelcallback(void *context) static int balloon_probe(struct hv_device *dev, const struct hv_vmbus_device_id *dev_id) { - int ret, t; + int ret; + unsigned long t; struct dm_version_request version_req; struct dm_capabilities cap_msg; @@ -1439,7 +1444,6 @@ static int balloon_probe(struct hv_device *dev, dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN7; init_completion(&dm_device.host_event); init_completion(&dm_device.config_event); - init_completion(&dm_device.waiter_event); INIT_LIST_HEAD(&dm_device.ha_region_list); mutex_init(&dm_device.ha_region_mutex); INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up); diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c index 3b9c9ef0deb8..7994ec2e4151 100644 --- a/drivers/hv/hv_util.c +++ b/drivers/hv/hv_util.c @@ -340,12 +340,8 @@ static int util_probe(struct hv_device *dev, set_channel_read_state(dev->channel, false); - ret = vmbus_open(dev->channel, 4 * PAGE_SIZE, 4 * PAGE_SIZE, NULL, 0, - srv->util_cb, dev->channel); - if (ret) - goto error; - hv_set_drvdata(dev, srv); + /* * Based on the host; initialize the framework and * service version numbers we will negotiate. @@ -365,6 +361,11 @@ static int util_probe(struct hv_device *dev, hb_srv_version = HB_VERSION; } + ret = vmbus_open(dev->channel, 4 * PAGE_SIZE, 4 * PAGE_SIZE, NULL, 0, + srv->util_cb, dev->channel); + if (ret) + goto error; + return 0; error: @@ -379,9 +380,9 @@ static int util_remove(struct hv_device *dev) { struct hv_util_service *srv = hv_get_drvdata(dev); - vmbus_close(dev->channel); if (srv->util_deinit) srv->util_deinit(); + vmbus_close(dev->channel); kfree(srv->recv_buffer); return 0; diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 44b1c9424712..88af4ec559c4 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -49,6 +49,17 @@ enum hv_cpuid_function { HVCPUID_IMPLEMENTATION_LIMITS = 0x40000005, }; +#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE 0x400 + +#define HV_X64_MSR_CRASH_P0 0x40000100 +#define HV_X64_MSR_CRASH_P1 0x40000101 +#define HV_X64_MSR_CRASH_P2 0x40000102 +#define HV_X64_MSR_CRASH_P3 0x40000103 +#define HV_X64_MSR_CRASH_P4 0x40000104 +#define HV_X64_MSR_CRASH_CTL 0x40000105 + +#define HV_CRASH_CTL_CRASH_NOTIFY 0x8000000000000000 + /* Define version of the synthetic interrupt controller. */ #define HV_SYNIC_VERSION (1) @@ -572,6 +583,8 @@ extern void hv_synic_init(void *irqarg); extern void hv_synic_cleanup(void *arg); +extern void hv_synic_clockevents_cleanup(void); + /* * Host version information. */ @@ -692,6 +705,7 @@ void vmbus_free_channels(void); /* Connection interface */ int vmbus_connect(void); +void vmbus_disconnect(void); int vmbus_post_msg(void *buffer, size_t buflen); diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index f518b8d7a5b5..8313e25378cb 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -33,9 +33,12 @@ #include <linux/hyperv.h> #include <linux/kernel_stat.h> #include <linux/clockchips.h> +#include <linux/cpu.h> #include <asm/hyperv.h> #include <asm/hypervisor.h> #include <asm/mshyperv.h> +#include <linux/notifier.h> +#include <linux/ptrace.h> #include "hyperv_vmbus.h" static struct acpi_device *hv_acpi_dev; @@ -44,6 +47,31 @@ static struct tasklet_struct msg_dpc; static struct completion probe_event; static int irq; + +static int hyperv_panic_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct pt_regs *regs; + + regs = current_pt_regs(); + + wrmsrl(HV_X64_MSR_CRASH_P0, regs->ip); + wrmsrl(HV_X64_MSR_CRASH_P1, regs->ax); + wrmsrl(HV_X64_MSR_CRASH_P2, regs->bx); + wrmsrl(HV_X64_MSR_CRASH_P3, regs->cx); + wrmsrl(HV_X64_MSR_CRASH_P4, regs->dx); + + /* + * Let Hyper-V know there is crash data available + */ + wrmsrl(HV_X64_MSR_CRASH_CTL, HV_CRASH_CTL_CRASH_NOTIFY); + return NOTIFY_DONE; +} + +static struct notifier_block hyperv_panic_block = { + .notifier_call = hyperv_panic_event, +}; + struct resource hyperv_mmio = { .name = "hyperv mmio", .flags = IORESOURCE_MEM, @@ -507,14 +535,26 @@ static int vmbus_probe(struct device *child_device) */ static int vmbus_remove(struct device *child_device) { - struct hv_driver *drv = drv_to_hv_drv(child_device->driver); + struct hv_driver *drv; struct hv_device *dev = device_to_hv_device(child_device); - - if (drv->remove) - drv->remove(dev); - else - pr_err("remove not set for driver %s\n", - dev_name(child_device)); + u32 relid = dev->channel->offermsg.child_relid; + + if (child_device->driver) { + drv = drv_to_hv_drv(child_device->driver); + if (drv->remove) + drv->remove(dev); + else { + hv_process_channel_removal(dev->channel, relid); + pr_err("remove not set for driver %s\n", + dev_name(child_device)); + } + } else { + /* + * We don't have a driver for this device; deal with the + * rescind message by removing the channel. + */ + hv_process_channel_removal(dev->channel, relid); + } return 0; } @@ -573,6 +613,10 @@ static void vmbus_onmessage_work(struct work_struct *work) { struct onmessage_work_context *ctx; + /* Do not process messages if we're in DISCONNECTED state */ + if (vmbus_connection.conn_state == DISCONNECTED) + return; + ctx = container_of(work, struct onmessage_work_context, work); vmbus_onmessage(&ctx->msg); @@ -704,6 +748,39 @@ static void vmbus_isr(void) } } +#ifdef CONFIG_HOTPLUG_CPU +static int hyperv_cpu_disable(void) +{ + return -ENOSYS; +} + +static void hv_cpu_hotplug_quirk(bool vmbus_loaded) +{ + static void *previous_cpu_disable; + + /* + * Offlining a CPU when running on newer hypervisors (WS2012R2, Win8, + * ...) is not supported at this moment as channel interrupts are + * distributed across all of them. + */ + + if ((vmbus_proto_version == VERSION_WS2008) || + (vmbus_proto_version == VERSION_WIN7)) + return; + + if (vmbus_loaded) { + previous_cpu_disable = smp_ops.cpu_disable; + smp_ops.cpu_disable = hyperv_cpu_disable; + pr_notice("CPU offlining is not supported by hypervisor\n"); + } else if (previous_cpu_disable) + smp_ops.cpu_disable = previous_cpu_disable; +} +#else +static void hv_cpu_hotplug_quirk(bool vmbus_loaded) +{ +} +#endif + /* * vmbus_bus_init -Main vmbus driver initialization routine. * @@ -744,6 +821,16 @@ static int vmbus_bus_init(int irq) if (ret) goto err_alloc; + hv_cpu_hotplug_quirk(true); + + /* + * Only register if the crash MSRs are available + */ + if (ms_hyperv.features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { + atomic_notifier_chain_register(&panic_notifier_list, + &hyperv_panic_block); + } + vmbus_request_offers(); return 0; @@ -840,10 +927,8 @@ int vmbus_device_register(struct hv_device *child_device_obj) { int ret = 0; - static atomic_t device_num = ATOMIC_INIT(0); - - dev_set_name(&child_device_obj->device, "vmbus_0_%d", - atomic_inc_return(&device_num)); + dev_set_name(&child_device_obj->device, "vmbus_%d", + child_device_obj->channel->id); child_device_obj->device.bus = &hv_bus; child_device_obj->device.parent = &hv_acpi_dev->dev; @@ -992,11 +1077,19 @@ cleanup: static void __exit vmbus_exit(void) { + int cpu; + + vmbus_connection.conn_state = DISCONNECTED; + hv_synic_clockevents_cleanup(); hv_remove_vmbus_irq(); vmbus_free_channels(); bus_unregister(&hv_bus); hv_cleanup(); + for_each_online_cpu(cpu) + smp_call_function_single(cpu, hv_synic_cleanup, NULL, 1); acpi_bus_unregister_driver(&vmbus_acpi_driver); + hv_cpu_hotplug_quirk(false); + vmbus_disconnect(); } |