summaryrefslogtreecommitdiff
path: root/drivers/hv
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/hv')
-rw-r--r--drivers/hv/channel.c36
-rw-r--r--drivers/hv/channel_mgmt.c262
-rw-r--r--drivers/hv/connection.c20
-rw-r--r--drivers/hv/hv.c36
-rw-r--r--drivers/hv/hv_fcopy.c2
-rw-r--r--drivers/hv/hv_kvp.c2
-rw-r--r--drivers/hv/hv_snapshot.c2
-rw-r--r--drivers/hv/hv_util.c1
-rw-r--r--drivers/hv/hv_utils_transport.c3
-rw-r--r--drivers/hv/hyperv_vmbus.h42
-rw-r--r--drivers/hv/ring_buffer.c31
-rw-r--r--drivers/hv/vmbus_drv.c117
12 files changed, 377 insertions, 177 deletions
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 1161d68a1863..56dd261f7142 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -219,6 +219,21 @@ error0:
}
EXPORT_SYMBOL_GPL(vmbus_open);
+/* Used for Hyper-V Socket: a guest client's connect() to the host */
+int vmbus_send_tl_connect_request(const uuid_le *shv_guest_servie_id,
+ const uuid_le *shv_host_servie_id)
+{
+ struct vmbus_channel_tl_connect_request conn_msg;
+
+ memset(&conn_msg, 0, sizeof(conn_msg));
+ conn_msg.header.msgtype = CHANNELMSG_TL_CONNECT_REQUEST;
+ conn_msg.guest_endpoint_id = *shv_guest_servie_id;
+ conn_msg.host_service_id = *shv_host_servie_id;
+
+ return vmbus_post_msg(&conn_msg, sizeof(conn_msg));
+}
+EXPORT_SYMBOL_GPL(vmbus_send_tl_connect_request);
+
/*
* create_gpadl_header - Creates a gpadl for the specified buffer
*/
@@ -624,6 +639,7 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer,
u64 aligned_data = 0;
int ret;
bool signal = false;
+ bool lock = channel->acquire_ring_lock;
int num_vecs = ((bufferlen != 0) ? 3 : 1);
@@ -643,7 +659,7 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer,
bufferlist[2].iov_len = (packetlen_aligned - packetlen);
ret = hv_ringbuffer_write(&channel->outbound, bufferlist, num_vecs,
- &signal);
+ &signal, lock);
/*
* Signalling the host is conditional on many factors:
@@ -659,6 +675,9 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer,
* If we cannot write to the ring-buffer; signal the host
* even if we may not have written anything. This is a rare
* enough condition that it should not matter.
+ * NOTE: in this case, the hvsock channel is an exception, because
+ * it looks the host side's hvsock implementation has a throttling
+ * mechanism which can hurt the performance otherwise.
*/
if (channel->signal_policy)
@@ -666,7 +685,8 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer,
else
kick_q = true;
- if (((ret == 0) && kick_q && signal) || (ret))
+ if (((ret == 0) && kick_q && signal) ||
+ (ret && !is_hvsock_channel(channel)))
vmbus_setevent(channel);
return ret;
@@ -719,6 +739,7 @@ int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel,
struct kvec bufferlist[3];
u64 aligned_data = 0;
bool signal = false;
+ bool lock = channel->acquire_ring_lock;
if (pagecount > MAX_PAGE_BUFFER_COUNT)
return -EINVAL;
@@ -755,7 +776,8 @@ int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel,
bufferlist[2].iov_base = &aligned_data;
bufferlist[2].iov_len = (packetlen_aligned - packetlen);
- ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal);
+ ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3,
+ &signal, lock);
/*
* Signalling the host is conditional on many factors:
@@ -818,6 +840,7 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel,
struct kvec bufferlist[3];
u64 aligned_data = 0;
bool signal = false;
+ bool lock = channel->acquire_ring_lock;
packetlen = desc_size + bufferlen;
packetlen_aligned = ALIGN(packetlen, sizeof(u64));
@@ -837,7 +860,8 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel,
bufferlist[2].iov_base = &aligned_data;
bufferlist[2].iov_len = (packetlen_aligned - packetlen);
- ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal);
+ ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3,
+ &signal, lock);
if (ret == 0 && signal)
vmbus_setevent(channel);
@@ -862,6 +886,7 @@ int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel,
struct kvec bufferlist[3];
u64 aligned_data = 0;
bool signal = false;
+ bool lock = channel->acquire_ring_lock;
u32 pfncount = NUM_PAGES_SPANNED(multi_pagebuffer->offset,
multi_pagebuffer->len);
@@ -900,7 +925,8 @@ int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel,
bufferlist[2].iov_base = &aligned_data;
bufferlist[2].iov_len = (packetlen_aligned - packetlen);
- ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal);
+ ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3,
+ &signal, lock);
if (ret == 0 && signal)
vmbus_setevent(channel);
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 1c1ad47042c5..38b682bab85a 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -28,12 +28,127 @@
#include <linux/list.h>
#include <linux/module.h>
#include <linux/completion.h>
+#include <linux/delay.h>
#include <linux/hyperv.h>
#include "hyperv_vmbus.h"
-static void init_vp_index(struct vmbus_channel *channel,
- const uuid_le *type_guid);
+static void init_vp_index(struct vmbus_channel *channel, u16 dev_type);
+
+static const struct vmbus_device vmbus_devs[] = {
+ /* IDE */
+ { .dev_type = HV_IDE,
+ HV_IDE_GUID,
+ .perf_device = true,
+ },
+
+ /* SCSI */
+ { .dev_type = HV_SCSI,
+ HV_SCSI_GUID,
+ .perf_device = true,
+ },
+
+ /* Fibre Channel */
+ { .dev_type = HV_FC,
+ HV_SYNTHFC_GUID,
+ .perf_device = true,
+ },
+
+ /* Synthetic NIC */
+ { .dev_type = HV_NIC,
+ HV_NIC_GUID,
+ .perf_device = true,
+ },
+
+ /* Network Direct */
+ { .dev_type = HV_ND,
+ HV_ND_GUID,
+ .perf_device = true,
+ },
+
+ /* PCIE */
+ { .dev_type = HV_PCIE,
+ HV_PCIE_GUID,
+ .perf_device = true,
+ },
+
+ /* Synthetic Frame Buffer */
+ { .dev_type = HV_FB,
+ HV_SYNTHVID_GUID,
+ .perf_device = false,
+ },
+
+ /* Synthetic Keyboard */
+ { .dev_type = HV_KBD,
+ HV_KBD_GUID,
+ .perf_device = false,
+ },
+
+ /* Synthetic MOUSE */
+ { .dev_type = HV_MOUSE,
+ HV_MOUSE_GUID,
+ .perf_device = false,
+ },
+
+ /* KVP */
+ { .dev_type = HV_KVP,
+ HV_KVP_GUID,
+ .perf_device = false,
+ },
+
+ /* Time Synch */
+ { .dev_type = HV_TS,
+ HV_TS_GUID,
+ .perf_device = false,
+ },
+
+ /* Heartbeat */
+ { .dev_type = HV_HB,
+ HV_HEART_BEAT_GUID,
+ .perf_device = false,
+ },
+
+ /* Shutdown */
+ { .dev_type = HV_SHUTDOWN,
+ HV_SHUTDOWN_GUID,
+ .perf_device = false,
+ },
+
+ /* File copy */
+ { .dev_type = HV_FCOPY,
+ HV_FCOPY_GUID,
+ .perf_device = false,
+ },
+
+ /* Backup */
+ { .dev_type = HV_BACKUP,
+ HV_VSS_GUID,
+ .perf_device = false,
+ },
+
+ /* Dynamic Memory */
+ { .dev_type = HV_DM,
+ HV_DM_GUID,
+ .perf_device = false,
+ },
+
+ /* Unknown GUID */
+ { .dev_type = HV_UNKOWN,
+ .perf_device = false,
+ },
+};
+
+static u16 hv_get_dev_type(const uuid_le *guid)
+{
+ u16 i;
+
+ for (i = HV_IDE; i < HV_UNKOWN; i++) {
+ if (!uuid_le_cmp(*guid, vmbus_devs[i].guid))
+ return i;
+ }
+ pr_info("Unknown GUID: %pUl\n", guid);
+ return i;
+}
/**
* vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message
@@ -144,6 +259,7 @@ static struct vmbus_channel *alloc_channel(void)
return NULL;
channel->id = atomic_inc_return(&chan_num);
+ channel->acquire_ring_lock = true;
spin_lock_init(&channel->inbound_lock);
spin_lock_init(&channel->lock);
@@ -195,6 +311,7 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
vmbus_release_relid(relid);
BUG_ON(!channel->rescind);
+ BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
if (channel->target_cpu != get_cpu()) {
put_cpu();
@@ -206,9 +323,7 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
}
if (channel->primary_channel == NULL) {
- mutex_lock(&vmbus_connection.channel_mutex);
list_del(&channel->listentry);
- mutex_unlock(&vmbus_connection.channel_mutex);
primary_channel = channel;
} else {
@@ -251,6 +366,8 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
struct vmbus_channel *channel;
bool fnew = true;
unsigned long flags;
+ u16 dev_type;
+ int ret;
/* Make sure this is a new offer */
mutex_lock(&vmbus_connection.channel_mutex);
@@ -288,7 +405,9 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
goto err_free_chan;
}
- init_vp_index(newchannel, &newchannel->offermsg.offer.if_type);
+ dev_type = hv_get_dev_type(&newchannel->offermsg.offer.if_type);
+
+ init_vp_index(newchannel, dev_type);
if (newchannel->target_cpu != get_cpu()) {
put_cpu();
@@ -325,12 +444,17 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
if (!newchannel->device_obj)
goto err_deq_chan;
+ newchannel->device_obj->device_id = dev_type;
/*
* Add the new device to the bus. This will kick off device-driver
* binding which eventually invokes the device driver's AddDevice()
* method.
*/
- if (vmbus_device_register(newchannel->device_obj) != 0) {
+ mutex_lock(&vmbus_connection.channel_mutex);
+ ret = vmbus_device_register(newchannel->device_obj);
+ mutex_unlock(&vmbus_connection.channel_mutex);
+
+ if (ret != 0) {
pr_err("unable to add child device object (relid %d)\n",
newchannel->offermsg.child_relid);
kfree(newchannel->device_obj);
@@ -358,37 +482,6 @@ err_free_chan:
free_channel(newchannel);
}
-enum {
- IDE = 0,
- SCSI,
- FC,
- NIC,
- ND_NIC,
- PCIE,
- MAX_PERF_CHN,
-};
-
-/*
- * This is an array of device_ids (device types) that are performance critical.
- * We attempt to distribute the interrupt load for these devices across
- * all available CPUs.
- */
-static const struct hv_vmbus_device_id hp_devs[] = {
- /* IDE */
- { HV_IDE_GUID, },
- /* Storage - SCSI */
- { HV_SCSI_GUID, },
- /* Storage - FC */
- { HV_SYNTHFC_GUID, },
- /* Network */
- { HV_NIC_GUID, },
- /* NetworkDirect Guest RDMA */
- { HV_ND_GUID, },
- /* PCI Express Pass Through */
- { HV_PCIE_GUID, },
-};
-
-
/*
* We use this state to statically distribute the channel interrupt load.
*/
@@ -405,22 +498,15 @@ static int next_numa_node_id;
* For pre-win8 hosts or non-performance critical channels we assign the
* first CPU in the first NUMA node.
*/
-static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_guid)
+static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
{
u32 cur_cpu;
- int i;
- bool perf_chn = false;
+ bool perf_chn = vmbus_devs[dev_type].perf_device;
struct vmbus_channel *primary = channel->primary_channel;
int next_node;
struct cpumask available_mask;
struct cpumask *alloced_mask;
- for (i = IDE; i < MAX_PERF_CHN; i++) {
- if (!uuid_le_cmp(*type_guid, hp_devs[i].guid)) {
- perf_chn = true;
- break;
- }
- }
if ((vmbus_proto_version == VERSION_WS2008) ||
(vmbus_proto_version == VERSION_WIN7) || (!perf_chn)) {
/*
@@ -469,6 +555,17 @@ static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_gui
cpumask_of_node(primary->numa_node));
cur_cpu = -1;
+
+ /*
+ * Normally Hyper-V host doesn't create more subchannels than there
+ * are VCPUs on the node but it is possible when not all present VCPUs
+ * on the node are initialized by guest. Clear the alloced_cpus_in_node
+ * to start over.
+ */
+ if (cpumask_equal(&primary->alloced_cpus_in_node,
+ cpumask_of_node(primary->numa_node)))
+ cpumask_clear(&primary->alloced_cpus_in_node);
+
while (true) {
cur_cpu = cpumask_next(cur_cpu, &available_mask);
if (cur_cpu >= nr_cpu_ids) {
@@ -498,6 +595,32 @@ static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_gui
channel->target_vp = hv_context.vp_index[cur_cpu];
}
+static void vmbus_wait_for_unload(void)
+{
+ int cpu = smp_processor_id();
+ void *page_addr = hv_context.synic_message_page[cpu];
+ struct hv_message *msg = (struct hv_message *)page_addr +
+ VMBUS_MESSAGE_SINT;
+ struct vmbus_channel_message_header *hdr;
+ bool unloaded = false;
+
+ while (1) {
+ if (READ_ONCE(msg->header.message_type) == HVMSG_NONE) {
+ mdelay(10);
+ continue;
+ }
+
+ hdr = (struct vmbus_channel_message_header *)msg->u.payload;
+ if (hdr->msgtype == CHANNELMSG_UNLOAD_RESPONSE)
+ unloaded = true;
+
+ vmbus_signal_eom(msg);
+
+ if (unloaded)
+ break;
+ }
+}
+
/*
* vmbus_unload_response - Handler for the unload response.
*/
@@ -510,7 +633,7 @@ static void vmbus_unload_response(struct vmbus_channel_message_header *hdr)
complete(&vmbus_connection.unload_event);
}
-void vmbus_initiate_unload(void)
+void vmbus_initiate_unload(bool crash)
{
struct vmbus_channel_message_header hdr;
@@ -523,7 +646,14 @@ void vmbus_initiate_unload(void)
hdr.msgtype = CHANNELMSG_UNLOAD;
vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header));
- wait_for_completion(&vmbus_connection.unload_event);
+ /*
+ * vmbus_initiate_unload() is also called on crash and the crash can be
+ * happening in an interrupt context, where scheduling is impossible.
+ */
+ if (!crash)
+ wait_for_completion(&vmbus_connection.unload_event);
+ else
+ vmbus_wait_for_unload();
}
/*
@@ -592,6 +722,8 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
struct device *dev;
rescind = (struct vmbus_channel_rescind_offer *)hdr;
+
+ mutex_lock(&vmbus_connection.channel_mutex);
channel = relid2channel(rescind->child_relid);
if (channel == NULL) {
@@ -600,7 +732,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
* vmbus_process_offer(), we have already invoked
* vmbus_release_relid() on error.
*/
- return;
+ goto out;
}
spin_lock_irqsave(&channel->lock, flags);
@@ -608,6 +740,10 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
spin_unlock_irqrestore(&channel->lock, flags);
if (channel->device_obj) {
+ if (channel->chn_rescind_callback) {
+ channel->chn_rescind_callback(channel);
+ goto out;
+ }
/*
* We will have to unregister this device from the
* driver core.
@@ -621,7 +757,24 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
hv_process_channel_removal(channel,
channel->offermsg.child_relid);
}
+
+out:
+ mutex_unlock(&vmbus_connection.channel_mutex);
+}
+
+void vmbus_hvsock_device_unregister(struct vmbus_channel *channel)
+{
+ mutex_lock(&vmbus_connection.channel_mutex);
+
+ BUG_ON(!is_hvsock_channel(channel));
+
+ channel->rescind = true;
+ vmbus_device_unregister(channel->device_obj);
+
+ mutex_unlock(&vmbus_connection.channel_mutex);
}
+EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister);
+
/*
* vmbus_onoffers_delivered -
@@ -825,6 +978,10 @@ struct vmbus_channel_message_table_entry
{CHANNELMSG_VERSION_RESPONSE, 1, vmbus_onversion_response},
{CHANNELMSG_UNLOAD, 0, NULL},
{CHANNELMSG_UNLOAD_RESPONSE, 1, vmbus_unload_response},
+ {CHANNELMSG_18, 0, NULL},
+ {CHANNELMSG_19, 0, NULL},
+ {CHANNELMSG_20, 0, NULL},
+ {CHANNELMSG_TL_CONNECT_REQUEST, 0, NULL},
};
/*
@@ -973,3 +1130,10 @@ bool vmbus_are_subchannels_present(struct vmbus_channel *primary)
return ret;
}
EXPORT_SYMBOL_GPL(vmbus_are_subchannels_present);
+
+void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel,
+ void (*chn_rescind_cb)(struct vmbus_channel *))
+{
+ channel->chn_rescind_callback = chn_rescind_cb;
+}
+EXPORT_SYMBOL_GPL(vmbus_set_chn_rescind_callback);
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 3dc5a9c7fad6..d02f1373dd98 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -88,8 +88,16 @@ static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo,
* This has been the behavior pre-win8. This is not
* perf issue and having all channel messages delivered on CPU 0
* would be ok.
+ * For post win8 hosts, we support receiving channel messagges on
+ * all the CPUs. This is needed for kexec to work correctly where
+ * the CPU attempting to connect may not be CPU 0.
*/
- msg->target_vcpu = 0;
+ if (version >= VERSION_WIN8_1) {
+ msg->target_vcpu = hv_context.vp_index[get_cpu()];
+ put_cpu();
+ } else {
+ msg->target_vcpu = 0;
+ }
/*
* Add to list before we send the request since we may
@@ -236,7 +244,7 @@ void vmbus_disconnect(void)
/*
* First send the unload request to the host.
*/
- vmbus_initiate_unload();
+ vmbus_initiate_unload(false);
if (vmbus_connection.work_queue) {
drain_workqueue(vmbus_connection.work_queue);
@@ -288,7 +296,8 @@ struct vmbus_channel *relid2channel(u32 relid)
struct list_head *cur, *tmp;
struct vmbus_channel *cur_sc;
- mutex_lock(&vmbus_connection.channel_mutex);
+ BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
+
list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
if (channel->offermsg.child_relid == relid) {
found_channel = channel;
@@ -307,7 +316,6 @@ struct vmbus_channel *relid2channel(u32 relid)
}
}
}
- mutex_unlock(&vmbus_connection.channel_mutex);
return found_channel;
}
@@ -474,7 +482,7 @@ int vmbus_post_msg(void *buffer, size_t buflen)
/*
* vmbus_set_event - Send an event notification to the parent
*/
-int vmbus_set_event(struct vmbus_channel *channel)
+void vmbus_set_event(struct vmbus_channel *channel)
{
u32 child_relid = channel->offermsg.child_relid;
@@ -485,5 +493,5 @@ int vmbus_set_event(struct vmbus_channel *channel)
(child_relid >> 5));
}
- return hv_signal_event(channel->sig_event);
+ hv_do_hypercall(HVCALL_SIGNAL_EVENT, channel->sig_event, NULL);
}
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 11bca51ef5ff..a1c086ba3b9a 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -204,6 +204,8 @@ int hv_init(void)
sizeof(int) * NR_CPUS);
memset(hv_context.event_dpc, 0,
sizeof(void *) * NR_CPUS);
+ memset(hv_context.msg_dpc, 0,
+ sizeof(void *) * NR_CPUS);
memset(hv_context.clk_evt, 0,
sizeof(void *) * NR_CPUS);
@@ -295,8 +297,14 @@ void hv_cleanup(void)
* Cleanup the TSC page based CS.
*/
if (ms_hyperv.features & HV_X64_MSR_REFERENCE_TSC_AVAILABLE) {
- clocksource_change_rating(&hyperv_cs_tsc, 10);
- clocksource_unregister(&hyperv_cs_tsc);
+ /*
+ * Crash can happen in an interrupt context and unregistering
+ * a clocksource is impossible and redundant in this case.
+ */
+ if (!oops_in_progress) {
+ clocksource_change_rating(&hyperv_cs_tsc, 10);
+ clocksource_unregister(&hyperv_cs_tsc);
+ }
hypercall_msr.as_uint64 = 0;
wrmsrl(HV_X64_MSR_REFERENCE_TSC, hypercall_msr.as_uint64);
@@ -337,22 +345,6 @@ int hv_post_message(union hv_connection_id connection_id,
return status & 0xFFFF;
}
-
-/*
- * hv_signal_event -
- * Signal an event on the specified connection using the hypervisor event IPC.
- *
- * This involves a hypercall.
- */
-int hv_signal_event(void *con_id)
-{
- u64 status;
-
- status = hv_do_hypercall(HVCALL_SIGNAL_EVENT, con_id, NULL);
-
- return status & 0xFFFF;
-}
-
static int hv_ce_set_next_event(unsigned long delta,
struct clock_event_device *evt)
{
@@ -425,6 +417,13 @@ int hv_synic_alloc(void)
}
tasklet_init(hv_context.event_dpc[cpu], vmbus_on_event, cpu);
+ hv_context.msg_dpc[cpu] = kmalloc(size, GFP_ATOMIC);
+ if (hv_context.msg_dpc[cpu] == NULL) {
+ pr_err("Unable to allocate event dpc\n");
+ goto err;
+ }
+ tasklet_init(hv_context.msg_dpc[cpu], vmbus_on_msg_dpc, cpu);
+
hv_context.clk_evt[cpu] = kzalloc(ced_size, GFP_ATOMIC);
if (hv_context.clk_evt[cpu] == NULL) {
pr_err("Unable to allocate clock event device\n");
@@ -466,6 +465,7 @@ err:
static void hv_synic_free_cpu(int cpu)
{
kfree(hv_context.event_dpc[cpu]);
+ kfree(hv_context.msg_dpc[cpu]);
kfree(hv_context.clk_evt[cpu]);
if (hv_context.synic_event_page[cpu])
free_page((unsigned long)hv_context.synic_event_page[cpu]);
diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c
index c37a71e13de0..23c70799ad8a 100644
--- a/drivers/hv/hv_fcopy.c
+++ b/drivers/hv/hv_fcopy.c
@@ -251,7 +251,6 @@ void hv_fcopy_onchannelcallback(void *context)
*/
fcopy_transaction.recv_len = recvlen;
- fcopy_transaction.recv_channel = channel;
fcopy_transaction.recv_req_id = requestid;
fcopy_transaction.fcopy_msg = fcopy_msg;
@@ -317,6 +316,7 @@ static void fcopy_on_reset(void)
int hv_fcopy_init(struct hv_util_service *srv)
{
recv_buffer = srv->recv_buffer;
+ fcopy_transaction.recv_channel = srv->channel;
/*
* When this driver loads, the user level daemon that
diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index d4ab81bcd515..9b9b370fe22a 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -639,7 +639,6 @@ void hv_kvp_onchannelcallback(void *context)
*/
kvp_transaction.recv_len = recvlen;
- kvp_transaction.recv_channel = channel;
kvp_transaction.recv_req_id = requestid;
kvp_transaction.kvp_msg = kvp_msg;
@@ -688,6 +687,7 @@ int
hv_kvp_init(struct hv_util_service *srv)
{
recv_buffer = srv->recv_buffer;
+ kvp_transaction.recv_channel = srv->channel;
/*
* When this driver loads, the user level daemon that
diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c
index 67def4a831c8..3fba14e88f03 100644
--- a/drivers/hv/hv_snapshot.c
+++ b/drivers/hv/hv_snapshot.c
@@ -263,7 +263,6 @@ void hv_vss_onchannelcallback(void *context)
*/
vss_transaction.recv_len = recvlen;
- vss_transaction.recv_channel = channel;
vss_transaction.recv_req_id = requestid;
vss_transaction.msg = (struct hv_vss_msg *)vss_msg;
@@ -337,6 +336,7 @@ hv_vss_init(struct hv_util_service *srv)
return -ENOTSUPP;
}
recv_buffer = srv->recv_buffer;
+ vss_transaction.recv_channel = srv->channel;
/*
* When this driver loads, the user level daemon that
diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c
index 7994ec2e4151..d5acaa2d8e61 100644
--- a/drivers/hv/hv_util.c
+++ b/drivers/hv/hv_util.c
@@ -322,6 +322,7 @@ static int util_probe(struct hv_device *dev,
srv->recv_buffer = kmalloc(PAGE_SIZE * 4, GFP_KERNEL);
if (!srv->recv_buffer)
return -ENOMEM;
+ srv->channel = dev->channel;
if (srv->util_init) {
ret = srv->util_init(srv);
if (ret) {
diff --git a/drivers/hv/hv_utils_transport.c b/drivers/hv/hv_utils_transport.c
index 4f42c0e20c20..9a9983fa4531 100644
--- a/drivers/hv/hv_utils_transport.c
+++ b/drivers/hv/hv_utils_transport.c
@@ -310,6 +310,9 @@ struct hvutil_transport *hvutil_transport_init(const char *name,
return hvt;
err_free_hvt:
+ spin_lock(&hvt_list_lock);
+ list_del(&hvt->list);
+ spin_unlock(&hvt_list_lock);
kfree(hvt);
return NULL;
}
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 4ebc796b4f33..12321b93a756 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -256,12 +256,6 @@ struct hv_monitor_page {
u8 rsvdz4[1984];
};
-/* Declare the various hypercall operations. */
-enum hv_call_code {
- HVCALL_POST_MESSAGE = 0x005c,
- HVCALL_SIGNAL_EVENT = 0x005d,
-};
-
/* Definition of the hv_post_message hypercall input structure. */
struct hv_input_post_message {
union hv_connection_id connectionid;
@@ -449,10 +443,11 @@ struct hv_context {
u32 vp_index[NR_CPUS];
/*
* Starting with win8, we can take channel interrupts on any CPU;
- * we will manage the tasklet that handles events on a per CPU
+ * we will manage the tasklet that handles events messages on a per CPU
* basis.
*/
struct tasklet_struct *event_dpc[NR_CPUS];
+ struct tasklet_struct *msg_dpc[NR_CPUS];
/*
* To optimize the mapping of relid to channel, maintain
* per-cpu list of the channels based on their CPU affinity.
@@ -501,8 +496,6 @@ extern int hv_post_message(union hv_connection_id connection_id,
enum hv_message_type message_type,
void *payload, size_t payload_size);
-extern int hv_signal_event(void *con_id);
-
extern int hv_synic_alloc(void);
extern void hv_synic_free(void);
@@ -531,7 +524,7 @@ void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info);
int hv_ringbuffer_write(struct hv_ring_buffer_info *ring_info,
struct kvec *kv_list,
- u32 kv_count, bool *signal);
+ u32 kv_count, bool *signal, bool lock);
int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info,
void *buffer, u32 buflen, u32 *buffer_actual_len,
@@ -626,6 +619,30 @@ struct vmbus_channel_message_table_entry {
extern struct vmbus_channel_message_table_entry
channel_message_table[CHANNELMSG_COUNT];
+/* Free the message slot and signal end-of-message if required */
+static inline void vmbus_signal_eom(struct hv_message *msg)
+{
+ msg->header.message_type = HVMSG_NONE;
+
+ /*
+ * Make sure the write to MessageType (ie set to
+ * HVMSG_NONE) happens before we read the
+ * MessagePending and EOMing. Otherwise, the EOMing
+ * will not deliver any more messages since there is
+ * no empty slot
+ */
+ mb();
+
+ if (msg->header.message_flags.msg_pending) {
+ /*
+ * This will cause message queue rescan to
+ * possibly deliver another msg from the
+ * hypervisor
+ */
+ wrmsrl(HV_X64_MSR_EOM, 0);
+ }
+}
+
/* General vmbus interface */
struct hv_device *vmbus_device_create(const uuid_le *type,
@@ -650,9 +667,10 @@ void vmbus_disconnect(void);
int vmbus_post_msg(void *buffer, size_t buflen);
-int vmbus_set_event(struct vmbus_channel *channel);
+void vmbus_set_event(struct vmbus_channel *channel);
void vmbus_on_event(unsigned long data);
+void vmbus_on_msg_dpc(unsigned long data);
int hv_kvp_init(struct hv_util_service *);
void hv_kvp_deinit(void);
@@ -665,7 +683,7 @@ void hv_vss_onchannelcallback(void *);
int hv_fcopy_init(struct hv_util_service *);
void hv_fcopy_deinit(void);
void hv_fcopy_onchannelcallback(void *);
-void vmbus_initiate_unload(void);
+void vmbus_initiate_unload(bool crash);
static inline void hv_poll_channel(struct vmbus_channel *channel,
void (*cb)(void *))
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index b53702ce692f..5613e2b5cff7 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -314,7 +314,7 @@ void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info)
/* Write to the ring buffer. */
int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info,
- struct kvec *kv_list, u32 kv_count, bool *signal)
+ struct kvec *kv_list, u32 kv_count, bool *signal, bool lock)
{
int i = 0;
u32 bytes_avail_towrite;
@@ -324,14 +324,15 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info,
u32 next_write_location;
u32 old_write;
u64 prev_indices = 0;
- unsigned long flags;
+ unsigned long flags = 0;
for (i = 0; i < kv_count; i++)
totalbytes_towrite += kv_list[i].iov_len;
totalbytes_towrite += sizeof(u64);
- spin_lock_irqsave(&outring_info->ring_lock, flags);
+ if (lock)
+ spin_lock_irqsave(&outring_info->ring_lock, flags);
hv_get_ringbuffer_availbytes(outring_info,
&bytes_avail_toread,
@@ -343,7 +344,8 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info,
* is empty since the read index == write index.
*/
if (bytes_avail_towrite <= totalbytes_towrite) {
- spin_unlock_irqrestore(&outring_info->ring_lock, flags);
+ if (lock)
+ spin_unlock_irqrestore(&outring_info->ring_lock, flags);
return -EAGAIN;
}
@@ -374,7 +376,8 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info,
hv_set_next_write_location(outring_info, next_write_location);
- spin_unlock_irqrestore(&outring_info->ring_lock, flags);
+ if (lock)
+ spin_unlock_irqrestore(&outring_info->ring_lock, flags);
*signal = hv_need_to_signal(old_write, outring_info);
return 0;
@@ -388,7 +391,6 @@ int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info,
u32 bytes_avail_toread;
u32 next_read_location = 0;
u64 prev_indices = 0;
- unsigned long flags;
struct vmpacket_descriptor desc;
u32 offset;
u32 packetlen;
@@ -397,7 +399,6 @@ int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info,
if (buflen <= 0)
return -EINVAL;
- spin_lock_irqsave(&inring_info->ring_lock, flags);
*buffer_actual_len = 0;
*requestid = 0;
@@ -412,7 +413,7 @@ int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info,
* No error is set when there is even no header, drivers are
* supposed to analyze buffer_actual_len.
*/
- goto out_unlock;
+ return ret;
}
next_read_location = hv_get_next_read_location(inring_info);
@@ -425,15 +426,11 @@ int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info,
*buffer_actual_len = packetlen;
*requestid = desc.trans_id;
- if (bytes_avail_toread < packetlen + offset) {
- ret = -EAGAIN;
- goto out_unlock;
- }
+ if (bytes_avail_toread < packetlen + offset)
+ return -EAGAIN;
- if (packetlen > buflen) {
- ret = -ENOBUFS;
- goto out_unlock;
- }
+ if (packetlen > buflen)
+ return -ENOBUFS;
next_read_location =
hv_get_next_readlocation_withoffset(inring_info, offset);
@@ -460,7 +457,5 @@ int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info,
*signal = hv_need_to_signal_on_read(bytes_avail_towrite, inring_info);
-out_unlock:
- spin_unlock_irqrestore(&inring_info->ring_lock, flags);
return ret;
}
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 328e4c3808e0..64713ff47e36 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -45,7 +45,6 @@
static struct acpi_device *hv_acpi_dev;
-static struct tasklet_struct msg_dpc;
static struct completion probe_event;
@@ -477,6 +476,24 @@ static ssize_t channel_vp_mapping_show(struct device *dev,
}
static DEVICE_ATTR_RO(channel_vp_mapping);
+static ssize_t vendor_show(struct device *dev,
+ struct device_attribute *dev_attr,
+ char *buf)
+{
+ struct hv_device *hv_dev = device_to_hv_device(dev);
+ return sprintf(buf, "0x%x\n", hv_dev->vendor_id);
+}
+static DEVICE_ATTR_RO(vendor);
+
+static ssize_t device_show(struct device *dev,
+ struct device_attribute *dev_attr,
+ char *buf)
+{
+ struct hv_device *hv_dev = device_to_hv_device(dev);
+ return sprintf(buf, "0x%x\n", hv_dev->device_id);
+}
+static DEVICE_ATTR_RO(device);
+
/* Set up per device attributes in /sys/bus/vmbus/devices/<bus device> */
static struct attribute *vmbus_attrs[] = {
&dev_attr_id.attr,
@@ -502,6 +519,8 @@ static struct attribute *vmbus_attrs[] = {
&dev_attr_in_read_bytes_avail.attr,
&dev_attr_in_write_bytes_avail.attr,
&dev_attr_channel_vp_mapping.attr,
+ &dev_attr_vendor.attr,
+ &dev_attr_device.attr,
NULL,
};
ATTRIBUTE_GROUPS(vmbus);
@@ -562,6 +581,10 @@ static int vmbus_match(struct device *device, struct device_driver *driver)
struct hv_driver *drv = drv_to_hv_drv(driver);
struct hv_device *hv_dev = device_to_hv_device(device);
+ /* The hv_sock driver handles all hv_sock offers. */
+ if (is_hvsock_channel(hv_dev->channel))
+ return drv->hvsock;
+
if (hv_vmbus_get_id(drv->id_table, &hv_dev->dev_type))
return 1;
@@ -685,28 +708,10 @@ static void hv_process_timer_expiration(struct hv_message *msg, int cpu)
if (dev->event_handler)
dev->event_handler(dev);
- msg->header.message_type = HVMSG_NONE;
-
- /*
- * Make sure the write to MessageType (ie set to
- * HVMSG_NONE) happens before we read the
- * MessagePending and EOMing. Otherwise, the EOMing
- * will not deliver any more messages since there is
- * no empty slot
- */
- mb();
-
- if (msg->header.message_flags.msg_pending) {
- /*
- * This will cause message queue rescan to
- * possibly deliver another msg from the
- * hypervisor
- */
- wrmsrl(HV_X64_MSR_EOM, 0);
- }
+ vmbus_signal_eom(msg);
}
-static void vmbus_on_msg_dpc(unsigned long data)
+void vmbus_on_msg_dpc(unsigned long data)
{
int cpu = smp_processor_id();
void *page_addr = hv_context.synic_message_page[cpu];
@@ -716,52 +721,32 @@ static void vmbus_on_msg_dpc(unsigned long data)
struct vmbus_channel_message_table_entry *entry;
struct onmessage_work_context *ctx;
- while (1) {
- if (msg->header.message_type == HVMSG_NONE)
- /* no msg */
- break;
+ if (msg->header.message_type == HVMSG_NONE)
+ /* no msg */
+ return;
- hdr = (struct vmbus_channel_message_header *)msg->u.payload;
+ hdr = (struct vmbus_channel_message_header *)msg->u.payload;
- if (hdr->msgtype >= CHANNELMSG_COUNT) {
- WARN_ONCE(1, "unknown msgtype=%d\n", hdr->msgtype);
- goto msg_handled;
- }
+ if (hdr->msgtype >= CHANNELMSG_COUNT) {
+ WARN_ONCE(1, "unknown msgtype=%d\n", hdr->msgtype);
+ goto msg_handled;
+ }
- entry = &channel_message_table[hdr->msgtype];
- if (entry->handler_type == VMHT_BLOCKING) {
- ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC);
- if (ctx == NULL)
- continue;
+ entry = &channel_message_table[hdr->msgtype];
+ if (entry->handler_type == VMHT_BLOCKING) {
+ ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC);
+ if (ctx == NULL)
+ return;
- INIT_WORK(&ctx->work, vmbus_onmessage_work);
- memcpy(&ctx->msg, msg, sizeof(*msg));
+ INIT_WORK(&ctx->work, vmbus_onmessage_work);
+ memcpy(&ctx->msg, msg, sizeof(*msg));
- queue_work(vmbus_connection.work_queue, &ctx->work);
- } else
- entry->message_handler(hdr);
+ queue_work(vmbus_connection.work_queue, &ctx->work);
+ } else
+ entry->message_handler(hdr);
msg_handled:
- msg->header.message_type = HVMSG_NONE;
-
- /*
- * Make sure the write to MessageType (ie set to
- * HVMSG_NONE) happens before we read the
- * MessagePending and EOMing. Otherwise, the EOMing
- * will not deliver any more messages since there is
- * no empty slot
- */
- mb();
-
- if (msg->header.message_flags.msg_pending) {
- /*
- * This will cause message queue rescan to
- * possibly deliver another msg from the
- * hypervisor
- */
- wrmsrl(HV_X64_MSR_EOM, 0);
- }
- }
+ vmbus_signal_eom(msg);
}
static void vmbus_isr(void)
@@ -814,7 +799,7 @@ static void vmbus_isr(void)
if (msg->header.message_type == HVMSG_TIMER_EXPIRED)
hv_process_timer_expiration(msg, cpu);
else
- tasklet_schedule(&msg_dpc);
+ tasklet_schedule(hv_context.msg_dpc[cpu]);
}
}
@@ -838,8 +823,6 @@ static int vmbus_bus_init(void)
return ret;
}
- tasklet_init(&msg_dpc, vmbus_on_msg_dpc, 0);
-
ret = bus_register(&hv_bus);
if (ret)
goto err_cleanup;
@@ -957,6 +940,7 @@ struct hv_device *vmbus_device_create(const uuid_le *type,
memcpy(&child_device_obj->dev_type, type, sizeof(uuid_le));
memcpy(&child_device_obj->dev_instance, instance,
sizeof(uuid_le));
+ child_device_obj->vendor_id = 0x1414; /* MSFT vendor ID */
return child_device_obj;
@@ -1268,7 +1252,7 @@ static void hv_kexec_handler(void)
int cpu;
hv_synic_clockevents_cleanup();
- vmbus_initiate_unload();
+ vmbus_initiate_unload(false);
for_each_online_cpu(cpu)
smp_call_function_single(cpu, hv_synic_cleanup, NULL, 1);
hv_cleanup();
@@ -1276,7 +1260,7 @@ static void hv_kexec_handler(void)
static void hv_crash_handler(struct pt_regs *regs)
{
- vmbus_initiate_unload();
+ vmbus_initiate_unload(true);
/*
* In crash handler we can't schedule synic cleanup for all CPUs,
* doing the cleanup for current CPU only. This should be sufficient
@@ -1334,7 +1318,8 @@ static void __exit vmbus_exit(void)
hv_synic_clockevents_cleanup();
vmbus_disconnect();
hv_remove_vmbus_irq();
- tasklet_kill(&msg_dpc);
+ for_each_online_cpu(cpu)
+ tasklet_kill(hv_context.msg_dpc[cpu]);
vmbus_free_channels();
if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
unregister_die_notifier(&hyperv_die_block);