diff options
author | Greg Kroah-Hartman <gregkh@suse.de> | 2011-10-04 23:29:52 +0400 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@suse.de> | 2011-10-11 08:52:55 +0400 |
commit | 46a971913611a23478283931460a95be962ce329 (patch) | |
tree | 7452d0f07ee9f1f5270a8da6c1387f35c439843d /drivers/hv | |
parent | 715a4801e734ea9c8e528265ce3ff6aead85bce1 (diff) | |
download | linux-46a971913611a23478283931460a95be962ce329.tar.xz |
Staging: hv: move hyperv code out of staging directory
After many years wandering the desert, it is finally time for the
Microsoft HyperV code to move out of the staging directory. Or at least
the core hyperv bus code, and the utility driver, the rest still have
some review to get through by the various subsystem maintainers.
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Diffstat (limited to 'drivers/hv')
-rw-r--r-- | drivers/hv/Kconfig | 14 | ||||
-rw-r--r-- | drivers/hv/Makefile | 7 | ||||
-rw-r--r-- | drivers/hv/channel.c | 815 | ||||
-rw-r--r-- | drivers/hv/channel_mgmt.c | 647 | ||||
-rw-r--r-- | drivers/hv/connection.c | 318 | ||||
-rw-r--r-- | drivers/hv/hv.c | 429 | ||||
-rw-r--r-- | drivers/hv/hv_kvp.c | 339 | ||||
-rw-r--r-- | drivers/hv/hv_kvp.h | 184 | ||||
-rw-r--r-- | drivers/hv/hv_util.c | 354 | ||||
-rw-r--r-- | drivers/hv/hyperv_vmbus.h | 628 | ||||
-rw-r--r-- | drivers/hv/ring_buffer.c | 527 | ||||
-rw-r--r-- | drivers/hv/vmbus_drv.c | 772 |
12 files changed, 5034 insertions, 0 deletions
diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig new file mode 100644 index 000000000000..9fa09ac000ad --- /dev/null +++ b/drivers/hv/Kconfig @@ -0,0 +1,14 @@ +config HYPERV + tristate "Microsoft Hyper-V client drivers" + depends on X86 && ACPI && PCI + help + Select this option to run Linux as a Hyper-V client operating + system. + +config HYPERV_UTILS + tristate "Microsoft Hyper-V Utilities driver" + depends on HYPERV && CONNECTOR && NLS + help + Select this option to enable the Hyper-V Utilities. + + diff --git a/drivers/hv/Makefile b/drivers/hv/Makefile new file mode 100644 index 000000000000..a23938b991c9 --- /dev/null +++ b/drivers/hv/Makefile @@ -0,0 +1,7 @@ +obj-$(CONFIG_HYPERV) += hv_vmbus.o +obj-$(CONFIG_HYPERV_UTILS) += hv_utils.o + +hv_vmbus-y := vmbus_drv.o \ + hv.o connection.o channel.o \ + channel_mgmt.o ring_buffer.o +hv_utils-y := hv_util.o hv_kvp.o diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c new file mode 100644 index 000000000000..406537420fff --- /dev/null +++ b/drivers/hv/channel.c @@ -0,0 +1,815 @@ +/* + * Copyright (c) 2009, Microsoft Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Authors: + * Haiyang Zhang <haiyangz@microsoft.com> + * Hank Janssen <hjanssen@microsoft.com> + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/hyperv.h> + +#include "hyperv_vmbus.h" + +#define NUM_PAGES_SPANNED(addr, len) \ +((PAGE_ALIGN(addr + len) >> PAGE_SHIFT) - (addr >> PAGE_SHIFT)) + +/* Internal routines */ +static int create_gpadl_header( + void *kbuffer, /* must be phys and virt contiguous */ + u32 size, /* page-size multiple */ + struct vmbus_channel_msginfo **msginfo, + u32 *messagecount); +static void vmbus_setevent(struct vmbus_channel *channel); + +/* + * vmbus_setevent- Trigger an event notification on the specified + * channel. + */ +static void vmbus_setevent(struct vmbus_channel *channel) +{ + struct hv_monitor_page *monitorpage; + + if (channel->offermsg.monitor_allocated) { + /* Each u32 represents 32 channels */ + sync_set_bit(channel->offermsg.child_relid & 31, + (unsigned long *) vmbus_connection.send_int_page + + (channel->offermsg.child_relid >> 5)); + + monitorpage = vmbus_connection.monitor_pages; + monitorpage++; /* Get the child to parent monitor page */ + + sync_set_bit(channel->monitor_bit, + (unsigned long *)&monitorpage->trigger_group + [channel->monitor_grp].pending); + + } else { + vmbus_set_event(channel->offermsg.child_relid); + } +} + +/* + * vmbus_get_debug_info -Retrieve various channel debug info + */ +void vmbus_get_debug_info(struct vmbus_channel *channel, + struct vmbus_channel_debug_info *debuginfo) +{ + struct hv_monitor_page *monitorpage; + u8 monitor_group = (u8)channel->offermsg.monitorid / 32; + u8 monitor_offset = (u8)channel->offermsg.monitorid % 32; + + debuginfo->relid = channel->offermsg.child_relid; + debuginfo->state = channel->state; + memcpy(&debuginfo->interfacetype, + &channel->offermsg.offer.if_type, sizeof(uuid_le)); + memcpy(&debuginfo->interface_instance, + &channel->offermsg.offer.if_instance, + sizeof(uuid_le)); + + monitorpage = (struct hv_monitor_page *)vmbus_connection.monitor_pages; + + debuginfo->monitorid = channel->offermsg.monitorid; + + debuginfo->servermonitor_pending = + monitorpage->trigger_group[monitor_group].pending; + debuginfo->servermonitor_latency = + monitorpage->latency[monitor_group][monitor_offset]; + debuginfo->servermonitor_connectionid = + monitorpage->parameter[monitor_group] + [monitor_offset].connectionid.u.id; + + monitorpage++; + + debuginfo->clientmonitor_pending = + monitorpage->trigger_group[monitor_group].pending; + debuginfo->clientmonitor_latency = + monitorpage->latency[monitor_group][monitor_offset]; + debuginfo->clientmonitor_connectionid = + monitorpage->parameter[monitor_group] + [monitor_offset].connectionid.u.id; + + hv_ringbuffer_get_debuginfo(&channel->inbound, &debuginfo->inbound); + hv_ringbuffer_get_debuginfo(&channel->outbound, &debuginfo->outbound); +} + +/* + * vmbus_open - Open the specified channel. + */ +int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, + u32 recv_ringbuffer_size, void *userdata, u32 userdatalen, + void (*onchannelcallback)(void *context), void *context) +{ + struct vmbus_channel_open_channel *open_msg; + struct vmbus_channel_msginfo *open_info = NULL; + void *in, *out; + unsigned long flags; + int ret, t, err = 0; + + newchannel->onchannel_callback = onchannelcallback; + newchannel->channel_callback_context = context; + + /* Allocate the ring buffer */ + out = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, + get_order(send_ringbuffer_size + recv_ringbuffer_size)); + + if (!out) + return -ENOMEM; + + + in = (void *)((unsigned long)out + send_ringbuffer_size); + + newchannel->ringbuffer_pages = out; + newchannel->ringbuffer_pagecount = (send_ringbuffer_size + + recv_ringbuffer_size) >> PAGE_SHIFT; + + ret = hv_ringbuffer_init( + &newchannel->outbound, out, send_ringbuffer_size); + + if (ret != 0) { + err = ret; + goto errorout; + } + + ret = hv_ringbuffer_init( + &newchannel->inbound, in, recv_ringbuffer_size); + if (ret != 0) { + err = ret; + goto errorout; + } + + + /* Establish the gpadl for the ring buffer */ + newchannel->ringbuffer_gpadlhandle = 0; + + ret = vmbus_establish_gpadl(newchannel, + newchannel->outbound.ring_buffer, + send_ringbuffer_size + + recv_ringbuffer_size, + &newchannel->ringbuffer_gpadlhandle); + + if (ret != 0) { + err = ret; + goto errorout; + } + + /* Create and init the channel open message */ + open_info = kmalloc(sizeof(*open_info) + + sizeof(struct vmbus_channel_open_channel), + GFP_KERNEL); + if (!open_info) { + err = -ENOMEM; + goto errorout; + } + + init_completion(&open_info->waitevent); + + open_msg = (struct vmbus_channel_open_channel *)open_info->msg; + open_msg->header.msgtype = CHANNELMSG_OPENCHANNEL; + open_msg->openid = newchannel->offermsg.child_relid; + open_msg->child_relid = newchannel->offermsg.child_relid; + open_msg->ringbuffer_gpadlhandle = newchannel->ringbuffer_gpadlhandle; + open_msg->downstream_ringbuffer_pageoffset = send_ringbuffer_size >> + PAGE_SHIFT; + open_msg->server_contextarea_gpadlhandle = 0; + + if (userdatalen > MAX_USER_DEFINED_BYTES) { + err = -EINVAL; + goto errorout; + } + + if (userdatalen) + memcpy(open_msg->userdata, userdata, userdatalen); + + spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); + list_add_tail(&open_info->msglistentry, + &vmbus_connection.chn_msg_list); + spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); + + ret = vmbus_post_msg(open_msg, + sizeof(struct vmbus_channel_open_channel)); + + if (ret != 0) + goto cleanup; + + t = wait_for_completion_timeout(&open_info->waitevent, 5*HZ); + if (t == 0) { + err = -ETIMEDOUT; + goto errorout; + } + + + if (open_info->response.open_result.status) + err = open_info->response.open_result.status; + +cleanup: + spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); + list_del(&open_info->msglistentry); + spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); + + kfree(open_info); + return err; + +errorout: + hv_ringbuffer_cleanup(&newchannel->outbound); + hv_ringbuffer_cleanup(&newchannel->inbound); + free_pages((unsigned long)out, + get_order(send_ringbuffer_size + recv_ringbuffer_size)); + kfree(open_info); + return err; +} +EXPORT_SYMBOL_GPL(vmbus_open); + +/* + * create_gpadl_header - Creates a gpadl for the specified buffer + */ +static int create_gpadl_header(void *kbuffer, u32 size, + struct vmbus_channel_msginfo **msginfo, + u32 *messagecount) +{ + int i; + int pagecount; + unsigned long long pfn; + struct vmbus_channel_gpadl_header *gpadl_header; + struct vmbus_channel_gpadl_body *gpadl_body; + struct vmbus_channel_msginfo *msgheader; + struct vmbus_channel_msginfo *msgbody = NULL; + u32 msgsize; + + int pfnsum, pfncount, pfnleft, pfncurr, pfnsize; + + pagecount = size >> PAGE_SHIFT; + pfn = virt_to_phys(kbuffer) >> PAGE_SHIFT; + + /* do we need a gpadl body msg */ + pfnsize = MAX_SIZE_CHANNEL_MESSAGE - + sizeof(struct vmbus_channel_gpadl_header) - + sizeof(struct gpa_range); + pfncount = pfnsize / sizeof(u64); + + if (pagecount > pfncount) { + /* we need a gpadl body */ + /* fill in the header */ + msgsize = sizeof(struct vmbus_channel_msginfo) + + sizeof(struct vmbus_channel_gpadl_header) + + sizeof(struct gpa_range) + pfncount * sizeof(u64); + msgheader = kzalloc(msgsize, GFP_KERNEL); + if (!msgheader) + goto nomem; + + INIT_LIST_HEAD(&msgheader->submsglist); + msgheader->msgsize = msgsize; + + gpadl_header = (struct vmbus_channel_gpadl_header *) + msgheader->msg; + gpadl_header->rangecount = 1; + gpadl_header->range_buflen = sizeof(struct gpa_range) + + pagecount * sizeof(u64); + gpadl_header->range[0].byte_offset = 0; + gpadl_header->range[0].byte_count = size; + for (i = 0; i < pfncount; i++) + gpadl_header->range[0].pfn_array[i] = pfn+i; + *msginfo = msgheader; + *messagecount = 1; + + pfnsum = pfncount; + pfnleft = pagecount - pfncount; + + /* how many pfns can we fit */ + pfnsize = MAX_SIZE_CHANNEL_MESSAGE - + sizeof(struct vmbus_channel_gpadl_body); + pfncount = pfnsize / sizeof(u64); + + /* fill in the body */ + while (pfnleft) { + if (pfnleft > pfncount) + pfncurr = pfncount; + else + pfncurr = pfnleft; + + msgsize = sizeof(struct vmbus_channel_msginfo) + + sizeof(struct vmbus_channel_gpadl_body) + + pfncurr * sizeof(u64); + msgbody = kzalloc(msgsize, GFP_KERNEL); + + if (!msgbody) { + struct vmbus_channel_msginfo *pos = NULL; + struct vmbus_channel_msginfo *tmp = NULL; + /* + * Free up all the allocated messages. + */ + list_for_each_entry_safe(pos, tmp, + &msgheader->submsglist, + msglistentry) { + + list_del(&pos->msglistentry); + kfree(pos); + } + + goto nomem; + } + + msgbody->msgsize = msgsize; + (*messagecount)++; + gpadl_body = + (struct vmbus_channel_gpadl_body *)msgbody->msg; + + /* + * Gpadl is u32 and we are using a pointer which could + * be 64-bit + * This is governed by the guest/host protocol and + * so the hypervisor gurantees that this is ok. + */ + for (i = 0; i < pfncurr; i++) + gpadl_body->pfn[i] = pfn + pfnsum + i; + + /* add to msg header */ + list_add_tail(&msgbody->msglistentry, + &msgheader->submsglist); + pfnsum += pfncurr; + pfnleft -= pfncurr; + } + } else { + /* everything fits in a header */ + msgsize = sizeof(struct vmbus_channel_msginfo) + + sizeof(struct vmbus_channel_gpadl_header) + + sizeof(struct gpa_range) + pagecount * sizeof(u64); + msgheader = kzalloc(msgsize, GFP_KERNEL); + if (msgheader == NULL) + goto nomem; + msgheader->msgsize = msgsize; + + gpadl_header = (struct vmbus_channel_gpadl_header *) + msgheader->msg; + gpadl_header->rangecount = 1; + gpadl_header->range_buflen = sizeof(struct gpa_range) + + pagecount * sizeof(u64); + gpadl_header->range[0].byte_offset = 0; + gpadl_header->range[0].byte_count = size; + for (i = 0; i < pagecount; i++) + gpadl_header->range[0].pfn_array[i] = pfn+i; + + *msginfo = msgheader; + *messagecount = 1; + } + + return 0; +nomem: + kfree(msgheader); + kfree(msgbody); + return -ENOMEM; +} + +/* + * vmbus_establish_gpadl - Estabish a GPADL for the specified buffer + * + * @channel: a channel + * @kbuffer: from kmalloc + * @size: page-size multiple + * @gpadl_handle: some funky thing + */ +int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer, + u32 size, u32 *gpadl_handle) +{ + struct vmbus_channel_gpadl_header *gpadlmsg; + struct vmbus_channel_gpadl_body *gpadl_body; + struct vmbus_channel_msginfo *msginfo = NULL; + struct vmbus_channel_msginfo *submsginfo; + u32 msgcount; + struct list_head *curr; + u32 next_gpadl_handle; + unsigned long flags; + int ret = 0; + int t; + + next_gpadl_handle = atomic_read(&vmbus_connection.next_gpadl_handle); + atomic_inc(&vmbus_connection.next_gpadl_handle); + + ret = create_gpadl_header(kbuffer, size, &msginfo, &msgcount); + if (ret) + return ret; + + init_completion(&msginfo->waitevent); + + gpadlmsg = (struct vmbus_channel_gpadl_header *)msginfo->msg; + gpadlmsg->header.msgtype = CHANNELMSG_GPADL_HEADER; + gpadlmsg->child_relid = channel->offermsg.child_relid; + gpadlmsg->gpadl = next_gpadl_handle; + + + spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); + list_add_tail(&msginfo->msglistentry, + &vmbus_connection.chn_msg_list); + + spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); + + ret = vmbus_post_msg(gpadlmsg, msginfo->msgsize - + sizeof(*msginfo)); + if (ret != 0) + goto cleanup; + + if (msgcount > 1) { + list_for_each(curr, &msginfo->submsglist) { + + submsginfo = (struct vmbus_channel_msginfo *)curr; + gpadl_body = + (struct vmbus_channel_gpadl_body *)submsginfo->msg; + + gpadl_body->header.msgtype = + CHANNELMSG_GPADL_BODY; + gpadl_body->gpadl = next_gpadl_handle; + + ret = vmbus_post_msg(gpadl_body, + submsginfo->msgsize - + sizeof(*submsginfo)); + if (ret != 0) + goto cleanup; + + } + } + t = wait_for_completion_timeout(&msginfo->waitevent, 5*HZ); + BUG_ON(t == 0); + + + /* At this point, we received the gpadl created msg */ + *gpadl_handle = gpadlmsg->gpadl; + +cleanup: + spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); + list_del(&msginfo->msglistentry); + spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); + + kfree(msginfo); + return ret; +} +EXPORT_SYMBOL_GPL(vmbus_establish_gpadl); + +/* + * vmbus_teardown_gpadl -Teardown the specified GPADL handle + */ +int vmbus_teardown_gpadl(struct vmbus_channel *channel, u32 gpadl_handle) +{ + struct vmbus_channel_gpadl_teardown *msg; + struct vmbus_channel_msginfo *info; + unsigned long flags; + int ret, t; + + info = kmalloc(sizeof(*info) + + sizeof(struct vmbus_channel_gpadl_teardown), GFP_KERNEL); + if (!info) + return -ENOMEM; + + init_completion(&info->waitevent); + + msg = (struct vmbus_channel_gpadl_teardown *)info->msg; + + msg->header.msgtype = CHANNELMSG_GPADL_TEARDOWN; + msg->child_relid = channel->offermsg.child_relid; + msg->gpadl = gpadl_handle; + + spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); + list_add_tail(&info->msglistentry, + &vmbus_connection.chn_msg_list); + spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); + ret = vmbus_post_msg(msg, + sizeof(struct vmbus_channel_gpadl_teardown)); + + BUG_ON(ret != 0); + t = wait_for_completion_timeout(&info->waitevent, 5*HZ); + BUG_ON(t == 0); + + /* Received a torndown response */ + spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); + list_del(&info->msglistentry); + spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); + + kfree(info); + return ret; +} +EXPORT_SYMBOL_GPL(vmbus_teardown_gpadl); + +/* + * vmbus_close - Close the specified channel + */ +void vmbus_close(struct vmbus_channel *channel) +{ + struct vmbus_channel_close_channel *msg; + int ret; + unsigned long flags; + + /* Stop callback and cancel the timer asap */ + spin_lock_irqsave(&channel->inbound_lock, flags); + channel->onchannel_callback = NULL; + spin_unlock_irqrestore(&channel->inbound_lock, flags); + + /* Send a closing message */ + + msg = &channel->close_msg.msg; + + msg->header.msgtype = CHANNELMSG_CLOSECHANNEL; + msg->child_relid = channel->offermsg.child_relid; + + ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_close_channel)); + + BUG_ON(ret != 0); + /* Tear down the gpadl for the channel's ring buffer */ + if (channel->ringbuffer_gpadlhandle) + vmbus_teardown_gpadl(channel, + channel->ringbuffer_gpadlhandle); + + /* Cleanup the ring buffers for this channel */ + hv_ringbuffer_cleanup(&channel->outbound); + hv_ringbuffer_cleanup(&channel->inbound); + + free_pages((unsigned long)channel->ringbuffer_pages, + get_order(channel->ringbuffer_pagecount * PAGE_SIZE)); + + +} +EXPORT_SYMBOL_GPL(vmbus_close); + +/** + * vmbus_sendpacket() - Send the specified buffer on the given channel + * @channel: Pointer to vmbus_channel structure. + * @buffer: Pointer to the buffer you want to receive the data into. + * @bufferlen: Maximum size of what the the buffer will hold + * @requestid: Identifier of the request + * @type: Type of packet that is being send e.g. negotiate, time + * packet etc. + * + * Sends data in @buffer directly to hyper-v via the vmbus + * This will send the data unparsed to hyper-v. + * + * Mainly used by Hyper-V drivers. + */ +int vmbus_sendpacket(struct vmbus_channel *channel, const void *buffer, + u32 bufferlen, u64 requestid, + enum vmbus_packet_type type, u32 flags) +{ + struct vmpacket_descriptor desc; + u32 packetlen = sizeof(struct vmpacket_descriptor) + bufferlen; + u32 packetlen_aligned = ALIGN(packetlen, sizeof(u64)); + struct scatterlist bufferlist[3]; + u64 aligned_data = 0; + int ret; + + + /* Setup the descriptor */ + desc.type = type; /* VmbusPacketTypeDataInBand; */ + desc.flags = flags; /* VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED; */ + /* in 8-bytes granularity */ + desc.offset8 = sizeof(struct vmpacket_descriptor) >> 3; + desc.len8 = (u16)(packetlen_aligned >> 3); + desc.trans_id = requestid; + + sg_init_table(bufferlist, 3); + sg_set_buf(&bufferlist[0], &desc, sizeof(struct vmpacket_descriptor)); + sg_set_buf(&bufferlist[1], buffer, bufferlen); + sg_set_buf(&bufferlist[2], &aligned_data, + packetlen_aligned - packetlen); + + ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3); + + if (ret == 0 && !hv_get_ringbuffer_interrupt_mask(&channel->outbound)) + vmbus_setevent(channel); + + return ret; +} +EXPORT_SYMBOL(vmbus_sendpacket); + +/* + * vmbus_sendpacket_pagebuffer - Send a range of single-page buffer + * packets using a GPADL Direct packet type. + */ +int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel, + struct hv_page_buffer pagebuffers[], + u32 pagecount, void *buffer, u32 bufferlen, + u64 requestid) +{ + int ret; + int i; + struct vmbus_channel_packet_page_buffer desc; + u32 descsize; + u32 packetlen; + u32 packetlen_aligned; + struct scatterlist bufferlist[3]; + u64 aligned_data = 0; + + if (pagecount > MAX_PAGE_BUFFER_COUNT) + return -EINVAL; + + + /* + * Adjust the size down since vmbus_channel_packet_page_buffer is the + * largest size we support + */ + descsize = sizeof(struct vmbus_channel_packet_page_buffer) - + ((MAX_PAGE_BUFFER_COUNT - pagecount) * + sizeof(struct hv_page_buffer)); + packetlen = descsize + bufferlen; + packetlen_aligned = ALIGN(packetlen, sizeof(u64)); + + /* Setup the descriptor */ + desc.type = VM_PKT_DATA_USING_GPA_DIRECT; + desc.flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED; + desc.dataoffset8 = descsize >> 3; /* in 8-bytes grandularity */ + desc.length8 = (u16)(packetlen_aligned >> 3); + desc.transactionid = requestid; + desc.rangecount = pagecount; + + for (i = 0; i < pagecount; i++) { + desc.range[i].len = pagebuffers[i].len; + desc.range[i].offset = pagebuffers[i].offset; + desc.range[i].pfn = pagebuffers[i].pfn; + } + + sg_init_table(bufferlist, 3); + sg_set_buf(&bufferlist[0], &desc, descsize); + sg_set_buf(&bufferlist[1], buffer, bufferlen); + sg_set_buf(&bufferlist[2], &aligned_data, + packetlen_aligned - packetlen); + + ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3); + + if (ret == 0 && !hv_get_ringbuffer_interrupt_mask(&channel->outbound)) + vmbus_setevent(channel); + + return ret; +} +EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer); + +/* + * vmbus_sendpacket_multipagebuffer - Send a multi-page buffer packet + * using a GPADL Direct packet type. + */ +int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel, + struct hv_multipage_buffer *multi_pagebuffer, + void *buffer, u32 bufferlen, u64 requestid) +{ + int ret; + struct vmbus_channel_packet_multipage_buffer desc; + u32 descsize; + u32 packetlen; + u32 packetlen_aligned; + struct scatterlist bufferlist[3]; + u64 aligned_data = 0; + u32 pfncount = NUM_PAGES_SPANNED(multi_pagebuffer->offset, + multi_pagebuffer->len); + + + if ((pfncount < 0) || (pfncount > MAX_MULTIPAGE_BUFFER_COUNT)) + return -EINVAL; + + /* + * Adjust the size down since vmbus_channel_packet_multipage_buffer is + * the largest size we support + */ + descsize = sizeof(struct vmbus_channel_packet_multipage_buffer) - + ((MAX_MULTIPAGE_BUFFER_COUNT - pfncount) * + sizeof(u64)); + packetlen = descsize + bufferlen; + packetlen_aligned = ALIGN(packetlen, sizeof(u64)); + + + /* Setup the descriptor */ + desc.type = VM_PKT_DATA_USING_GPA_DIRECT; + desc.flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED; + desc.dataoffset8 = descsize >> 3; /* in 8-bytes grandularity */ + desc.length8 = (u16)(packetlen_aligned >> 3); + desc.transactionid = requestid; + desc.rangecount = 1; + + desc.range.len = multi_pagebuffer->len; + desc.range.offset = multi_pagebuffer->offset; + + memcpy(desc.range.pfn_array, multi_pagebuffer->pfn_array, + pfncount * sizeof(u64)); + + sg_init_table(bufferlist, 3); + sg_set_buf(&bufferlist[0], &desc, descsize); + sg_set_buf(&bufferlist[1], buffer, bufferlen); + sg_set_buf(&bufferlist[2], &aligned_data, + packetlen_aligned - packetlen); + + ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3); + + if (ret == 0 && !hv_get_ringbuffer_interrupt_mask(&channel->outbound)) + vmbus_setevent(channel); + + return ret; +} +EXPORT_SYMBOL_GPL(vmbus_sendpacket_multipagebuffer); + +/** + * vmbus_recvpacket() - Retrieve the user packet on the specified channel + * @channel: Pointer to vmbus_channel structure. + * @buffer: Pointer to the buffer you want to receive the data into. + * @bufferlen: Maximum size of what the the buffer will hold + * @buffer_actual_len: The actual size of the data after it was received + * @requestid: Identifier of the request + * + * Receives directly from the hyper-v vmbus and puts the data it received + * into Buffer. This will receive the data unparsed from hyper-v. + * + * Mainly used by Hyper-V drivers. + */ +int vmbus_recvpacket(struct vmbus_channel *channel, void *buffer, + u32 bufferlen, u32 *buffer_actual_len, u64 *requestid) +{ + struct vmpacket_descriptor desc; + u32 packetlen; + u32 userlen; + int ret; + + *buffer_actual_len = 0; + *requestid = 0; + + + ret = hv_ringbuffer_peek(&channel->inbound, &desc, + sizeof(struct vmpacket_descriptor)); + if (ret != 0) + return 0; + + packetlen = desc.len8 << 3; + userlen = packetlen - (desc.offset8 << 3); + + *buffer_actual_len = userlen; + + if (userlen > bufferlen) { + + pr_err("Buffer too small - got %d needs %d\n", + bufferlen, userlen); + return -ETOOSMALL; + } + + *requestid = desc.trans_id; + + /* Copy over the packet to the user buffer */ + ret = hv_ringbuffer_read(&channel->inbound, buffer, userlen, + (desc.offset8 << 3)); + + + return 0; +} +EXPORT_SYMBOL(vmbus_recvpacket); + +/* + * vmbus_recvpacket_raw - Retrieve the raw packet on the specified channel + */ +int vmbus_recvpacket_raw(struct vmbus_channel *channel, void *buffer, + u32 bufferlen, u32 *buffer_actual_len, + u64 *requestid) +{ + struct vmpacket_descriptor desc; + u32 packetlen; + u32 userlen; + int ret; + + *buffer_actual_len = 0; + *requestid = 0; + + + ret = hv_ringbuffer_peek(&channel->inbound, &desc, + sizeof(struct vmpacket_descriptor)); + if (ret != 0) + return 0; + + + packetlen = desc.len8 << 3; + userlen = packetlen - (desc.offset8 << 3); + + *buffer_actual_len = packetlen; + + if (packetlen > bufferlen) { + pr_err("Buffer too small - needed %d bytes but " + "got space for only %d bytes\n", + packetlen, bufferlen); + return -ENOBUFS; + } + + *requestid = desc.trans_id; + + /* Copy over the entire packet to the user buffer */ + ret = hv_ringbuffer_read(&channel->inbound, buffer, packetlen, 0); + + return 0; +} +EXPORT_SYMBOL_GPL(vmbus_recvpacket_raw); diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c new file mode 100644 index 000000000000..41bf287baa1c --- /dev/null +++ b/drivers/hv/channel_mgmt.c @@ -0,0 +1,647 @@ +/* + * Copyright (c) 2009, Microsoft Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Authors: + * Haiyang Zhang <haiyangz@microsoft.com> + * Hank Janssen <hjanssen@microsoft.com> + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/completion.h> +#include <linux/hyperv.h> + +#include "hyperv_vmbus.h" + +struct vmbus_channel_message_table_entry { + enum vmbus_channel_message_type message_type; + void (*message_handler)(struct vmbus_channel_message_header *msg); +}; + +#define MAX_MSG_TYPES 4 +#define MAX_NUM_DEVICE_CLASSES_SUPPORTED 8 + +static const uuid_le + supported_device_classes[MAX_NUM_DEVICE_CLASSES_SUPPORTED] = { + /* {ba6163d9-04a1-4d29-b605-72e2ffb1dc7f} */ + /* Storage - SCSI */ + { + .b = { + 0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d, + 0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f + } + }, + + /* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */ + /* Network */ + { + .b = { + 0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46, + 0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E + } + }, + + /* {CFA8B69E-5B4A-4cc0-B98B-8BA1A1F3F95A} */ + /* Input */ + { + .b = { + 0x9E, 0xB6, 0xA8, 0xCF, 0x4A, 0x5B, 0xc0, 0x4c, + 0xB9, 0x8B, 0x8B, 0xA1, 0xA1, 0xF3, 0xF9, 0x5A + } + }, + + /* {32412632-86cb-44a2-9b5c-50d1417354f5} */ + /* IDE */ + { + .b = { + 0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44, + 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5 + } + }, + /* 0E0B6031-5213-4934-818B-38D90CED39DB */ + /* Shutdown */ + { + .b = { + 0x31, 0x60, 0x0B, 0X0E, 0x13, 0x52, 0x34, 0x49, + 0x81, 0x8B, 0x38, 0XD9, 0x0C, 0xED, 0x39, 0xDB + } + }, + /* {9527E630-D0AE-497b-ADCE-E80AB0175CAF} */ + /* TimeSync */ + { + .b = { + 0x30, 0xe6, 0x27, 0x95, 0xae, 0xd0, 0x7b, 0x49, + 0xad, 0xce, 0xe8, 0x0a, 0xb0, 0x17, 0x5c, 0xaf + } + }, + /* {57164f39-9115-4e78-ab55-382f3bd5422d} */ + /* Heartbeat */ + { + .b = { + 0x39, 0x4f, 0x16, 0x57, 0x15, 0x91, 0x78, 0x4e, + 0xab, 0x55, 0x38, 0x2f, 0x3b, 0xd5, 0x42, 0x2d + } + }, + /* {A9A0F4E7-5A45-4d96-B827-8A841E8C03E6} */ + /* KVP */ + { + .b = { + 0xe7, 0xf4, 0xa0, 0xa9, 0x45, 0x5a, 0x96, 0x4d, + 0xb8, 0x27, 0x8a, 0x84, 0x1e, 0x8c, 0x3, 0xe6 + } + }, + +}; + + +/** + * prep_negotiate_resp() - Create default response for Hyper-V Negotiate message + * @icmsghdrp: Pointer to msg header structure + * @icmsg_negotiate: Pointer to negotiate message structure + * @buf: Raw buffer channel data + * + * @icmsghdrp is of type &struct icmsg_hdr. + * @negop is of type &struct icmsg_negotiate. + * Set up and fill in default negotiate response message. This response can + * come from both the vmbus driver and the hv_utils driver. The current api + * will respond properly to both Windows 2008 and Windows 2008-R2 operating + * systems. + * + * Mainly used by Hyper-V drivers. + */ +void prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, + struct icmsg_negotiate *negop, + u8 *buf) +{ + if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { + icmsghdrp->icmsgsize = 0x10; + + negop = (struct icmsg_negotiate *)&buf[ + sizeof(struct vmbuspipe_hdr) + + sizeof(struct icmsg_hdr)]; + + if (negop->icframe_vercnt == 2 && + negop->icversion_data[1].major == 3) { + negop->icversion_data[0].major = 3; + negop->icversion_data[0].minor = 0; + negop->icversion_data[1].major = 3; + negop->icversion_data[1].minor = 0; + } else { + negop->icversion_data[0].major = 1; + negop->icversion_data[0].minor = 0; + negop->icversion_data[1].major = 1; + negop->icversion_data[1].minor = 0; + } + + negop->icframe_vercnt = 1; + negop->icmsg_vercnt = 1; + } +} +EXPORT_SYMBOL(prep_negotiate_resp); + +/* + * alloc_channel - Allocate and initialize a vmbus channel object + */ +static struct vmbus_channel *alloc_channel(void) +{ + struct vmbus_channel *channel; + + channel = kzalloc(sizeof(*channel), GFP_ATOMIC); + if (!channel) + return NULL; + + spin_lock_init(&channel->inbound_lock); + + channel->controlwq = create_workqueue("hv_vmbus_ctl"); + if (!channel->controlwq) { + kfree(channel); + return NULL; + } + + return channel; +} + +/* + * release_hannel - Release the vmbus channel object itself + */ +static void release_channel(struct work_struct *work) +{ + struct vmbus_channel *channel = container_of(work, + struct vmbus_channel, + work); + + destroy_workqueue(channel->controlwq); + + kfree(channel); +} + +/* + * free_channel - Release the resources used by the vmbus channel object + */ +void free_channel(struct vmbus_channel *channel) +{ + + /* + * We have to release the channel's workqueue/thread in the vmbus's + * workqueue/thread context + * ie we can't destroy ourselves. + */ + INIT_WORK(&channel->work, release_channel); + queue_work(vmbus_connection.work_queue, &channel->work); +} + + + +/* + * vmbus_process_rescind_offer - + * Rescind the offer by initiating a device removal + */ +static void vmbus_process_rescind_offer(struct work_struct *work) +{ + struct vmbus_channel *channel = container_of(work, + struct vmbus_channel, + work); + + vmbus_device_unregister(channel->device_obj); +} + +/* + * vmbus_process_offer - Process the offer by creating a channel/device + * associated with this offer + */ +static void vmbus_process_offer(struct work_struct *work) +{ + struct vmbus_channel *newchannel = container_of(work, + struct vmbus_channel, + work); + struct vmbus_channel *channel; + bool fnew = true; + int ret; + unsigned long flags; + + /* The next possible work is rescind handling */ + INIT_WORK(&newchannel->work, vmbus_process_rescind_offer); + + /* Make sure this is a new offer */ + spin_lock_irqsave(&vmbus_connection.channel_lock, flags); + + list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { + if (!uuid_le_cmp(channel->offermsg.offer.if_type, + newchannel->offermsg.offer.if_type) && + !uuid_le_cmp(channel->offermsg.offer.if_instance, + newchannel->offermsg.offer.if_instance)) { + fnew = false; + break; + } + } + + if (fnew) + list_add_tail(&newchannel->listentry, + &vmbus_connection.chn_list); + + spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags); + + if (!fnew) { + free_channel(newchannel); + return; + } + + /* + * Start the process of binding this offer to the driver + * We need to set the DeviceObject field before calling + * vmbus_child_dev_add() + */ + newchannel->device_obj = vmbus_device_create( + &newchannel->offermsg.offer.if_type, + &newchannel->offermsg.offer.if_instance, + newchannel); + + /* + * Add the new device to the bus. This will kick off device-driver + * binding which eventually invokes the device driver's AddDevice() + * method. + */ + ret = vmbus_device_register(newchannel->device_obj); + if (ret != 0) { + pr_err("unable to add child device object (relid %d)\n", + newchannel->offermsg.child_relid); + + spin_lock_irqsave(&vmbus_connection.channel_lock, flags); + list_del(&newchannel->listentry); + spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags); + + free_channel(newchannel); + } else { + /* + * This state is used to indicate a successful open + * so that when we do close the channel normally, we + * can cleanup properly + */ + newchannel->state = CHANNEL_OPEN_STATE; + } +} + +/* + * vmbus_onoffer - Handler for channel offers from vmbus in parent partition. + * + */ +static void vmbus_onoffer(struct vmbus_channel_message_header *hdr) +{ + struct vmbus_channel_offer_channel *offer; + struct vmbus_channel *newchannel; + uuid_le *guidtype; + uuid_le *guidinstance; + int i; + int fsupported = 0; + + offer = (struct vmbus_channel_offer_channel *)hdr; + for (i = 0; i < MAX_NUM_DEVICE_CLASSES_SUPPORTED; i++) { + if (!uuid_le_cmp(offer->offer.if_type, + supported_device_classes[i])) { + fsupported = 1; + break; + } + } + + if (!fsupported) + return; + + guidtype = &offer->offer.if_type; + guidinstance = &offer->offer.if_instance; + + /* Allocate the channel object and save this offer. */ + newchannel = alloc_channel(); + if (!newchannel) { + pr_err("Unable to allocate channel object\n"); + return; + } + + memcpy(&newchannel->offermsg, offer, + sizeof(struct vmbus_channel_offer_channel)); + newchannel->monitor_grp = (u8)offer->monitorid / 32; + newchannel->monitor_bit = (u8)offer->monitorid % 32; + + INIT_WORK(&newchannel->work, vmbus_process_offer); + queue_work(newchannel->controlwq, &newchannel->work); +} + +/* + * vmbus_onoffer_rescind - Rescind offer handler. + * + * We queue a work item to process this offer synchronously + */ +static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) +{ + struct vmbus_channel_rescind_offer *rescind; + struct vmbus_channel *channel; + + rescind = (struct vmbus_channel_rescind_offer *)hdr; + channel = relid2channel(rescind->child_relid); + + if (channel == NULL) + /* Just return here, no channel found */ + return; + + /* work is initialized for vmbus_process_rescind_offer() from + * vmbus_process_offer() where the channel got created */ + queue_work(channel->controlwq, &channel->work); +} + +/* + * vmbus_onoffers_delivered - + * This is invoked when all offers have been delivered. + * + * Nothing to do here. + */ +static void vmbus_onoffers_delivered( + struct vmbus_channel_message_header *hdr) +{ +} + +/* + * vmbus_onopen_result - Open result handler. + * + * This is invoked when we received a response to our channel open request. + * Find the matching request, copy the response and signal the requesting + * thread. + */ +static void vmbus_onopen_result(struct vmbus_channel_message_header *hdr) +{ + struct vmbus_channel_open_result *result; + struct vmbus_channel_msginfo *msginfo; + struct vmbus_channel_message_header *requestheader; + struct vmbus_channel_open_channel *openmsg; + unsigned long flags; + + result = (struct vmbus_channel_open_result *)hdr; + + /* + * Find the open msg, copy the result and signal/unblock the wait event + */ + spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); + + list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, + msglistentry) { + requestheader = + (struct vmbus_channel_message_header *)msginfo->msg; + + if (requestheader->msgtype == CHANNELMSG_OPENCHANNEL) { + openmsg = + (struct vmbus_channel_open_channel *)msginfo->msg; + if (openmsg->child_relid == result->child_relid && + openmsg->openid == result->openid) { + memcpy(&msginfo->response.open_result, + result, + sizeof( + struct vmbus_channel_open_result)); + complete(&msginfo->waitevent); + break; + } + } + } + spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); +} + +/* + * vmbus_ongpadl_created - GPADL created handler. + * + * This is invoked when we received a response to our gpadl create request. + * Find the matching request, copy the response and signal the requesting + * thread. + */ +static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr) +{ + struct vmbus_channel_gpadl_created *gpadlcreated; + struct vmbus_channel_msginfo *msginfo; + struct vmbus_channel_message_header *requestheader; + struct vmbus_channel_gpadl_header *gpadlheader; + unsigned long flags; + + gpadlcreated = (struct vmbus_channel_gpadl_created *)hdr; + + /* + * Find the establish msg, copy the result and signal/unblock the wait + * event + */ + spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); + + list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, + msglistentry) { + requestheader = + (struct vmbus_channel_message_header *)msginfo->msg; + + if (requestheader->msgtype == CHANNELMSG_GPADL_HEADER) { + gpadlheader = + (struct vmbus_channel_gpadl_header *)requestheader; + + if ((gpadlcreated->child_relid == + gpadlheader->child_relid) && + (gpadlcreated->gpadl == gpadlheader->gpadl)) { + memcpy(&msginfo->response.gpadl_created, + gpadlcreated, + sizeof( + struct vmbus_channel_gpadl_created)); + complete(&msginfo->waitevent); + break; + } + } + } + spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); +} + +/* + * vmbus_ongpadl_torndown - GPADL torndown handler. + * + * This is invoked when we received a response to our gpadl teardown request. + * Find the matching request, copy the response and signal the requesting + * thread. + */ +static void vmbus_ongpadl_torndown( + struct vmbus_channel_message_header *hdr) +{ + struct vmbus_channel_gpadl_torndown *gpadl_torndown; + struct vmbus_channel_msginfo *msginfo; + struct vmbus_channel_message_header *requestheader; + struct vmbus_channel_gpadl_teardown *gpadl_teardown; + unsigned long flags; + + gpadl_torndown = (struct vmbus_channel_gpadl_torndown *)hdr; + + /* + * Find the open msg, copy the result and signal/unblock the wait event + */ + spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); + + list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, + msglistentry) { + requestheader = + (struct vmbus_channel_message_header *)msginfo->msg; + + if (requestheader->msgtype == CHANNELMSG_GPADL_TEARDOWN) { + gpadl_teardown = + (struct vmbus_channel_gpadl_teardown *)requestheader; + + if (gpadl_torndown->gpadl == gpadl_teardown->gpadl) { + memcpy(&msginfo->response.gpadl_torndown, + gpadl_torndown, + sizeof( + struct vmbus_channel_gpadl_torndown)); + complete(&msginfo->waitevent); + break; + } + } + } + spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); +} + +/* + * vmbus_onversion_response - Version response handler + * + * This is invoked when we received a response to our initiate contact request. + * Find the matching request, copy the response and signal the requesting + * thread. + */ +static void vmbus_onversion_response( + struct vmbus_channel_message_header *hdr) +{ + struct vmbus_channel_msginfo *msginfo; + struct vmbus_channel_message_header *requestheader; + struct vmbus_channel_initiate_contact *initiate; + struct vmbus_channel_version_response *version_response; + unsigned long flags; + + version_response = (struct vmbus_channel_version_response *)hdr; + spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); + + list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, + msglistentry) { + requestheader = + (struct vmbus_channel_message_header *)msginfo->msg; + + if (requestheader->msgtype == + CHANNELMSG_INITIATE_CONTACT) { + initiate = + (struct vmbus_channel_initiate_contact *)requestheader; + memcpy(&msginfo->response.version_response, + version_response, + sizeof(struct vmbus_channel_version_response)); + complete(&msginfo->waitevent); + } + } + spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); +} + +/* Channel message dispatch table */ +static struct vmbus_channel_message_table_entry + channel_message_table[CHANNELMSG_COUNT] = { + {CHANNELMSG_INVALID, NULL}, + {CHANNELMSG_OFFERCHANNEL, vmbus_onoffer}, + {CHANNELMSG_RESCIND_CHANNELOFFER, vmbus_onoffer_rescind}, + {CHANNELMSG_REQUESTOFFERS, NULL}, + {CHANNELMSG_ALLOFFERS_DELIVERED, vmbus_onoffers_delivered}, + {CHANNELMSG_OPENCHANNEL, NULL}, + {CHANNELMSG_OPENCHANNEL_RESULT, vmbus_onopen_result}, + {CHANNELMSG_CLOSECHANNEL, NULL}, + {CHANNELMSG_GPADL_HEADER, NULL}, + {CHANNELMSG_GPADL_BODY, NULL}, + {CHANNELMSG_GPADL_CREATED, vmbus_ongpadl_created}, + {CHANNELMSG_GPADL_TEARDOWN, NULL}, + {CHANNELMSG_GPADL_TORNDOWN, vmbus_ongpadl_torndown}, + {CHANNELMSG_RELID_RELEASED, NULL}, + {CHANNELMSG_INITIATE_CONTACT, NULL}, + {CHANNELMSG_VERSION_RESPONSE, vmbus_onversion_response}, + {CHANNELMSG_UNLOAD, NULL}, +}; + +/* + * vmbus_onmessage - Handler for channel protocol messages. + * + * This is invoked in the vmbus worker thread context. + */ +void vmbus_onmessage(void *context) +{ + struct hv_message *msg = context; + struct vmbus_channel_message_header *hdr; + int size; + + hdr = (struct vmbus_channel_message_header *)msg->u.payload; + size = msg->header.payload_size; + + if (hdr->msgtype >= CHANNELMSG_COUNT) { + pr_err("Received invalid channel message type %d size %d\n", + hdr->msgtype, size); + print_hex_dump_bytes("", DUMP_PREFIX_NONE, + (unsigned char *)msg->u.payload, size); + return; + } + + if (channel_message_table[hdr->msgtype].message_handler) + channel_message_table[hdr->msgtype].message_handler(hdr); + else + pr_err("Unhandled channel message type %d\n", hdr->msgtype); +} + +/* + * vmbus_request_offers - Send a request to get all our pending offers. + */ +int vmbus_request_offers(void) +{ + struct vmbus_channel_message_header *msg; + struct vmbus_channel_msginfo *msginfo; + int ret, t; + + msginfo = kmalloc(sizeof(*msginfo) + + sizeof(struct vmbus_channel_message_header), + GFP_KERNEL); + if (!msginfo) + return -ENOMEM; + + init_completion(&msginfo->waitevent); + + msg = (struct vmbus_channel_message_header *)msginfo->msg; + + msg->msgtype = CHANNELMSG_REQUESTOFFERS; + + + ret = vmbus_post_msg(msg, + sizeof(struct vmbus_channel_message_header)); + if (ret != 0) { + pr_err("Unable to request offers - %d\n", ret); + + goto cleanup; + } + + t = wait_for_completion_timeout(&msginfo->waitevent, 5*HZ); + if (t == 0) { + ret = -ETIMEDOUT; + goto cleanup; + } + + + +cleanup: + kfree(msginfo); + + return ret; +} + +/* eof */ diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c new file mode 100644 index 000000000000..5f438b650068 --- /dev/null +++ b/drivers/hv/connection.c @@ -0,0 +1,318 @@ +/* + * + * Copyright (c) 2009, Microsoft Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Authors: + * Haiyang Zhang <haiyangz@microsoft.com> + * Hank Janssen <hjanssen@microsoft.com> + * + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/delay.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/hyperv.h> + +#include "hyperv_vmbus.h" + + +struct vmbus_connection vmbus_connection = { + .conn_state = DISCONNECTED, + .next_gpadl_handle = ATOMIC_INIT(0xE1E10), +}; + +/* + * vmbus_connect - Sends a connect request on the partition service connection + */ +int vmbus_connect(void) +{ + int ret = 0; + int t; + struct vmbus_channel_msginfo *msginfo = NULL; + struct vmbus_channel_initiate_contact *msg; + unsigned long flags; + + /* Initialize the vmbus connection */ + vmbus_connection.conn_state = CONNECTING; + vmbus_connection.work_queue = create_workqueue("hv_vmbus_con"); + if (!vmbus_connection.work_queue) { + ret = -ENOMEM; + goto cleanup; + } + + INIT_LIST_HEAD(&vmbus_connection.chn_msg_list); + spin_lock_init(&vmbus_connection.channelmsg_lock); + + INIT_LIST_HEAD(&vmbus_connection.chn_list); + spin_lock_init(&vmbus_connection.channel_lock); + + /* + * Setup the vmbus event connection for channel interrupt + * abstraction stuff + */ + vmbus_connection.int_page = + (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, 0); + if (vmbus_connection.int_page == NULL) { + ret = -ENOMEM; + goto cleanup; + } + + vmbus_connection.recv_int_page = vmbus_connection.int_page; + vmbus_connection.send_int_page = + (void *)((unsigned long)vmbus_connection.int_page + + (PAGE_SIZE >> 1)); + + /* + * Setup the monitor notification facility. The 1st page for + * parent->child and the 2nd page for child->parent + */ + vmbus_connection.monitor_pages = + (void *)__get_free_pages((GFP_KERNEL|__GFP_ZERO), 1); + if (vmbus_connection.monitor_pages == NULL) { + ret = -ENOMEM; + goto cleanup; + } + + msginfo = kzalloc(sizeof(*msginfo) + + sizeof(struct vmbus_channel_initiate_contact), + GFP_KERNEL); + if (msginfo == NULL) { + ret = -ENOMEM; + goto cleanup; + } + + init_completion(&msginfo->waitevent); + + msg = (struct vmbus_channel_initiate_contact *)msginfo->msg; + + msg->header.msgtype = CHANNELMSG_INITIATE_CONTACT; + msg->vmbus_version_requested = VMBUS_REVISION_NUMBER; + msg->interrupt_page = virt_to_phys(vmbus_connection.int_page); + msg->monitor_page1 = virt_to_phys(vmbus_connection.monitor_pages); + msg->monitor_page2 = virt_to_phys( + (void *)((unsigned long)vmbus_connection.monitor_pages + + PAGE_SIZE)); + + /* + * Add to list before we send the request since we may + * receive the response before returning from this routine + */ + spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); + list_add_tail(&msginfo->msglistentry, + &vmbus_connection.chn_msg_list); + + spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); + + ret = vmbus_post_msg(msg, + sizeof(struct vmbus_channel_initiate_contact)); + if (ret != 0) { + spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); + list_del(&msginfo->msglistentry); + spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, + flags); + goto cleanup; + } + + /* Wait for the connection response */ + t = wait_for_completion_timeout(&msginfo->waitevent, 5*HZ); + if (t == 0) { + spin_lock_irqsave(&vmbus_connection.channelmsg_lock, + flags); + list_del(&msginfo->msglistentry); + spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, + flags); + ret = -ETIMEDOUT; + goto cleanup; + } + + spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); + list_del(&msginfo->msglistentry); + spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); + + /* Check if successful */ + if (msginfo->response.version_response.version_supported) { + vmbus_connection.conn_state = CONNECTED; + } else { + pr_err("Unable to connect, " + "Version %d not supported by Hyper-V\n", + VMBUS_REVISION_NUMBER); + ret = -ECONNREFUSED; + goto cleanup; + } + + kfree(msginfo); + return 0; + +cleanup: + vmbus_connection.conn_state = DISCONNECTED; + + if (vmbus_connection.work_queue) + destroy_workqueue(vmbus_connection.work_queue); + + if (vmbus_connection.int_page) { + free_pages((unsigned long)vmbus_connection.int_page, 0); + vmbus_connection.int_page = NULL; + } + + if (vmbus_connection.monitor_pages) { + free_pages((unsigned long)vmbus_connection.monitor_pages, 1); + vmbus_connection.monitor_pages = NULL; + } + + kfree(msginfo); + + return ret; +} + + +/* + * relid2channel - Get the channel object given its + * child relative id (ie channel id) + */ +struct vmbus_channel *relid2channel(u32 relid) +{ + struct vmbus_channel *channel; + struct vmbus_channel *found_channel = NULL; + unsigned long flags; + + spin_lock_irqsave(&vmbus_connection.channel_lock, flags); + list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { + if (channel->offermsg.child_relid == relid) { + found_channel = channel; + break; + } + } + spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags); + + return found_channel; +} + +/* + * process_chn_event - Process a channel event notification + */ +static void process_chn_event(u32 relid) +{ + struct vmbus_channel *channel; + unsigned long flags; + + /* + * Find the channel based on this relid and invokes the + * channel callback to process the event + */ + channel = relid2channel(relid); + + if (!channel) { + pr_err("channel not found for relid - %u\n", relid); + return; + } + + /* + * A channel once created is persistent even when there + * is no driver handling the device. An unloading driver + * sets the onchannel_callback to NULL under the + * protection of the channel inbound_lock. Thus, checking + * and invoking the driver specific callback takes care of + * orderly unloading of the driver. + */ + + spin_lock_irqsave(&channel->inbound_lock, flags); + if (channel->onchannel_callback != NULL) + channel->onchannel_callback(channel->channel_callback_context); + else + pr_err("no channel callback for relid - %u\n", relid); + + spin_unlock_irqrestore(&channel->inbound_lock, flags); +} + +/* + * vmbus_on_event - Handler for events + */ +void vmbus_on_event(unsigned long data) +{ + u32 dword; + u32 maxdword = MAX_NUM_CHANNELS_SUPPORTED >> 5; + int bit; + u32 relid; + u32 *recv_int_page = vmbus_connection.recv_int_page; + + /* Check events */ + if (!recv_int_page) + return; + for (dword = 0; dword < maxdword; dword++) { + if (!recv_int_page[dword]) + continue; + for (bit = 0; bit < 32; bit++) { + if (sync_test_and_clear_bit(bit, + (unsigned long *)&recv_int_page[dword])) { + relid = (dword << 5) + bit; + + if (relid == 0) + /* + * Special case - vmbus + * channel protocol msg + */ + continue; + + process_chn_event(relid); + } + } + } +} + +/* + * vmbus_post_msg - Send a msg on the vmbus's message connection + */ +int vmbus_post_msg(void *buffer, size_t buflen) +{ + union hv_connection_id conn_id; + int ret = 0; + int retries = 0; + + conn_id.asu32 = 0; + conn_id.u.id = VMBUS_MESSAGE_CONNECTION_ID; + + /* + * hv_post_message() can have transient failures because of + * insufficient resources. Retry the operation a couple of + * times before giving up. + */ + while (retries < 3) { + ret = hv_post_message(conn_id, 1, buffer, buflen); + if (ret != HV_STATUS_INSUFFICIENT_BUFFERS) + return ret; + retries++; + msleep(100); + } + return ret; +} + +/* + * vmbus_set_event - Send an event notification to the parent + */ +int vmbus_set_event(u32 child_relid) +{ + /* Each u32 represents 32 channels */ + sync_set_bit(child_relid & 31, + (unsigned long *)vmbus_connection.send_int_page + + (child_relid >> 5)); + + return hv_signal_event(); +} diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c new file mode 100644 index 000000000000..931b7b030784 --- /dev/null +++ b/drivers/hv/hv.c @@ -0,0 +1,429 @@ +/* + * Copyright (c) 2009, Microsoft Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Authors: + * Haiyang Zhang <haiyangz@microsoft.com> + * Hank Janssen <hjanssen@microsoft.com> + * + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/hyperv.h> + +#include "hyperv_vmbus.h" + +/* The one and only */ +struct hv_context hv_context = { + .synic_initialized = false, + .hypercall_page = NULL, + .signal_event_param = NULL, + .signal_event_buffer = NULL, +}; + +/* + * query_hypervisor_presence + * - Query the cpuid for presence of windows hypervisor + */ +static int query_hypervisor_presence(void) +{ + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + unsigned int op; + + eax = 0; + ebx = 0; + ecx = 0; + edx = 0; + op = HVCPUID_VERSION_FEATURES; + cpuid(op, &eax, &ebx, &ecx, &edx); + + return ecx & HV_PRESENT_BIT; +} + +/* + * query_hypervisor_info - Get version info of the windows hypervisor + */ +static int query_hypervisor_info(void) +{ + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + unsigned int max_leaf; + unsigned int op; + + /* + * Its assumed that this is called after confirming that Viridian + * is present. Query id and revision. + */ + eax = 0; + ebx = 0; + ecx = 0; + edx = 0; + op = HVCPUID_VENDOR_MAXFUNCTION; + cpuid(op, &eax, &ebx, &ecx, &edx); + + max_leaf = eax; + + if (max_leaf >= HVCPUID_VERSION) { + eax = 0; + ebx = 0; + ecx = 0; + edx = 0; + op = HVCPUID_VERSION; + cpuid(op, &eax, &ebx, &ecx, &edx); + pr_info("Hyper-V Host OS Build:%d-%d.%d-%d-%d.%d\n", + eax, + ebx >> 16, + ebx & 0xFFFF, + ecx, + edx >> 24, + edx & 0xFFFFFF); + } + return max_leaf; +} + +/* + * do_hypercall- Invoke the specified hypercall + */ +static u64 do_hypercall(u64 control, void *input, void *output) +{ +#ifdef CONFIG_X86_64 + u64 hv_status = 0; + u64 input_address = (input) ? virt_to_phys(input) : 0; + u64 output_address = (output) ? virt_to_phys(output) : 0; + void *hypercall_page = hv_context.hypercall_page; + + __asm__ __volatile__("mov %0, %%r8" : : "r" (output_address) : "r8"); + __asm__ __volatile__("call *%3" : "=a" (hv_status) : + "c" (control), "d" (input_address), + "m" (hypercall_page)); + + return hv_status; + +#else + + u32 control_hi = control >> 32; + u32 control_lo = control & 0xFFFFFFFF; + u32 hv_status_hi = 1; + u32 hv_status_lo = 1; + u64 input_address = (input) ? virt_to_phys(input) : 0; + u32 input_address_hi = input_address >> 32; + u32 input_address_lo = input_address & 0xFFFFFFFF; + u64 output_address = (output) ? virt_to_phys(output) : 0; + u32 output_address_hi = output_address >> 32; + u32 output_address_lo = output_address & 0xFFFFFFFF; + void *hypercall_page = hv_context.hypercall_page; + + __asm__ __volatile__ ("call *%8" : "=d"(hv_status_hi), + "=a"(hv_status_lo) : "d" (control_hi), + "a" (control_lo), "b" (input_address_hi), + "c" (input_address_lo), "D"(output_address_hi), + "S"(output_address_lo), "m" (hypercall_page)); + + return hv_status_lo | ((u64)hv_status_hi << 32); +#endif /* !x86_64 */ +} + +/* + * hv_init - Main initialization routine. + * + * This routine must be called before any other routines in here are called + */ +int hv_init(void) +{ + int max_leaf; + union hv_x64_msr_hypercall_contents hypercall_msr; + void *virtaddr = NULL; + + memset(hv_context.synic_event_page, 0, sizeof(void *) * MAX_NUM_CPUS); + memset(hv_context.synic_message_page, 0, + sizeof(void *) * MAX_NUM_CPUS); + + if (!query_hypervisor_presence()) + goto cleanup; + + max_leaf = query_hypervisor_info(); + + rdmsrl(HV_X64_MSR_GUEST_OS_ID, hv_context.guestid); + + if (hv_context.guestid != 0) + goto cleanup; + + /* Write our OS info */ + wrmsrl(HV_X64_MSR_GUEST_OS_ID, HV_LINUX_GUEST_ID); + hv_context.guestid = HV_LINUX_GUEST_ID; + + /* See if the hypercall page is already set */ + rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + + virtaddr = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_EXEC); + + if (!virtaddr) + goto cleanup; + + hypercall_msr.enable = 1; + + hypercall_msr.guest_physical_address = vmalloc_to_pfn(virtaddr); + wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + + /* Confirm that hypercall page did get setup. */ + hypercall_msr.as_uint64 = 0; + rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + + if (!hypercall_msr.enable) + goto cleanup; + + hv_context.hypercall_page = virtaddr; + + /* Setup the global signal event param for the signal event hypercall */ + hv_context.signal_event_buffer = + kmalloc(sizeof(struct hv_input_signal_event_buffer), + GFP_KERNEL); + if (!hv_context.signal_event_buffer) + goto cleanup; + + hv_context.signal_event_param = + (struct hv_input_signal_event *) + (ALIGN((unsigned long) + hv_context.signal_event_buffer, + HV_HYPERCALL_PARAM_ALIGN)); + hv_context.signal_event_param->connectionid.asu32 = 0; + hv_context.signal_event_param->connectionid.u.id = + VMBUS_EVENT_CONNECTION_ID; + hv_context.signal_event_param->flag_number = 0; + hv_context.signal_event_param->rsvdz = 0; + + return 0; + +cleanup: + if (virtaddr) { + if (hypercall_msr.enable) { + hypercall_msr.as_uint64 = 0; + wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + } + + vfree(virtaddr); + } + + return -ENOTSUPP; +} + +/* + * hv_cleanup - Cleanup routine. + * + * This routine is called normally during driver unloading or exiting. + */ +void hv_cleanup(void) +{ + union hv_x64_msr_hypercall_contents hypercall_msr; + + kfree(hv_context.signal_event_buffer); + hv_context.signal_event_buffer = NULL; + hv_context.signal_event_param = NULL; + + if (hv_context.hypercall_page) { + hypercall_msr.as_uint64 = 0; + wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + vfree(hv_context.hypercall_page); + hv_context.hypercall_page = NULL; + } +} + +/* + * hv_post_message - Post a message using the hypervisor message IPC. + * + * This involves a hypercall. + */ +u16 hv_post_message(union hv_connection_id connection_id, + enum hv_message_type message_type, + void *payload, size_t payload_size) +{ + struct aligned_input { + u64 alignment8; + struct hv_input_post_message msg; + }; + + struct hv_input_post_message *aligned_msg; + u16 status; + unsigned long addr; + + if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT) + return -EMSGSIZE; + + addr = (unsigned long)kmalloc(sizeof(struct aligned_input), GFP_ATOMIC); + if (!addr) + return -ENOMEM; + + aligned_msg = (struct hv_input_post_message *) + (ALIGN(addr, HV_HYPERCALL_PARAM_ALIGN)); + + aligned_msg->connectionid = connection_id; + aligned_msg->message_type = message_type; + aligned_msg->payload_size = payload_size; + memcpy((void *)aligned_msg->payload, payload, payload_size); + + status = do_hypercall(HVCALL_POST_MESSAGE, aligned_msg, NULL) + & 0xFFFF; + + kfree((void *)addr); + + return status; +} + + +/* + * hv_signal_event - + * Signal an event on the specified connection using the hypervisor event IPC. + * + * This involves a hypercall. + */ +u16 hv_signal_event(void) +{ + u16 status; + + status = do_hypercall(HVCALL_SIGNAL_EVENT, + hv_context.signal_event_param, + NULL) & 0xFFFF; + return status; +} + +/* + * hv_synic_init - Initialize the Synthethic Interrupt Controller. + * + * If it is already initialized by another entity (ie x2v shim), we need to + * retrieve the initialized message and event pages. Otherwise, we create and + * initialize the message and event pages. + */ +void hv_synic_init(void *irqarg) +{ + u64 version; + union hv_synic_simp simp; + union hv_synic_siefp siefp; + union hv_synic_sint shared_sint; + union hv_synic_scontrol sctrl; + + u32 irq_vector = *((u32 *)(irqarg)); + int cpu = smp_processor_id(); + + if (!hv_context.hypercall_page) + return; + + /* Check the version */ + rdmsrl(HV_X64_MSR_SVERSION, version); + + hv_context.synic_message_page[cpu] = + (void *)get_zeroed_page(GFP_ATOMIC); + + if (hv_context.synic_message_page[cpu] == NULL) { + pr_err("Unable to allocate SYNIC message page\n"); + goto cleanup; + } + + hv_context.synic_event_page[cpu] = + (void *)get_zeroed_page(GFP_ATOMIC); + + if (hv_context.synic_event_page[cpu] == NULL) { + pr_err("Unable to allocate SYNIC event page\n"); + goto cleanup; + } + + /* Setup the Synic's message page */ + rdmsrl(HV_X64_MSR_SIMP, simp.as_uint64); + simp.simp_enabled = 1; + simp.base_simp_gpa = virt_to_phys(hv_context.synic_message_page[cpu]) + >> PAGE_SHIFT; + + wrmsrl(HV_X64_MSR_SIMP, simp.as_uint64); + + /* Setup the Synic's event page */ + rdmsrl(HV_X64_MSR_SIEFP, siefp.as_uint64); + siefp.siefp_enabled = 1; + siefp.base_siefp_gpa = virt_to_phys(hv_context.synic_event_page[cpu]) + >> PAGE_SHIFT; + + wrmsrl(HV_X64_MSR_SIEFP, siefp.as_uint64); + + /* Setup the shared SINT. */ + rdmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64); + + shared_sint.as_uint64 = 0; + shared_sint.vector = irq_vector; /* HV_SHARED_SINT_IDT_VECTOR + 0x20; */ + shared_sint.masked = false; + shared_sint.auto_eoi = false; + + wrmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64); + + /* Enable the global synic bit */ + rdmsrl(HV_X64_MSR_SCONTROL, sctrl.as_uint64); + sctrl.enable = 1; + + wrmsrl(HV_X64_MSR_SCONTROL, sctrl.as_uint64); + + hv_context.synic_initialized = true; + return; + +cleanup: + if (hv_context.synic_event_page[cpu]) + free_page((unsigned long)hv_context.synic_event_page[cpu]); + + if (hv_context.synic_message_page[cpu]) + free_page((unsigned long)hv_context.synic_message_page[cpu]); + return; +} + +/* + * hv_synic_cleanup - Cleanup routine for hv_synic_init(). + */ +void hv_synic_cleanup(void *arg) +{ + union hv_synic_sint shared_sint; + union hv_synic_simp simp; + union hv_synic_siefp siefp; + int cpu = smp_processor_id(); + + if (!hv_context.synic_initialized) + return; + + rdmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64); + + shared_sint.masked = 1; + + /* Need to correctly cleanup in the case of SMP!!! */ + /* Disable the interrupt */ + wrmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64); + + rdmsrl(HV_X64_MSR_SIMP, simp.as_uint64); + simp.simp_enabled = 0; + simp.base_simp_gpa = 0; + + wrmsrl(HV_X64_MSR_SIMP, simp.as_uint64); + + rdmsrl(HV_X64_MSR_SIEFP, siefp.as_uint64); + siefp.siefp_enabled = 0; + siefp.base_siefp_gpa = 0; + + wrmsrl(HV_X64_MSR_SIEFP, siefp.as_uint64); + + free_page((unsigned long)hv_context.synic_message_page[cpu]); + free_page((unsigned long)hv_context.synic_event_page[cpu]); +} diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c new file mode 100644 index 000000000000..69c4c985daeb --- /dev/null +++ b/drivers/hv/hv_kvp.c @@ -0,0 +1,339 @@ +/* + * An implementation of key value pair (KVP) functionality for Linux. + * + * + * Copyright (C) 2010, Novell, Inc. + * Author : K. Y. Srinivasan <ksrinivasan@novell.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/net.h> +#include <linux/nls.h> +#include <linux/connector.h> +#include <linux/workqueue.h> +#include <linux/hyperv.h> + +#include "hv_kvp.h" + + + +/* + * Global state maintained for transaction that is being processed. + * Note that only one transaction can be active at any point in time. + * + * This state is set when we receive a request from the host; we + * cleanup this state when the transaction is completed - when we respond + * to the host with the key value. + */ + +static struct { + bool active; /* transaction status - active or not */ + int recv_len; /* number of bytes received. */ + int index; /* current index */ + struct vmbus_channel *recv_channel; /* chn we got the request */ + u64 recv_req_id; /* request ID. */ +} kvp_transaction; + +static void kvp_send_key(struct work_struct *dummy); + +#define TIMEOUT_FIRED 1 + +static void kvp_respond_to_host(char *key, char *value, int error); +static void kvp_work_func(struct work_struct *dummy); +static void kvp_register(void); + +static DECLARE_DELAYED_WORK(kvp_work, kvp_work_func); +static DECLARE_WORK(kvp_sendkey_work, kvp_send_key); + +static struct cb_id kvp_id = { CN_KVP_IDX, CN_KVP_VAL }; +static const char kvp_name[] = "kvp_kernel_module"; +static u8 *recv_buffer; +/* + * Register the kernel component with the user-level daemon. + * As part of this registration, pass the LIC version number. + */ + +static void +kvp_register(void) +{ + + struct cn_msg *msg; + + msg = kzalloc(sizeof(*msg) + strlen(HV_DRV_VERSION) + 1 , GFP_ATOMIC); + + if (msg) { + msg->id.idx = CN_KVP_IDX; + msg->id.val = CN_KVP_VAL; + msg->seq = KVP_REGISTER; + strcpy(msg->data, HV_DRV_VERSION); + msg->len = strlen(HV_DRV_VERSION) + 1; + cn_netlink_send(msg, 0, GFP_ATOMIC); + kfree(msg); + } +} +static void +kvp_work_func(struct work_struct *dummy) +{ + /* + * If the timer fires, the user-mode component has not responded; + * process the pending transaction. + */ + kvp_respond_to_host("Unknown key", "Guest timed out", TIMEOUT_FIRED); +} + +/* + * Callback when data is received from user mode. + */ + +static void +kvp_cn_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp) +{ + struct hv_ku_msg *message; + + message = (struct hv_ku_msg *)msg->data; + if (msg->seq == KVP_REGISTER) { + pr_info("KVP: user-mode registering done.\n"); + kvp_register(); + } + + if (msg->seq == KVP_USER_SET) { + /* + * Complete the transaction by forwarding the key value + * to the host. But first, cancel the timeout. + */ + if (cancel_delayed_work_sync(&kvp_work)) + kvp_respond_to_host(message->kvp_key, + message->kvp_value, + !strlen(message->kvp_key)); + } +} + +static void +kvp_send_key(struct work_struct *dummy) +{ + struct cn_msg *msg; + int index = kvp_transaction.index; + + msg = kzalloc(sizeof(*msg) + sizeof(struct hv_kvp_msg) , GFP_ATOMIC); + + if (msg) { + msg->id.idx = CN_KVP_IDX; + msg->id.val = CN_KVP_VAL; + msg->seq = KVP_KERNEL_GET; + ((struct hv_ku_msg *)msg->data)->kvp_index = index; + msg->len = sizeof(struct hv_ku_msg); + cn_netlink_send(msg, 0, GFP_ATOMIC); + kfree(msg); + } + return; +} + +/* + * Send a response back to the host. + */ + +static void +kvp_respond_to_host(char *key, char *value, int error) +{ + struct hv_kvp_msg *kvp_msg; + struct hv_kvp_msg_enumerate *kvp_data; + char *key_name; + struct icmsg_hdr *icmsghdrp; + int keylen, valuelen; + u32 buf_len; + struct vmbus_channel *channel; + u64 req_id; + + /* + * If a transaction is not active; log and return. + */ + + if (!kvp_transaction.active) { + /* + * This is a spurious call! + */ + pr_warn("KVP: Transaction not active\n"); + return; + } + /* + * Copy the global state for completing the transaction. Note that + * only one transaction can be active at a time. + */ + + buf_len = kvp_transaction.recv_len; + channel = kvp_transaction.recv_channel; + req_id = kvp_transaction.recv_req_id; + + kvp_transaction.active = false; + + if (channel->onchannel_callback == NULL) + /* + * We have raced with util driver being unloaded; + * silently return. + */ + return; + + icmsghdrp = (struct icmsg_hdr *) + &recv_buffer[sizeof(struct vmbuspipe_hdr)]; + kvp_msg = (struct hv_kvp_msg *) + &recv_buffer[sizeof(struct vmbuspipe_hdr) + + sizeof(struct icmsg_hdr)]; + kvp_data = &kvp_msg->kvp_data; + key_name = key; + + /* + * If the error parameter is set, terminate the host's enumeration. + */ + if (error) { + /* + * We don't support this index or the we have timedout; + * terminate the host-side iteration by returning an error. + */ + icmsghdrp->status = HV_E_FAIL; + goto response_done; + } + + /* + * The windows host expects the key/value pair to be encoded + * in utf16. + */ + keylen = utf8s_to_utf16s(key_name, strlen(key_name), + (wchar_t *)kvp_data->data.key); + kvp_data->data.key_size = 2*(keylen + 1); /* utf16 encoding */ + valuelen = utf8s_to_utf16s(value, strlen(value), + (wchar_t *)kvp_data->data.value); + kvp_data->data.value_size = 2*(valuelen + 1); /* utf16 encoding */ + + kvp_data->data.value_type = REG_SZ; /* all our values are strings */ + icmsghdrp->status = HV_S_OK; + +response_done: + icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION | ICMSGHDRFLAG_RESPONSE; + + vmbus_sendpacket(channel, recv_buffer, buf_len, req_id, + VM_PKT_DATA_INBAND, 0); + +} + +/* + * This callback is invoked when we get a KVP message from the host. + * The host ensures that only one KVP transaction can be active at a time. + * KVP implementation in Linux needs to forward the key to a user-mde + * component to retrive the corresponding value. Consequently, we cannot + * respond to the host in the conext of this callback. Since the host + * guarantees that at most only one transaction can be active at a time, + * we stash away the transaction state in a set of global variables. + */ + +void hv_kvp_onchannelcallback(void *context) +{ + struct vmbus_channel *channel = context; + u32 recvlen; + u64 requestid; + + struct hv_kvp_msg *kvp_msg; + struct hv_kvp_msg_enumerate *kvp_data; + + struct icmsg_hdr *icmsghdrp; + struct icmsg_negotiate *negop = NULL; + + + vmbus_recvpacket(channel, recv_buffer, PAGE_SIZE, &recvlen, &requestid); + + if (recvlen > 0) { + icmsghdrp = (struct icmsg_hdr *)&recv_buffer[ + sizeof(struct vmbuspipe_hdr)]; + + if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { + prep_negotiate_resp(icmsghdrp, negop, recv_buffer); + } else { + kvp_msg = (struct hv_kvp_msg *)&recv_buffer[ + sizeof(struct vmbuspipe_hdr) + + sizeof(struct icmsg_hdr)]; + + kvp_data = &kvp_msg->kvp_data; + + /* + * We only support the "get" operation on + * "KVP_POOL_AUTO" pool. + */ + + if ((kvp_msg->kvp_hdr.pool != KVP_POOL_AUTO) || + (kvp_msg->kvp_hdr.operation != + KVP_OP_ENUMERATE)) { + icmsghdrp->status = HV_E_FAIL; + goto callback_done; + } + + /* + * Stash away this global state for completing the + * transaction; note transactions are serialized. + */ + kvp_transaction.recv_len = recvlen; + kvp_transaction.recv_channel = channel; + kvp_transaction.recv_req_id = requestid; + kvp_transaction.active = true; + kvp_transaction.index = kvp_data->index; + + /* + * Get the information from the + * user-mode component. + * component. This transaction will be + * completed when we get the value from + * the user-mode component. + * Set a timeout to deal with + * user-mode not responding. + */ + schedule_work(&kvp_sendkey_work); + schedule_delayed_work(&kvp_work, 5*HZ); + + return; + + } + +callback_done: + + icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION + | ICMSGHDRFLAG_RESPONSE; + + vmbus_sendpacket(channel, recv_buffer, + recvlen, requestid, + VM_PKT_DATA_INBAND, 0); + } + +} + +int +hv_kvp_init(struct hv_util_service *srv) +{ + int err; + + err = cn_add_callback(&kvp_id, kvp_name, kvp_cn_callback); + if (err) + return err; + recv_buffer = srv->recv_buffer; + + return 0; +} + +void hv_kvp_deinit(void) +{ + cn_del_callback(&kvp_id); + cancel_delayed_work_sync(&kvp_work); + cancel_work_sync(&kvp_sendkey_work); +} diff --git a/drivers/hv/hv_kvp.h b/drivers/hv/hv_kvp.h new file mode 100644 index 000000000000..9b765d7df838 --- /dev/null +++ b/drivers/hv/hv_kvp.h @@ -0,0 +1,184 @@ +/* + * An implementation of HyperV key value pair (KVP) functionality for Linux. + * + * + * Copyright (C) 2010, Novell, Inc. + * Author : K. Y. Srinivasan <ksrinivasan@novell.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + */ +#ifndef _KVP_H +#define _KVP_H_ + +/* + * Maximum value size - used for both key names and value data, and includes + * any applicable NULL terminators. + * + * Note: This limit is somewhat arbitrary, but falls easily within what is + * supported for all native guests (back to Win 2000) and what is reasonable + * for the IC KVP exchange functionality. Note that Windows Me/98/95 are + * limited to 255 character key names. + * + * MSDN recommends not storing data values larger than 2048 bytes in the + * registry. + * + * Note: This value is used in defining the KVP exchange message - this value + * cannot be modified without affecting the message size and compatibility. + */ + +/* + * bytes, including any null terminators + */ +#define HV_KVP_EXCHANGE_MAX_VALUE_SIZE (2048) + + +/* + * Maximum key size - the registry limit for the length of an entry name + * is 256 characters, including the null terminator + */ + +#define HV_KVP_EXCHANGE_MAX_KEY_SIZE (512) + +/* + * In Linux, we implement the KVP functionality in two components: + * 1) The kernel component which is packaged as part of the hv_utils driver + * is responsible for communicating with the host and responsible for + * implementing the host/guest protocol. 2) A user level daemon that is + * responsible for data gathering. + * + * Host/Guest Protocol: The host iterates over an index and expects the guest + * to assign a key name to the index and also return the value corresponding to + * the key. The host will have atmost one KVP transaction outstanding at any + * given point in time. The host side iteration stops when the guest returns + * an error. Microsoft has specified the following mapping of key names to + * host specified index: + * + * Index Key Name + * 0 FullyQualifiedDomainName + * 1 IntegrationServicesVersion + * 2 NetworkAddressIPv4 + * 3 NetworkAddressIPv6 + * 4 OSBuildNumber + * 5 OSName + * 6 OSMajorVersion + * 7 OSMinorVersion + * 8 OSVersion + * 9 ProcessorArchitecture + * + * The Windows host expects the Key Name and Key Value to be encoded in utf16. + * + * Guest Kernel/KVP Daemon Protocol: As noted earlier, we implement all of the + * data gathering functionality in a user mode daemon. The user level daemon + * is also responsible for binding the key name to the index as well. The + * kernel and user-level daemon communicate using a connector channel. + * + * The user mode component first registers with the + * the kernel component. Subsequently, the kernel component requests, data + * for the specified keys. In response to this message the user mode component + * fills in the value corresponding to the specified key. We overload the + * sequence field in the cn_msg header to define our KVP message types. + * + * + * The kernel component simply acts as a conduit for communication between the + * Windows host and the user-level daemon. The kernel component passes up the + * index received from the Host to the user-level daemon. If the index is + * valid (supported), the corresponding key as well as its + * value (both are strings) is returned. If the index is invalid + * (not supported), a NULL key string is returned. + */ + +/* + * + * The following definitions are shared with the user-mode component; do not + * change any of this without making the corresponding changes in + * the KVP user-mode component. + */ + +#define CN_KVP_VAL 0x1 /* This supports queries from the kernel */ +#define CN_KVP_USER_VAL 0x2 /* This supports queries from the user */ + +enum hv_ku_op { + KVP_REGISTER = 0, /* Register the user mode component */ + KVP_KERNEL_GET, /* Kernel is requesting the value */ + KVP_KERNEL_SET, /* Kernel is providing the value */ + KVP_USER_GET, /* User is requesting the value */ + KVP_USER_SET /* User is providing the value */ +}; + +struct hv_ku_msg { + __u32 kvp_index; /* Key index */ + __u8 kvp_key[HV_KVP_EXCHANGE_MAX_KEY_SIZE]; /* Key name */ + __u8 kvp_value[HV_KVP_EXCHANGE_MAX_VALUE_SIZE]; /* Key value */ +}; + + + + +#ifdef __KERNEL__ + +/* + * Registry value types. + */ + +#define REG_SZ 1 + +enum hv_kvp_exchg_op { + KVP_OP_GET = 0, + KVP_OP_SET, + KVP_OP_DELETE, + KVP_OP_ENUMERATE, + KVP_OP_COUNT /* Number of operations, must be last. */ +}; + +enum hv_kvp_exchg_pool { + KVP_POOL_EXTERNAL = 0, + KVP_POOL_GUEST, + KVP_POOL_AUTO, + KVP_POOL_AUTO_EXTERNAL, + KVP_POOL_AUTO_INTERNAL, + KVP_POOL_COUNT /* Number of pools, must be last. */ +}; + +struct hv_kvp_hdr { + u8 operation; + u8 pool; +}; + +struct hv_kvp_exchg_msg_value { + u32 value_type; + u32 key_size; + u32 value_size; + u8 key[HV_KVP_EXCHANGE_MAX_KEY_SIZE]; + u8 value[HV_KVP_EXCHANGE_MAX_VALUE_SIZE]; +}; + +struct hv_kvp_msg_enumerate { + u32 index; + struct hv_kvp_exchg_msg_value data; +}; + +struct hv_kvp_msg { + struct hv_kvp_hdr kvp_hdr; + struct hv_kvp_msg_enumerate kvp_data; +}; + +int hv_kvp_init(struct hv_util_service *); +void hv_kvp_deinit(void); +void hv_kvp_onchannelcallback(void *); + +#endif /* __KERNEL__ */ +#endif /* _KVP_H */ + diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c new file mode 100644 index 000000000000..e0e3a6d0244d --- /dev/null +++ b/drivers/hv/hv_util.c @@ -0,0 +1,354 @@ +/* + * Copyright (c) 2010, Microsoft Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Authors: + * Haiyang Zhang <haiyangz@microsoft.com> + * Hank Janssen <hjanssen@microsoft.com> + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/sysctl.h> +#include <linux/reboot.h> +#include <linux/hyperv.h> + +#include "hv_kvp.h" + + +static void shutdown_onchannelcallback(void *context); +static struct hv_util_service util_shutdown = { + .util_cb = shutdown_onchannelcallback, +}; + +static void timesync_onchannelcallback(void *context); +static struct hv_util_service util_timesynch = { + .util_cb = timesync_onchannelcallback, +}; + +static void heartbeat_onchannelcallback(void *context); +static struct hv_util_service util_heartbeat = { + .util_cb = heartbeat_onchannelcallback, +}; + +static struct hv_util_service util_kvp = { + .util_cb = hv_kvp_onchannelcallback, + .util_init = hv_kvp_init, + .util_deinit = hv_kvp_deinit, +}; + +static void shutdown_onchannelcallback(void *context) +{ + struct vmbus_channel *channel = context; + u32 recvlen; + u64 requestid; + u8 execute_shutdown = false; + u8 *shut_txf_buf = util_shutdown.recv_buffer; + + struct shutdown_msg_data *shutdown_msg; + + struct icmsg_hdr *icmsghdrp; + struct icmsg_negotiate *negop = NULL; + + vmbus_recvpacket(channel, shut_txf_buf, + PAGE_SIZE, &recvlen, &requestid); + + if (recvlen > 0) { + icmsghdrp = (struct icmsg_hdr *)&shut_txf_buf[ + sizeof(struct vmbuspipe_hdr)]; + + if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { + prep_negotiate_resp(icmsghdrp, negop, shut_txf_buf); + } else { + shutdown_msg = + (struct shutdown_msg_data *)&shut_txf_buf[ + sizeof(struct vmbuspipe_hdr) + + sizeof(struct icmsg_hdr)]; + + switch (shutdown_msg->flags) { + case 0: + case 1: + icmsghdrp->status = HV_S_OK; + execute_shutdown = true; + + pr_info("Shutdown request received -" + " graceful shutdown initiated\n"); + break; + default: + icmsghdrp->status = HV_E_FAIL; + execute_shutdown = false; + + pr_info("Shutdown request received -" + " Invalid request\n"); + break; + } + } + + icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION + | ICMSGHDRFLAG_RESPONSE; + + vmbus_sendpacket(channel, shut_txf_buf, + recvlen, requestid, + VM_PKT_DATA_INBAND, 0); + } + + if (execute_shutdown == true) + orderly_poweroff(true); +} + +/* + * Set guest time to host UTC time. + */ +static inline void do_adj_guesttime(u64 hosttime) +{ + s64 host_tns; + struct timespec host_ts; + + host_tns = (hosttime - WLTIMEDELTA) * 100; + host_ts = ns_to_timespec(host_tns); + + do_settimeofday(&host_ts); +} + +/* + * Set the host time in a process context. + */ + +struct adj_time_work { + struct work_struct work; + u64 host_time; +}; + +static void hv_set_host_time(struct work_struct *work) +{ + struct adj_time_work *wrk; + + wrk = container_of(work, struct adj_time_work, work); + do_adj_guesttime(wrk->host_time); + kfree(wrk); +} + +/* + * Synchronize time with host after reboot, restore, etc. + * + * ICTIMESYNCFLAG_SYNC flag bit indicates reboot, restore events of the VM. + * After reboot the flag ICTIMESYNCFLAG_SYNC is included in the first time + * message after the timesync channel is opened. Since the hv_utils module is + * loaded after hv_vmbus, the first message is usually missed. The other + * thing is, systime is automatically set to emulated hardware clock which may + * not be UTC time or in the same time zone. So, to override these effects, we + * use the first 50 time samples for initial system time setting. + */ +static inline void adj_guesttime(u64 hosttime, u8 flags) +{ + struct adj_time_work *wrk; + static s32 scnt = 50; + + wrk = kmalloc(sizeof(struct adj_time_work), GFP_ATOMIC); + if (wrk == NULL) + return; + + wrk->host_time = hosttime; + if ((flags & ICTIMESYNCFLAG_SYNC) != 0) { + INIT_WORK(&wrk->work, hv_set_host_time); + schedule_work(&wrk->work); + return; + } + + if ((flags & ICTIMESYNCFLAG_SAMPLE) != 0 && scnt > 0) { + scnt--; + INIT_WORK(&wrk->work, hv_set_host_time); + schedule_work(&wrk->work); + } else + kfree(wrk); +} + +/* + * Time Sync Channel message handler. + */ +static void timesync_onchannelcallback(void *context) +{ + struct vmbus_channel *channel = context; + u32 recvlen; + u64 requestid; + struct icmsg_hdr *icmsghdrp; + struct ictimesync_data *timedatap; + u8 *time_txf_buf = util_timesynch.recv_buffer; + + vmbus_recvpacket(channel, time_txf_buf, + PAGE_SIZE, &recvlen, &requestid); + + if (recvlen > 0) { + icmsghdrp = (struct icmsg_hdr *)&time_txf_buf[ + sizeof(struct vmbuspipe_hdr)]; + + if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { + prep_negotiate_resp(icmsghdrp, NULL, time_txf_buf); + } else { + timedatap = (struct ictimesync_data *)&time_txf_buf[ + sizeof(struct vmbuspipe_hdr) + + sizeof(struct icmsg_hdr)]; + adj_guesttime(timedatap->parenttime, timedatap->flags); + } + + icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION + | ICMSGHDRFLAG_RESPONSE; + + vmbus_sendpacket(channel, time_txf_buf, + recvlen, requestid, + VM_PKT_DATA_INBAND, 0); + } +} + +/* + * Heartbeat functionality. + * Every two seconds, Hyper-V send us a heartbeat request message. + * we respond to this message, and Hyper-V knows we are alive. + */ +static void heartbeat_onchannelcallback(void *context) +{ + struct vmbus_channel *channel = context; + u32 recvlen; + u64 requestid; + struct icmsg_hdr *icmsghdrp; + struct heartbeat_msg_data *heartbeat_msg; + u8 *hbeat_txf_buf = util_heartbeat.recv_buffer; + + vmbus_recvpacket(channel, hbeat_txf_buf, + PAGE_SIZE, &recvlen, &requestid); + + if (recvlen > 0) { + icmsghdrp = (struct icmsg_hdr *)&hbeat_txf_buf[ + sizeof(struct vmbuspipe_hdr)]; + + if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { + prep_negotiate_resp(icmsghdrp, NULL, hbeat_txf_buf); + } else { + heartbeat_msg = + (struct heartbeat_msg_data *)&hbeat_txf_buf[ + sizeof(struct vmbuspipe_hdr) + + sizeof(struct icmsg_hdr)]; + + heartbeat_msg->seq_num += 1; + } + + icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION + | ICMSGHDRFLAG_RESPONSE; + + vmbus_sendpacket(channel, hbeat_txf_buf, + recvlen, requestid, + VM_PKT_DATA_INBAND, 0); + } +} + +static int util_probe(struct hv_device *dev, + const struct hv_vmbus_device_id *dev_id) +{ + struct hv_util_service *srv = + (struct hv_util_service *)dev_id->driver_data; + int ret; + + srv->recv_buffer = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!srv->recv_buffer) + return -ENOMEM; + if (srv->util_init) { + ret = srv->util_init(srv); + if (ret) { + ret = -ENODEV; + goto error1; + } + } + + ret = vmbus_open(dev->channel, 2 * PAGE_SIZE, 2 * PAGE_SIZE, NULL, 0, + srv->util_cb, dev->channel); + if (ret) + goto error; + + hv_set_drvdata(dev, srv); + return 0; + +error: + if (srv->util_deinit) + srv->util_deinit(); +error1: + kfree(srv->recv_buffer); + return ret; +} + +static int util_remove(struct hv_device *dev) +{ + struct hv_util_service *srv = hv_get_drvdata(dev); + + vmbus_close(dev->channel); + if (srv->util_deinit) + srv->util_deinit(); + kfree(srv->recv_buffer); + + return 0; +} + +static const struct hv_vmbus_device_id id_table[] = { + /* Shutdown guid */ + { VMBUS_DEVICE(0x31, 0x60, 0x0B, 0X0E, 0x13, 0x52, 0x34, 0x49, + 0x81, 0x8B, 0x38, 0XD9, 0x0C, 0xED, 0x39, 0xDB) + .driver_data = (unsigned long)&util_shutdown }, + /* Time synch guid */ + { VMBUS_DEVICE(0x30, 0xe6, 0x27, 0x95, 0xae, 0xd0, 0x7b, 0x49, + 0xad, 0xce, 0xe8, 0x0a, 0xb0, 0x17, 0x5c, 0xaf) + .driver_data = (unsigned long)&util_timesynch }, + /* Heartbeat guid */ + { VMBUS_DEVICE(0x39, 0x4f, 0x16, 0x57, 0x15, 0x91, 0x78, 0x4e, + 0xab, 0x55, 0x38, 0x2f, 0x3b, 0xd5, 0x42, 0x2d) + .driver_data = (unsigned long)&util_heartbeat }, + /* KVP guid */ + { VMBUS_DEVICE(0xe7, 0xf4, 0xa0, 0xa9, 0x45, 0x5a, 0x96, 0x4d, + 0xb8, 0x27, 0x8a, 0x84, 0x1e, 0x8c, 0x3, 0xe6) + .driver_data = (unsigned long)&util_kvp }, + { }, +}; + +MODULE_DEVICE_TABLE(vmbus, id_table); + +/* The one and only one */ +static struct hv_driver util_drv = { + .name = "hv_util", + .id_table = id_table, + .probe = util_probe, + .remove = util_remove, +}; + +static int __init init_hyperv_utils(void) +{ + pr_info("Registering HyperV Utility Driver\n"); + + return vmbus_driver_register(&util_drv); +} + +static void exit_hyperv_utils(void) +{ + pr_info("De-Registered HyperV Utility Driver\n"); + + vmbus_driver_unregister(&util_drv); +} + +module_init(init_hyperv_utils); +module_exit(exit_hyperv_utils); + +MODULE_DESCRIPTION("Hyper-V Utilities"); +MODULE_VERSION(HV_DRV_VERSION); +MODULE_LICENSE("GPL"); diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h new file mode 100644 index 000000000000..8261cb64931b --- /dev/null +++ b/drivers/hv/hyperv_vmbus.h @@ -0,0 +1,628 @@ +/* + * + * Copyright (c) 2011, Microsoft Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Authors: + * Haiyang Zhang <haiyangz@microsoft.com> + * Hank Janssen <hjanssen@microsoft.com> + * K. Y. Srinivasan <kys@microsoft.com> + * + */ + +#ifndef _HYPERV_VMBUS_H +#define _HYPERV_VMBUS_H + +#include <linux/list.h> +#include <asm/sync_bitops.h> +#include <linux/atomic.h> +#include <linux/hyperv.h> + +/* + * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent + * is set by CPUID(HVCPUID_VERSION_FEATURES). + */ +enum hv_cpuid_function { + HVCPUID_VERSION_FEATURES = 0x00000001, + HVCPUID_VENDOR_MAXFUNCTION = 0x40000000, + HVCPUID_INTERFACE = 0x40000001, + + /* + * The remaining functions depend on the value of + * HVCPUID_INTERFACE + */ + HVCPUID_VERSION = 0x40000002, + HVCPUID_FEATURES = 0x40000003, + HVCPUID_ENLIGHTENMENT_INFO = 0x40000004, + HVCPUID_IMPLEMENTATION_LIMITS = 0x40000005, +}; + +/* Define version of the synthetic interrupt controller. */ +#define HV_SYNIC_VERSION (1) + +/* Define the expected SynIC version. */ +#define HV_SYNIC_VERSION_1 (0x1) + +/* Define synthetic interrupt controller message constants. */ +#define HV_MESSAGE_SIZE (256) +#define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240) +#define HV_MESSAGE_PAYLOAD_QWORD_COUNT (30) +#define HV_ANY_VP (0xFFFFFFFF) + +/* Define synthetic interrupt controller flag constants. */ +#define HV_EVENT_FLAGS_COUNT (256 * 8) +#define HV_EVENT_FLAGS_BYTE_COUNT (256) +#define HV_EVENT_FLAGS_DWORD_COUNT (256 / sizeof(u32)) + +/* Define hypervisor message types. */ +enum hv_message_type { + HVMSG_NONE = 0x00000000, + + /* Memory access messages. */ + HVMSG_UNMAPPED_GPA = 0x80000000, + HVMSG_GPA_INTERCEPT = 0x80000001, + + /* Timer notification messages. */ + HVMSG_TIMER_EXPIRED = 0x80000010, + + /* Error messages. */ + HVMSG_INVALID_VP_REGISTER_VALUE = 0x80000020, + HVMSG_UNRECOVERABLE_EXCEPTION = 0x80000021, + HVMSG_UNSUPPORTED_FEATURE = 0x80000022, + + /* Trace buffer complete messages. */ + HVMSG_EVENTLOG_BUFFERCOMPLETE = 0x80000040, + + /* Platform-specific processor intercept messages. */ + HVMSG_X64_IOPORT_INTERCEPT = 0x80010000, + HVMSG_X64_MSR_INTERCEPT = 0x80010001, + HVMSG_X64_CPUID_INTERCEPT = 0x80010002, + HVMSG_X64_EXCEPTION_INTERCEPT = 0x80010003, + HVMSG_X64_APIC_EOI = 0x80010004, + HVMSG_X64_LEGACY_FP_ERROR = 0x80010005 +}; + +/* Define the number of synthetic interrupt sources. */ +#define HV_SYNIC_SINT_COUNT (16) +#define HV_SYNIC_STIMER_COUNT (4) + +/* Define invalid partition identifier. */ +#define HV_PARTITION_ID_INVALID ((u64)0x0) + +/* Define connection identifier type. */ +union hv_connection_id { + u32 asu32; + struct { + u32 id:24; + u32 reserved:8; + } u; +}; + +/* Define port identifier type. */ +union hv_port_id { + u32 asu32; + struct { + u32 id:24; + u32 reserved:8; + } u ; +}; + +/* Define port type. */ +enum hv_port_type { + HVPORT_MSG = 1, + HVPORT_EVENT = 2, + HVPORT_MONITOR = 3 +}; + +/* Define port information structure. */ +struct hv_port_info { + enum hv_port_type port_type; + u32 padding; + union { + struct { + u32 target_sint; + u32 target_vp; + u64 rsvdz; + } message_port_info; + struct { + u32 target_sint; + u32 target_vp; + u16 base_flag_bumber; + u16 flag_count; + u32 rsvdz; + } event_port_info; + struct { + u64 monitor_address; + u64 rsvdz; + } monitor_port_info; + }; +}; + +struct hv_connection_info { + enum hv_port_type port_type; + u32 padding; + union { + struct { + u64 rsvdz; + } message_connection_info; + struct { + u64 rsvdz; + } event_connection_info; + struct { + u64 monitor_address; + } monitor_connection_info; + }; +}; + +/* Define synthetic interrupt controller message flags. */ +union hv_message_flags { + u8 asu8; + struct { + u8 msg_pending:1; + u8 reserved:7; + }; +}; + +/* Define synthetic interrupt controller message header. */ +struct hv_message_header { + enum hv_message_type message_type; + u8 payload_size; + union hv_message_flags message_flags; + u8 reserved[2]; + union { + u64 sender; + union hv_port_id port; + }; +}; + +/* Define timer message payload structure. */ +struct hv_timer_message_payload { + u32 timer_index; + u32 reserved; + u64 expiration_time; /* When the timer expired */ + u64 delivery_time; /* When the message was delivered */ +}; + +/* Define synthetic interrupt controller message format. */ +struct hv_message { + struct hv_message_header header; + union { + u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT]; + } u ; +}; + +/* Define the number of message buffers associated with each port. */ +#define HV_PORT_MESSAGE_BUFFER_COUNT (16) + +/* Define the synthetic interrupt message page layout. */ +struct hv_message_page { + struct hv_message sint_message[HV_SYNIC_SINT_COUNT]; +}; + +/* Define the synthetic interrupt controller event flags format. */ +union hv_synic_event_flags { + u8 flags8[HV_EVENT_FLAGS_BYTE_COUNT]; + u32 flags32[HV_EVENT_FLAGS_DWORD_COUNT]; +}; + +/* Define the synthetic interrupt flags page layout. */ +struct hv_synic_event_flags_page { + union hv_synic_event_flags sintevent_flags[HV_SYNIC_SINT_COUNT]; +}; + +/* Define SynIC control register. */ +union hv_synic_scontrol { + u64 as_uint64; + struct { + u64 enable:1; + u64 reserved:63; + }; +}; + +/* Define synthetic interrupt source. */ +union hv_synic_sint { + u64 as_uint64; + struct { + u64 vector:8; + u64 reserved1:8; + u64 masked:1; + u64 auto_eoi:1; + u64 reserved2:46; + }; +}; + +/* Define the format of the SIMP register */ +union hv_synic_simp { + u64 as_uint64; + struct { + u64 simp_enabled:1; + u64 preserved:11; + u64 base_simp_gpa:52; + }; +}; + +/* Define the format of the SIEFP register */ +union hv_synic_siefp { + u64 as_uint64; + struct { + u64 siefp_enabled:1; + u64 preserved:11; + u64 base_siefp_gpa:52; + }; +}; + +/* Definitions for the monitored notification facility */ +union hv_monitor_trigger_group { + u64 as_uint64; + struct { + u32 pending; + u32 armed; + }; +}; + +struct hv_monitor_parameter { + union hv_connection_id connectionid; + u16 flagnumber; + u16 rsvdz; +}; + +union hv_monitor_trigger_state { + u32 asu32; + + struct { + u32 group_enable:4; + u32 rsvdz:28; + }; +}; + +/* struct hv_monitor_page Layout */ +/* ------------------------------------------------------ */ +/* | 0 | TriggerState (4 bytes) | Rsvd1 (4 bytes) | */ +/* | 8 | TriggerGroup[0] | */ +/* | 10 | TriggerGroup[1] | */ +/* | 18 | TriggerGroup[2] | */ +/* | 20 | TriggerGroup[3] | */ +/* | 28 | Rsvd2[0] | */ +/* | 30 | Rsvd2[1] | */ +/* | 38 | Rsvd2[2] | */ +/* | 40 | NextCheckTime[0][0] | NextCheckTime[0][1] | */ +/* | ... | */ +/* | 240 | Latency[0][0..3] | */ +/* | 340 | Rsvz3[0] | */ +/* | 440 | Parameter[0][0] | */ +/* | 448 | Parameter[0][1] | */ +/* | ... | */ +/* | 840 | Rsvd4[0] | */ +/* ------------------------------------------------------ */ +struct hv_monitor_page { + union hv_monitor_trigger_state trigger_state; + u32 rsvdz1; + + union hv_monitor_trigger_group trigger_group[4]; + u64 rsvdz2[3]; + + s32 next_checktime[4][32]; + + u16 latency[4][32]; + u64 rsvdz3[32]; + + struct hv_monitor_parameter parameter[4][32]; + + u8 rsvdz4[1984]; +}; + +/* Declare the various hypercall operations. */ +enum hv_call_code { + HVCALL_POST_MESSAGE = 0x005c, + HVCALL_SIGNAL_EVENT = 0x005d, +}; + +/* Definition of the hv_post_message hypercall input structure. */ +struct hv_input_post_message { + union hv_connection_id connectionid; + u32 reserved; + enum hv_message_type message_type; + u32 payload_size; + u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT]; +}; + +/* Definition of the hv_signal_event hypercall input structure. */ +struct hv_input_signal_event { + union hv_connection_id connectionid; + u16 flag_number; + u16 rsvdz; +}; + +/* + * Versioning definitions used for guests reporting themselves to the + * hypervisor, and visa versa. + */ + +/* Version info reported by guest OS's */ +enum hv_guest_os_vendor { + HVGUESTOS_VENDOR_MICROSOFT = 0x0001 +}; + +enum hv_guest_os_microsoft_ids { + HVGUESTOS_MICROSOFT_UNDEFINED = 0x00, + HVGUESTOS_MICROSOFT_MSDOS = 0x01, + HVGUESTOS_MICROSOFT_WINDOWS3X = 0x02, + HVGUESTOS_MICROSOFT_WINDOWS9X = 0x03, + HVGUESTOS_MICROSOFT_WINDOWSNT = 0x04, + HVGUESTOS_MICROSOFT_WINDOWSCE = 0x05 +}; + +/* + * Declare the MSR used to identify the guest OS. + */ +#define HV_X64_MSR_GUEST_OS_ID 0x40000000 + +union hv_x64_msr_guest_os_id_contents { + u64 as_uint64; + struct { + u64 build_number:16; + u64 service_version:8; /* Service Pack, etc. */ + u64 minor_version:8; + u64 major_version:8; + u64 os_id:8; /* enum hv_guest_os_microsoft_ids (if Vendor=MS) */ + u64 vendor_id:16; /* enum hv_guest_os_vendor */ + }; +}; + +/* + * Declare the MSR used to setup pages used to communicate with the hypervisor. + */ +#define HV_X64_MSR_HYPERCALL 0x40000001 + +union hv_x64_msr_hypercall_contents { + u64 as_uint64; + struct { + u64 enable:1; + u64 reserved:11; + u64 guest_physical_address:52; + }; +}; + + +enum { + VMBUS_MESSAGE_CONNECTION_ID = 1, + VMBUS_MESSAGE_PORT_ID = 1, + VMBUS_EVENT_CONNECTION_ID = 2, + VMBUS_EVENT_PORT_ID = 2, + VMBUS_MONITOR_CONNECTION_ID = 3, + VMBUS_MONITOR_PORT_ID = 3, + VMBUS_MESSAGE_SINT = 2, +}; + +/* #defines */ + +#define HV_PRESENT_BIT 0x80000000 + +#define HV_LINUX_GUEST_ID_LO 0x00000000 +#define HV_LINUX_GUEST_ID_HI 0xB16B00B5 +#define HV_LINUX_GUEST_ID (((u64)HV_LINUX_GUEST_ID_HI << 32) | \ + HV_LINUX_GUEST_ID_LO) + +#define HV_CPU_POWER_MANAGEMENT (1 << 0) +#define HV_RECOMMENDATIONS_MAX 4 + +#define HV_X64_MAX 5 +#define HV_CAPS_MAX 8 + + +#define HV_HYPERCALL_PARAM_ALIGN sizeof(u64) + + +/* Service definitions */ + +#define HV_SERVICE_PARENT_PORT (0) +#define HV_SERVICE_PARENT_CONNECTION (0) + +#define HV_SERVICE_CONNECT_RESPONSE_SUCCESS (0) +#define HV_SERVICE_CONNECT_RESPONSE_INVALID_PARAMETER (1) +#define HV_SERVICE_CONNECT_RESPONSE_UNKNOWN_SERVICE (2) +#define HV_SERVICE_CONNECT_RESPONSE_CONNECTION_REJECTED (3) + +#define HV_SERVICE_CONNECT_REQUEST_MESSAGE_ID (1) +#define HV_SERVICE_CONNECT_RESPONSE_MESSAGE_ID (2) +#define HV_SERVICE_DISCONNECT_REQUEST_MESSAGE_ID (3) +#define HV_SERVICE_DISCONNECT_RESPONSE_MESSAGE_ID (4) +#define HV_SERVICE_MAX_MESSAGE_ID (4) + +#define HV_SERVICE_PROTOCOL_VERSION (0x0010) +#define HV_CONNECT_PAYLOAD_BYTE_COUNT 64 + +/* #define VMBUS_REVISION_NUMBER 6 */ + +/* Our local vmbus's port and connection id. Anything >0 is fine */ +/* #define VMBUS_PORT_ID 11 */ + +/* 628180B8-308D-4c5e-B7DB-1BEB62E62EF4 */ +static const uuid_le VMBUS_SERVICE_ID = { + .b = { + 0xb8, 0x80, 0x81, 0x62, 0x8d, 0x30, 0x5e, 0x4c, + 0xb7, 0xdb, 0x1b, 0xeb, 0x62, 0xe6, 0x2e, 0xf4 + }, +}; + +#define MAX_NUM_CPUS 32 + + +struct hv_input_signal_event_buffer { + u64 align8; + struct hv_input_signal_event event; +}; + +struct hv_context { + /* We only support running on top of Hyper-V + * So at this point this really can only contain the Hyper-V ID + */ + u64 guestid; + + void *hypercall_page; + + bool synic_initialized; + + /* + * This is used as an input param to HvCallSignalEvent hypercall. The + * input param is immutable in our usage and must be dynamic mem (vs + * stack or global). */ + struct hv_input_signal_event_buffer *signal_event_buffer; + /* 8-bytes aligned of the buffer above */ + struct hv_input_signal_event *signal_event_param; + + void *synic_message_page[MAX_NUM_CPUS]; + void *synic_event_page[MAX_NUM_CPUS]; +}; + +extern struct hv_context hv_context; + + +/* Hv Interface */ + +extern int hv_init(void); + +extern void hv_cleanup(void); + +extern u16 hv_post_message(union hv_connection_id connection_id, + enum hv_message_type message_type, + void *payload, size_t payload_size); + +extern u16 hv_signal_event(void); + +extern void hv_synic_init(void *irqarg); + +extern void hv_synic_cleanup(void *arg); + + +/* Interface */ + + +int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info, void *buffer, + u32 buflen); + +void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info); + +int hv_ringbuffer_write(struct hv_ring_buffer_info *ring_info, + struct scatterlist *sglist, + u32 sgcount); + +int hv_ringbuffer_peek(struct hv_ring_buffer_info *ring_info, void *buffer, + u32 buflen); + +int hv_ringbuffer_read(struct hv_ring_buffer_info *ring_info, + void *buffer, + u32 buflen, + u32 offset); + +u32 hv_get_ringbuffer_interrupt_mask(struct hv_ring_buffer_info *ring_info); + +void hv_dump_ring_info(struct hv_ring_buffer_info *ring_info, char *prefix); + +void hv_ringbuffer_get_debuginfo(struct hv_ring_buffer_info *ring_info, + struct hv_ring_buffer_debug_info *debug_info); + +/* + * Maximum channels is determined by the size of the interrupt page + * which is PAGE_SIZE. 1/2 of PAGE_SIZE is for send endpoint interrupt + * and the other is receive endpoint interrupt + */ +#define MAX_NUM_CHANNELS ((PAGE_SIZE >> 1) << 3) /* 16348 channels */ + +/* The value here must be in multiple of 32 */ +/* TODO: Need to make this configurable */ +#define MAX_NUM_CHANNELS_SUPPORTED 256 + + +enum vmbus_connect_state { + DISCONNECTED, + CONNECTING, + CONNECTED, + DISCONNECTING +}; + +#define MAX_SIZE_CHANNEL_MESSAGE HV_MESSAGE_PAYLOAD_BYTE_COUNT + +struct vmbus_connection { + enum vmbus_connect_state conn_state; + + atomic_t next_gpadl_handle; + + /* + * Represents channel interrupts. Each bit position represents a + * channel. When a channel sends an interrupt via VMBUS, it finds its + * bit in the sendInterruptPage, set it and calls Hv to generate a port + * event. The other end receives the port event and parse the + * recvInterruptPage to see which bit is set + */ + void *int_page; + void *send_int_page; + void *recv_int_page; + + /* + * 2 pages - 1st page for parent->child notification and 2nd + * is child->parent notification + */ + void *monitor_pages; + struct list_head chn_msg_list; + spinlock_t channelmsg_lock; + + /* List of channels */ + struct list_head chn_list; + spinlock_t channel_lock; + + struct workqueue_struct *work_queue; +}; + + +struct vmbus_msginfo { + /* Bookkeeping stuff */ + struct list_head msglist_entry; + + /* The message itself */ + unsigned char msg[0]; +}; + + +extern struct vmbus_connection vmbus_connection; + +/* General vmbus interface */ + +struct hv_device *vmbus_device_create(uuid_le *type, + uuid_le *instance, + struct vmbus_channel *channel); + +int vmbus_device_register(struct hv_device *child_device_obj); +void vmbus_device_unregister(struct hv_device *device_obj); + +/* static void */ +/* VmbusChildDeviceDestroy( */ +/* struct hv_device *); */ + +struct vmbus_channel *relid2channel(u32 relid); + + +/* Connection interface */ + +int vmbus_connect(void); + +int vmbus_post_msg(void *buffer, size_t buflen); + +int vmbus_set_event(u32 child_relid); + +void vmbus_on_event(unsigned long data); + + +#endif /* _HYPERV_VMBUS_H */ diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c new file mode 100644 index 000000000000..f594ed09d7e0 --- /dev/null +++ b/drivers/hv/ring_buffer.c @@ -0,0 +1,527 @@ +/* + * + * Copyright (c) 2009, Microsoft Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Authors: + * Haiyang Zhang <haiyangz@microsoft.com> + * Hank Janssen <hjanssen@microsoft.com> + * K. Y. Srinivasan <kys@microsoft.com> + * + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/hyperv.h> + +#include "hyperv_vmbus.h" + + +/* #defines */ + + +/* Amount of space to write to */ +#define BYTES_AVAIL_TO_WRITE(r, w, z) \ + ((w) >= (r)) ? ((z) - ((w) - (r))) : ((r) - (w)) + + +/* + * + * hv_get_ringbuffer_availbytes() + * + * Get number of bytes available to read and to write to + * for the specified ring buffer + */ +static inline void +hv_get_ringbuffer_availbytes(struct hv_ring_buffer_info *rbi, + u32 *read, u32 *write) +{ + u32 read_loc, write_loc; + + smp_read_barrier_depends(); + + /* Capture the read/write indices before they changed */ + read_loc = rbi->ring_buffer->read_index; + write_loc = rbi->ring_buffer->write_index; + + *write = BYTES_AVAIL_TO_WRITE(read_loc, write_loc, rbi->ring_datasize); + *read = rbi->ring_datasize - *write; +} + +/* + * hv_get_next_write_location() + * + * Get the next write location for the specified ring buffer + * + */ +static inline u32 +hv_get_next_write_location(struct hv_ring_buffer_info *ring_info) +{ + u32 next = ring_info->ring_buffer->write_index; + + return next; +} + +/* + * hv_set_next_write_location() + * + * Set the next write location for the specified ring buffer + * + */ +static inline void +hv_set_next_write_location(struct hv_ring_buffer_info *ring_info, + u32 next_write_location) +{ + ring_info->ring_buffer->write_index = next_write_location; +} + +/* + * hv_get_next_read_location() + * + * Get the next read location for the specified ring buffer + */ +static inline u32 +hv_get_next_read_location(struct hv_ring_buffer_info *ring_info) +{ + u32 next = ring_info->ring_buffer->read_index; + + return next; +} + +/* + * hv_get_next_readlocation_withoffset() + * + * Get the next read location + offset for the specified ring buffer. + * This allows the caller to skip + */ +static inline u32 +hv_get_next_readlocation_withoffset(struct hv_ring_buffer_info *ring_info, + u32 offset) +{ + u32 next = ring_info->ring_buffer->read_index; + + next += offset; + next %= ring_info->ring_datasize; + + return next; +} + +/* + * + * hv_set_next_read_location() + * + * Set the next read location for the specified ring buffer + * + */ +static inline void +hv_set_next_read_location(struct hv_ring_buffer_info *ring_info, + u32 next_read_location) +{ + ring_info->ring_buffer->read_index = next_read_location; +} + + +/* + * + * hv_get_ring_buffer() + * + * Get the start of the ring buffer + */ +static inline void * +hv_get_ring_buffer(struct hv_ring_buffer_info *ring_info) +{ + return (void *)ring_info->ring_buffer->buffer; +} + + +/* + * + * hv_get_ring_buffersize() + * + * Get the size of the ring buffer + */ +static inline u32 +hv_get_ring_buffersize(struct hv_ring_buffer_info *ring_info) +{ + return ring_info->ring_datasize; +} + +/* + * + * hv_get_ring_bufferindices() + * + * Get the read and write indices as u64 of the specified ring buffer + * + */ +static inline u64 +hv_get_ring_bufferindices(struct hv_ring_buffer_info *ring_info) +{ + return (u64)ring_info->ring_buffer->write_index << 32; +} + + +/* + * + * hv_dump_ring_info() + * + * Dump out to console the ring buffer info + * + */ +void hv_dump_ring_info(struct hv_ring_buffer_info *ring_info, char *prefix) +{ + u32 bytes_avail_towrite; + u32 bytes_avail_toread; + + hv_get_ringbuffer_availbytes(ring_info, + &bytes_avail_toread, + &bytes_avail_towrite); + + DPRINT(VMBUS, + DEBUG_RING_LVL, + "%s <<ringinfo %p buffer %p avail write %u " + "avail read %u read idx %u write idx %u>>", + prefix, + ring_info, + ring_info->ring_buffer->buffer, + bytes_avail_towrite, + bytes_avail_toread, + ring_info->ring_buffer->read_index, + ring_info->ring_buffer->write_index); +} + + +/* + * + * hv_copyfrom_ringbuffer() + * + * Helper routine to copy to source from ring buffer. + * Assume there is enough room. Handles wrap-around in src case only!! + * + */ +static u32 hv_copyfrom_ringbuffer( + struct hv_ring_buffer_info *ring_info, + void *dest, + u32 destlen, + u32 start_read_offset) +{ + void *ring_buffer = hv_get_ring_buffer(ring_info); + u32 ring_buffer_size = hv_get_ring_buffersize(ring_info); + + u32 frag_len; + + /* wrap-around detected at the src */ + if (destlen > ring_buffer_size - start_read_offset) { + frag_len = ring_buffer_size - start_read_offset; + + memcpy(dest, ring_buffer + start_read_offset, frag_len); + memcpy(dest + frag_len, ring_buffer, destlen - frag_len); + } else + + memcpy(dest, ring_buffer + start_read_offset, destlen); + + + start_read_offset += destlen; + start_read_offset %= ring_buffer_size; + + return start_read_offset; +} + + +/* + * + * hv_copyto_ringbuffer() + * + * Helper routine to copy from source to ring buffer. + * Assume there is enough room. Handles wrap-around in dest case only!! + * + */ +static u32 hv_copyto_ringbuffer( + struct hv_ring_buffer_info *ring_info, + u32 start_write_offset, + void *src, + u32 srclen) +{ + void *ring_buffer = hv_get_ring_buffer(ring_info); + u32 ring_buffer_size = hv_get_ring_buffersize(ring_info); + u32 frag_len; + + /* wrap-around detected! */ + if (srclen > ring_buffer_size - start_write_offset) { + frag_len = ring_buffer_size - start_write_offset; + memcpy(ring_buffer + start_write_offset, src, frag_len); + memcpy(ring_buffer, src + frag_len, srclen - frag_len); + } else + memcpy(ring_buffer + start_write_offset, src, srclen); + + start_write_offset += srclen; + start_write_offset %= ring_buffer_size; + + return start_write_offset; +} + +/* + * + * hv_ringbuffer_get_debuginfo() + * + * Get various debug metrics for the specified ring buffer + * + */ +void hv_ringbuffer_get_debuginfo(struct hv_ring_buffer_info *ring_info, + struct hv_ring_buffer_debug_info *debug_info) +{ + u32 bytes_avail_towrite; + u32 bytes_avail_toread; + + if (ring_info->ring_buffer) { + hv_get_ringbuffer_availbytes(ring_info, + &bytes_avail_toread, + &bytes_avail_towrite); + + debug_info->bytes_avail_toread = bytes_avail_toread; + debug_info->bytes_avail_towrite = bytes_avail_towrite; + debug_info->current_read_index = + ring_info->ring_buffer->read_index; + debug_info->current_write_index = + ring_info->ring_buffer->write_index; + debug_info->current_interrupt_mask = + ring_info->ring_buffer->interrupt_mask; + } +} + + +/* + * + * hv_get_ringbuffer_interrupt_mask() + * + * Get the interrupt mask for the specified ring buffer + * + */ +u32 hv_get_ringbuffer_interrupt_mask(struct hv_ring_buffer_info *rbi) +{ + return rbi->ring_buffer->interrupt_mask; +} + +/* + * + * hv_ringbuffer_init() + * + *Initialize the ring buffer + * + */ +int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info, + void *buffer, u32 buflen) +{ + if (sizeof(struct hv_ring_buffer) != PAGE_SIZE) + return -EINVAL; + + memset(ring_info, 0, sizeof(struct hv_ring_buffer_info)); + + ring_info->ring_buffer = (struct hv_ring_buffer *)buffer; + ring_info->ring_buffer->read_index = + ring_info->ring_buffer->write_index = 0; + + ring_info->ring_size = buflen; + ring_info->ring_datasize = buflen - sizeof(struct hv_ring_buffer); + + spin_lock_init(&ring_info->ring_lock); + + return 0; +} + +/* + * + * hv_ringbuffer_cleanup() + * + * Cleanup the ring buffer + * + */ +void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info) +{ +} + +/* + * + * hv_ringbuffer_write() + * + * Write to the ring buffer + * + */ +int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info, + struct scatterlist *sglist, u32 sgcount) +{ + int i = 0; + u32 bytes_avail_towrite; + u32 bytes_avail_toread; + u32 totalbytes_towrite = 0; + + struct scatterlist *sg; + u32 next_write_location; + u64 prev_indices = 0; + unsigned long flags; + + for_each_sg(sglist, sg, sgcount, i) + { + totalbytes_towrite += sg->length; + } + + totalbytes_towrite += sizeof(u64); + + spin_lock_irqsave(&outring_info->ring_lock, flags); + + hv_get_ringbuffer_availbytes(outring_info, + &bytes_avail_toread, + &bytes_avail_towrite); + + + /* If there is only room for the packet, assume it is full. */ + /* Otherwise, the next time around, we think the ring buffer */ + /* is empty since the read index == write index */ + if (bytes_avail_towrite <= totalbytes_towrite) { + spin_unlock_irqrestore(&outring_info->ring_lock, flags); + return -EAGAIN; + } + + /* Write to the ring buffer */ + next_write_location = hv_get_next_write_location(outring_info); + + for_each_sg(sglist, sg, sgcount, i) + { + next_write_location = hv_copyto_ringbuffer(outring_info, + next_write_location, + sg_virt(sg), + sg->length); + } + + /* Set previous packet start */ + prev_indices = hv_get_ring_bufferindices(outring_info); + + next_write_location = hv_copyto_ringbuffer(outring_info, + next_write_location, + &prev_indices, + sizeof(u64)); + + /* Make sure we flush all writes before updating the writeIndex */ + smp_wmb(); + + /* Now, update the write location */ + hv_set_next_write_location(outring_info, next_write_location); + + + spin_unlock_irqrestore(&outring_info->ring_lock, flags); + return 0; +} + + +/* + * + * hv_ringbuffer_peek() + * + * Read without advancing the read index + * + */ +int hv_ringbuffer_peek(struct hv_ring_buffer_info *Inring_info, + void *Buffer, u32 buflen) +{ + u32 bytes_avail_towrite; + u32 bytes_avail_toread; + u32 next_read_location = 0; + unsigned long flags; + + spin_lock_irqsave(&Inring_info->ring_lock, flags); + + hv_get_ringbuffer_availbytes(Inring_info, + &bytes_avail_toread, + &bytes_avail_towrite); + + /* Make sure there is something to read */ + if (bytes_avail_toread < buflen) { + + spin_unlock_irqrestore(&Inring_info->ring_lock, flags); + + return -EAGAIN; + } + + /* Convert to byte offset */ + next_read_location = hv_get_next_read_location(Inring_info); + + next_read_location = hv_copyfrom_ringbuffer(Inring_info, + Buffer, + buflen, + next_read_location); + + spin_unlock_irqrestore(&Inring_info->ring_lock, flags); + + return 0; +} + + +/* + * + * hv_ringbuffer_read() + * + * Read and advance the read index + * + */ +int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info, void *buffer, + u32 buflen, u32 offset) +{ + u32 bytes_avail_towrite; + u32 bytes_avail_toread; + u32 next_read_location = 0; + u64 prev_indices = 0; + unsigned long flags; + + if (buflen <= 0) + return -EINVAL; + + spin_lock_irqsave(&inring_info->ring_lock, flags); + + hv_get_ringbuffer_availbytes(inring_info, + &bytes_avail_toread, + &bytes_avail_towrite); + + /* Make sure there is something to read */ + if (bytes_avail_toread < buflen) { + spin_unlock_irqrestore(&inring_info->ring_lock, flags); + + return -EAGAIN; + } + + next_read_location = + hv_get_next_readlocation_withoffset(inring_info, offset); + + next_read_location = hv_copyfrom_ringbuffer(inring_info, + buffer, + buflen, + next_read_location); + + next_read_location = hv_copyfrom_ringbuffer(inring_info, + &prev_indices, + sizeof(u64), + next_read_location); + + /* Make sure all reads are done before we update the read index since */ + /* the writer may start writing to the read area once the read index */ + /*is updated */ + smp_mb(); + + /* Update the read index */ + hv_set_next_read_location(inring_info, next_read_location); + + spin_unlock_irqrestore(&inring_info->ring_lock, flags); + + return 0; +} diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c new file mode 100644 index 000000000000..b0d08f980de1 --- /dev/null +++ b/drivers/hv/vmbus_drv.c @@ -0,0 +1,772 @@ +/* + * Copyright (c) 2009, Microsoft Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Authors: + * Haiyang Zhang <haiyangz@microsoft.com> + * Hank Janssen <hjanssen@microsoft.com> + * K. Y. Srinivasan <kys@microsoft.com> + * + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/irq.h> +#include <linux/interrupt.h> +#include <linux/sysctl.h> +#include <linux/slab.h> +#include <linux/acpi.h> +#include <acpi/acpi_bus.h> +#include <linux/completion.h> +#include <linux/hyperv.h> + +#include "hyperv_vmbus.h" + + +static struct acpi_device *hv_acpi_dev; + +static struct tasklet_struct msg_dpc; +static struct tasklet_struct event_dpc; + +unsigned int vmbus_loglevel = (ALL_MODULES << 16 | INFO_LVL); +EXPORT_SYMBOL(vmbus_loglevel); + +static struct completion probe_event; +static int irq; + +static void get_channel_info(struct hv_device *device, + struct hv_device_info *info) +{ + struct vmbus_channel_debug_info debug_info; + + if (!device->channel) + return; + + vmbus_get_debug_info(device->channel, &debug_info); + + info->chn_id = debug_info.relid; + info->chn_state = debug_info.state; + memcpy(&info->chn_type, &debug_info.interfacetype, + sizeof(uuid_le)); + memcpy(&info->chn_instance, &debug_info.interface_instance, + sizeof(uuid_le)); + + info->monitor_id = debug_info.monitorid; + + info->server_monitor_pending = debug_info.servermonitor_pending; + info->server_monitor_latency = debug_info.servermonitor_latency; + info->server_monitor_conn_id = debug_info.servermonitor_connectionid; + + info->client_monitor_pending = debug_info.clientmonitor_pending; + info->client_monitor_latency = debug_info.clientmonitor_latency; + info->client_monitor_conn_id = debug_info.clientmonitor_connectionid; + + info->inbound.int_mask = debug_info.inbound.current_interrupt_mask; + info->inbound.read_idx = debug_info.inbound.current_read_index; + info->inbound.write_idx = debug_info.inbound.current_write_index; + info->inbound.bytes_avail_toread = + debug_info.inbound.bytes_avail_toread; + info->inbound.bytes_avail_towrite = + debug_info.inbound.bytes_avail_towrite; + + info->outbound.int_mask = + debug_info.outbound.current_interrupt_mask; + info->outbound.read_idx = debug_info.outbound.current_read_index; + info->outbound.write_idx = debug_info.outbound.current_write_index; + info->outbound.bytes_avail_toread = + debug_info.outbound.bytes_avail_toread; + info->outbound.bytes_avail_towrite = + debug_info.outbound.bytes_avail_towrite; +} + +#define VMBUS_ALIAS_LEN ((sizeof((struct hv_vmbus_device_id *)0)->guid) * 2) +static void print_alias_name(struct hv_device *hv_dev, char *alias_name) +{ + int i; + for (i = 0; i < VMBUS_ALIAS_LEN; i += 2) + sprintf(&alias_name[i], "%02x", hv_dev->dev_type.b[i/2]); +} + +/* + * vmbus_show_device_attr - Show the device attribute in sysfs. + * + * This is invoked when user does a + * "cat /sys/bus/vmbus/devices/<busdevice>/<attr name>" + */ +static ssize_t vmbus_show_device_attr(struct device *dev, + struct device_attribute *dev_attr, + char *buf) +{ + struct hv_device *hv_dev = device_to_hv_device(dev); + struct hv_device_info *device_info; + char alias_name[VMBUS_ALIAS_LEN + 1]; + int ret = 0; + + device_info = kzalloc(sizeof(struct hv_device_info), GFP_KERNEL); + if (!device_info) + return ret; + + get_channel_info(hv_dev, device_info); + + if (!strcmp(dev_attr->attr.name, "class_id")) { + ret = sprintf(buf, "{%02x%02x%02x%02x-%02x%02x-%02x%02x-" + "%02x%02x%02x%02x%02x%02x%02x%02x}\n", + device_info->chn_type.b[3], + device_info->chn_type.b[2], + device_info->chn_type.b[1], + device_info->chn_type.b[0], + device_info->chn_type.b[5], + device_info->chn_type.b[4], + device_info->chn_type.b[7], + device_info->chn_type.b[6], + device_info->chn_type.b[8], + device_info->chn_type.b[9], + device_info->chn_type.b[10], + device_info->chn_type.b[11], + device_info->chn_type.b[12], + device_info->chn_type.b[13], + device_info->chn_type.b[14], + device_info->chn_type.b[15]); + } else if (!strcmp(dev_attr->attr.name, "device_id")) { + ret = sprintf(buf, "{%02x%02x%02x%02x-%02x%02x-%02x%02x-" + "%02x%02x%02x%02x%02x%02x%02x%02x}\n", + device_info->chn_instance.b[3], + device_info->chn_instance.b[2], + device_info->chn_instance.b[1], + device_info->chn_instance.b[0], + device_info->chn_instance.b[5], + device_info->chn_instance.b[4], + device_info->chn_instance.b[7], + device_info->chn_instance.b[6], + device_info->chn_instance.b[8], + device_info->chn_instance.b[9], + device_info->chn_instance.b[10], + device_info->chn_instance.b[11], + device_info->chn_instance.b[12], + device_info->chn_instance.b[13], + device_info->chn_instance.b[14], + device_info->chn_instance.b[15]); + } else if (!strcmp(dev_attr->attr.name, "modalias")) { + print_alias_name(hv_dev, alias_name); + ret = sprintf(buf, "vmbus:%s\n", alias_name); + } else if (!strcmp(dev_attr->attr.name, "state")) { + ret = sprintf(buf, "%d\n", device_info->chn_state); + } else if (!strcmp(dev_attr->attr.name, "id")) { + ret = sprintf(buf, "%d\n", device_info->chn_id); + } else if (!strcmp(dev_attr->attr.name, "out_intr_mask")) { + ret = sprintf(buf, "%d\n", device_info->outbound.int_mask); + } else if (!strcmp(dev_attr->attr.name, "out_read_index")) { + ret = sprintf(buf, "%d\n", device_info->outbound.read_idx); + } else if (!strcmp(dev_attr->attr.name, "out_write_index")) { + ret = sprintf(buf, "%d\n", device_info->outbound.write_idx); + } else if (!strcmp(dev_attr->attr.name, "out_read_bytes_avail")) { + ret = sprintf(buf, "%d\n", + device_info->outbound.bytes_avail_toread); + } else if (!strcmp(dev_attr->attr.name, "out_write_bytes_avail")) { + ret = sprintf(buf, "%d\n", + device_info->outbound.bytes_avail_towrite); + } else if (!strcmp(dev_attr->attr.name, "in_intr_mask")) { + ret = sprintf(buf, "%d\n", device_info->inbound.int_mask); + } else if (!strcmp(dev_attr->attr.name, "in_read_index")) { + ret = sprintf(buf, "%d\n", device_info->inbound.read_idx); + } else if (!strcmp(dev_attr->attr.name, "in_write_index")) { + ret = sprintf(buf, "%d\n", device_info->inbound.write_idx); + } else if (!strcmp(dev_attr->attr.name, "in_read_bytes_avail")) { + ret = sprintf(buf, "%d\n", + device_info->inbound.bytes_avail_toread); + } else if (!strcmp(dev_attr->attr.name, "in_write_bytes_avail")) { + ret = sprintf(buf, "%d\n", + device_info->inbound.bytes_avail_towrite); + } else if (!strcmp(dev_attr->attr.name, "monitor_id")) { + ret = sprintf(buf, "%d\n", device_info->monitor_id); + } else if (!strcmp(dev_attr->attr.name, "server_monitor_pending")) { + ret = sprintf(buf, "%d\n", device_info->server_monitor_pending); + } else if (!strcmp(dev_attr->attr.name, "server_monitor_latency")) { + ret = sprintf(buf, "%d\n", device_info->server_monitor_latency); + } else if (!strcmp(dev_attr->attr.name, "server_monitor_conn_id")) { + ret = sprintf(buf, "%d\n", + device_info->server_monitor_conn_id); + } else if (!strcmp(dev_attr->attr.name, "client_monitor_pending")) { + ret = sprintf(buf, "%d\n", device_info->client_monitor_pending); + } else if (!strcmp(dev_attr->attr.name, "client_monitor_latency")) { + ret = sprintf(buf, "%d\n", device_info->client_monitor_latency); + } else if (!strcmp(dev_attr->attr.name, "client_monitor_conn_id")) { + ret = sprintf(buf, "%d\n", + device_info->client_monitor_conn_id); + } + + kfree(device_info); + return ret; +} + +/* Set up per device attributes in /sys/bus/vmbus/devices/<bus device> */ +static struct device_attribute vmbus_device_attrs[] = { + __ATTR(id, S_IRUGO, vmbus_show_device_attr, NULL), + __ATTR(state, S_IRUGO, vmbus_show_device_attr, NULL), + __ATTR(class_id, S_IRUGO, vmbus_show_device_attr, NULL), + __ATTR(device_id, S_IRUGO, vmbus_show_device_attr, NULL), + __ATTR(monitor_id, S_IRUGO, vmbus_show_device_attr, NULL), + __ATTR(modalias, S_IRUGO, vmbus_show_device_attr, NULL), + + __ATTR(server_monitor_pending, S_IRUGO, vmbus_show_device_attr, NULL), + __ATTR(server_monitor_latency, S_IRUGO, vmbus_show_device_attr, NULL), + __ATTR(server_monitor_conn_id, S_IRUGO, vmbus_show_device_attr, NULL), + + __ATTR(client_monitor_pending, S_IRUGO, vmbus_show_device_attr, NULL), + __ATTR(client_monitor_latency, S_IRUGO, vmbus_show_device_attr, NULL), + __ATTR(client_monitor_conn_id, S_IRUGO, vmbus_show_device_attr, NULL), + + __ATTR(out_intr_mask, S_IRUGO, vmbus_show_device_attr, NULL), + __ATTR(out_read_index, S_IRUGO, vmbus_show_device_attr, NULL), + __ATTR(out_write_index, S_IRUGO, vmbus_show_device_attr, NULL), + __ATTR(out_read_bytes_avail, S_IRUGO, vmbus_show_device_attr, NULL), + __ATTR(out_write_bytes_avail, S_IRUGO, vmbus_show_device_attr, NULL), + + __ATTR(in_intr_mask, S_IRUGO, vmbus_show_device_attr, NULL), + __ATTR(in_read_index, S_IRUGO, vmbus_show_device_attr, NULL), + __ATTR(in_write_index, S_IRUGO, vmbus_show_device_attr, NULL), + __ATTR(in_read_bytes_avail, S_IRUGO, vmbus_show_device_attr, NULL), + __ATTR(in_write_bytes_avail, S_IRUGO, vmbus_show_device_attr, NULL), + __ATTR_NULL +}; + + +/* + * vmbus_uevent - add uevent for our device + * + * This routine is invoked when a device is added or removed on the vmbus to + * generate a uevent to udev in the userspace. The udev will then look at its + * rule and the uevent generated here to load the appropriate driver + * + * The alias string will be of the form vmbus:guid where guid is the string + * representation of the device guid (each byte of the guid will be + * represented with two hex characters. + */ +static int vmbus_uevent(struct device *device, struct kobj_uevent_env *env) +{ + struct hv_device *dev = device_to_hv_device(device); + int ret; + char alias_name[VMBUS_ALIAS_LEN + 1]; + + print_alias_name(dev, alias_name); + ret = add_uevent_var(env, "MODALIAS=vmbus:%s", alias_name); + return ret; +} + +static uuid_le null_guid; + +static inline bool is_null_guid(const __u8 *guid) +{ + if (memcmp(guid, &null_guid, sizeof(uuid_le))) + return false; + return true; +} + +/* + * Return a matching hv_vmbus_device_id pointer. + * If there is no match, return NULL. + */ +static const struct hv_vmbus_device_id *hv_vmbus_get_id( + const struct hv_vmbus_device_id *id, + __u8 *guid) +{ + for (; !is_null_guid(id->guid); id++) + if (!memcmp(&id->guid, guid, sizeof(uuid_le))) + return id; + + return NULL; +} + + + +/* + * vmbus_match - Attempt to match the specified device to the specified driver + */ +static int vmbus_match(struct device *device, struct device_driver *driver) +{ + struct hv_driver *drv = drv_to_hv_drv(driver); + struct hv_device *hv_dev = device_to_hv_device(device); + + if (hv_vmbus_get_id(drv->id_table, hv_dev->dev_type.b)) + return 1; + + return 0; +} + +/* + * vmbus_probe - Add the new vmbus's child device + */ +static int vmbus_probe(struct device *child_device) +{ + int ret = 0; + struct hv_driver *drv = + drv_to_hv_drv(child_device->driver); + struct hv_device *dev = device_to_hv_device(child_device); + const struct hv_vmbus_device_id *dev_id; + + dev_id = hv_vmbus_get_id(drv->id_table, dev->dev_type.b); + if (drv->probe) { + ret = drv->probe(dev, dev_id); + if (ret != 0) + pr_err("probe failed for device %s (%d)\n", + dev_name(child_device), ret); + + } else { + pr_err("probe not set for driver %s\n", + dev_name(child_device)); + ret = -ENODEV; + } + return ret; +} + +/* + * vmbus_remove - Remove a vmbus device + */ +static int vmbus_remove(struct device *child_device) +{ + struct hv_driver *drv = drv_to_hv_drv(child_device->driver); + struct hv_device *dev = device_to_hv_device(child_device); + + if (drv->remove) + drv->remove(dev); + else + pr_err("remove not set for driver %s\n", + dev_name(child_device)); + + return 0; +} + + +/* + * vmbus_shutdown - Shutdown a vmbus device + */ +static void vmbus_shutdown(struct device *child_device) +{ + struct hv_driver *drv; + struct hv_device *dev = device_to_hv_device(child_device); + + + /* The device may not be attached yet */ + if (!child_device->driver) + return; + + drv = drv_to_hv_drv(child_device->driver); + + if (drv->shutdown) + drv->shutdown(dev); + + return; +} + + +/* + * vmbus_device_release - Final callback release of the vmbus child device + */ +static void vmbus_device_release(struct device *device) +{ + struct hv_device *hv_dev = device_to_hv_device(device); + + kfree(hv_dev); + +} + +/* The one and only one */ +static struct bus_type hv_bus = { + .name = "vmbus", + .match = vmbus_match, + .shutdown = vmbus_shutdown, + .remove = vmbus_remove, + .probe = vmbus_probe, + .uevent = vmbus_uevent, + .dev_attrs = vmbus_device_attrs, +}; + +static const char *driver_name = "hyperv"; + + +struct onmessage_work_context { + struct work_struct work; + struct hv_message msg; +}; + +static void vmbus_onmessage_work(struct work_struct *work) +{ + struct onmessage_work_context *ctx; + + ctx = container_of(work, struct onmessage_work_context, + work); + vmbus_onmessage(&ctx->msg); + kfree(ctx); +} + +static void vmbus_on_msg_dpc(unsigned long data) +{ + int cpu = smp_processor_id(); + void *page_addr = hv_context.synic_message_page[cpu]; + struct hv_message *msg = (struct hv_message *)page_addr + + VMBUS_MESSAGE_SINT; + struct onmessage_work_context *ctx; + + while (1) { + if (msg->header.message_type == HVMSG_NONE) { + /* no msg */ + break; + } else { + ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC); + if (ctx == NULL) + continue; + INIT_WORK(&ctx->work, vmbus_onmessage_work); + memcpy(&ctx->msg, msg, sizeof(*msg)); + queue_work(vmbus_connection.work_queue, &ctx->work); + } + + msg->header.message_type = HVMSG_NONE; + + /* + * Make sure the write to MessageType (ie set to + * HVMSG_NONE) happens before we read the + * MessagePending and EOMing. Otherwise, the EOMing + * will not deliver any more messages since there is + * no empty slot + */ + smp_mb(); + + if (msg->header.message_flags.msg_pending) { + /* + * This will cause message queue rescan to + * possibly deliver another msg from the + * hypervisor + */ + wrmsrl(HV_X64_MSR_EOM, 0); + } + } +} + +static irqreturn_t vmbus_isr(int irq, void *dev_id) +{ + int cpu = smp_processor_id(); + void *page_addr; + struct hv_message *msg; + union hv_synic_event_flags *event; + bool handled = false; + + /* + * Check for events before checking for messages. This is the order + * in which events and messages are checked in Windows guests on + * Hyper-V, and the Windows team suggested we do the same. + */ + + page_addr = hv_context.synic_event_page[cpu]; + event = (union hv_synic_event_flags *)page_addr + VMBUS_MESSAGE_SINT; + + /* Since we are a child, we only need to check bit 0 */ + if (sync_test_and_clear_bit(0, (unsigned long *) &event->flags32[0])) { + handled = true; + tasklet_schedule(&event_dpc); + } + + page_addr = hv_context.synic_message_page[cpu]; + msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT; + + /* Check if there are actual msgs to be processed */ + if (msg->header.message_type != HVMSG_NONE) { + handled = true; + tasklet_schedule(&msg_dpc); + } + + if (handled) + return IRQ_HANDLED; + else + return IRQ_NONE; +} + +/* + * vmbus_bus_init -Main vmbus driver initialization routine. + * + * Here, we + * - initialize the vmbus driver context + * - invoke the vmbus hv main init routine + * - get the irq resource + * - retrieve the channel offers + */ +static int vmbus_bus_init(int irq) +{ + int ret; + unsigned int vector; + + /* Hypervisor initialization...setup hypercall page..etc */ + ret = hv_init(); + if (ret != 0) { + pr_err("Unable to initialize the hypervisor - 0x%x\n", ret); + return ret; + } + + tasklet_init(&msg_dpc, vmbus_on_msg_dpc, 0); + tasklet_init(&event_dpc, vmbus_on_event, 0); + + ret = bus_register(&hv_bus); + if (ret) + goto err_cleanup; + + ret = request_irq(irq, vmbus_isr, IRQF_SAMPLE_RANDOM, + driver_name, hv_acpi_dev); + + if (ret != 0) { + pr_err("Unable to request IRQ %d\n", + irq); + goto err_unregister; + } + + vector = IRQ0_VECTOR + irq; + + /* + * Notify the hypervisor of our irq and + * connect to the host. + */ + on_each_cpu(hv_synic_init, (void *)&vector, 1); + ret = vmbus_connect(); + if (ret) + goto err_irq; + + vmbus_request_offers(); + + return 0; + +err_irq: + free_irq(irq, hv_acpi_dev); + +err_unregister: + bus_unregister(&hv_bus); + +err_cleanup: + hv_cleanup(); + + return ret; +} + +/** + * __vmbus_child_driver_register - Register a vmbus's driver + * @drv: Pointer to driver structure you want to register + * @owner: owner module of the drv + * @mod_name: module name string + * + * Registers the given driver with Linux through the 'driver_register()' call + * and sets up the hyper-v vmbus handling for this driver. + * It will return the state of the 'driver_register()' call. + * + */ +int __vmbus_driver_register(struct hv_driver *hv_driver, struct module *owner, const char *mod_name) +{ + int ret; + + pr_info("registering driver %s\n", hv_driver->name); + + hv_driver->driver.name = hv_driver->name; + hv_driver->driver.owner = owner; + hv_driver->driver.mod_name = mod_name; + hv_driver->driver.bus = &hv_bus; + + ret = driver_register(&hv_driver->driver); + + vmbus_request_offers(); + + return ret; +} +EXPORT_SYMBOL_GPL(__vmbus_driver_register); + +/** + * vmbus_driver_unregister() - Unregister a vmbus's driver + * @drv: Pointer to driver structure you want to un-register + * + * Un-register the given driver that was previous registered with a call to + * vmbus_driver_register() + */ +void vmbus_driver_unregister(struct hv_driver *hv_driver) +{ + pr_info("unregistering driver %s\n", hv_driver->name); + + driver_unregister(&hv_driver->driver); + +} +EXPORT_SYMBOL_GPL(vmbus_driver_unregister); + +/* + * vmbus_device_create - Creates and registers a new child device + * on the vmbus. + */ +struct hv_device *vmbus_device_create(uuid_le *type, + uuid_le *instance, + struct vmbus_channel *channel) +{ + struct hv_device *child_device_obj; + + child_device_obj = kzalloc(sizeof(struct hv_device), GFP_KERNEL); + if (!child_device_obj) { + pr_err("Unable to allocate device object for child device\n"); + return NULL; + } + + child_device_obj->channel = channel; + memcpy(&child_device_obj->dev_type, type, sizeof(uuid_le)); + memcpy(&child_device_obj->dev_instance, instance, + sizeof(uuid_le)); + + + return child_device_obj; +} + +/* + * vmbus_device_register - Register the child device + */ +int vmbus_device_register(struct hv_device *child_device_obj) +{ + int ret = 0; + + static atomic_t device_num = ATOMIC_INIT(0); + + dev_set_name(&child_device_obj->device, "vmbus_0_%d", + atomic_inc_return(&device_num)); + + child_device_obj->device.bus = &hv_bus; + child_device_obj->device.parent = &hv_acpi_dev->dev; + child_device_obj->device.release = vmbus_device_release; + + /* + * Register with the LDM. This will kick off the driver/device + * binding...which will eventually call vmbus_match() and vmbus_probe() + */ + ret = device_register(&child_device_obj->device); + + if (ret) + pr_err("Unable to register child device\n"); + else + pr_info("child device %s registered\n", + dev_name(&child_device_obj->device)); + + return ret; +} + +/* + * vmbus_device_unregister - Remove the specified child device + * from the vmbus. + */ +void vmbus_device_unregister(struct hv_device *device_obj) +{ + /* + * Kick off the process of unregistering the device. + * This will call vmbus_remove() and eventually vmbus_device_release() + */ + device_unregister(&device_obj->device); + + pr_info("child device %s unregistered\n", + dev_name(&device_obj->device)); +} + + +/* + * VMBUS is an acpi enumerated device. Get the the IRQ information + * from DSDT. + */ + +static acpi_status vmbus_walk_resources(struct acpi_resource *res, void *irq) +{ + + if (res->type == ACPI_RESOURCE_TYPE_IRQ) { + struct acpi_resource_irq *irqp; + irqp = &res->data.irq; + + *((unsigned int *)irq) = irqp->interrupts[0]; + } + + return AE_OK; +} + +static int vmbus_acpi_add(struct acpi_device *device) +{ + acpi_status result; + + hv_acpi_dev = device; + + result = acpi_walk_resources(device->handle, METHOD_NAME__CRS, + vmbus_walk_resources, &irq); + + if (ACPI_FAILURE(result)) { + complete(&probe_event); + return -ENODEV; + } + complete(&probe_event); + return 0; +} + +static const struct acpi_device_id vmbus_acpi_device_ids[] = { + {"VMBUS", 0}, + {"VMBus", 0}, + {"", 0}, +}; +MODULE_DEVICE_TABLE(acpi, vmbus_acpi_device_ids); + +static struct acpi_driver vmbus_acpi_driver = { + .name = "vmbus", + .ids = vmbus_acpi_device_ids, + .ops = { + .add = vmbus_acpi_add, + }, +}; + +static int __init hv_acpi_init(void) +{ + int ret, t; + + init_completion(&probe_event); + + /* + * Get irq resources first. + */ + + ret = acpi_bus_register_driver(&vmbus_acpi_driver); + + if (ret) + return ret; + + t = wait_for_completion_timeout(&probe_event, 5*HZ); + if (t == 0) { + ret = -ETIMEDOUT; + goto cleanup; + } + + if (irq <= 0) { + ret = -ENODEV; + goto cleanup; + } + + ret = vmbus_bus_init(irq); + if (ret) + goto cleanup; + + return 0; + +cleanup: + acpi_bus_unregister_driver(&vmbus_acpi_driver); + return ret; +} + + +MODULE_LICENSE("GPL"); +MODULE_VERSION(HV_DRV_VERSION); +module_param(vmbus_loglevel, int, S_IRUGO|S_IWUSR); + +module_init(hv_acpi_init); |