diff options
-rw-r--r-- | drivers/hv/channel.c | 28 | ||||
-rw-r--r-- | drivers/hv/channel_mgmt.c | 2 | ||||
-rw-r--r-- | drivers/hv/hv_trace.h | 19 | ||||
-rw-r--r-- | drivers/hv/vmbus_drv.c | 108 | ||||
-rw-r--r-- | include/linux/hyperv.h | 10 |
5 files changed, 163 insertions, 4 deletions
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 132e476f87b2..90070b337c10 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -290,6 +290,34 @@ int vmbus_send_tl_connect_request(const guid_t *shv_guest_servie_id, EXPORT_SYMBOL_GPL(vmbus_send_tl_connect_request); /* + * Set/change the vCPU (@target_vp) the channel (@child_relid) will interrupt. + * + * CHANNELMSG_MODIFYCHANNEL messages are aynchronous. Also, Hyper-V does not + * ACK such messages. IOW we can't know when the host will stop interrupting + * the "old" vCPU and start interrupting the "new" vCPU for the given channel. + * + * The CHANNELMSG_MODIFYCHANNEL message type is supported since VMBus version + * VERSION_WIN10_V4_1. + */ +int vmbus_send_modifychannel(u32 child_relid, u32 target_vp) +{ + struct vmbus_channel_modifychannel conn_msg; + int ret; + + memset(&conn_msg, 0, sizeof(conn_msg)); + conn_msg.header.msgtype = CHANNELMSG_MODIFYCHANNEL; + conn_msg.child_relid = child_relid; + conn_msg.target_vp = target_vp; + + ret = vmbus_post_msg(&conn_msg, sizeof(conn_msg), true); + + trace_vmbus_send_modifychannel(&conn_msg, ret); + + return ret; +} +EXPORT_SYMBOL_GPL(vmbus_send_modifychannel); + +/* * create_gpadl_header - Creates a gpadl for the specified buffer */ static int create_gpadl_header(void *kbuffer, u32 size, diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 2db3823f0e59..ffd7fffa5f83 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -1383,7 +1383,7 @@ channel_message_table[CHANNELMSG_COUNT] = { { CHANNELMSG_19, 0, NULL, 0}, { CHANNELMSG_20, 0, NULL, 0}, { CHANNELMSG_TL_CONNECT_REQUEST, 0, NULL, 0}, - { CHANNELMSG_22, 0, NULL, 0}, + { CHANNELMSG_MODIFYCHANNEL, 0, NULL, 0}, { CHANNELMSG_TL_CONNECT_RESULT, 0, NULL, 0}, }; diff --git a/drivers/hv/hv_trace.h b/drivers/hv/hv_trace.h index e70783e33680..a43bc76c2d5d 100644 --- a/drivers/hv/hv_trace.h +++ b/drivers/hv/hv_trace.h @@ -296,6 +296,25 @@ TRACE_EVENT(vmbus_send_tl_connect_request, ) ); +TRACE_EVENT(vmbus_send_modifychannel, + TP_PROTO(const struct vmbus_channel_modifychannel *msg, + int ret), + TP_ARGS(msg, ret), + TP_STRUCT__entry( + __field(u32, child_relid) + __field(u32, target_vp) + __field(int, ret) + ), + TP_fast_assign( + __entry->child_relid = msg->child_relid; + __entry->target_vp = msg->target_vp; + __entry->ret = ret; + ), + TP_printk("binding child_relid 0x%x to target_vp 0x%x, ret %d", + __entry->child_relid, __entry->target_vp, __entry->ret + ) + ); + DECLARE_EVENT_CLASS(vmbus_channel, TP_PROTO(const struct vmbus_channel *channel), TP_ARGS(channel), diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 0f7dfa507a40..5d24b25fb5aa 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -1606,8 +1606,24 @@ static ssize_t vmbus_chan_attr_show(struct kobject *kobj, return attribute->show(chan, buf); } +static ssize_t vmbus_chan_attr_store(struct kobject *kobj, + struct attribute *attr, const char *buf, + size_t count) +{ + const struct vmbus_chan_attribute *attribute + = container_of(attr, struct vmbus_chan_attribute, attr); + struct vmbus_channel *chan + = container_of(kobj, struct vmbus_channel, kobj); + + if (!attribute->store) + return -EIO; + + return attribute->store(chan, buf, count); +} + static const struct sysfs_ops vmbus_chan_sysfs_ops = { .show = vmbus_chan_attr_show, + .store = vmbus_chan_attr_store, }; static ssize_t out_mask_show(struct vmbus_channel *channel, char *buf) @@ -1678,11 +1694,99 @@ static ssize_t write_avail_show(struct vmbus_channel *channel, char *buf) } static VMBUS_CHAN_ATTR_RO(write_avail); -static ssize_t show_target_cpu(struct vmbus_channel *channel, char *buf) +static ssize_t target_cpu_show(struct vmbus_channel *channel, char *buf) { return sprintf(buf, "%u\n", channel->target_cpu); } -static VMBUS_CHAN_ATTR(cpu, S_IRUGO, show_target_cpu, NULL); +static ssize_t target_cpu_store(struct vmbus_channel *channel, + const char *buf, size_t count) +{ + ssize_t ret = count; + u32 target_cpu; + + if (vmbus_proto_version < VERSION_WIN10_V4_1) + return -EIO; + + if (sscanf(buf, "%uu", &target_cpu) != 1) + return -EIO; + + /* Validate target_cpu for the cpumask_test_cpu() operation below. */ + if (target_cpu >= nr_cpumask_bits) + return -EINVAL; + + /* No CPUs should come up or down during this. */ + cpus_read_lock(); + + if (!cpumask_test_cpu(target_cpu, cpu_online_mask)) { + cpus_read_unlock(); + return -EINVAL; + } + + /* + * Synchronizes target_cpu_store() and channel closure: + * + * { Initially: state = CHANNEL_OPENED } + * + * CPU1 CPU2 + * + * [target_cpu_store()] [vmbus_disconnect_ring()] + * + * LOCK channel_mutex LOCK channel_mutex + * LOAD r1 = state LOAD r2 = state + * IF (r1 == CHANNEL_OPENED) IF (r2 == CHANNEL_OPENED) + * SEND MODIFYCHANNEL STORE state = CHANNEL_OPEN + * [...] SEND CLOSECHANNEL + * UNLOCK channel_mutex UNLOCK channel_mutex + * + * Forbids: r1 == r2 == CHANNEL_OPENED (i.e., CPU1's LOCK precedes + * CPU2's LOCK) && CPU2's SEND precedes CPU1's SEND + * + * Note. The host processes the channel messages "sequentially", in + * the order in which they are received on a per-partition basis. + */ + mutex_lock(&vmbus_connection.channel_mutex); + + /* + * Hyper-V will ignore MODIFYCHANNEL messages for "non-open" channels; + * avoid sending the message and fail here for such channels. + */ + if (channel->state != CHANNEL_OPENED_STATE) { + ret = -EIO; + goto cpu_store_unlock; + } + + if (channel->target_cpu == target_cpu) + goto cpu_store_unlock; + + if (vmbus_send_modifychannel(channel->offermsg.child_relid, + hv_cpu_number_to_vp_number(target_cpu))) { + ret = -EIO; + goto cpu_store_unlock; + } + + /* + * Warning. At this point, there is *no* guarantee that the host will + * have successfully processed the vmbus_send_modifychannel() request. + * See the header comment of vmbus_send_modifychannel() for more info. + * + * Lags in the processing of the above vmbus_send_modifychannel() can + * result in missed interrupts if the "old" target CPU is taken offline + * before Hyper-V starts sending interrupts to the "new" target CPU. + * But apart from this offlining scenario, the code tolerates such + * lags. It will function correctly even if a channel interrupt comes + * in on a CPU that is different from the channel target_cpu value. + */ + + channel->target_cpu = target_cpu; + channel->target_vp = hv_cpu_number_to_vp_number(target_cpu); + channel->numa_node = cpu_to_node(target_cpu); + +cpu_store_unlock: + mutex_unlock(&vmbus_connection.channel_mutex); + cpus_read_unlock(); + return ret; +} +static VMBUS_CHAN_ATTR(cpu, 0644, target_cpu_show, target_cpu_store); static ssize_t channel_pending_show(struct vmbus_channel *channel, char *buf) diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 247356dbd742..b85d7580f2c1 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -425,7 +425,7 @@ enum vmbus_channel_message_type { CHANNELMSG_19 = 19, CHANNELMSG_20 = 20, CHANNELMSG_TL_CONNECT_REQUEST = 21, - CHANNELMSG_22 = 22, + CHANNELMSG_MODIFYCHANNEL = 22, CHANNELMSG_TL_CONNECT_RESULT = 23, CHANNELMSG_COUNT }; @@ -620,6 +620,13 @@ struct vmbus_channel_tl_connect_request { guid_t host_service_id; } __packed; +/* Modify Channel parameters, cf. vmbus_send_modifychannel() */ +struct vmbus_channel_modifychannel { + struct vmbus_channel_message_header header; + u32 child_relid; + u32 target_vp; +} __packed; + struct vmbus_channel_version_response { struct vmbus_channel_message_header header; u8 version_supported; @@ -1505,6 +1512,7 @@ extern __u32 vmbus_proto_version; int vmbus_send_tl_connect_request(const guid_t *shv_guest_servie_id, const guid_t *shv_host_servie_id); +int vmbus_send_modifychannel(u32 child_relid, u32 target_vp); void vmbus_set_event(struct vmbus_channel *channel); /* Get the start of the ring buffer. */ |