summaryrefslogtreecommitdiff
path: root/drivers/hv
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2018-12-10 08:27:48 +0300
committerDavid S. Miller <davem@davemloft.net>2018-12-10 08:43:31 +0300
commit4cc1feeb6ffc2799f8badb4dea77c637d340cb0d (patch)
treec41c1e4c05f016298246ad7b3a6034dc1e65c154 /drivers/hv
parenta60956ed72f7b715e9918df93fcf2f63a30fdda1 (diff)
parent40e020c129cfc991e8ab4736d2665351ffd1468d (diff)
downloadlinux-4cc1feeb6ffc2799f8badb4dea77c637d340cb0d.tar.xz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Several conflicts, seemingly all over the place. I used Stephen Rothwell's sample resolutions for many of these, if not just to double check my own work, so definitely the credit largely goes to him. The NFP conflict consisted of a bug fix (moving operations past the rhashtable operation) while chaning the initial argument in the function call in the moved code. The net/dsa/master.c conflict had to do with a bug fix intermixing of making dsa_master_set_mtu() static with the fixing of the tagging attribute location. cls_flower had a conflict because the dup reject fix from Or overlapped with the addition of port range classifiction. __set_phy_supported()'s conflict was relatively easy to resolve because Andrew fixed it in both trees, so it was just a matter of taking the net-next copy. Or at least I think it was :-) Joe Stringer's fix to the handling of netns id 0 in bpf_sk_lookup() intermixed with changes on how the sdif and caller_net are calculated in these code paths in net-next. The remaining BPF conflicts were largely about the addition of the __bpf_md_ptr stuff in 'net' overlapping with adjustments and additions to the relevant data structure where the MD pointer macros are used. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/hv')
-rw-r--r--drivers/hv/channel.c8
-rw-r--r--drivers/hv/channel_mgmt.c189
-rw-r--r--drivers/hv/connection.c24
-rw-r--r--drivers/hv/hyperv_vmbus.h7
4 files changed, 162 insertions, 66 deletions
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index de8193f3b838..fe00b12e4417 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -516,6 +516,14 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer,
}
wait_for_completion(&msginfo->waitevent);
+ if (msginfo->response.gpadl_created.creation_status != 0) {
+ pr_err("Failed to establish GPADL: err = 0x%x\n",
+ msginfo->response.gpadl_created.creation_status);
+
+ ret = -EDQUOT;
+ goto cleanup;
+ }
+
if (channel->rescind) {
ret = -ENODEV;
goto cleanup;
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 6277597d3d58..edd34c167a9b 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -435,61 +435,16 @@ void vmbus_free_channels(void)
}
}
-/*
- * vmbus_process_offer - Process the offer by creating a channel/device
- * associated with this offer
- */
-static void vmbus_process_offer(struct vmbus_channel *newchannel)
+/* Note: the function can run concurrently for primary/sub channels. */
+static void vmbus_add_channel_work(struct work_struct *work)
{
- struct vmbus_channel *channel;
- bool fnew = true;
+ struct vmbus_channel *newchannel =
+ container_of(work, struct vmbus_channel, add_channel_work);
+ struct vmbus_channel *primary_channel = newchannel->primary_channel;
unsigned long flags;
u16 dev_type;
int ret;
- /* Make sure this is a new offer */
- mutex_lock(&vmbus_connection.channel_mutex);
-
- /*
- * Now that we have acquired the channel_mutex,
- * we can release the potentially racing rescind thread.
- */
- atomic_dec(&vmbus_connection.offer_in_progress);
-
- list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
- if (!uuid_le_cmp(channel->offermsg.offer.if_type,
- newchannel->offermsg.offer.if_type) &&
- !uuid_le_cmp(channel->offermsg.offer.if_instance,
- newchannel->offermsg.offer.if_instance)) {
- fnew = false;
- break;
- }
- }
-
- if (fnew)
- list_add_tail(&newchannel->listentry,
- &vmbus_connection.chn_list);
-
- mutex_unlock(&vmbus_connection.channel_mutex);
-
- if (!fnew) {
- /*
- * Check to see if this is a sub-channel.
- */
- if (newchannel->offermsg.offer.sub_channel_index != 0) {
- /*
- * Process the sub-channel.
- */
- newchannel->primary_channel = channel;
- spin_lock_irqsave(&channel->lock, flags);
- list_add_tail(&newchannel->sc_list, &channel->sc_list);
- channel->num_sc++;
- spin_unlock_irqrestore(&channel->lock, flags);
- } else {
- goto err_free_chan;
- }
- }
-
dev_type = hv_get_dev_type(newchannel);
init_vp_index(newchannel, dev_type);
@@ -507,27 +462,26 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
/*
* This state is used to indicate a successful open
* so that when we do close the channel normally, we
- * can cleanup properly
+ * can cleanup properly.
*/
newchannel->state = CHANNEL_OPEN_STATE;
- if (!fnew) {
- struct hv_device *dev
- = newchannel->primary_channel->device_obj;
+ if (primary_channel != NULL) {
+ /* newchannel is a sub-channel. */
+ struct hv_device *dev = primary_channel->device_obj;
if (vmbus_add_channel_kobj(dev, newchannel))
- goto err_free_chan;
+ goto err_deq_chan;
+
+ if (primary_channel->sc_creation_callback != NULL)
+ primary_channel->sc_creation_callback(newchannel);
- if (channel->sc_creation_callback != NULL)
- channel->sc_creation_callback(newchannel);
newchannel->probe_done = true;
return;
}
/*
- * Start the process of binding this offer to the driver
- * We need to set the DeviceObject field before calling
- * vmbus_child_dev_add()
+ * Start the process of binding the primary channel to the driver
*/
newchannel->device_obj = vmbus_device_create(
&newchannel->offermsg.offer.if_type,
@@ -556,13 +510,28 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
err_deq_chan:
mutex_lock(&vmbus_connection.channel_mutex);
- list_del(&newchannel->listentry);
+
+ /*
+ * We need to set the flag, otherwise
+ * vmbus_onoffer_rescind() can be blocked.
+ */
+ newchannel->probe_done = true;
+
+ if (primary_channel == NULL) {
+ list_del(&newchannel->listentry);
+ } else {
+ spin_lock_irqsave(&primary_channel->lock, flags);
+ list_del(&newchannel->sc_list);
+ spin_unlock_irqrestore(&primary_channel->lock, flags);
+ }
+
mutex_unlock(&vmbus_connection.channel_mutex);
if (newchannel->target_cpu != get_cpu()) {
put_cpu();
smp_call_function_single(newchannel->target_cpu,
- percpu_channel_deq, newchannel, true);
+ percpu_channel_deq,
+ newchannel, true);
} else {
percpu_channel_deq(newchannel);
put_cpu();
@@ -570,14 +539,104 @@ err_deq_chan:
vmbus_release_relid(newchannel->offermsg.child_relid);
-err_free_chan:
free_channel(newchannel);
}
/*
+ * vmbus_process_offer - Process the offer by creating a channel/device
+ * associated with this offer
+ */
+static void vmbus_process_offer(struct vmbus_channel *newchannel)
+{
+ struct vmbus_channel *channel;
+ struct workqueue_struct *wq;
+ unsigned long flags;
+ bool fnew = true;
+
+ mutex_lock(&vmbus_connection.channel_mutex);
+
+ /*
+ * Now that we have acquired the channel_mutex,
+ * we can release the potentially racing rescind thread.
+ */
+ atomic_dec(&vmbus_connection.offer_in_progress);
+
+ list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
+ if (!uuid_le_cmp(channel->offermsg.offer.if_type,
+ newchannel->offermsg.offer.if_type) &&
+ !uuid_le_cmp(channel->offermsg.offer.if_instance,
+ newchannel->offermsg.offer.if_instance)) {
+ fnew = false;
+ break;
+ }
+ }
+
+ if (fnew)
+ list_add_tail(&newchannel->listentry,
+ &vmbus_connection.chn_list);
+ else {
+ /*
+ * Check to see if this is a valid sub-channel.
+ */
+ if (newchannel->offermsg.offer.sub_channel_index == 0) {
+ mutex_unlock(&vmbus_connection.channel_mutex);
+ /*
+ * Don't call free_channel(), because newchannel->kobj
+ * is not initialized yet.
+ */
+ kfree(newchannel);
+ WARN_ON_ONCE(1);
+ return;
+ }
+ /*
+ * Process the sub-channel.
+ */
+ newchannel->primary_channel = channel;
+ spin_lock_irqsave(&channel->lock, flags);
+ list_add_tail(&newchannel->sc_list, &channel->sc_list);
+ spin_unlock_irqrestore(&channel->lock, flags);
+ }
+
+ mutex_unlock(&vmbus_connection.channel_mutex);
+
+ /*
+ * vmbus_process_offer() mustn't call channel->sc_creation_callback()
+ * directly for sub-channels, because sc_creation_callback() ->
+ * vmbus_open() may never get the host's response to the
+ * OPEN_CHANNEL message (the host may rescind a channel at any time,
+ * e.g. in the case of hot removing a NIC), and vmbus_onoffer_rescind()
+ * may not wake up the vmbus_open() as it's blocked due to a non-zero
+ * vmbus_connection.offer_in_progress, and finally we have a deadlock.
+ *
+ * The above is also true for primary channels, if the related device
+ * drivers use sync probing mode by default.
+ *
+ * And, usually the handling of primary channels and sub-channels can
+ * depend on each other, so we should offload them to different
+ * workqueues to avoid possible deadlock, e.g. in sync-probing mode,
+ * NIC1's netvsc_subchan_work() can race with NIC2's netvsc_probe() ->
+ * rtnl_lock(), and causes deadlock: the former gets the rtnl_lock
+ * and waits for all the sub-channels to appear, but the latter
+ * can't get the rtnl_lock and this blocks the handling of
+ * sub-channels.
+ */
+ INIT_WORK(&newchannel->add_channel_work, vmbus_add_channel_work);
+ wq = fnew ? vmbus_connection.handle_primary_chan_wq :
+ vmbus_connection.handle_sub_chan_wq;
+ queue_work(wq, &newchannel->add_channel_work);
+}
+
+/*
* We use this state to statically distribute the channel interrupt load.
*/
static int next_numa_node_id;
+/*
+ * init_vp_index() accesses global variables like next_numa_node_id, and
+ * it can run concurrently for primary channels and sub-channels: see
+ * vmbus_process_offer(), so we need the lock to protect the global
+ * variables.
+ */
+static DEFINE_SPINLOCK(bind_channel_to_cpu_lock);
/*
* Starting with Win8, we can statically distribute the incoming
@@ -613,6 +672,8 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
return;
}
+ spin_lock(&bind_channel_to_cpu_lock);
+
/*
* Based on the channel affinity policy, we will assign the NUMA
* nodes.
@@ -695,6 +756,8 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
channel->target_cpu = cur_cpu;
channel->target_vp = hv_cpu_number_to_vp_number(cur_cpu);
+ spin_unlock(&bind_channel_to_cpu_lock);
+
free_cpumask_var(available_mask);
}
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index f4d08c8ac7f8..4fe117b761ce 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -190,6 +190,20 @@ int vmbus_connect(void)
goto cleanup;
}
+ vmbus_connection.handle_primary_chan_wq =
+ create_workqueue("hv_pri_chan");
+ if (!vmbus_connection.handle_primary_chan_wq) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+
+ vmbus_connection.handle_sub_chan_wq =
+ create_workqueue("hv_sub_chan");
+ if (!vmbus_connection.handle_sub_chan_wq) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+
INIT_LIST_HEAD(&vmbus_connection.chn_msg_list);
spin_lock_init(&vmbus_connection.channelmsg_lock);
@@ -280,10 +294,14 @@ void vmbus_disconnect(void)
*/
vmbus_initiate_unload(false);
- if (vmbus_connection.work_queue) {
- drain_workqueue(vmbus_connection.work_queue);
+ if (vmbus_connection.handle_sub_chan_wq)
+ destroy_workqueue(vmbus_connection.handle_sub_chan_wq);
+
+ if (vmbus_connection.handle_primary_chan_wq)
+ destroy_workqueue(vmbus_connection.handle_primary_chan_wq);
+
+ if (vmbus_connection.work_queue)
destroy_workqueue(vmbus_connection.work_queue);
- }
if (vmbus_connection.int_page) {
free_pages((unsigned long)vmbus_connection.int_page, 0);
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 72eaba3d50fc..87d3d7da78f8 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -335,7 +335,14 @@ struct vmbus_connection {
struct list_head chn_list;
struct mutex channel_mutex;
+ /*
+ * An offer message is handled first on the work_queue, and then
+ * is further handled on handle_primary_chan_wq or
+ * handle_sub_chan_wq.
+ */
struct workqueue_struct *work_queue;
+ struct workqueue_struct *handle_primary_chan_wq;
+ struct workqueue_struct *handle_sub_chan_wq;
};