diff options
author | Andrea Parri (Microsoft) <parri.andrea@gmail.com> | 2020-04-06 03:15:10 +0300 |
---|---|---|
committer | Wei Liu <wei.liu@kernel.org> | 2020-04-23 16:17:12 +0300 |
commit | 240ad77cb50d9f0a961fcb0f21e67939cf7a9c04 (patch) | |
tree | 43892c546d7475d93726ae96b239f782ccb8495b /drivers/pci/controller/pci-hyperv.c | |
parent | 9403b66e6161130ebae7e55a97491c84c1ad6f9f (diff) | |
download | linux-240ad77cb50d9f0a961fcb0f21e67939cf7a9c04.tar.xz |
PCI: hv: Prepare hv_compose_msi_msg() for the VMBus-channel-interrupt-to-vCPU reassignment functionality
The current implementation of hv_compose_msi_msg() is incompatible with
the new functionality that allows changing the vCPU a VMBus channel will
interrupt: if this function always calls hv_pci_onchannelcallback() in
the polling loop, the interrupt going to a different CPU could cause
hv_pci_onchannelcallback() to be running simultaneously in a tasklet,
which will break. The current code also has a problem in that it is not
synchronized with vmbus_reset_channel_cb(): hv_compose_msi_msg() could
be accessing the ring buffer via the call of hv_pci_onchannelcallback()
well after the time that vmbus_reset_channel_cb() has finished.
Fix these issues as follows. Disable the channel tasklet before
entering the polling loop in hv_compose_msi_msg() and re-enable it when
done. This will prevent hv_pci_onchannelcallback() from running in a
tasklet on a different CPU. Moreover, poll by always calling
hv_pci_onchannelcallback(), but check the channel callback function for
NULL and invoke the callback within a sched_lock critical section. This
will prevent hv_compose_msi_msg() from accessing the ring buffer after
vmbus_reset_channel_cb() has acquired the sched_lock spinlock.
Suggested-by: Michael Kelley <mikelley@microsoft.com>
Signed-off-by: Andrea Parri (Microsoft) <parri.andrea@gmail.com>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Andrew Murray <amurray@thegoodpenguin.co.uk>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: <linux-pci@vger.kernel.org>
Link: https://lore.kernel.org/r/20200406001514.19876-8-parri.andrea@gmail.com
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
Signed-off-by: Wei Liu <wei.liu@kernel.org>
Diffstat (limited to 'drivers/pci/controller/pci-hyperv.c')
-rw-r--r-- | drivers/pci/controller/pci-hyperv.c | 44 |
1 files changed, 28 insertions, 16 deletions
diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index e15022ff63e3..222ff5639ebe 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -1356,11 +1356,11 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) { struct irq_cfg *cfg = irqd_cfg(data); struct hv_pcibus_device *hbus; + struct vmbus_channel *channel; struct hv_pci_dev *hpdev; struct pci_bus *pbus; struct pci_dev *pdev; struct cpumask *dest; - unsigned long flags; struct compose_comp_ctxt comp; struct tran_int_desc *int_desc; struct { @@ -1378,6 +1378,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) dest = irq_data_get_effective_affinity_mask(data); pbus = pdev->bus; hbus = container_of(pbus->sysdata, struct hv_pcibus_device, sysdata); + channel = hbus->hdev->channel; hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(pdev->devfn)); if (!hpdev) goto return_null_message; @@ -1436,42 +1437,51 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) } /* + * Prevents hv_pci_onchannelcallback() from running concurrently + * in the tasklet. + */ + tasklet_disable(&channel->callback_event); + + /* * Since this function is called with IRQ locks held, can't * do normal wait for completion; instead poll. */ while (!try_wait_for_completion(&comp.comp_pkt.host_event)) { + unsigned long flags; + /* 0xFFFF means an invalid PCI VENDOR ID. */ if (hv_pcifront_get_vendor_id(hpdev) == 0xFFFF) { dev_err_once(&hbus->hdev->device, "the device has gone\n"); - goto free_int_desc; + goto enable_tasklet; } /* - * When the higher level interrupt code calls us with - * interrupt disabled, we must poll the channel by calling - * the channel callback directly when channel->target_cpu is - * the current CPU. When the higher level interrupt code - * calls us with interrupt enabled, let's add the - * local_irq_save()/restore() to avoid race: - * hv_pci_onchannelcallback() can also run in tasklet. + * Make sure that the ring buffer data structure doesn't get + * freed while we dereference the ring buffer pointer. Test + * for the channel's onchannel_callback being NULL within a + * sched_lock critical section. See also the inline comments + * in vmbus_reset_channel_cb(). */ - local_irq_save(flags); - - if (hbus->hdev->channel->target_cpu == smp_processor_id()) - hv_pci_onchannelcallback(hbus); - - local_irq_restore(flags); + spin_lock_irqsave(&channel->sched_lock, flags); + if (unlikely(channel->onchannel_callback == NULL)) { + spin_unlock_irqrestore(&channel->sched_lock, flags); + goto enable_tasklet; + } + hv_pci_onchannelcallback(hbus); + spin_unlock_irqrestore(&channel->sched_lock, flags); if (hpdev->state == hv_pcichild_ejecting) { dev_err_once(&hbus->hdev->device, "the device is being ejected\n"); - goto free_int_desc; + goto enable_tasklet; } udelay(100); } + tasklet_enable(&channel->callback_event); + if (comp.comp_pkt.completion_status < 0) { dev_err(&hbus->hdev->device, "Request for interrupt failed: 0x%x", @@ -1495,6 +1505,8 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) put_pcichild(hpdev); return; +enable_tasklet: + tasklet_enable(&channel->callback_event); free_int_desc: kfree(int_desc); drop_reference: |