diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-11-25 23:32:42 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-11-25 23:32:42 +0300 |
commit | 081f359ef5334b0e034979e4e930c2ce80f3001b (patch) | |
tree | 9255c89731446ccc5735b2f6c9074bdc20a3440e /drivers | |
parent | 0b1dcc2cf55ae6523c6fbd0d741b3ac28c9f4536 (diff) | |
parent | 25c94b051592c010abe92c85b0485f1faedc83f3 (diff) | |
download | linux-081f359ef5334b0e034979e4e930c2ce80f3001b.tar.xz |
Merge tag 'hyperv-fixes-signed-20221125' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux
Pull hyperv fixes from Wei Liu:
- Fix IRTE allocation in Hyper-V PCI controller (Dexuan Cui)
- Fix handling of SCSI srb_status and capacity change events (Michael
Kelley)
- Restore VP assist page after CPU offlining and onlining (Vitaly
Kuznetsov)
- Fix some memory leak issues in VMBus (Yang Yingliang)
* tag 'hyperv-fixes-signed-20221125' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux:
Drivers: hv: vmbus: fix possible memory leak in vmbus_device_register()
Drivers: hv: vmbus: fix double free in the error path of vmbus_add_channel_work()
PCI: hv: Only reuse existing IRTE allocation for Multi-MSI
scsi: storvsc: Fix handling of srb_status and capacity change events
x86/hyperv: Restore VP assist page after cpu offlining/onlining
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/hv/channel_mgmt.c | 6 | ||||
-rw-r--r-- | drivers/hv/vmbus_drv.c | 1 | ||||
-rw-r--r-- | drivers/pci/controller/pci-hyperv.c | 90 | ||||
-rw-r--r-- | drivers/scsi/storvsc_drv.c | 69 |
4 files changed, 115 insertions, 51 deletions
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 5b120402d405..cc23b90cae02 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -533,13 +533,17 @@ static void vmbus_add_channel_work(struct work_struct *work) * Add the new device to the bus. This will kick off device-driver * binding which eventually invokes the device driver's AddDevice() * method. + * + * If vmbus_device_register() fails, the 'device_obj' is freed in + * vmbus_device_release() as called by device_unregister() in the + * error path of vmbus_device_register(). In the outside error + * path, there's no need to free it. */ ret = vmbus_device_register(newchannel->device_obj); if (ret != 0) { pr_err("unable to add child device object (relid %d)\n", newchannel->offermsg.child_relid); - kfree(newchannel->device_obj); goto err_deq_chan; } diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 8b2e413bf19c..e592c481f7ae 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -2082,6 +2082,7 @@ int vmbus_device_register(struct hv_device *child_device_obj) ret = device_register(&child_device_obj->device); if (ret) { pr_err("Unable to register child device\n"); + put_device(&child_device_obj->device); return ret; } diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index ba64284eaf9f..f1ec8931dfbc 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -1613,7 +1613,7 @@ out: } static u32 hv_compose_msi_req_v1( - struct pci_create_interrupt *int_pkt, const struct cpumask *affinity, + struct pci_create_interrupt *int_pkt, u32 slot, u8 vector, u16 vector_count) { int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE; @@ -1632,6 +1632,35 @@ static u32 hv_compose_msi_req_v1( } /* + * The vCPU selected by hv_compose_multi_msi_req_get_cpu() and + * hv_compose_msi_req_get_cpu() is a "dummy" vCPU because the final vCPU to be + * interrupted is specified later in hv_irq_unmask() and communicated to Hyper-V + * via the HVCALL_RETARGET_INTERRUPT hypercall. But the choice of dummy vCPU is + * not irrelevant because Hyper-V chooses the physical CPU to handle the + * interrupts based on the vCPU specified in message sent to the vPCI VSP in + * hv_compose_msi_msg(). Hyper-V's choice of pCPU is not visible to the guest, + * but assigning too many vPCI device interrupts to the same pCPU can cause a + * performance bottleneck. So we spread out the dummy vCPUs to influence Hyper-V + * to spread out the pCPUs that it selects. + * + * For the single-MSI and MSI-X cases, it's OK for hv_compose_msi_req_get_cpu() + * to always return the same dummy vCPU, because a second call to + * hv_compose_msi_msg() contains the "real" vCPU, causing Hyper-V to choose a + * new pCPU for the interrupt. But for the multi-MSI case, the second call to + * hv_compose_msi_msg() exits without sending a message to the vPCI VSP, so the + * original dummy vCPU is used. This dummy vCPU must be round-robin'ed so that + * the pCPUs are spread out. All interrupts for a multi-MSI device end up using + * the same pCPU, even though the vCPUs will be spread out by later calls + * to hv_irq_unmask(), but that is the best we can do now. + * + * With Hyper-V in Nov 2022, the HVCALL_RETARGET_INTERRUPT hypercall does *not* + * cause Hyper-V to reselect the pCPU based on the specified vCPU. Such an + * enhancement is planned for a future version. With that enhancement, the + * dummy vCPU selection won't matter, and interrupts for the same multi-MSI + * device will be spread across multiple pCPUs. + */ + +/* * Create MSI w/ dummy vCPU set targeting just one vCPU, overwritten * by subsequent retarget in hv_irq_unmask(). */ @@ -1640,18 +1669,39 @@ static int hv_compose_msi_req_get_cpu(const struct cpumask *affinity) return cpumask_first_and(affinity, cpu_online_mask); } -static u32 hv_compose_msi_req_v2( - struct pci_create_interrupt2 *int_pkt, const struct cpumask *affinity, - u32 slot, u8 vector, u16 vector_count) +/* + * Make sure the dummy vCPU values for multi-MSI don't all point to vCPU0. + */ +static int hv_compose_multi_msi_req_get_cpu(void) { + static DEFINE_SPINLOCK(multi_msi_cpu_lock); + + /* -1 means starting with CPU 0 */ + static int cpu_next = -1; + + unsigned long flags; int cpu; + spin_lock_irqsave(&multi_msi_cpu_lock, flags); + + cpu_next = cpumask_next_wrap(cpu_next, cpu_online_mask, nr_cpu_ids, + false); + cpu = cpu_next; + + spin_unlock_irqrestore(&multi_msi_cpu_lock, flags); + + return cpu; +} + +static u32 hv_compose_msi_req_v2( + struct pci_create_interrupt2 *int_pkt, int cpu, + u32 slot, u8 vector, u16 vector_count) +{ int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE2; int_pkt->wslot.slot = slot; int_pkt->int_desc.vector = vector; int_pkt->int_desc.vector_count = vector_count; int_pkt->int_desc.delivery_mode = DELIVERY_MODE; - cpu = hv_compose_msi_req_get_cpu(affinity); int_pkt->int_desc.processor_array[0] = hv_cpu_number_to_vp_number(cpu); int_pkt->int_desc.processor_count = 1; @@ -1660,18 +1710,15 @@ static u32 hv_compose_msi_req_v2( } static u32 hv_compose_msi_req_v3( - struct pci_create_interrupt3 *int_pkt, const struct cpumask *affinity, + struct pci_create_interrupt3 *int_pkt, int cpu, u32 slot, u32 vector, u16 vector_count) { - int cpu; - int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE3; int_pkt->wslot.slot = slot; int_pkt->int_desc.vector = vector; int_pkt->int_desc.reserved = 0; int_pkt->int_desc.vector_count = vector_count; int_pkt->int_desc.delivery_mode = DELIVERY_MODE; - cpu = hv_compose_msi_req_get_cpu(affinity); int_pkt->int_desc.processor_array[0] = hv_cpu_number_to_vp_number(cpu); int_pkt->int_desc.processor_count = 1; @@ -1715,12 +1762,18 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) struct pci_create_interrupt3 v3; } int_pkts; } __packed ctxt; + bool multi_msi; u64 trans_id; u32 size; int ret; + int cpu; + + msi_desc = irq_data_get_msi_desc(data); + multi_msi = !msi_desc->pci.msi_attrib.is_msix && + msi_desc->nvec_used > 1; /* Reuse the previous allocation */ - if (data->chip_data) { + if (data->chip_data && multi_msi) { int_desc = data->chip_data; msg->address_hi = int_desc->address >> 32; msg->address_lo = int_desc->address & 0xffffffff; @@ -1728,7 +1781,6 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) return; } - msi_desc = irq_data_get_msi_desc(data); pdev = msi_desc_to_pci_dev(msi_desc); dest = irq_data_get_effective_affinity_mask(data); pbus = pdev->bus; @@ -1738,11 +1790,18 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) if (!hpdev) goto return_null_message; + /* Free any previous message that might have already been composed. */ + if (data->chip_data && !multi_msi) { + int_desc = data->chip_data; + data->chip_data = NULL; + hv_int_desc_free(hpdev, int_desc); + } + int_desc = kzalloc(sizeof(*int_desc), GFP_ATOMIC); if (!int_desc) goto drop_reference; - if (!msi_desc->pci.msi_attrib.is_msix && msi_desc->nvec_used > 1) { + if (multi_msi) { /* * If this is not the first MSI of Multi MSI, we already have * a mapping. Can exit early. @@ -1767,9 +1826,11 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) */ vector = 32; vector_count = msi_desc->nvec_used; + cpu = hv_compose_multi_msi_req_get_cpu(); } else { vector = hv_msi_get_int_vector(data); vector_count = 1; + cpu = hv_compose_msi_req_get_cpu(dest); } /* @@ -1785,7 +1846,6 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) switch (hbus->protocol_version) { case PCI_PROTOCOL_VERSION_1_1: size = hv_compose_msi_req_v1(&ctxt.int_pkts.v1, - dest, hpdev->desc.win_slot.slot, (u8)vector, vector_count); @@ -1794,7 +1854,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) case PCI_PROTOCOL_VERSION_1_2: case PCI_PROTOCOL_VERSION_1_3: size = hv_compose_msi_req_v2(&ctxt.int_pkts.v2, - dest, + cpu, hpdev->desc.win_slot.slot, (u8)vector, vector_count); @@ -1802,7 +1862,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) case PCI_PROTOCOL_VERSION_1_4: size = hv_compose_msi_req_v3(&ctxt.int_pkts.v3, - dest, + cpu, hpdev->desc.win_slot.slot, vector, vector_count); diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index bc46721aa01c..3c5b7e4227b2 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -303,16 +303,21 @@ enum storvsc_request_type { }; /* - * SRB status codes and masks; a subset of the codes used here. + * SRB status codes and masks. In the 8-bit field, the two high order bits + * are flags, while the remaining 6 bits are an integer status code. The + * definitions here include only the subset of the integer status codes that + * are tested for in this driver. */ - #define SRB_STATUS_AUTOSENSE_VALID 0x80 #define SRB_STATUS_QUEUE_FROZEN 0x40 -#define SRB_STATUS_INVALID_LUN 0x20 -#define SRB_STATUS_SUCCESS 0x01 -#define SRB_STATUS_ABORTED 0x02 -#define SRB_STATUS_ERROR 0x04 -#define SRB_STATUS_DATA_OVERRUN 0x12 + +/* SRB status integer codes */ +#define SRB_STATUS_SUCCESS 0x01 +#define SRB_STATUS_ABORTED 0x02 +#define SRB_STATUS_ERROR 0x04 +#define SRB_STATUS_INVALID_REQUEST 0x06 +#define SRB_STATUS_DATA_OVERRUN 0x12 +#define SRB_STATUS_INVALID_LUN 0x20 #define SRB_STATUS(status) \ (status & ~(SRB_STATUS_AUTOSENSE_VALID | SRB_STATUS_QUEUE_FROZEN)) @@ -969,38 +974,25 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb, void (*process_err_fn)(struct work_struct *work); struct hv_host_device *host_dev = shost_priv(host); - /* - * In some situations, Hyper-V sets multiple bits in the - * srb_status, such as ABORTED and ERROR. So process them - * individually, with the most specific bits first. - */ - - if (vm_srb->srb_status & SRB_STATUS_INVALID_LUN) { - set_host_byte(scmnd, DID_NO_CONNECT); - process_err_fn = storvsc_remove_lun; - goto do_work; - } + switch (SRB_STATUS(vm_srb->srb_status)) { + case SRB_STATUS_ERROR: + case SRB_STATUS_ABORTED: + case SRB_STATUS_INVALID_REQUEST: + if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID) { + /* Check for capacity change */ + if ((asc == 0x2a) && (ascq == 0x9)) { + process_err_fn = storvsc_device_scan; + /* Retry the I/O that triggered this. */ + set_host_byte(scmnd, DID_REQUEUE); + goto do_work; + } - if (vm_srb->srb_status & SRB_STATUS_ABORTED) { - if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID && - /* Capacity data has changed */ - (asc == 0x2a) && (ascq == 0x9)) { - process_err_fn = storvsc_device_scan; /* - * Retry the I/O that triggered this. + * Otherwise, let upper layer deal with the + * error when sense message is present */ - set_host_byte(scmnd, DID_REQUEUE); - goto do_work; - } - } - - if (vm_srb->srb_status & SRB_STATUS_ERROR) { - /* - * Let upper layer deal with error when - * sense message is present. - */ - if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID) return; + } /* * If there is an error; offline the device since all @@ -1023,6 +1015,13 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb, default: set_host_byte(scmnd, DID_ERROR); } + return; + + case SRB_STATUS_INVALID_LUN: + set_host_byte(scmnd, DID_NO_CONNECT); + process_err_fn = storvsc_remove_lun; + goto do_work; + } return; |