diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-03-05 23:21:47 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-03-05 23:21:47 +0300 |
commit | 78f860135433a8bba406352fbdcea8e8980583bf (patch) | |
tree | 0b7a9ba320e38b5d6eb0fb982bc2d9449aaf57f3 /drivers/nvme | |
parent | 18483190e7a2a6761b67c6824a31adf5b2b7be51 (diff) | |
parent | a324ca9cad4736252c33c1e28cffe1d87f262d03 (diff) | |
download | linux-78f860135433a8bba406352fbdcea8e8980583bf.tar.xz |
Merge branch 'irq-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull irq updates from Thomas Gleixner:
"The interrupt departement delivers this time:
- New infrastructure to manage NMIs on platforms which have a sane
NMI delivery, i.e. identifiable NMI vectors instead of a single
lump.
- Simplification of the interrupt affinity management so drivers
don't have to implement ugly loops around the PCI/MSI enablement.
- Speedup for interrupt statistics in /proc/stat
- Provide a function to retrieve the default irq domain
- A new interrupt controller for the Loongson LS1X platform
- Affinity support for the SiFive PLIC
- Better support for the iMX irqsteer driver
- NUMA aware memory allocations for GICv3
- The usual small fixes, improvements and cleanups all over the
place"
* 'irq-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (36 commits)
irqchip/imx-irqsteer: Add multi output interrupts support
irqchip/imx-irqsteer: Change to use reg_num instead of irq_group
dt-bindings: irq: imx-irqsteer: Add multi output interrupts support
dt-binding: irq: imx-irqsteer: Use irq number instead of group number
irqchip/brcmstb-l2: Use _irqsave locking variants in non-interrupt code
irqchip/gicv3-its: Use NUMA aware memory allocation for ITS tables
irqdomain: Allow the default irq domain to be retrieved
irqchip/sifive-plic: Implement irq_set_affinity() for SMP host
irqchip/sifive-plic: Differentiate between PLIC handler and context
irqchip/sifive-plic: Add warning in plic_init() if handler already present
irqchip/sifive-plic: Pre-compute context hart base and enable base
PCI/MSI: Remove obsolete sanity checks for multiple interrupt sets
genirq/affinity: Remove the leftovers of the original set support
nvme-pci: Simplify interrupt allocation
genirq/affinity: Add new callback for (re)calculating interrupt sets
genirq/affinity: Store interrupt sets size in struct irq_affinity
genirq/affinity: Code consolidation
irqchip/irq-sifive-plic: Check and continue in case of an invalid cpuid.
irqchip/i8259: Fix shutdown order by moving syscore_ops registration
dt-bindings: interrupt-controller: loongson ls1x intc
...
Diffstat (limited to 'drivers/nvme')
-rw-r--r-- | drivers/nvme/host/pci.c | 117 |
1 files changed, 39 insertions, 78 deletions
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 7fee665ec45e..e905861186e3 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2041,53 +2041,52 @@ static int nvme_setup_host_mem(struct nvme_dev *dev) return ret; } -/* irq_queues covers admin queue */ -static void nvme_calc_io_queues(struct nvme_dev *dev, unsigned int irq_queues) +/* + * nirqs is the number of interrupts available for write and read + * queues. The core already reserved an interrupt for the admin queue. + */ +static void nvme_calc_irq_sets(struct irq_affinity *affd, unsigned int nrirqs) { - unsigned int this_w_queues = write_queues; - - WARN_ON(!irq_queues); - - /* - * Setup read/write queue split, assign admin queue one independent - * irq vector if irq_queues is > 1. - */ - if (irq_queues <= 2) { - dev->io_queues[HCTX_TYPE_DEFAULT] = 1; - dev->io_queues[HCTX_TYPE_READ] = 0; - return; - } - - /* - * If 'write_queues' is set, ensure it leaves room for at least - * one read queue and one admin queue - */ - if (this_w_queues >= irq_queues) - this_w_queues = irq_queues - 2; + struct nvme_dev *dev = affd->priv; + unsigned int nr_read_queues; /* - * If 'write_queues' is set to zero, reads and writes will share - * a queue set. + * If there is no interupt available for queues, ensure that + * the default queue is set to 1. The affinity set size is + * also set to one, but the irq core ignores it for this case. + * + * If only one interrupt is available or 'write_queue' == 0, combine + * write and read queues. + * + * If 'write_queues' > 0, ensure it leaves room for at least one read + * queue. */ - if (!this_w_queues) { - dev->io_queues[HCTX_TYPE_DEFAULT] = irq_queues - 1; - dev->io_queues[HCTX_TYPE_READ] = 0; + if (!nrirqs) { + nrirqs = 1; + nr_read_queues = 0; + } else if (nrirqs == 1 || !write_queues) { + nr_read_queues = 0; + } else if (write_queues >= nrirqs) { + nr_read_queues = 1; } else { - dev->io_queues[HCTX_TYPE_DEFAULT] = this_w_queues; - dev->io_queues[HCTX_TYPE_READ] = irq_queues - this_w_queues - 1; + nr_read_queues = nrirqs - write_queues; } + + dev->io_queues[HCTX_TYPE_DEFAULT] = nrirqs - nr_read_queues; + affd->set_size[HCTX_TYPE_DEFAULT] = nrirqs - nr_read_queues; + dev->io_queues[HCTX_TYPE_READ] = nr_read_queues; + affd->set_size[HCTX_TYPE_READ] = nr_read_queues; + affd->nr_sets = nr_read_queues ? 2 : 1; } static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues) { struct pci_dev *pdev = to_pci_dev(dev->dev); - int irq_sets[2]; struct irq_affinity affd = { - .pre_vectors = 1, - .nr_sets = ARRAY_SIZE(irq_sets), - .sets = irq_sets, + .pre_vectors = 1, + .calc_sets = nvme_calc_irq_sets, + .priv = dev, }; - int result = 0; unsigned int irq_queues, this_p_queues; /* @@ -2103,51 +2102,12 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues) } dev->io_queues[HCTX_TYPE_POLL] = this_p_queues; - /* - * For irq sets, we have to ask for minvec == maxvec. This passes - * any reduction back to us, so we can adjust our queue counts and - * IRQ vector needs. - */ - do { - nvme_calc_io_queues(dev, irq_queues); - irq_sets[0] = dev->io_queues[HCTX_TYPE_DEFAULT]; - irq_sets[1] = dev->io_queues[HCTX_TYPE_READ]; - if (!irq_sets[1]) - affd.nr_sets = 1; - - /* - * If we got a failure and we're down to asking for just - * 1 + 1 queues, just ask for a single vector. We'll share - * that between the single IO queue and the admin queue. - * Otherwise, we assign one independent vector to admin queue. - */ - if (irq_queues > 1) - irq_queues = irq_sets[0] + irq_sets[1] + 1; + /* Initialize for the single interrupt case */ + dev->io_queues[HCTX_TYPE_DEFAULT] = 1; + dev->io_queues[HCTX_TYPE_READ] = 0; - result = pci_alloc_irq_vectors_affinity(pdev, irq_queues, - irq_queues, - PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY, &affd); - - /* - * Need to reduce our vec counts. If we get ENOSPC, the - * platform should support mulitple vecs, we just need - * to decrease our ask. If we get EINVAL, the platform - * likely does not. Back down to ask for just one vector. - */ - if (result == -ENOSPC) { - irq_queues--; - if (!irq_queues) - return result; - continue; - } else if (result == -EINVAL) { - irq_queues = 1; - continue; - } else if (result <= 0) - return -EIO; - break; - } while (1); - - return result; + return pci_alloc_irq_vectors_affinity(pdev, 1, irq_queues, + PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY, &affd); } static void nvme_disable_io_queues(struct nvme_dev *dev) @@ -3024,6 +2984,7 @@ static struct pci_driver nvme_driver = { static int __init nvme_init(void) { + BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS < 2); return pci_register_driver(&nvme_driver); } |