diff options
author | Jason Gunthorpe <jgg@mellanox.com> | 2018-08-16 23:13:03 +0300 |
---|---|---|
committer | Jason Gunthorpe <jgg@mellanox.com> | 2018-08-16 23:21:29 +0300 |
commit | 0a3173a5f09bc58a3638ecfd0a80bdbae55e123c (patch) | |
tree | d6c0bc84863cca54dfbde3b7463e5d49c82af9f1 /drivers/irqchip/irq-gic-v3-its.c | |
parent | 92f4e77c85918eab5e5803d7e28ab89a7e6bd3a2 (diff) | |
parent | 5c60a7389d795e001c8748b458eb76e3a5b6008c (diff) | |
download | linux-0a3173a5f09bc58a3638ecfd0a80bdbae55e123c.tar.xz |
Merge branch 'linus/master' into rdma.git for-next
rdma.git merge resolution for the 4.19 merge window
Conflicts:
drivers/infiniband/core/rdma_core.c
- Use the rdma code and revise with the new spelling for
atomic_fetch_add_unless
drivers/nvme/host/rdma.c
- Replace max_sge with max_send_sge in new blk code
drivers/nvme/target/rdma.c
- Use the blk code and revise to use NULL for ib_post_recv when
appropriate
- Replace max_sge with max_recv_sge in new blk code
net/rds/ib_send.c
- Use the net code and revise to use NULL for ib_post_recv when
appropriate
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Diffstat (limited to 'drivers/irqchip/irq-gic-v3-its.c')
-rw-r--r-- | drivers/irqchip/irq-gic-v3-its.c | 243 |
1 files changed, 156 insertions, 87 deletions
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index d7842d312d3e..316a57530f6d 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -23,6 +23,8 @@ #include <linux/dma-iommu.h> #include <linux/interrupt.h> #include <linux/irqdomain.h> +#include <linux/list.h> +#include <linux/list_sort.h> #include <linux/log2.h> #include <linux/mm.h> #include <linux/msi.h> @@ -160,7 +162,7 @@ static struct { } vpe_proxy; static LIST_HEAD(its_nodes); -static DEFINE_SPINLOCK(its_lock); +static DEFINE_RAW_SPINLOCK(its_lock); static struct rdists *gic_rdists; static struct irq_domain *its_parent; @@ -1421,112 +1423,176 @@ static struct irq_chip its_irq_chip = { .irq_set_vcpu_affinity = its_irq_set_vcpu_affinity, }; + /* * How we allocate LPIs: * - * The GIC has id_bits bits for interrupt identifiers. From there, we - * must subtract 8192 which are reserved for SGIs/PPIs/SPIs. Then, as - * we allocate LPIs by chunks of 32, we can shift the whole thing by 5 - * bits to the right. + * lpi_range_list contains ranges of LPIs that are to available to + * allocate from. To allocate LPIs, just pick the first range that + * fits the required allocation, and reduce it by the required + * amount. Once empty, remove the range from the list. + * + * To free a range of LPIs, add a free range to the list, sort it and + * merge the result if the new range happens to be adjacent to an + * already free block. * - * This gives us (((1UL << id_bits) - 8192) >> 5) possible allocations. + * The consequence of the above is that allocation is cost is low, but + * freeing is expensive. We assumes that freeing rarely occurs. */ -#define IRQS_PER_CHUNK_SHIFT 5 -#define IRQS_PER_CHUNK (1UL << IRQS_PER_CHUNK_SHIFT) -#define ITS_MAX_LPI_NRBITS 16 /* 64K LPIs */ -static unsigned long *lpi_bitmap; -static u32 lpi_chunks; -static DEFINE_SPINLOCK(lpi_lock); +static DEFINE_MUTEX(lpi_range_lock); +static LIST_HEAD(lpi_range_list); + +struct lpi_range { + struct list_head entry; + u32 base_id; + u32 span; +}; -static int its_lpi_to_chunk(int lpi) +static struct lpi_range *mk_lpi_range(u32 base, u32 span) { - return (lpi - 8192) >> IRQS_PER_CHUNK_SHIFT; + struct lpi_range *range; + + range = kzalloc(sizeof(*range), GFP_KERNEL); + if (range) { + INIT_LIST_HEAD(&range->entry); + range->base_id = base; + range->span = span; + } + + return range; } -static int its_chunk_to_lpi(int chunk) +static int lpi_range_cmp(void *priv, struct list_head *a, struct list_head *b) { - return (chunk << IRQS_PER_CHUNK_SHIFT) + 8192; + struct lpi_range *ra, *rb; + + ra = container_of(a, struct lpi_range, entry); + rb = container_of(b, struct lpi_range, entry); + + return rb->base_id - ra->base_id; } -static int __init its_lpi_init(u32 id_bits) +static void merge_lpi_ranges(void) { - lpi_chunks = its_lpi_to_chunk(1UL << id_bits); + struct lpi_range *range, *tmp; - lpi_bitmap = kcalloc(BITS_TO_LONGS(lpi_chunks), sizeof(long), - GFP_KERNEL); - if (!lpi_bitmap) { - lpi_chunks = 0; - return -ENOMEM; + list_for_each_entry_safe(range, tmp, &lpi_range_list, entry) { + if (!list_is_last(&range->entry, &lpi_range_list) && + (tmp->base_id == (range->base_id + range->span))) { + tmp->base_id = range->base_id; + tmp->span += range->span; + list_del(&range->entry); + kfree(range); + } } +} - pr_info("ITS: Allocated %d chunks for LPIs\n", (int)lpi_chunks); - return 0; +static int alloc_lpi_range(u32 nr_lpis, u32 *base) +{ + struct lpi_range *range, *tmp; + int err = -ENOSPC; + + mutex_lock(&lpi_range_lock); + + list_for_each_entry_safe(range, tmp, &lpi_range_list, entry) { + if (range->span >= nr_lpis) { + *base = range->base_id; + range->base_id += nr_lpis; + range->span -= nr_lpis; + + if (range->span == 0) { + list_del(&range->entry); + kfree(range); + } + + err = 0; + break; + } + } + + mutex_unlock(&lpi_range_lock); + + pr_debug("ITS: alloc %u:%u\n", *base, nr_lpis); + return err; } -static unsigned long *its_lpi_alloc_chunks(int nr_irqs, int *base, int *nr_ids) +static int free_lpi_range(u32 base, u32 nr_lpis) { - unsigned long *bitmap = NULL; - int chunk_id; - int nr_chunks; - int i; + struct lpi_range *new; + int err = 0; + + mutex_lock(&lpi_range_lock); + + new = mk_lpi_range(base, nr_lpis); + if (!new) { + err = -ENOMEM; + goto out; + } + + list_add(&new->entry, &lpi_range_list); + list_sort(NULL, &lpi_range_list, lpi_range_cmp); + merge_lpi_ranges(); +out: + mutex_unlock(&lpi_range_lock); + return err; +} + +static int __init its_lpi_init(u32 id_bits) +{ + u32 lpis = (1UL << id_bits) - 8192; + u32 numlpis; + int err; + + numlpis = 1UL << GICD_TYPER_NUM_LPIS(gic_rdists->gicd_typer); + + if (numlpis > 2 && !WARN_ON(numlpis > lpis)) { + lpis = numlpis; + pr_info("ITS: Using hypervisor restricted LPI range [%u]\n", + lpis); + } - nr_chunks = DIV_ROUND_UP(nr_irqs, IRQS_PER_CHUNK); + /* + * Initializing the allocator is just the same as freeing the + * full range of LPIs. + */ + err = free_lpi_range(8192, lpis); + pr_debug("ITS: Allocator initialized for %u LPIs\n", lpis); + return err; +} - spin_lock(&lpi_lock); +static unsigned long *its_lpi_alloc(int nr_irqs, u32 *base, int *nr_ids) +{ + unsigned long *bitmap = NULL; + int err = 0; do { - chunk_id = bitmap_find_next_zero_area(lpi_bitmap, lpi_chunks, - 0, nr_chunks, 0); - if (chunk_id < lpi_chunks) + err = alloc_lpi_range(nr_irqs, base); + if (!err) break; - nr_chunks--; - } while (nr_chunks > 0); + nr_irqs /= 2; + } while (nr_irqs > 0); - if (!nr_chunks) + if (err) goto out; - bitmap = kcalloc(BITS_TO_LONGS(nr_chunks * IRQS_PER_CHUNK), - sizeof(long), - GFP_ATOMIC); + bitmap = kcalloc(BITS_TO_LONGS(nr_irqs), sizeof (long), GFP_ATOMIC); if (!bitmap) goto out; - for (i = 0; i < nr_chunks; i++) - set_bit(chunk_id + i, lpi_bitmap); - - *base = its_chunk_to_lpi(chunk_id); - *nr_ids = nr_chunks * IRQS_PER_CHUNK; + *nr_ids = nr_irqs; out: - spin_unlock(&lpi_lock); - if (!bitmap) *base = *nr_ids = 0; return bitmap; } -static void its_lpi_free_chunks(unsigned long *bitmap, int base, int nr_ids) +static void its_lpi_free(unsigned long *bitmap, u32 base, u32 nr_ids) { - int lpi; - - spin_lock(&lpi_lock); - - for (lpi = base; lpi < (base + nr_ids); lpi += IRQS_PER_CHUNK) { - int chunk = its_lpi_to_chunk(lpi); - - BUG_ON(chunk > lpi_chunks); - if (test_bit(chunk, lpi_bitmap)) { - clear_bit(chunk, lpi_bitmap); - } else { - pr_err("Bad LPI chunk %d\n", chunk); - } - } - - spin_unlock(&lpi_lock); - + WARN_ON(free_lpi_range(base, nr_ids)); kfree(bitmap); } @@ -1559,7 +1625,7 @@ static int __init its_alloc_lpi_tables(void) { phys_addr_t paddr; - lpi_id_bits = min_t(u32, gic_rdists->id_bits, ITS_MAX_LPI_NRBITS); + lpi_id_bits = GICD_TYPER_ID_BITS(gic_rdists->gicd_typer); gic_rdists->prop_page = its_allocate_prop_table(GFP_NOWAIT); if (!gic_rdists->prop_page) { pr_err("Failed to allocate PROPBASE\n"); @@ -1997,12 +2063,12 @@ static void its_cpu_init_collections(void) { struct its_node *its; - spin_lock(&its_lock); + raw_spin_lock(&its_lock); list_for_each_entry(its, &its_nodes, entry) its_cpu_init_collection(its); - spin_unlock(&its_lock); + raw_spin_unlock(&its_lock); } static struct its_device *its_find_device(struct its_node *its, u32 dev_id) @@ -2134,17 +2200,20 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id, if (!its_alloc_device_table(its, dev_id)) return NULL; + if (WARN_ON(!is_power_of_2(nvecs))) + nvecs = roundup_pow_of_two(nvecs); + dev = kzalloc(sizeof(*dev), GFP_KERNEL); /* - * We allocate at least one chunk worth of LPIs bet device, - * and thus that many ITEs. The device may require less though. + * Even if the device wants a single LPI, the ITT must be + * sized as a power of two (and you need at least one bit...). */ - nr_ites = max(IRQS_PER_CHUNK, roundup_pow_of_two(nvecs)); + nr_ites = max(2, nvecs); sz = nr_ites * its->ite_size; sz = max(sz, ITS_ITT_ALIGN) + ITS_ITT_ALIGN - 1; itt = kzalloc(sz, GFP_KERNEL); if (alloc_lpis) { - lpi_map = its_lpi_alloc_chunks(nvecs, &lpi_base, &nr_lpis); + lpi_map = its_lpi_alloc(nvecs, &lpi_base, &nr_lpis); if (lpi_map) col_map = kcalloc(nr_lpis, sizeof(*col_map), GFP_KERNEL); @@ -2379,9 +2448,9 @@ static void its_irq_domain_free(struct irq_domain *domain, unsigned int virq, /* If all interrupts have been freed, start mopping the floor */ if (bitmap_empty(its_dev->event_map.lpi_map, its_dev->event_map.nr_lpis)) { - its_lpi_free_chunks(its_dev->event_map.lpi_map, - its_dev->event_map.lpi_base, - its_dev->event_map.nr_lpis); + its_lpi_free(its_dev->event_map.lpi_map, + its_dev->event_map.lpi_base, + its_dev->event_map.nr_lpis); kfree(its_dev->event_map.col_map); /* Unmap device/itt */ @@ -2780,7 +2849,7 @@ static void its_vpe_irq_domain_free(struct irq_domain *domain, } if (bitmap_empty(vm->db_bitmap, vm->nr_db_lpis)) { - its_lpi_free_chunks(vm->db_bitmap, vm->db_lpi_base, vm->nr_db_lpis); + its_lpi_free(vm->db_bitmap, vm->db_lpi_base, vm->nr_db_lpis); its_free_prop_table(vm->vprop_page); } } @@ -2795,18 +2864,18 @@ static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq BUG_ON(!vm); - bitmap = its_lpi_alloc_chunks(nr_irqs, &base, &nr_ids); + bitmap = its_lpi_alloc(roundup_pow_of_two(nr_irqs), &base, &nr_ids); if (!bitmap) return -ENOMEM; if (nr_ids < nr_irqs) { - its_lpi_free_chunks(bitmap, base, nr_ids); + its_lpi_free(bitmap, base, nr_ids); return -ENOMEM; } vprop_page = its_allocate_prop_table(GFP_KERNEL); if (!vprop_page) { - its_lpi_free_chunks(bitmap, base, nr_ids); + its_lpi_free(bitmap, base, nr_ids); return -ENOMEM; } @@ -2833,7 +2902,7 @@ static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq if (i > 0) its_vpe_irq_domain_free(domain, virq, i - 1); - its_lpi_free_chunks(bitmap, base, nr_ids); + its_lpi_free(bitmap, base, nr_ids); its_free_prop_table(vprop_page); } @@ -3070,7 +3139,7 @@ static int its_save_disable(void) struct its_node *its; int err = 0; - spin_lock(&its_lock); + raw_spin_lock(&its_lock); list_for_each_entry(its, &its_nodes, entry) { void __iomem *base; @@ -3102,7 +3171,7 @@ err: writel_relaxed(its->ctlr_save, base + GITS_CTLR); } } - spin_unlock(&its_lock); + raw_spin_unlock(&its_lock); return err; } @@ -3112,7 +3181,7 @@ static void its_restore_enable(void) struct its_node *its; int ret; - spin_lock(&its_lock); + raw_spin_lock(&its_lock); list_for_each_entry(its, &its_nodes, entry) { void __iomem *base; int i; @@ -3164,7 +3233,7 @@ static void its_restore_enable(void) GITS_TYPER_HCC(gic_read_typer(base + GITS_TYPER))) its_cpu_init_collection(its); } - spin_unlock(&its_lock); + raw_spin_unlock(&its_lock); } static struct syscore_ops its_syscore_ops = { @@ -3398,9 +3467,9 @@ static int __init its_probe_one(struct resource *res, if (err) goto out_free_tables; - spin_lock(&its_lock); + raw_spin_lock(&its_lock); list_add(&its->entry, &its_nodes); - spin_unlock(&its_lock); + raw_spin_unlock(&its_lock); return 0; |