diff options
39 files changed, 1752 insertions, 394 deletions
diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 61f6ccc19cfa..6be9ee148b78 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -23,7 +23,6 @@ ifeq ($(CONFIG_ARM_MODULE_PLTS),y) LDFLAGS_MODULE += -T $(srctree)/arch/arm/kernel/module.lds endif -OBJCOPYFLAGS :=-O binary -R .comment -S GZFLAGS :=-9 #KBUILD_CFLAGS +=-pipe diff --git a/arch/arm/boot/Makefile b/arch/arm/boot/Makefile index bdc1d5af03d2..50f8d1be7fcb 100644 --- a/arch/arm/boot/Makefile +++ b/arch/arm/boot/Makefile @@ -11,6 +11,8 @@ # Copyright (C) 1995-2002 Russell King # +OBJCOPYFLAGS :=-O binary -R .comment -S + ifneq ($(MACHINE),) include $(MACHINE)/Makefile.boot endif diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c index 2e076c492005..4ecd5120fce7 100644 --- a/arch/arm/common/sa1111.c +++ b/arch/arm/common/sa1111.c @@ -15,6 +15,7 @@ * from machine specific code with proper arguments when required. */ #include <linux/module.h> +#include <linux/gpio/driver.h> #include <linux/init.h> #include <linux/irq.h> #include <linux/kernel.h> @@ -107,6 +108,7 @@ struct sa1111 { spinlock_t lock; void __iomem *base; struct sa1111_platform_data *pdata; + struct gpio_chip gc; #ifdef CONFIG_PM void *saved_state; #endif @@ -231,132 +233,44 @@ static void sa1111_irq_handler(struct irq_desc *desc) #define SA1111_IRQMASK_LO(x) (1 << (x - sachip->irq_base)) #define SA1111_IRQMASK_HI(x) (1 << (x - sachip->irq_base - 32)) -static void sa1111_ack_irq(struct irq_data *d) -{ -} - -static void sa1111_mask_lowirq(struct irq_data *d) +static u32 sa1111_irqmask(struct irq_data *d) { struct sa1111 *sachip = irq_data_get_irq_chip_data(d); - void __iomem *mapbase = sachip->base + SA1111_INTC; - unsigned long ie0; - ie0 = sa1111_readl(mapbase + SA1111_INTEN0); - ie0 &= ~SA1111_IRQMASK_LO(d->irq); - writel(ie0, mapbase + SA1111_INTEN0); + return BIT((d->irq - sachip->irq_base) & 31); } -static void sa1111_unmask_lowirq(struct irq_data *d) +static int sa1111_irqbank(struct irq_data *d) { struct sa1111 *sachip = irq_data_get_irq_chip_data(d); - void __iomem *mapbase = sachip->base + SA1111_INTC; - unsigned long ie0; - - ie0 = sa1111_readl(mapbase + SA1111_INTEN0); - ie0 |= SA1111_IRQMASK_LO(d->irq); - sa1111_writel(ie0, mapbase + SA1111_INTEN0); -} - -/* - * Attempt to re-trigger the interrupt. The SA1111 contains a register - * (INTSET) which claims to do this. However, in practice no amount of - * manipulation of INTEN and INTSET guarantees that the interrupt will - * be triggered. In fact, its very difficult, if not impossible to get - * INTSET to re-trigger the interrupt. - */ -static int sa1111_retrigger_lowirq(struct irq_data *d) -{ - struct sa1111 *sachip = irq_data_get_irq_chip_data(d); - void __iomem *mapbase = sachip->base + SA1111_INTC; - unsigned int mask = SA1111_IRQMASK_LO(d->irq); - unsigned long ip0; - int i; - - ip0 = sa1111_readl(mapbase + SA1111_INTPOL0); - for (i = 0; i < 8; i++) { - sa1111_writel(ip0 ^ mask, mapbase + SA1111_INTPOL0); - sa1111_writel(ip0, mapbase + SA1111_INTPOL0); - if (sa1111_readl(mapbase + SA1111_INTSTATCLR0) & mask) - break; - } - if (i == 8) - pr_err("Danger Will Robinson: failed to re-trigger IRQ%d\n", - d->irq); - return i == 8 ? -1 : 0; + return ((d->irq - sachip->irq_base) / 32) * 4; } -static int sa1111_type_lowirq(struct irq_data *d, unsigned int flags) -{ - struct sa1111 *sachip = irq_data_get_irq_chip_data(d); - void __iomem *mapbase = sachip->base + SA1111_INTC; - unsigned int mask = SA1111_IRQMASK_LO(d->irq); - unsigned long ip0; - - if (flags == IRQ_TYPE_PROBE) - return 0; - - if ((!(flags & IRQ_TYPE_EDGE_RISING) ^ !(flags & IRQ_TYPE_EDGE_FALLING)) == 0) - return -EINVAL; - - ip0 = sa1111_readl(mapbase + SA1111_INTPOL0); - if (flags & IRQ_TYPE_EDGE_RISING) - ip0 &= ~mask; - else - ip0 |= mask; - sa1111_writel(ip0, mapbase + SA1111_INTPOL0); - sa1111_writel(ip0, mapbase + SA1111_WAKEPOL0); - - return 0; -} - -static int sa1111_wake_lowirq(struct irq_data *d, unsigned int on) +static void sa1111_ack_irq(struct irq_data *d) { - struct sa1111 *sachip = irq_data_get_irq_chip_data(d); - void __iomem *mapbase = sachip->base + SA1111_INTC; - unsigned int mask = SA1111_IRQMASK_LO(d->irq); - unsigned long we0; - - we0 = sa1111_readl(mapbase + SA1111_WAKEEN0); - if (on) - we0 |= mask; - else - we0 &= ~mask; - sa1111_writel(we0, mapbase + SA1111_WAKEEN0); - - return 0; } -static struct irq_chip sa1111_low_chip = { - .name = "SA1111-l", - .irq_ack = sa1111_ack_irq, - .irq_mask = sa1111_mask_lowirq, - .irq_unmask = sa1111_unmask_lowirq, - .irq_retrigger = sa1111_retrigger_lowirq, - .irq_set_type = sa1111_type_lowirq, - .irq_set_wake = sa1111_wake_lowirq, -}; - -static void sa1111_mask_highirq(struct irq_data *d) +static void sa1111_mask_irq(struct irq_data *d) { struct sa1111 *sachip = irq_data_get_irq_chip_data(d); - void __iomem *mapbase = sachip->base + SA1111_INTC; - unsigned long ie1; + void __iomem *mapbase = sachip->base + SA1111_INTC + sa1111_irqbank(d); + u32 ie; - ie1 = sa1111_readl(mapbase + SA1111_INTEN1); - ie1 &= ~SA1111_IRQMASK_HI(d->irq); - sa1111_writel(ie1, mapbase + SA1111_INTEN1); + ie = sa1111_readl(mapbase + SA1111_INTEN0); + ie &= ~sa1111_irqmask(d); + sa1111_writel(ie, mapbase + SA1111_INTEN0); } -static void sa1111_unmask_highirq(struct irq_data *d) +static void sa1111_unmask_irq(struct irq_data *d) { struct sa1111 *sachip = irq_data_get_irq_chip_data(d); - void __iomem *mapbase = sachip->base + SA1111_INTC; - unsigned long ie1; + void __iomem *mapbase = sachip->base + SA1111_INTC + sa1111_irqbank(d); + u32 ie; - ie1 = sa1111_readl(mapbase + SA1111_INTEN1); - ie1 |= SA1111_IRQMASK_HI(d->irq); - sa1111_writel(ie1, mapbase + SA1111_INTEN1); + ie = sa1111_readl(mapbase + SA1111_INTEN0); + ie |= sa1111_irqmask(d); + sa1111_writel(ie, mapbase + SA1111_INTEN0); } /* @@ -366,19 +280,18 @@ static void sa1111_unmask_highirq(struct irq_data *d) * be triggered. In fact, its very difficult, if not impossible to get * INTSET to re-trigger the interrupt. */ -static int sa1111_retrigger_highirq(struct irq_data *d) +static int sa1111_retrigger_irq(struct irq_data *d) { struct sa1111 *sachip = irq_data_get_irq_chip_data(d); - void __iomem *mapbase = sachip->base + SA1111_INTC; - unsigned int mask = SA1111_IRQMASK_HI(d->irq); - unsigned long ip1; + void __iomem *mapbase = sachip->base + SA1111_INTC + sa1111_irqbank(d); + u32 ip, mask = sa1111_irqmask(d); int i; - ip1 = sa1111_readl(mapbase + SA1111_INTPOL1); + ip = sa1111_readl(mapbase + SA1111_INTPOL0); for (i = 0; i < 8; i++) { - sa1111_writel(ip1 ^ mask, mapbase + SA1111_INTPOL1); - sa1111_writel(ip1, mapbase + SA1111_INTPOL1); - if (sa1111_readl(mapbase + SA1111_INTSTATCLR1) & mask) + sa1111_writel(ip ^ mask, mapbase + SA1111_INTPOL0); + sa1111_writel(ip, mapbase + SA1111_INTPOL0); + if (sa1111_readl(mapbase + SA1111_INTSTATCLR0) & mask) break; } @@ -388,12 +301,11 @@ static int sa1111_retrigger_highirq(struct irq_data *d) return i == 8 ? -1 : 0; } -static int sa1111_type_highirq(struct irq_data *d, unsigned int flags) +static int sa1111_type_irq(struct irq_data *d, unsigned int flags) { struct sa1111 *sachip = irq_data_get_irq_chip_data(d); - void __iomem *mapbase = sachip->base + SA1111_INTC; - unsigned int mask = SA1111_IRQMASK_HI(d->irq); - unsigned long ip1; + void __iomem *mapbase = sachip->base + SA1111_INTC + sa1111_irqbank(d); + u32 ip, mask = sa1111_irqmask(d); if (flags == IRQ_TYPE_PROBE) return 0; @@ -401,42 +313,41 @@ static int sa1111_type_highirq(struct irq_data *d, unsigned int flags) if ((!(flags & IRQ_TYPE_EDGE_RISING) ^ !(flags & IRQ_TYPE_EDGE_FALLING)) == 0) return -EINVAL; - ip1 = sa1111_readl(mapbase + SA1111_INTPOL1); + ip = sa1111_readl(mapbase + SA1111_INTPOL0); if (flags & IRQ_TYPE_EDGE_RISING) - ip1 &= ~mask; + ip &= ~mask; else - ip1 |= mask; - sa1111_writel(ip1, mapbase + SA1111_INTPOL1); - sa1111_writel(ip1, mapbase + SA1111_WAKEPOL1); + ip |= mask; + sa1111_writel(ip, mapbase + SA1111_INTPOL0); + sa1111_writel(ip, mapbase + SA1111_WAKEPOL0); return 0; } -static int sa1111_wake_highirq(struct irq_data *d, unsigned int on) +static int sa1111_wake_irq(struct irq_data *d, unsigned int on) { struct sa1111 *sachip = irq_data_get_irq_chip_data(d); - void __iomem *mapbase = sachip->base + SA1111_INTC; - unsigned int mask = SA1111_IRQMASK_HI(d->irq); - unsigned long we1; + void __iomem *mapbase = sachip->base + SA1111_INTC + sa1111_irqbank(d); + u32 we, mask = sa1111_irqmask(d); - we1 = sa1111_readl(mapbase + SA1111_WAKEEN1); + we = sa1111_readl(mapbase + SA1111_WAKEEN0); if (on) - we1 |= mask; + we |= mask; else - we1 &= ~mask; - sa1111_writel(we1, mapbase + SA1111_WAKEEN1); + we &= ~mask; + sa1111_writel(we, mapbase + SA1111_WAKEEN0); return 0; } -static struct irq_chip sa1111_high_chip = { - .name = "SA1111-h", +static struct irq_chip sa1111_irq_chip = { + .name = "SA1111", .irq_ack = sa1111_ack_irq, - .irq_mask = sa1111_mask_highirq, - .irq_unmask = sa1111_unmask_highirq, - .irq_retrigger = sa1111_retrigger_highirq, - .irq_set_type = sa1111_type_highirq, - .irq_set_wake = sa1111_wake_highirq, + .irq_mask = sa1111_mask_irq, + .irq_unmask = sa1111_unmask_irq, + .irq_retrigger = sa1111_retrigger_irq, + .irq_set_type = sa1111_type_irq, + .irq_set_wake = sa1111_wake_irq, }; static int sa1111_setup_irq(struct sa1111 *sachip, unsigned irq_base) @@ -482,16 +393,14 @@ static int sa1111_setup_irq(struct sa1111 *sachip, unsigned irq_base) for (i = IRQ_GPAIN0; i <= SSPROR; i++) { irq = sachip->irq_base + i; - irq_set_chip_and_handler(irq, &sa1111_low_chip, - handle_edge_irq); + irq_set_chip_and_handler(irq, &sa1111_irq_chip, handle_edge_irq); irq_set_chip_data(irq, sachip); irq_clear_status_flags(irq, IRQ_NOREQUEST | IRQ_NOPROBE); } for (i = AUDXMTDMADONEA; i <= IRQ_S1_BVD1_STSCHG; i++) { irq = sachip->irq_base + i; - irq_set_chip_and_handler(irq, &sa1111_high_chip, - handle_edge_irq); + irq_set_chip_and_handler(irq, &sa1111_irq_chip, handle_edge_irq); irq_set_chip_data(irq, sachip); irq_clear_status_flags(irq, IRQ_NOREQUEST | IRQ_NOPROBE); } @@ -509,6 +418,181 @@ static int sa1111_setup_irq(struct sa1111 *sachip, unsigned irq_base) return 0; } +static void sa1111_remove_irq(struct sa1111 *sachip) +{ + void __iomem *irqbase = sachip->base + SA1111_INTC; + + /* disable all IRQs */ + sa1111_writel(0, irqbase + SA1111_INTEN0); + sa1111_writel(0, irqbase + SA1111_INTEN1); + sa1111_writel(0, irqbase + SA1111_WAKEEN0); + sa1111_writel(0, irqbase + SA1111_WAKEEN1); + + if (sachip->irq != NO_IRQ) { + irq_set_chained_handler_and_data(sachip->irq, NULL, NULL); + irq_free_descs(sachip->irq_base, SA1111_IRQ_NR); + + release_mem_region(sachip->phys + SA1111_INTC, 512); + } +} + +enum { + SA1111_GPIO_PXDDR = (SA1111_GPIO_PADDR - SA1111_GPIO_PADDR), + SA1111_GPIO_PXDRR = (SA1111_GPIO_PADRR - SA1111_GPIO_PADDR), + SA1111_GPIO_PXDWR = (SA1111_GPIO_PADWR - SA1111_GPIO_PADDR), + SA1111_GPIO_PXSDR = (SA1111_GPIO_PASDR - SA1111_GPIO_PADDR), + SA1111_GPIO_PXSSR = (SA1111_GPIO_PASSR - SA1111_GPIO_PADDR), +}; + +static struct sa1111 *gc_to_sa1111(struct gpio_chip *gc) +{ + return container_of(gc, struct sa1111, gc); +} + +static void __iomem *sa1111_gpio_map_reg(struct sa1111 *sachip, unsigned offset) +{ + void __iomem *reg = sachip->base + SA1111_GPIO; + + if (offset < 4) + return reg + SA1111_GPIO_PADDR; + if (offset < 10) + return reg + SA1111_GPIO_PBDDR; + if (offset < 18) + return reg + SA1111_GPIO_PCDDR; + return NULL; +} + +static u32 sa1111_gpio_map_bit(unsigned offset) +{ + if (offset < 4) + return BIT(offset); + if (offset < 10) + return BIT(offset - 4); + if (offset < 18) + return BIT(offset - 10); + return 0; +} + +static void sa1111_gpio_modify(void __iomem *reg, u32 mask, u32 set) +{ + u32 val; + + val = readl_relaxed(reg); + val &= ~mask; + val |= mask & set; + writel_relaxed(val, reg); +} + +static int sa1111_gpio_get_direction(struct gpio_chip *gc, unsigned offset) +{ + struct sa1111 *sachip = gc_to_sa1111(gc); + void __iomem *reg = sa1111_gpio_map_reg(sachip, offset); + u32 mask = sa1111_gpio_map_bit(offset); + + return !!(readl_relaxed(reg + SA1111_GPIO_PXDDR) & mask); +} + +static int sa1111_gpio_direction_input(struct gpio_chip *gc, unsigned offset) +{ + struct sa1111 *sachip = gc_to_sa1111(gc); + unsigned long flags; + void __iomem *reg = sa1111_gpio_map_reg(sachip, offset); + u32 mask = sa1111_gpio_map_bit(offset); + + spin_lock_irqsave(&sachip->lock, flags); + sa1111_gpio_modify(reg + SA1111_GPIO_PXDDR, mask, mask); + sa1111_gpio_modify(reg + SA1111_GPIO_PXSDR, mask, mask); + spin_unlock_irqrestore(&sachip->lock, flags); + + return 0; +} + +static int sa1111_gpio_direction_output(struct gpio_chip *gc, unsigned offset, + int value) +{ + struct sa1111 *sachip = gc_to_sa1111(gc); + unsigned long flags; + void __iomem *reg = sa1111_gpio_map_reg(sachip, offset); + u32 mask = sa1111_gpio_map_bit(offset); + + spin_lock_irqsave(&sachip->lock, flags); + sa1111_gpio_modify(reg + SA1111_GPIO_PXDWR, mask, value ? mask : 0); + sa1111_gpio_modify(reg + SA1111_GPIO_PXSSR, mask, value ? mask : 0); + sa1111_gpio_modify(reg + SA1111_GPIO_PXDDR, mask, 0); + sa1111_gpio_modify(reg + SA1111_GPIO_PXSDR, mask, 0); + spin_unlock_irqrestore(&sachip->lock, flags); + + return 0; +} + +static int sa1111_gpio_get(struct gpio_chip *gc, unsigned offset) +{ + struct sa1111 *sachip = gc_to_sa1111(gc); + void __iomem *reg = sa1111_gpio_map_reg(sachip, offset); + u32 mask = sa1111_gpio_map_bit(offset); + + return !!(readl_relaxed(reg + SA1111_GPIO_PXDRR) & mask); +} + +static void sa1111_gpio_set(struct gpio_chip *gc, unsigned offset, int value) +{ + struct sa1111 *sachip = gc_to_sa1111(gc); + unsigned long flags; + void __iomem *reg = sa1111_gpio_map_reg(sachip, offset); + u32 mask = sa1111_gpio_map_bit(offset); + + spin_lock_irqsave(&sachip->lock, flags); + sa1111_gpio_modify(reg + SA1111_GPIO_PXDWR, mask, value ? mask : 0); + sa1111_gpio_modify(reg + SA1111_GPIO_PXSSR, mask, value ? mask : 0); + spin_unlock_irqrestore(&sachip->lock, flags); +} + +static void sa1111_gpio_set_multiple(struct gpio_chip *gc, unsigned long *mask, + unsigned long *bits) +{ + struct sa1111 *sachip = gc_to_sa1111(gc); + unsigned long flags; + void __iomem *reg = sachip->base + SA1111_GPIO; + u32 msk, val; + + msk = *mask; + val = *bits; + + spin_lock_irqsave(&sachip->lock, flags); + sa1111_gpio_modify(reg + SA1111_GPIO_PADWR, msk & 15, val); + sa1111_gpio_modify(reg + SA1111_GPIO_PASSR, msk & 15, val); + sa1111_gpio_modify(reg + SA1111_GPIO_PBDWR, (msk >> 4) & 255, val >> 4); + sa1111_gpio_modify(reg + SA1111_GPIO_PBSSR, (msk >> 4) & 255, val >> 4); + sa1111_gpio_modify(reg + SA1111_GPIO_PCDWR, (msk >> 12) & 255, val >> 12); + sa1111_gpio_modify(reg + SA1111_GPIO_PCSSR, (msk >> 12) & 255, val >> 12); + spin_unlock_irqrestore(&sachip->lock, flags); +} + +static int sa1111_gpio_to_irq(struct gpio_chip *gc, unsigned offset) +{ + struct sa1111 *sachip = gc_to_sa1111(gc); + + return sachip->irq_base + offset; +} + +static int sa1111_setup_gpios(struct sa1111 *sachip) +{ + sachip->gc.label = "sa1111"; + sachip->gc.parent = sachip->dev; + sachip->gc.owner = THIS_MODULE; + sachip->gc.get_direction = sa1111_gpio_get_direction; + sachip->gc.direction_input = sa1111_gpio_direction_input; + sachip->gc.direction_output = sa1111_gpio_direction_output; + sachip->gc.get = sa1111_gpio_get; + sachip->gc.set = sa1111_gpio_set; + sachip->gc.set_multiple = sa1111_gpio_set_multiple; + sachip->gc.to_irq = sa1111_gpio_to_irq; + sachip->gc.base = -1; + sachip->gc.ngpio = 18; + + return devm_gpiochip_add_data(sachip->dev, &sachip->gc, sachip); +} + /* * Bring the SA1111 out of reset. This requires a set procedure: * 1. nRESET asserted (by hardware) @@ -607,7 +691,7 @@ sa1111_configure_smc(struct sa1111 *sachip, int sdram, unsigned int drac, static void sa1111_dev_release(struct device *_dev) { - struct sa1111_dev *dev = SA1111_DEV(_dev); + struct sa1111_dev *dev = to_sa1111_device(_dev); kfree(dev); } @@ -696,19 +780,17 @@ static int __sa1111_probe(struct device *me, struct resource *mem, int irq) if (!pd) return -EINVAL; - sachip = kzalloc(sizeof(struct sa1111), GFP_KERNEL); + sachip = devm_kzalloc(me, sizeof(struct sa1111), GFP_KERNEL); if (!sachip) return -ENOMEM; - sachip->clk = clk_get(me, "SA1111_CLK"); - if (IS_ERR(sachip->clk)) { - ret = PTR_ERR(sachip->clk); - goto err_free; - } + sachip->clk = devm_clk_get(me, "SA1111_CLK"); + if (IS_ERR(sachip->clk)) + return PTR_ERR(sachip->clk); ret = clk_prepare(sachip->clk); if (ret) - goto err_clkput; + return ret; spin_lock_init(&sachip->lock); @@ -757,6 +839,11 @@ static int __sa1111_probe(struct device *me, struct resource *mem, int irq) goto err_clk; } + /* Setup the GPIOs - should really be done after the IRQ setup */ + ret = sa1111_setup_gpios(sachip); + if (ret) + goto err_irq; + #ifdef CONFIG_ARCH_SA1100 { unsigned int val; @@ -799,22 +886,22 @@ static int __sa1111_probe(struct device *me, struct resource *mem, int irq) return 0; + err_irq: + sa1111_remove_irq(sachip); err_clk: clk_disable(sachip->clk); err_unmap: iounmap(sachip->base); err_clk_unprep: clk_unprepare(sachip->clk); - err_clkput: - clk_put(sachip->clk); - err_free: - kfree(sachip); return ret; } static int sa1111_remove_one(struct device *dev, void *data) { - struct sa1111_dev *sadev = SA1111_DEV(dev); + struct sa1111_dev *sadev = to_sa1111_device(dev); + if (dev->bus != &sa1111_bus_type) + return 0; device_del(&sadev->dev); release_resource(&sadev->res); put_device(&sadev->dev); @@ -823,29 +910,14 @@ static int sa1111_remove_one(struct device *dev, void *data) static void __sa1111_remove(struct sa1111 *sachip) { - void __iomem *irqbase = sachip->base + SA1111_INTC; - device_for_each_child(sachip->dev, NULL, sa1111_remove_one); - /* disable all IRQs */ - sa1111_writel(0, irqbase + SA1111_INTEN0); - sa1111_writel(0, irqbase + SA1111_INTEN1); - sa1111_writel(0, irqbase + SA1111_WAKEEN0); - sa1111_writel(0, irqbase + SA1111_WAKEEN1); + sa1111_remove_irq(sachip); clk_disable(sachip->clk); clk_unprepare(sachip->clk); - if (sachip->irq != NO_IRQ) { - irq_set_chained_handler_and_data(sachip->irq, NULL, NULL); - irq_free_descs(sachip->irq_base, SA1111_IRQ_NR); - - release_mem_region(sachip->phys + SA1111_INTC, 512); - } - iounmap(sachip->base); - clk_put(sachip->clk); - kfree(sachip); } struct sa1111_save_data { @@ -1285,6 +1357,14 @@ void sa1111_disable_device(struct sa1111_dev *sadev) } EXPORT_SYMBOL(sa1111_disable_device); +int sa1111_get_irq(struct sa1111_dev *sadev, unsigned num) +{ + if (num >= ARRAY_SIZE(sadev->irq)) + return -EINVAL; + return sadev->irq[num]; +} +EXPORT_SYMBOL_GPL(sa1111_get_irq); + /* * SA1111 "Register Access Bus." * @@ -1293,7 +1373,7 @@ EXPORT_SYMBOL(sa1111_disable_device); */ static int sa1111_match(struct device *_dev, struct device_driver *_drv) { - struct sa1111_dev *dev = SA1111_DEV(_dev); + struct sa1111_dev *dev = to_sa1111_device(_dev); struct sa1111_driver *drv = SA1111_DRV(_drv); return !!(dev->devid & drv->devid); @@ -1301,7 +1381,7 @@ static int sa1111_match(struct device *_dev, struct device_driver *_drv) static int sa1111_bus_suspend(struct device *dev, pm_message_t state) { - struct sa1111_dev *sadev = SA1111_DEV(dev); + struct sa1111_dev *sadev = to_sa1111_device(dev); struct sa1111_driver *drv = SA1111_DRV(dev->driver); int ret = 0; @@ -1312,7 +1392,7 @@ static int sa1111_bus_suspend(struct device *dev, pm_message_t state) static int sa1111_bus_resume(struct device *dev) { - struct sa1111_dev *sadev = SA1111_DEV(dev); + struct sa1111_dev *sadev = to_sa1111_device(dev); struct sa1111_driver *drv = SA1111_DRV(dev->driver); int ret = 0; @@ -1326,12 +1406,12 @@ static void sa1111_bus_shutdown(struct device *dev) struct sa1111_driver *drv = SA1111_DRV(dev->driver); if (drv && drv->shutdown) - drv->shutdown(SA1111_DEV(dev)); + drv->shutdown(to_sa1111_device(dev)); } static int sa1111_bus_probe(struct device *dev) { - struct sa1111_dev *sadev = SA1111_DEV(dev); + struct sa1111_dev *sadev = to_sa1111_device(dev); struct sa1111_driver *drv = SA1111_DRV(dev->driver); int ret = -ENODEV; @@ -1342,7 +1422,7 @@ static int sa1111_bus_probe(struct device *dev) static int sa1111_bus_remove(struct device *dev) { - struct sa1111_dev *sadev = SA1111_DEV(dev); + struct sa1111_dev *sadev = to_sa1111_device(dev); struct sa1111_driver *drv = SA1111_DRV(dev->driver); int ret = 0; @@ -1407,7 +1487,7 @@ static int sa1111_needs_bounce(struct device *dev, dma_addr_t addr, size_t size) static int sa1111_notifier_call(struct notifier_block *n, unsigned long action, void *data) { - struct sa1111_dev *dev = SA1111_DEV(data); + struct sa1111_dev *dev = to_sa1111_device(data); switch (action) { case BUS_NOTIFY_ADD_DEVICE: diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 4eaea2173bf8..68b06f9c65de 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -159,7 +159,11 @@ .endm .macro save_and_disable_irqs_notrace, oldcpsr +#ifdef CONFIG_CPU_V7M + mrs \oldcpsr, primask +#else mrs \oldcpsr, cpsr +#endif disable_irq_notrace .endm diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index 9156fc303afd..bdd283bc5842 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -501,21 +501,4 @@ static inline void set_kernel_text_ro(void) { } void flush_uprobe_xol_access(struct page *page, unsigned long uaddr, void *kaddr, unsigned long len); -/** - * secure_flush_area - ensure coherency across the secure boundary - * @addr: virtual address - * @size: size of region - * - * Ensure that the specified area of memory is coherent across the secure - * boundary from the non-secure side. This is used when calling secure - * firmware where the secure firmware does not ensure coherency. - */ -static inline void secure_flush_area(const void *addr, size_t size) -{ - phys_addr_t phys = __pa(addr); - - __cpuc_flush_dcache_area((void *)addr, size); - outer_flush_range(phys, phys + size); -} - #endif diff --git a/arch/arm/include/asm/cachetype.h b/arch/arm/include/asm/cachetype.h index 7ea78144ae22..01509ae0bbec 100644 --- a/arch/arm/include/asm/cachetype.h +++ b/arch/arm/include/asm/cachetype.h @@ -56,4 +56,43 @@ static inline unsigned int __attribute__((pure)) cacheid_is(unsigned int mask) (~__CACHEID_NEVER & __CACHEID_ARCH_MIN & mask & cacheid); } +#define CSSELR_ICACHE 1 +#define CSSELR_DCACHE 0 + +#define CSSELR_L1 (0 << 1) +#define CSSELR_L2 (1 << 1) +#define CSSELR_L3 (2 << 1) +#define CSSELR_L4 (3 << 1) +#define CSSELR_L5 (4 << 1) +#define CSSELR_L6 (5 << 1) +#define CSSELR_L7 (6 << 1) + +#ifndef CONFIG_CPU_V7M +static inline void set_csselr(unsigned int cache_selector) +{ + asm volatile("mcr p15, 2, %0, c0, c0, 0" : : "r" (cache_selector)); +} + +static inline unsigned int read_ccsidr(void) +{ + unsigned int val; + + asm volatile("mrc p15, 1, %0, c0, c0, 0" : "=r" (val)); + return val; +} +#else /* CONFIG_CPU_V7M */ +#include <linux/io.h> +#include "asm/v7m.h" + +static inline void set_csselr(unsigned int cache_selector) +{ + writel(cache_selector, BASEADDR_V7M_SCB + V7M_SCB_CTR); +} + +static inline unsigned int read_ccsidr(void) +{ + return readl(BASEADDR_V7M_SCB + V7M_SCB_CCSIDR); +} +#endif + #endif diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h index 1ee94c716a7f..754f86f667d4 100644 --- a/arch/arm/include/asm/cputype.h +++ b/arch/arm/include/asm/cputype.h @@ -60,6 +60,7 @@ ((mpidr >> (MPIDR_LEVEL_BITS * level)) & MPIDR_LEVEL_MASK) #define ARM_CPU_IMP_ARM 0x41 +#define ARM_CPU_IMP_DEC 0x44 #define ARM_CPU_IMP_INTEL 0x69 /* ARM implemented processors */ @@ -76,6 +77,17 @@ #define ARM_CPU_PART_CORTEX_A15 0x4100c0f0 #define ARM_CPU_PART_MASK 0xff00fff0 +/* DEC implemented cores */ +#define ARM_CPU_PART_SA1100 0x4400a110 + +/* Intel implemented cores */ +#define ARM_CPU_PART_SA1110 0x6900b110 +#define ARM_CPU_REV_SA1110_A0 0 +#define ARM_CPU_REV_SA1110_B0 4 +#define ARM_CPU_REV_SA1110_B1 5 +#define ARM_CPU_REV_SA1110_B2 6 +#define ARM_CPU_REV_SA1110_B4 8 + #define ARM_CPU_XSCALE_ARCH_MASK 0xe000 #define ARM_CPU_XSCALE_ARCH_V1 0x2000 #define ARM_CPU_XSCALE_ARCH_V2 0x4000 @@ -152,6 +164,11 @@ static inline unsigned int __attribute_const__ read_cpuid_id(void) return read_cpuid(CPUID_ID); } +static inline unsigned int __attribute_const__ read_cpuid_cachetype(void) +{ + return read_cpuid(CPUID_CACHETYPE); +} + #elif defined(CONFIG_CPU_V7M) static inline unsigned int __attribute_const__ read_cpuid_id(void) @@ -159,6 +176,11 @@ static inline unsigned int __attribute_const__ read_cpuid_id(void) return readl(BASEADDR_V7M_SCB + V7M_SCB_CPUID); } +static inline unsigned int __attribute_const__ read_cpuid_cachetype(void) +{ + return readl(BASEADDR_V7M_SCB + V7M_SCB_CTR); +} + #else /* ifdef CONFIG_CPU_CP15 / elif defined(CONFIG_CPU_V7M) */ static inline unsigned int __attribute_const__ read_cpuid_id(void) @@ -173,6 +195,11 @@ static inline unsigned int __attribute_const__ read_cpuid_implementor(void) return (read_cpuid_id() & 0xFF000000) >> 24; } +static inline unsigned int __attribute_const__ read_cpuid_revision(void) +{ + return read_cpuid_id() & 0x0000000f; +} + /* * The CPU part number is meaningless without referring to the CPU * implementer: implementers are free to define their own part numbers @@ -193,11 +220,6 @@ static inline unsigned int __attribute_const__ xscale_cpu_arch_version(void) return read_cpuid_id() & ARM_CPU_XSCALE_ARCH_MASK; } -static inline unsigned int __attribute_const__ read_cpuid_cachetype(void) -{ - return read_cpuid(CPUID_CACHETYPE); -} - static inline unsigned int __attribute_const__ read_cpuid_tcmstatus(void) { return read_cpuid(CPUID_TCM); @@ -208,6 +230,10 @@ static inline unsigned int __attribute_const__ read_cpuid_mpidr(void) return read_cpuid(CPUID_MPIDR); } +/* StrongARM-11x0 CPUs */ +#define cpu_is_sa1100() (read_cpuid_part() == ARM_CPU_PART_SA1100) +#define cpu_is_sa1110() (read_cpuid_part() == ARM_CPU_PART_SA1110) + /* * Intel's XScale3 core supports some v6 features (supersections, L2) * but advertises itself as v5 as it does not support the v6 ISA. For diff --git a/arch/arm/include/asm/flat.h b/arch/arm/include/asm/flat.h index e847d23351ed..acf1d14b89a6 100644 --- a/arch/arm/include/asm/flat.h +++ b/arch/arm/include/asm/flat.h @@ -8,8 +8,9 @@ #define flat_argvp_envp_on_stack() 1 #define flat_old_ram_flag(flags) (flags) #define flat_reloc_valid(reloc, size) ((reloc) <= (size)) -#define flat_get_addr_from_rp(rp, relval, flags, persistent) ((void)persistent,get_unaligned(rp)) -#define flat_put_addr_at_rp(rp, val, relval) put_unaligned(val,rp) +#define flat_get_addr_from_rp(rp, relval, flags, persistent) \ + ({ unsigned long __val; __get_user_unaligned(__val, rp); __val; }) +#define flat_put_addr_at_rp(rp, val, relval) __put_user_unaligned(val, rp) #define flat_get_relocate_addr(rel) (rel) #define flat_set_persistent(relval, p) 0 diff --git a/arch/arm/include/asm/glue-cache.h b/arch/arm/include/asm/glue-cache.h index cab07f69382d..01c3d92624e5 100644 --- a/arch/arm/include/asm/glue-cache.h +++ b/arch/arm/include/asm/glue-cache.h @@ -118,11 +118,7 @@ #endif #if defined(CONFIG_CPU_V7M) -# ifdef _CACHE # define MULTI_CACHE 1 -# else -# define _CACHE nop -# endif #endif #if !defined(_CACHE) && !defined(MULTI_CACHE) diff --git a/arch/arm/include/asm/hardware/cache-l2x0.h b/arch/arm/include/asm/hardware/cache-l2x0.h index 3a5ec1c25659..736292b42fca 100644 --- a/arch/arm/include/asm/hardware/cache-l2x0.h +++ b/arch/arm/include/asm/hardware/cache-l2x0.h @@ -87,6 +87,15 @@ #define L310_CACHE_ID_RTL_R3P2 0x08 #define L310_CACHE_ID_RTL_R3P3 0x09 +#define L2X0_EVENT_CNT_CTRL_ENABLE BIT(0) + +#define L2X0_EVENT_CNT_CFG_SRC_SHIFT 2 +#define L2X0_EVENT_CNT_CFG_SRC_MASK 0xf +#define L2X0_EVENT_CNT_CFG_SRC_DISABLED 0 +#define L2X0_EVENT_CNT_CFG_INT_DISABLED 0 +#define L2X0_EVENT_CNT_CFG_INT_INCR 1 +#define L2X0_EVENT_CNT_CFG_INT_OVERFLOW 2 + /* L2C auxiliary control register - bits common to L2C-210/220/310 */ #define L2C_AUX_CTRL_WAY_SIZE_SHIFT 17 #define L2C_AUX_CTRL_WAY_SIZE_MASK (7 << 17) @@ -157,6 +166,16 @@ static inline int l2x0_of_init(u32 aux_val, u32 aux_mask) } #endif +#ifdef CONFIG_CACHE_L2X0_PMU +void l2x0_pmu_register(void __iomem *base, u32 part); +void l2x0_pmu_suspend(void); +void l2x0_pmu_resume(void); +#else +static inline void l2x0_pmu_register(void __iomem *base, u32 part) {} +static inline void l2x0_pmu_suspend(void) {} +static inline void l2x0_pmu_resume(void) {} +#endif + struct l2x0_regs { unsigned long phy_base; unsigned long aux_ctrl; diff --git a/arch/arm/include/asm/hardware/sa1111.h b/arch/arm/include/asm/hardware/sa1111.h index 7c2bbc7f0be1..8979fa3bbf2d 100644 --- a/arch/arm/include/asm/hardware/sa1111.h +++ b/arch/arm/include/asm/hardware/sa1111.h @@ -420,7 +420,7 @@ struct sa1111_dev { u64 dma_mask; }; -#define SA1111_DEV(_d) container_of((_d), struct sa1111_dev, dev) +#define to_sa1111_device(x) container_of(x, struct sa1111_dev, dev) #define sa1111_get_drvdata(d) dev_get_drvdata(&(d)->dev) #define sa1111_set_drvdata(d,p) dev_set_drvdata(&(d)->dev, p) @@ -446,6 +446,8 @@ struct sa1111_driver { int sa1111_enable_device(struct sa1111_dev *); void sa1111_disable_device(struct sa1111_dev *); +int sa1111_get_irq(struct sa1111_dev *, unsigned num); + unsigned int sa1111_pll_clock(struct sa1111_dev *); #define SA1111_AUDIO_ACLINK 0 diff --git a/arch/arm/include/asm/hw_breakpoint.h b/arch/arm/include/asm/hw_breakpoint.h index 8e427c7b4425..afcaf8bf971b 100644 --- a/arch/arm/include/asm/hw_breakpoint.h +++ b/arch/arm/include/asm/hw_breakpoint.h @@ -114,7 +114,6 @@ struct notifier_block; struct perf_event; struct pmu; -extern struct pmu perf_ops_bp; extern int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl, int *gen_len, int *gen_type); extern int arch_check_bp_in_kernelspace(struct perf_event *bp); diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 31c07a2cc100..76cbd9c674df 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -159,13 +159,8 @@ * PFNs are used to describe any physical page; this means * PFN 0 == physical address 0. */ -#if defined(__virt_to_phys) -#define PHYS_OFFSET PLAT_PHYS_OFFSET -#define PHYS_PFN_OFFSET ((unsigned long)(PHYS_OFFSET >> PAGE_SHIFT)) - -#define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT) -#elif defined(CONFIG_ARM_PATCH_PHYS_VIRT) +#if defined(CONFIG_ARM_PATCH_PHYS_VIRT) /* * Constants used to force the right instruction encodings and shifts @@ -182,10 +177,6 @@ extern const void *__pv_table_begin, *__pv_table_end; #define PHYS_OFFSET ((phys_addr_t)__pv_phys_pfn_offset << PAGE_SHIFT) #define PHYS_PFN_OFFSET (__pv_phys_pfn_offset) -#define virt_to_pfn(kaddr) \ - ((((unsigned long)(kaddr) - PAGE_OFFSET) >> PAGE_SHIFT) + \ - PHYS_PFN_OFFSET) - #define __pv_stub(from,to,instr,type) \ __asm__("@ __pv_stub\n" \ "1: " instr " %0, %1, %2\n" \ @@ -257,12 +248,12 @@ static inline unsigned long __phys_to_virt(phys_addr_t x) return x - PHYS_OFFSET + PAGE_OFFSET; } +#endif + #define virt_to_pfn(kaddr) \ ((((unsigned long)(kaddr) - PAGE_OFFSET) >> PAGE_SHIFT) + \ PHYS_PFN_OFFSET) -#endif - /* * These are *only* valid on the kernel direct mapped RAM memory. * Note: Drivers should NOT use these. They are the wrong diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h index e358b7966c06..464748b9fd7d 100644 --- a/arch/arm/include/asm/module.h +++ b/arch/arm/include/asm/module.h @@ -23,10 +23,8 @@ struct mod_arch_specific { struct unwind_table *unwind[ARM_SEC_MAX]; #endif #ifdef CONFIG_ARM_MODULE_PLTS - struct elf32_shdr *core_plt; - struct elf32_shdr *init_plt; - int core_plt_count; - int init_plt_count; + struct elf32_shdr *plt; + int plt_count; #endif }; diff --git a/arch/arm/include/asm/v7m.h b/arch/arm/include/asm/v7m.h index 615781c61627..1fd775c1bc5d 100644 --- a/arch/arm/include/asm/v7m.h +++ b/arch/arm/include/asm/v7m.h @@ -24,6 +24,9 @@ #define V7M_SCB_CCR 0x14 #define V7M_SCB_CCR_STKALIGN (1 << 9) +#define V7M_SCB_CCR_DC (1 << 16) +#define V7M_SCB_CCR_IC (1 << 17) +#define V7M_SCB_CCR_BP (1 << 18) #define V7M_SCB_SHPR2 0x1c #define V7M_SCB_SHPR3 0x20 @@ -47,6 +50,25 @@ #define EXC_RET_STACK_MASK 0x00000004 #define EXC_RET_THREADMODE_PROCESSSTACK 0xfffffffd +/* Cache related definitions */ + +#define V7M_SCB_CLIDR 0x78 /* Cache Level ID register */ +#define V7M_SCB_CTR 0x7c /* Cache Type register */ +#define V7M_SCB_CCSIDR 0x80 /* Cache size ID register */ +#define V7M_SCB_CSSELR 0x84 /* Cache size selection register */ + +/* Cache opeartions */ +#define V7M_SCB_ICIALLU 0x250 /* I-cache invalidate all to PoU */ +#define V7M_SCB_ICIMVAU 0x258 /* I-cache invalidate by MVA to PoU */ +#define V7M_SCB_DCIMVAC 0x25c /* D-cache invalidate by MVA to PoC */ +#define V7M_SCB_DCISW 0x260 /* D-cache invalidate by set-way */ +#define V7M_SCB_DCCMVAU 0x264 /* D-cache clean by MVA to PoU */ +#define V7M_SCB_DCCMVAC 0x268 /* D-cache clean by MVA to PoC */ +#define V7M_SCB_DCCSW 0x26c /* D-cache clean by set-way */ +#define V7M_SCB_DCCIMVAC 0x270 /* D-cache clean and invalidate by MVA to PoC */ +#define V7M_SCB_DCCISW 0x274 /* D-cache clean and invalidate by set-way */ +#define V7M_SCB_BPIALL 0x278 /* D-cache clean and invalidate by set-way */ + #ifndef __ASSEMBLY__ enum reboot_mode; diff --git a/arch/arm/kernel/cpuidle.c b/arch/arm/kernel/cpuidle.c index 7dccc964d75f..a3308ad1a024 100644 --- a/arch/arm/kernel/cpuidle.c +++ b/arch/arm/kernel/cpuidle.c @@ -19,7 +19,7 @@ extern struct of_cpuidle_method __cpuidle_method_of_table[]; static const struct of_cpuidle_method __cpuidle_method_of_table_sentinel __used __section(__cpuidle_method_of_table_end); -static struct cpuidle_ops cpuidle_ops[NR_CPUS]; +static struct cpuidle_ops cpuidle_ops[NR_CPUS] __ro_after_init; /** * arm_cpuidle_simple_enter() - a wrapper to cpu_do_idle() diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index fb1a69eb49c1..6b4eb27b8758 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -158,7 +158,21 @@ __after_proc_init: bic r0, r0, #CR_V #endif mcr p15, 0, r0, c1, c0, 0 @ write control reg -#endif /* CONFIG_CPU_CP15 */ +#elif defined (CONFIG_CPU_V7M) + /* For V7M systems we want to modify the CCR similarly to the SCTLR */ +#ifdef CONFIG_CPU_DCACHE_DISABLE + bic r0, r0, #V7M_SCB_CCR_DC +#endif +#ifdef CONFIG_CPU_BPREDICT_DISABLE + bic r0, r0, #V7M_SCB_CCR_BP +#endif +#ifdef CONFIG_CPU_ICACHE_DISABLE + bic r0, r0, #V7M_SCB_CCR_IC +#endif + movw r3, #:lower16:(BASEADDR_V7M_SCB + V7M_SCB_CCR) + movt r3, #:upper16:(BASEADDR_V7M_SCB + V7M_SCB_CCR) + str r0, [r3] +#endif /* CONFIG_CPU_CP15 elif CONFIG_CPU_V7M */ ret lr ENDPROC(__after_proc_init) .ltorg diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c index 0c7efc3446c0..3a5cba90c971 100644 --- a/arch/arm/kernel/module-plts.c +++ b/arch/arm/kernel/module-plts.c @@ -9,6 +9,7 @@ #include <linux/elf.h> #include <linux/kernel.h> #include <linux/module.h> +#include <linux/sort.h> #include <asm/cache.h> #include <asm/opcodes.h> @@ -30,154 +31,198 @@ struct plt_entries { u32 lit[PLT_ENT_COUNT]; }; -static bool in_init(const struct module *mod, u32 addr) +u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val) { - return addr - (u32)mod->init_layout.base < mod->init_layout.size; + struct plt_entries *plt = (struct plt_entries *)mod->arch.plt->sh_addr; + int idx = 0; + + /* + * Look for an existing entry pointing to 'val'. Given that the + * relocations are sorted, this will be the last entry we allocated. + * (if one exists). + */ + if (mod->arch.plt_count > 0) { + plt += (mod->arch.plt_count - 1) / PLT_ENT_COUNT; + idx = (mod->arch.plt_count - 1) % PLT_ENT_COUNT; + + if (plt->lit[idx] == val) + return (u32)&plt->ldr[idx]; + + idx = (idx + 1) % PLT_ENT_COUNT; + if (!idx) + plt++; + } + + mod->arch.plt_count++; + BUG_ON(mod->arch.plt_count * PLT_ENT_SIZE > mod->arch.plt->sh_size); + + if (!idx) + /* Populate a new set of entries */ + *plt = (struct plt_entries){ + { [0 ... PLT_ENT_COUNT - 1] = PLT_ENT_LDR, }, + { val, } + }; + else + plt->lit[idx] = val; + + return (u32)&plt->ldr[idx]; } -u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val) +#define cmp_3way(a,b) ((a) < (b) ? -1 : (a) > (b)) + +static int cmp_rel(const void *a, const void *b) { - struct plt_entries *plt, *plt_end; - int c, *count; - - if (in_init(mod, loc)) { - plt = (void *)mod->arch.init_plt->sh_addr; - plt_end = (void *)plt + mod->arch.init_plt->sh_size; - count = &mod->arch.init_plt_count; - } else { - plt = (void *)mod->arch.core_plt->sh_addr; - plt_end = (void *)plt + mod->arch.core_plt->sh_size; - count = &mod->arch.core_plt_count; - } + const Elf32_Rel *x = a, *y = b; + int i; - /* Look for an existing entry pointing to 'val' */ - for (c = *count; plt < plt_end; c -= PLT_ENT_COUNT, plt++) { - int i; - - if (!c) { - /* Populate a new set of entries */ - *plt = (struct plt_entries){ - { [0 ... PLT_ENT_COUNT - 1] = PLT_ENT_LDR, }, - { val, } - }; - ++*count; - return (u32)plt->ldr; - } - for (i = 0; i < PLT_ENT_COUNT; i++) { - if (!plt->lit[i]) { - plt->lit[i] = val; - ++*count; - } - if (plt->lit[i] == val) - return (u32)&plt->ldr[i]; - } + /* sort by type and symbol index */ + i = cmp_3way(ELF32_R_TYPE(x->r_info), ELF32_R_TYPE(y->r_info)); + if (i == 0) + i = cmp_3way(ELF32_R_SYM(x->r_info), ELF32_R_SYM(y->r_info)); + return i; +} + +static bool is_zero_addend_relocation(Elf32_Addr base, const Elf32_Rel *rel) +{ + u32 *tval = (u32 *)(base + rel->r_offset); + + /* + * Do a bitwise compare on the raw addend rather than fully decoding + * the offset and doing an arithmetic comparison. + * Note that a zero-addend jump/call relocation is encoded taking the + * PC bias into account, i.e., -8 for ARM and -4 for Thumb2. + */ + switch (ELF32_R_TYPE(rel->r_info)) { + u16 upper, lower; + + case R_ARM_THM_CALL: + case R_ARM_THM_JUMP24: + upper = __mem_to_opcode_thumb16(((u16 *)tval)[0]); + lower = __mem_to_opcode_thumb16(((u16 *)tval)[1]); + + return (upper & 0x7ff) == 0x7ff && (lower & 0x2fff) == 0x2ffe; + + case R_ARM_CALL: + case R_ARM_PC24: + case R_ARM_JUMP24: + return (__mem_to_opcode_arm(*tval) & 0xffffff) == 0xfffffe; } BUG(); } -static int duplicate_rel(Elf32_Addr base, const Elf32_Rel *rel, int num, - u32 mask) +static bool duplicate_rel(Elf32_Addr base, const Elf32_Rel *rel, int num) { - u32 *loc1, *loc2; - int i; + const Elf32_Rel *prev; - for (i = 0; i < num; i++) { - if (rel[i].r_info != rel[num].r_info) - continue; + /* + * Entries are sorted by type and symbol index. That means that, + * if a duplicate entry exists, it must be in the preceding + * slot. + */ + if (!num) + return false; - /* - * Identical relocation types against identical symbols can - * still result in different PLT entries if the addend in the - * place is different. So resolve the target of the relocation - * to compare the values. - */ - loc1 = (u32 *)(base + rel[i].r_offset); - loc2 = (u32 *)(base + rel[num].r_offset); - if (((*loc1 ^ *loc2) & mask) == 0) - return 1; - } - return 0; + prev = rel + num - 1; + return cmp_rel(rel + num, prev) == 0 && + is_zero_addend_relocation(base, prev); } /* Count how many PLT entries we may need */ -static unsigned int count_plts(Elf32_Addr base, const Elf32_Rel *rel, int num) +static unsigned int count_plts(const Elf32_Sym *syms, Elf32_Addr base, + const Elf32_Rel *rel, int num) { unsigned int ret = 0; + const Elf32_Sym *s; int i; - /* - * Sure, this is order(n^2), but it's usually short, and not - * time critical - */ - for (i = 0; i < num; i++) + for (i = 0; i < num; i++) { switch (ELF32_R_TYPE(rel[i].r_info)) { case R_ARM_CALL: case R_ARM_PC24: case R_ARM_JUMP24: - if (!duplicate_rel(base, rel, i, - __opcode_to_mem_arm(0x00ffffff))) - ret++; - break; -#ifdef CONFIG_THUMB2_KERNEL case R_ARM_THM_CALL: case R_ARM_THM_JUMP24: - if (!duplicate_rel(base, rel, i, - __opcode_to_mem_thumb32(0x07ff2fff))) + /* + * We only have to consider branch targets that resolve + * to undefined symbols. This is not simply a heuristic, + * it is a fundamental limitation, since the PLT itself + * is part of the module, and needs to be within range + * as well, so modules can never grow beyond that limit. + */ + s = syms + ELF32_R_SYM(rel[i].r_info); + if (s->st_shndx != SHN_UNDEF) + break; + + /* + * Jump relocations with non-zero addends against + * undefined symbols are supported by the ELF spec, but + * do not occur in practice (e.g., 'jump n bytes past + * the entry point of undefined function symbol f'). + * So we need to support them, but there is no need to + * take them into consideration when trying to optimize + * this code. So let's only check for duplicates when + * the addend is zero. + */ + if (!is_zero_addend_relocation(base, rel + i) || + !duplicate_rel(base, rel, i)) ret++; -#endif } + } return ret; } int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings, struct module *mod) { - unsigned long core_plts = 0, init_plts = 0; + unsigned long plts = 0; Elf32_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum; + Elf32_Sym *syms = NULL; /* * To store the PLTs, we expand the .text section for core module code - * and the .init.text section for initialization code. + * and for initialization code. */ - for (s = sechdrs; s < sechdrs_end; ++s) - if (strcmp(".core.plt", secstrings + s->sh_name) == 0) - mod->arch.core_plt = s; - else if (strcmp(".init.plt", secstrings + s->sh_name) == 0) - mod->arch.init_plt = s; - - if (!mod->arch.core_plt || !mod->arch.init_plt) { - pr_err("%s: sections missing\n", mod->name); + for (s = sechdrs; s < sechdrs_end; ++s) { + if (strcmp(".plt", secstrings + s->sh_name) == 0) + mod->arch.plt = s; + else if (s->sh_type == SHT_SYMTAB) + syms = (Elf32_Sym *)s->sh_addr; + } + + if (!mod->arch.plt) { + pr_err("%s: module PLT section missing\n", mod->name); + return -ENOEXEC; + } + if (!syms) { + pr_err("%s: module symtab section missing\n", mod->name); return -ENOEXEC; } for (s = sechdrs + 1; s < sechdrs_end; ++s) { - const Elf32_Rel *rels = (void *)ehdr + s->sh_offset; + Elf32_Rel *rels = (void *)ehdr + s->sh_offset; int numrels = s->sh_size / sizeof(Elf32_Rel); Elf32_Shdr *dstsec = sechdrs + s->sh_info; if (s->sh_type != SHT_REL) continue; - if (strstr(secstrings + s->sh_name, ".init")) - init_plts += count_plts(dstsec->sh_addr, rels, numrels); - else - core_plts += count_plts(dstsec->sh_addr, rels, numrels); + /* ignore relocations that operate on non-exec sections */ + if (!(dstsec->sh_flags & SHF_EXECINSTR)) + continue; + + /* sort by type and symbol index */ + sort(rels, numrels, sizeof(Elf32_Rel), cmp_rel, NULL); + + plts += count_plts(syms, dstsec->sh_addr, rels, numrels); } - mod->arch.core_plt->sh_type = SHT_NOBITS; - mod->arch.core_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; - mod->arch.core_plt->sh_addralign = L1_CACHE_BYTES; - mod->arch.core_plt->sh_size = round_up(core_plts * PLT_ENT_SIZE, - sizeof(struct plt_entries)); - mod->arch.core_plt_count = 0; - - mod->arch.init_plt->sh_type = SHT_NOBITS; - mod->arch.init_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; - mod->arch.init_plt->sh_addralign = L1_CACHE_BYTES; - mod->arch.init_plt->sh_size = round_up(init_plts * PLT_ENT_SIZE, - sizeof(struct plt_entries)); - mod->arch.init_plt_count = 0; - pr_debug("%s: core.plt=%x, init.plt=%x\n", __func__, - mod->arch.core_plt->sh_size, mod->arch.init_plt->sh_size); + mod->arch.plt->sh_type = SHT_NOBITS; + mod->arch.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + mod->arch.plt->sh_addralign = L1_CACHE_BYTES; + mod->arch.plt->sh_size = round_up(plts * PLT_ENT_SIZE, + sizeof(struct plt_entries)); + mod->arch.plt_count = 0; + + pr_debug("%s: plt=%x\n", __func__, mod->arch.plt->sh_size); return 0; } diff --git a/arch/arm/kernel/module.lds b/arch/arm/kernel/module.lds index 3682fa107918..05881e2b414c 100644 --- a/arch/arm/kernel/module.lds +++ b/arch/arm/kernel/module.lds @@ -1,4 +1,3 @@ SECTIONS { - .core.plt : { BYTE(0) } - .init.plt : { BYTE(0) } + .plt : { BYTE(0) } } diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index df7f2a75e769..34e3f3c45634 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -114,19 +114,19 @@ EXPORT_SYMBOL(elf_hwcap2); #ifdef MULTI_CPU -struct processor processor __read_mostly; +struct processor processor __ro_after_init; #endif #ifdef MULTI_TLB -struct cpu_tlb_fns cpu_tlb __read_mostly; +struct cpu_tlb_fns cpu_tlb __ro_after_init; #endif #ifdef MULTI_USER -struct cpu_user_fns cpu_user __read_mostly; +struct cpu_user_fns cpu_user __ro_after_init; #endif #ifdef MULTI_CACHE -struct cpu_cache_fns cpu_cache __read_mostly; +struct cpu_cache_fns cpu_cache __ro_after_init; #endif #ifdef CONFIG_OUTER_CACHE -struct outer_cache_fns outer_cache __read_mostly; +struct outer_cache_fns outer_cache __ro_after_init; EXPORT_SYMBOL(outer_cache); #endif @@ -290,12 +290,9 @@ static int cpu_has_aliasing_icache(unsigned int arch) /* arch specifies the register format */ switch (arch) { case CPU_ARCH_ARMv7: - asm("mcr p15, 2, %0, c0, c0, 0 @ set CSSELR" - : /* No output operands */ - : "r" (1)); + set_csselr(CSSELR_ICACHE | CSSELR_L1); isb(); - asm("mrc p15, 1, %0, c0, c0, 0 @ read CCSIDR" - : "=r" (id_reg)); + id_reg = read_ccsidr(); line_size = 4 << ((id_reg & 0x7) + 2); num_sets = ((id_reg >> 13) & 0x7fff) + 1; aliasing_icache = (line_size * num_sets) > PAGE_SIZE; @@ -315,11 +312,12 @@ static void __init cacheid_init(void) { unsigned int arch = cpu_architecture(); - if (arch == CPU_ARCH_ARMv7M) { - cacheid = 0; - } else if (arch >= CPU_ARCH_ARMv6) { + if (arch >= CPU_ARCH_ARMv6) { unsigned int cachetype = read_cpuid_cachetype(); - if ((cachetype & (7 << 29)) == 4 << 29) { + + if ((arch == CPU_ARCH_ARMv7M) && !cachetype) { + cacheid = 0; + } else if ((cachetype & (7 << 29)) == 4 << 29) { /* ARMv7 register format */ arch = CPU_ARCH_ARMv7; cacheid = CACHEID_VIPT_NONALIASING; diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 861521606c6d..937c8920d741 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -82,7 +82,7 @@ enum ipi_msg_type { static DECLARE_COMPLETION(cpu_running); -static struct smp_operations smp_ops; +static struct smp_operations smp_ops __ro_after_init; void __init smp_set_ops(const struct smp_operations *ops) { diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c index a0affd14086a..53cf86cf2d1a 100644 --- a/arch/arm/kernel/vdso.c +++ b/arch/arm/kernel/vdso.c @@ -17,6 +17,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/cache.h> #include <linux/elf.h> #include <linux/err.h> #include <linux/kernel.h> @@ -39,7 +40,7 @@ static struct page **vdso_text_pagelist; /* Total number of pages needed for the data and text portions of the VDSO. */ -unsigned int vdso_total_pages __read_mostly; +unsigned int vdso_total_pages __ro_after_init; /* * The VDSO data page. @@ -47,13 +48,13 @@ unsigned int vdso_total_pages __read_mostly; static union vdso_data_store vdso_data_store __page_aligned_data; static struct vdso_data *vdso_data = &vdso_data_store.data; -static struct page *vdso_data_page; -static struct vm_special_mapping vdso_data_mapping = { +static struct page *vdso_data_page __ro_after_init; +static const struct vm_special_mapping vdso_data_mapping = { .name = "[vvar]", .pages = &vdso_data_page, }; -static struct vm_special_mapping vdso_text_mapping = { +static struct vm_special_mapping vdso_text_mapping __ro_after_init = { .name = "[vdso]", }; @@ -67,7 +68,7 @@ struct elfinfo { /* Cached result of boot-time check for whether the arch timer exists, * and if so, whether the virtual counter is useable. */ -static bool cntvct_ok __read_mostly; +static bool cntvct_ok __ro_after_init; static bool __init cntvct_functional(void) { diff --git a/arch/arm/lib/delay.c b/arch/arm/lib/delay.c index 8044591dca72..2cef11884857 100644 --- a/arch/arm/lib/delay.c +++ b/arch/arm/lib/delay.c @@ -29,7 +29,7 @@ /* * Default to the loop-based delay implementation. */ -struct arm_delay_ops arm_delay_ops = { +struct arm_delay_ops arm_delay_ops __ro_after_init = { .delay = __loop_delay, .const_udelay = __loop_const_udelay, .udelay = __loop_udelay, diff --git a/arch/arm/mach-footbridge/include/mach/hardware.h b/arch/arm/mach-footbridge/include/mach/hardware.h index 02f6d7a706b1..20d5ad781fe2 100644 --- a/arch/arm/mach-footbridge/include/mach/hardware.h +++ b/arch/arm/mach-footbridge/include/mach/hardware.h @@ -59,7 +59,7 @@ #define XBUS_SWITCH_J17_11 ((*XBUS_SWITCH) & (1 << 5)) #define XBUS_SWITCH_J17_9 ((*XBUS_SWITCH) & (1 << 6)) -#define UNCACHEABLE_ADDR (ARMCSR_BASE + 0x108) +#define UNCACHEABLE_ADDR (ARMCSR_BASE + 0x108) /* CSR_ROMBASEMASK */ /* PIC irq control */ diff --git a/arch/arm/mach-rpc/include/mach/hardware.h b/arch/arm/mach-rpc/include/mach/hardware.h index 257166b21f3d..aa79fa47373a 100644 --- a/arch/arm/mach-rpc/include/mach/hardware.h +++ b/arch/arm/mach-rpc/include/mach/hardware.h @@ -40,7 +40,7 @@ #define SCREEN_END 0xdfc00000 #define SCREEN_BASE 0xdf800000 -#define UNCACHEABLE_ADDR 0xdf010000 +#define UNCACHEABLE_ADDR (FLUSH_BASE + 0x10000) /* * IO Addresses diff --git a/arch/arm/mach-sa1100/include/mach/hardware.h b/arch/arm/mach-sa1100/include/mach/hardware.h index cbedd75a9d65..d944fd7e464f 100644 --- a/arch/arm/mach-sa1100/include/mach/hardware.h +++ b/arch/arm/mach-sa1100/include/mach/hardware.h @@ -13,7 +13,7 @@ #define __ASM_ARCH_HARDWARE_H -#define UNCACHEABLE_ADDR 0xfa050000 +#define UNCACHEABLE_ADDR 0xfa050000 /* ICIP */ /* @@ -36,28 +36,10 @@ #define io_v2p( x ) \ ( (((x)&0x00ffffff) | (((x)&(0x30000000>>VIO_SHIFT))<<VIO_SHIFT)) + PIO_START ) -#define CPU_SA1110_A0 (0) -#define CPU_SA1110_B0 (4) -#define CPU_SA1110_B1 (5) -#define CPU_SA1110_B2 (6) -#define CPU_SA1110_B4 (8) - -#define CPU_SA1100_ID (0x4401a110) -#define CPU_SA1100_MASK (0xfffffff0) -#define CPU_SA1110_ID (0x6901b110) -#define CPU_SA1110_MASK (0xfffffff0) - #define __MREG(x) IOMEM(io_p2v(x)) #ifndef __ASSEMBLY__ -#include <asm/cputype.h> - -#define CPU_REVISION (read_cpuid_id() & 15) - -#define cpu_is_sa1100() ((read_cpuid_id() & CPU_SA1100_MASK) == CPU_SA1100_ID) -#define cpu_is_sa1110() ((read_cpuid_id() & CPU_SA1110_MASK) == CPU_SA1110_ID) - # define __REG(x) (*((volatile unsigned long __iomem *)io_p2v(x))) # define __PREG(x) (io_v2p((unsigned long)&(x))) diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index d15a7fe51618..c1799dd1d0d9 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -403,6 +403,7 @@ config CPU_V7M bool select CPU_32v7M select CPU_ABRT_NOMMU + select CPU_CACHE_V7M select CPU_CACHE_NOP select CPU_PABRT_LEGACY select CPU_THUMBONLY @@ -518,6 +519,9 @@ config CPU_CACHE_VIPT config CPU_CACHE_FA bool +config CPU_CACHE_V7M + bool + if MMU # The copy-page model config CPU_COPY_V4WT @@ -750,14 +754,14 @@ config CPU_HIGH_VECTOR config CPU_ICACHE_DISABLE bool "Disable I-Cache (I-bit)" - depends on CPU_CP15 && !(CPU_ARM720T || CPU_ARM740T || CPU_XSCALE || CPU_XSC3) + depends on (CPU_CP15 && !(CPU_ARM720T || CPU_ARM740T || CPU_XSCALE || CPU_XSC3)) || CPU_V7M help Say Y here to disable the processor instruction cache. Unless you have a reason not to or are unsure, say N. config CPU_DCACHE_DISABLE bool "Disable D-Cache (C-bit)" - depends on CPU_CP15 && !SMP + depends on (CPU_CP15 && !SMP) || CPU_V7M help Say Y here to disable the processor data cache. Unless you have a reason not to or are unsure, say N. @@ -792,7 +796,7 @@ config CPU_CACHE_ROUND_ROBIN config CPU_BPREDICT_DISABLE bool "Disable branch prediction" - depends on CPU_ARM1020 || CPU_V6 || CPU_V6K || CPU_MOHAWK || CPU_XSC3 || CPU_V7 || CPU_FA526 + depends on CPU_ARM1020 || CPU_V6 || CPU_V6K || CPU_MOHAWK || CPU_XSC3 || CPU_V7 || CPU_FA526 || CPU_V7M help Say Y here to disable branch prediction. If unsure, say N. @@ -916,6 +920,13 @@ config CACHE_L2X0 help This option enables the L2x0 PrimeCell. +config CACHE_L2X0_PMU + bool "L2x0 performance monitor support" if CACHE_L2X0 + depends on PERF_EVENTS + help + This option enables support for the performance monitoring features + of the L220 and PL310 outer cache controllers. + if CACHE_L2X0 config PL310_ERRATA_588369 diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index 7f76d96ce546..e8698241ece9 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -43,9 +43,11 @@ obj-$(CONFIG_CPU_CACHE_V6) += cache-v6.o obj-$(CONFIG_CPU_CACHE_V7) += cache-v7.o obj-$(CONFIG_CPU_CACHE_FA) += cache-fa.o obj-$(CONFIG_CPU_CACHE_NOP) += cache-nop.o +obj-$(CONFIG_CPU_CACHE_V7M) += cache-v7m.o AFLAGS_cache-v6.o :=-Wa,-march=armv6 AFLAGS_cache-v7.o :=-Wa,-march=armv7-a +AFLAGS_cache-v7m.o :=-Wa,-march=armv7-m obj-$(CONFIG_CPU_COPY_V4WT) += copypage-v4wt.o obj-$(CONFIG_CPU_COPY_V4WB) += copypage-v4wb.o @@ -101,6 +103,7 @@ AFLAGS_proc-v7.o :=-Wa,-march=armv7-a obj-$(CONFIG_OUTER_CACHE) += l2c-common.o obj-$(CONFIG_CACHE_FEROCEON_L2) += cache-feroceon-l2.o obj-$(CONFIG_CACHE_L2X0) += cache-l2x0.o l2c-l2x0-resume.o +obj-$(CONFIG_CACHE_L2X0_PMU) += cache-l2x0-pmu.o obj-$(CONFIG_CACHE_XSC3L2) += cache-xsc3l2.o obj-$(CONFIG_CACHE_TAUROS2) += cache-tauros2.o obj-$(CONFIG_CACHE_UNIPHIER) += cache-uniphier.o diff --git a/arch/arm/mm/cache-l2x0-pmu.c b/arch/arm/mm/cache-l2x0-pmu.c new file mode 100644 index 000000000000..976d3057272e --- /dev/null +++ b/arch/arm/mm/cache-l2x0-pmu.c @@ -0,0 +1,584 @@ +/* + * L220/L310 cache controller support + * + * Copyright (C) 2016 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include <linux/errno.h> +#include <linux/hrtimer.h> +#include <linux/io.h> +#include <linux/list.h> +#include <linux/perf_event.h> +#include <linux/printk.h> +#include <linux/slab.h> +#include <linux/types.h> + +#include <asm/hardware/cache-l2x0.h> + +#define PMU_NR_COUNTERS 2 + +static void __iomem *l2x0_base; +static struct pmu *l2x0_pmu; +static cpumask_t pmu_cpu; + +static const char *l2x0_name; + +static ktime_t l2x0_pmu_poll_period; +static struct hrtimer l2x0_pmu_hrtimer; + +/* + * The L220/PL310 PMU has two equivalent counters, Counter1 and Counter0. + * Registers controlling these are laid out in pairs, in descending order, i.e. + * the register for Counter1 comes first, followed by the register for + * Counter0. + * We ensure that idx 0 -> Counter0, and idx1 -> Counter1. + */ +static struct perf_event *events[PMU_NR_COUNTERS]; + +/* Find an unused counter */ +static int l2x0_pmu_find_idx(void) +{ + int i; + + for (i = 0; i < PMU_NR_COUNTERS; i++) { + if (!events[i]) + return i; + } + + return -1; +} + +/* How many counters are allocated? */ +static int l2x0_pmu_num_active_counters(void) +{ + int i, cnt = 0; + + for (i = 0; i < PMU_NR_COUNTERS; i++) { + if (events[i]) + cnt++; + } + + return cnt; +} + +static void l2x0_pmu_counter_config_write(int idx, u32 val) +{ + writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT0_CFG - 4 * idx); +} + +static u32 l2x0_pmu_counter_read(int idx) +{ + return readl_relaxed(l2x0_base + L2X0_EVENT_CNT0_VAL - 4 * idx); +} + +static void l2x0_pmu_counter_write(int idx, u32 val) +{ + writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT0_VAL - 4 * idx); +} + +static void __l2x0_pmu_enable(void) +{ + u32 val = readl_relaxed(l2x0_base + L2X0_EVENT_CNT_CTRL); + val |= L2X0_EVENT_CNT_CTRL_ENABLE; + writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT_CTRL); +} + +static void __l2x0_pmu_disable(void) +{ + u32 val = readl_relaxed(l2x0_base + L2X0_EVENT_CNT_CTRL); + val &= ~L2X0_EVENT_CNT_CTRL_ENABLE; + writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT_CTRL); +} + +static void l2x0_pmu_enable(struct pmu *pmu) +{ + if (l2x0_pmu_num_active_counters() == 0) + return; + + __l2x0_pmu_enable(); +} + +static void l2x0_pmu_disable(struct pmu *pmu) +{ + if (l2x0_pmu_num_active_counters() == 0) + return; + + __l2x0_pmu_disable(); +} + +static void warn_if_saturated(u32 count) +{ + if (count != 0xffffffff) + return; + + pr_warn_ratelimited("L2X0 counter saturated. Poll period too long\n"); +} + +static void l2x0_pmu_event_read(struct perf_event *event) +{ + struct hw_perf_event *hw = &event->hw; + u64 prev_count, new_count, mask; + + do { + prev_count = local64_read(&hw->prev_count); + new_count = l2x0_pmu_counter_read(hw->idx); + } while (local64_xchg(&hw->prev_count, new_count) != prev_count); + + mask = GENMASK_ULL(31, 0); + local64_add((new_count - prev_count) & mask, &event->count); + + warn_if_saturated(new_count); +} + +static void l2x0_pmu_event_configure(struct perf_event *event) +{ + struct hw_perf_event *hw = &event->hw; + + /* + * The L2X0 counters saturate at 0xffffffff rather than wrapping, so we + * will *always* lose some number of events when a counter saturates, + * and have no way of detecting how many were lost. + * + * To minimize the impact of this, we try to maximize the period by + * always starting counters at zero. To ensure that group ratios are + * representative, we poll periodically to avoid counters saturating. + * See l2x0_pmu_poll(). + */ + local64_set(&hw->prev_count, 0); + l2x0_pmu_counter_write(hw->idx, 0); +} + +static enum hrtimer_restart l2x0_pmu_poll(struct hrtimer *hrtimer) +{ + unsigned long flags; + int i; + + local_irq_save(flags); + __l2x0_pmu_disable(); + + for (i = 0; i < PMU_NR_COUNTERS; i++) { + struct perf_event *event = events[i]; + + if (!event) + continue; + + l2x0_pmu_event_read(event); + l2x0_pmu_event_configure(event); + } + + __l2x0_pmu_enable(); + local_irq_restore(flags); + + hrtimer_forward_now(hrtimer, l2x0_pmu_poll_period); + return HRTIMER_RESTART; +} + + +static void __l2x0_pmu_event_enable(int idx, u32 event) +{ + u32 val; + + val = event << L2X0_EVENT_CNT_CFG_SRC_SHIFT; + val |= L2X0_EVENT_CNT_CFG_INT_DISABLED; + l2x0_pmu_counter_config_write(idx, val); +} + +static void l2x0_pmu_event_start(struct perf_event *event, int flags) +{ + struct hw_perf_event *hw = &event->hw; + + if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) + return; + + if (flags & PERF_EF_RELOAD) { + WARN_ON_ONCE(!(hw->state & PERF_HES_UPTODATE)); + l2x0_pmu_event_configure(event); + } + + hw->state = 0; + + __l2x0_pmu_event_enable(hw->idx, hw->config_base); +} + +static void __l2x0_pmu_event_disable(int idx) +{ + u32 val; + + val = L2X0_EVENT_CNT_CFG_SRC_DISABLED << L2X0_EVENT_CNT_CFG_SRC_SHIFT; + val |= L2X0_EVENT_CNT_CFG_INT_DISABLED; + l2x0_pmu_counter_config_write(idx, val); +} + +static void l2x0_pmu_event_stop(struct perf_event *event, int flags) +{ + struct hw_perf_event *hw = &event->hw; + + if (WARN_ON_ONCE(event->hw.state & PERF_HES_STOPPED)) + return; + + __l2x0_pmu_event_disable(hw->idx); + + hw->state |= PERF_HES_STOPPED; + + if (flags & PERF_EF_UPDATE) { + l2x0_pmu_event_read(event); + hw->state |= PERF_HES_UPTODATE; + } +} + +static int l2x0_pmu_event_add(struct perf_event *event, int flags) +{ + struct hw_perf_event *hw = &event->hw; + int idx = l2x0_pmu_find_idx(); + + if (idx == -1) + return -EAGAIN; + + /* + * Pin the timer, so that the overflows are handled by the chosen + * event->cpu (this is the same one as presented in "cpumask" + * attribute). + */ + if (l2x0_pmu_num_active_counters() == 0) + hrtimer_start(&l2x0_pmu_hrtimer, l2x0_pmu_poll_period, + HRTIMER_MODE_REL_PINNED); + + events[idx] = event; + hw->idx = idx; + + l2x0_pmu_event_configure(event); + + hw->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + + if (flags & PERF_EF_START) + l2x0_pmu_event_start(event, 0); + + return 0; +} + +static void l2x0_pmu_event_del(struct perf_event *event, int flags) +{ + struct hw_perf_event *hw = &event->hw; + + l2x0_pmu_event_stop(event, PERF_EF_UPDATE); + + events[hw->idx] = NULL; + hw->idx = -1; + + if (l2x0_pmu_num_active_counters() == 0) + hrtimer_cancel(&l2x0_pmu_hrtimer); +} + +static bool l2x0_pmu_group_is_valid(struct perf_event *event) +{ + struct pmu *pmu = event->pmu; + struct perf_event *leader = event->group_leader; + struct perf_event *sibling; + int num_hw = 0; + + if (leader->pmu == pmu) + num_hw++; + else if (!is_software_event(leader)) + return false; + + list_for_each_entry(sibling, &leader->sibling_list, group_entry) { + if (sibling->pmu == pmu) + num_hw++; + else if (!is_software_event(sibling)) + return false; + } + + return num_hw <= PMU_NR_COUNTERS; +} + +static int l2x0_pmu_event_init(struct perf_event *event) +{ + struct hw_perf_event *hw = &event->hw; + + if (event->attr.type != l2x0_pmu->type) + return -ENOENT; + + if (is_sampling_event(event) || + event->attach_state & PERF_ATTACH_TASK) + return -EINVAL; + + if (event->attr.exclude_user || + event->attr.exclude_kernel || + event->attr.exclude_hv || + event->attr.exclude_idle || + event->attr.exclude_host || + event->attr.exclude_guest) + return -EINVAL; + + if (event->cpu < 0) + return -EINVAL; + + if (event->attr.config & ~L2X0_EVENT_CNT_CFG_SRC_MASK) + return -EINVAL; + + hw->config_base = event->attr.config; + + if (!l2x0_pmu_group_is_valid(event)) + return -EINVAL; + + event->cpu = cpumask_first(&pmu_cpu); + + return 0; +} + +struct l2x0_event_attribute { + struct device_attribute attr; + unsigned int config; + bool pl310_only; +}; + +#define L2X0_EVENT_ATTR(_name, _config, _pl310_only) \ + (&((struct l2x0_event_attribute[]) {{ \ + .attr = __ATTR(_name, S_IRUGO, l2x0_pmu_event_show, NULL), \ + .config = _config, \ + .pl310_only = _pl310_only, \ + }})[0].attr.attr) + +#define L220_PLUS_EVENT_ATTR(_name, _config) \ + L2X0_EVENT_ATTR(_name, _config, false) + +#define PL310_EVENT_ATTR(_name, _config) \ + L2X0_EVENT_ATTR(_name, _config, true) + +static ssize_t l2x0_pmu_event_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct l2x0_event_attribute *lattr; + + lattr = container_of(attr, typeof(*lattr), attr); + return snprintf(buf, PAGE_SIZE, "config=0x%x\n", lattr->config); +} + +static umode_t l2x0_pmu_event_attr_is_visible(struct kobject *kobj, + struct attribute *attr, + int unused) +{ + struct device *dev = kobj_to_dev(kobj); + struct pmu *pmu = dev_get_drvdata(dev); + struct l2x0_event_attribute *lattr; + + lattr = container_of(attr, typeof(*lattr), attr.attr); + + if (!lattr->pl310_only || strcmp("l2c_310", pmu->name) == 0) + return attr->mode; + + return 0; +} + +static struct attribute *l2x0_pmu_event_attrs[] = { + L220_PLUS_EVENT_ATTR(co, 0x1), + L220_PLUS_EVENT_ATTR(drhit, 0x2), + L220_PLUS_EVENT_ATTR(drreq, 0x3), + L220_PLUS_EVENT_ATTR(dwhit, 0x4), + L220_PLUS_EVENT_ATTR(dwreq, 0x5), + L220_PLUS_EVENT_ATTR(dwtreq, 0x6), + L220_PLUS_EVENT_ATTR(irhit, 0x7), + L220_PLUS_EVENT_ATTR(irreq, 0x8), + L220_PLUS_EVENT_ATTR(wa, 0x9), + PL310_EVENT_ATTR(ipfalloc, 0xa), + PL310_EVENT_ATTR(epfhit, 0xb), + PL310_EVENT_ATTR(epfalloc, 0xc), + PL310_EVENT_ATTR(srrcvd, 0xd), + PL310_EVENT_ATTR(srconf, 0xe), + PL310_EVENT_ATTR(epfrcvd, 0xf), + NULL +}; + +static struct attribute_group l2x0_pmu_event_attrs_group = { + .name = "events", + .attrs = l2x0_pmu_event_attrs, + .is_visible = l2x0_pmu_event_attr_is_visible, +}; + +static ssize_t l2x0_pmu_cpumask_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return cpumap_print_to_pagebuf(true, buf, &pmu_cpu); +} + +static struct device_attribute l2x0_pmu_cpumask_attr = + __ATTR(cpumask, S_IRUGO, l2x0_pmu_cpumask_show, NULL); + +static struct attribute *l2x0_pmu_cpumask_attrs[] = { + &l2x0_pmu_cpumask_attr.attr, + NULL, +}; + +static struct attribute_group l2x0_pmu_cpumask_attr_group = { + .attrs = l2x0_pmu_cpumask_attrs, +}; + +static const struct attribute_group *l2x0_pmu_attr_groups[] = { + &l2x0_pmu_event_attrs_group, + &l2x0_pmu_cpumask_attr_group, + NULL, +}; + +static void l2x0_pmu_reset(void) +{ + int i; + + __l2x0_pmu_disable(); + + for (i = 0; i < PMU_NR_COUNTERS; i++) + __l2x0_pmu_event_disable(i); +} + +static int l2x0_pmu_offline_cpu(unsigned int cpu) +{ + unsigned int target; + + if (!cpumask_test_and_clear_cpu(cpu, &pmu_cpu)) + return 0; + + target = cpumask_any_but(cpu_online_mask, cpu); + if (target >= nr_cpu_ids) + return 0; + + perf_pmu_migrate_context(l2x0_pmu, cpu, target); + cpumask_set_cpu(target, &pmu_cpu); + + return 0; +} + +void l2x0_pmu_suspend(void) +{ + int i; + + if (!l2x0_pmu) + return; + + l2x0_pmu_disable(l2x0_pmu); + + for (i = 0; i < PMU_NR_COUNTERS; i++) { + if (events[i]) + l2x0_pmu_event_stop(events[i], PERF_EF_UPDATE); + } + +} + +void l2x0_pmu_resume(void) +{ + int i; + + if (!l2x0_pmu) + return; + + l2x0_pmu_reset(); + + for (i = 0; i < PMU_NR_COUNTERS; i++) { + if (events[i]) + l2x0_pmu_event_start(events[i], PERF_EF_RELOAD); + } + + l2x0_pmu_enable(l2x0_pmu); +} + +void __init l2x0_pmu_register(void __iomem *base, u32 part) +{ + /* + * Determine whether we support the PMU, and choose the name for sysfs. + * This is also used by l2x0_pmu_event_attr_is_visible to determine + * which events to display, as the PL310 PMU supports a superset of + * L220 events. + * + * The L210 PMU has a different programmer's interface, and is not + * supported by this driver. + * + * We must defer registering the PMU until the perf subsystem is up and + * running, so just stash the name and base, and leave that to another + * initcall. + */ + switch (part & L2X0_CACHE_ID_PART_MASK) { + case L2X0_CACHE_ID_PART_L220: + l2x0_name = "l2c_220"; + break; + case L2X0_CACHE_ID_PART_L310: + l2x0_name = "l2c_310"; + break; + default: + return; + } + + l2x0_base = base; +} + +static __init int l2x0_pmu_init(void) +{ + int ret; + + if (!l2x0_base) + return 0; + + l2x0_pmu = kzalloc(sizeof(*l2x0_pmu), GFP_KERNEL); + if (!l2x0_pmu) { + pr_warn("Unable to allocate L2x0 PMU\n"); + return -ENOMEM; + } + + *l2x0_pmu = (struct pmu) { + .task_ctx_nr = perf_invalid_context, + .pmu_enable = l2x0_pmu_enable, + .pmu_disable = l2x0_pmu_disable, + .read = l2x0_pmu_event_read, + .start = l2x0_pmu_event_start, + .stop = l2x0_pmu_event_stop, + .add = l2x0_pmu_event_add, + .del = l2x0_pmu_event_del, + .event_init = l2x0_pmu_event_init, + .attr_groups = l2x0_pmu_attr_groups, + }; + + l2x0_pmu_reset(); + + /* + * We always use a hrtimer rather than an interrupt. + * See comments in l2x0_pmu_event_configure and l2x0_pmu_poll. + * + * Polling once a second allows the counters to fill up to 1/128th on a + * quad-core test chip with cores clocked at 400MHz. Hopefully this + * leaves sufficient headroom to avoid overflow on production silicon + * at higher frequencies. + */ + l2x0_pmu_poll_period = ms_to_ktime(1000); + hrtimer_init(&l2x0_pmu_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + l2x0_pmu_hrtimer.function = l2x0_pmu_poll; + + cpumask_set_cpu(0, &pmu_cpu); + ret = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ARM_L2X0_ONLINE, + "AP_PERF_ARM_L2X0_ONLINE", NULL, + l2x0_pmu_offline_cpu); + if (ret) + goto out_pmu; + + ret = perf_pmu_register(l2x0_pmu, l2x0_name, -1); + if (ret) + goto out_cpuhp; + + return 0; + +out_cpuhp: + cpuhp_remove_state_nocalls(CPUHP_AP_PERF_ARM_L2X0_ONLINE); +out_pmu: + kfree(l2x0_pmu); + l2x0_pmu = NULL; + return ret; +} +device_initcall(l2x0_pmu_init); diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index cc12905ae6f8..d1870c777c6e 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -142,6 +142,8 @@ static void l2c_disable(void) { void __iomem *base = l2x0_base; + l2x0_pmu_suspend(); + outer_cache.flush_all(); l2c_write_sec(0, base, L2X0_CTRL); dsb(st); @@ -159,6 +161,8 @@ static void l2c_resume(void) /* Do not touch the controller if already enabled. */ if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) l2c_enable(base, l2x0_data->num_lock); + + l2x0_pmu_resume(); } /* @@ -709,9 +713,8 @@ static void __init l2c310_fixup(void __iomem *base, u32 cache_id, if (revision >= L310_CACHE_ID_RTL_R3P0 && revision < L310_CACHE_ID_RTL_R3P2) { u32 val = l2x0_saved_regs.prefetch_ctrl; - /* I don't think bit23 is required here... but iMX6 does so */ - if (val & (BIT(30) | BIT(23))) { - val &= ~(BIT(30) | BIT(23)); + if (val & L310_PREFETCH_CTRL_DBL_LINEFILL) { + val &= ~L310_PREFETCH_CTRL_DBL_LINEFILL; l2x0_saved_regs.prefetch_ctrl = val; errata[n++] = "752271"; } @@ -892,6 +895,8 @@ static int __init __l2c_init(const struct l2c_init_data *data, pr_info("%s: CACHE_ID 0x%08x, AUX_CTRL 0x%08x\n", data->type, cache_id, aux); + l2x0_pmu_register(l2x0_base, cache_id); + return 0; } diff --git a/arch/arm/mm/cache-v7m.S b/arch/arm/mm/cache-v7m.S new file mode 100644 index 000000000000..816a7e44e6f1 --- /dev/null +++ b/arch/arm/mm/cache-v7m.S @@ -0,0 +1,453 @@ +/* + * linux/arch/arm/mm/cache-v7m.S + * + * Based on linux/arch/arm/mm/cache-v7.S + * + * Copyright (C) 2001 Deep Blue Solutions Ltd. + * Copyright (C) 2005 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This is the "shell" of the ARMv7M processor support. + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/errno.h> +#include <asm/unwind.h> +#include <asm/v7m.h> + +#include "proc-macros.S" + +/* Generic V7M read/write macros for memory mapped cache operations */ +.macro v7m_cache_read, rt, reg + movw \rt, #:lower16:BASEADDR_V7M_SCB + \reg + movt \rt, #:upper16:BASEADDR_V7M_SCB + \reg + ldr \rt, [\rt] +.endm + +.macro v7m_cacheop, rt, tmp, op, c = al + movw\c \tmp, #:lower16:BASEADDR_V7M_SCB + \op + movt\c \tmp, #:upper16:BASEADDR_V7M_SCB + \op + str\c \rt, [\tmp] +.endm + + +.macro read_ccsidr, rt + v7m_cache_read \rt, V7M_SCB_CCSIDR +.endm + +.macro read_clidr, rt + v7m_cache_read \rt, V7M_SCB_CLIDR +.endm + +.macro write_csselr, rt, tmp + v7m_cacheop \rt, \tmp, V7M_SCB_CSSELR +.endm + +/* + * dcisw: Invalidate data cache by set/way + */ +.macro dcisw, rt, tmp + v7m_cacheop \rt, \tmp, V7M_SCB_DCISW +.endm + +/* + * dccisw: Clean and invalidate data cache by set/way + */ +.macro dccisw, rt, tmp + v7m_cacheop \rt, \tmp, V7M_SCB_DCCISW +.endm + +/* + * dccimvac: Clean and invalidate data cache line by MVA to PoC. + */ +.irp c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo +.macro dccimvac\c, rt, tmp + v7m_cacheop \rt, \tmp, V7M_SCB_DCCIMVAC, \c +.endm +.endr + +/* + * dcimvac: Invalidate data cache line by MVA to PoC + */ +.macro dcimvac, rt, tmp + v7m_cacheop \rt, \tmp, V7M_SCB_DCIMVAC +.endm + +/* + * dccmvau: Clean data cache line by MVA to PoU + */ +.macro dccmvau, rt, tmp + v7m_cacheop \rt, \tmp, V7M_SCB_DCCMVAU +.endm + +/* + * dccmvac: Clean data cache line by MVA to PoC + */ +.macro dccmvac, rt, tmp + v7m_cacheop \rt, \tmp, V7M_SCB_DCCMVAC +.endm + +/* + * icimvau: Invalidate instruction caches by MVA to PoU + */ +.macro icimvau, rt, tmp + v7m_cacheop \rt, \tmp, V7M_SCB_ICIMVAU +.endm + +/* + * Invalidate the icache, inner shareable if SMP, invalidate BTB for UP. + * rt data ignored by ICIALLU(IS), so can be used for the address + */ +.macro invalidate_icache, rt + v7m_cacheop \rt, \rt, V7M_SCB_ICIALLU + mov \rt, #0 +.endm + +/* + * Invalidate the BTB, inner shareable if SMP. + * rt data ignored by BPIALL, so it can be used for the address + */ +.macro invalidate_bp, rt + v7m_cacheop \rt, \rt, V7M_SCB_BPIALL + mov \rt, #0 +.endm + +ENTRY(v7m_invalidate_l1) + mov r0, #0 + + write_csselr r0, r1 + read_ccsidr r0 + + movw r1, #0x7fff + and r2, r1, r0, lsr #13 + + movw r1, #0x3ff + + and r3, r1, r0, lsr #3 @ NumWays - 1 + add r2, r2, #1 @ NumSets + + and r0, r0, #0x7 + add r0, r0, #4 @ SetShift + + clz r1, r3 @ WayShift + add r4, r3, #1 @ NumWays +1: sub r2, r2, #1 @ NumSets-- + mov r3, r4 @ Temp = NumWays +2: subs r3, r3, #1 @ Temp-- + mov r5, r3, lsl r1 + mov r6, r2, lsl r0 + orr r5, r5, r6 @ Reg = (Temp<<WayShift)|(NumSets<<SetShift) + dcisw r5, r6 + bgt 2b + cmp r2, #0 + bgt 1b + dsb st + isb + ret lr +ENDPROC(v7m_invalidate_l1) + +/* + * v7m_flush_icache_all() + * + * Flush the whole I-cache. + * + * Registers: + * r0 - set to 0 + */ +ENTRY(v7m_flush_icache_all) + invalidate_icache r0 + ret lr +ENDPROC(v7m_flush_icache_all) + +/* + * v7m_flush_dcache_all() + * + * Flush the whole D-cache. + * + * Corrupted registers: r0-r7, r9-r11 + */ +ENTRY(v7m_flush_dcache_all) + dmb @ ensure ordering with previous memory accesses + read_clidr r0 + mov r3, r0, lsr #23 @ move LoC into position + ands r3, r3, #7 << 1 @ extract LoC*2 from clidr + beq finished @ if loc is 0, then no need to clean +start_flush_levels: + mov r10, #0 @ start clean at cache level 0 +flush_levels: + add r2, r10, r10, lsr #1 @ work out 3x current cache level + mov r1, r0, lsr r2 @ extract cache type bits from clidr + and r1, r1, #7 @ mask of the bits for current cache only + cmp r1, #2 @ see what cache we have at this level + blt skip @ skip if no cache, or just i-cache +#ifdef CONFIG_PREEMPT + save_and_disable_irqs_notrace r9 @ make cssr&csidr read atomic +#endif + write_csselr r10, r1 @ set current cache level + isb @ isb to sych the new cssr&csidr + read_ccsidr r1 @ read the new csidr +#ifdef CONFIG_PREEMPT + restore_irqs_notrace r9 +#endif + and r2, r1, #7 @ extract the length of the cache lines + add r2, r2, #4 @ add 4 (line length offset) + movw r4, #0x3ff + ands r4, r4, r1, lsr #3 @ find maximum number on the way size + clz r5, r4 @ find bit position of way size increment + movw r7, #0x7fff + ands r7, r7, r1, lsr #13 @ extract max number of the index size +loop1: + mov r9, r7 @ create working copy of max index +loop2: + lsl r6, r4, r5 + orr r11, r10, r6 @ factor way and cache number into r11 + lsl r6, r9, r2 + orr r11, r11, r6 @ factor index number into r11 + dccisw r11, r6 @ clean/invalidate by set/way + subs r9, r9, #1 @ decrement the index + bge loop2 + subs r4, r4, #1 @ decrement the way + bge loop1 +skip: + add r10, r10, #2 @ increment cache number + cmp r3, r10 + bgt flush_levels +finished: + mov r10, #0 @ swith back to cache level 0 + write_csselr r10, r3 @ select current cache level in cssr + dsb st + isb + ret lr +ENDPROC(v7m_flush_dcache_all) + +/* + * v7m_flush_cache_all() + * + * Flush the entire cache system. + * The data cache flush is now achieved using atomic clean / invalidates + * working outwards from L1 cache. This is done using Set/Way based cache + * maintenance instructions. + * The instruction cache can still be invalidated back to the point of + * unification in a single instruction. + * + */ +ENTRY(v7m_flush_kern_cache_all) + stmfd sp!, {r4-r7, r9-r11, lr} + bl v7m_flush_dcache_all + invalidate_icache r0 + ldmfd sp!, {r4-r7, r9-r11, lr} + ret lr +ENDPROC(v7m_flush_kern_cache_all) + +/* + * v7m_flush_cache_all() + * + * Flush all TLB entries in a particular address space + * + * - mm - mm_struct describing address space + */ +ENTRY(v7m_flush_user_cache_all) + /*FALLTHROUGH*/ + +/* + * v7m_flush_cache_range(start, end, flags) + * + * Flush a range of TLB entries in the specified address space. + * + * - start - start address (may not be aligned) + * - end - end address (exclusive, may not be aligned) + * - flags - vm_area_struct flags describing address space + * + * It is assumed that: + * - we have a VIPT cache. + */ +ENTRY(v7m_flush_user_cache_range) + ret lr +ENDPROC(v7m_flush_user_cache_all) +ENDPROC(v7m_flush_user_cache_range) + +/* + * v7m_coherent_kern_range(start,end) + * + * Ensure that the I and D caches are coherent within specified + * region. This is typically used when code has been written to + * a memory region, and will be executed. + * + * - start - virtual start address of region + * - end - virtual end address of region + * + * It is assumed that: + * - the Icache does not read data from the write buffer + */ +ENTRY(v7m_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * v7m_coherent_user_range(start,end) + * + * Ensure that the I and D caches are coherent within specified + * region. This is typically used when code has been written to + * a memory region, and will be executed. + * + * - start - virtual start address of region + * - end - virtual end address of region + * + * It is assumed that: + * - the Icache does not read data from the write buffer + */ +ENTRY(v7m_coherent_user_range) + UNWIND(.fnstart ) + dcache_line_size r2, r3 + sub r3, r2, #1 + bic r12, r0, r3 +1: +/* + * We use open coded version of dccmvau otherwise USER() would + * point at movw instruction. + */ + dccmvau r12, r3 + add r12, r12, r2 + cmp r12, r1 + blo 1b + dsb ishst + icache_line_size r2, r3 + sub r3, r2, #1 + bic r12, r0, r3 +2: + icimvau r12, r3 + add r12, r12, r2 + cmp r12, r1 + blo 2b + invalidate_bp r0 + dsb ishst + isb + ret lr + UNWIND(.fnend ) +ENDPROC(v7m_coherent_kern_range) +ENDPROC(v7m_coherent_user_range) + +/* + * v7m_flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure that the data held in the page kaddr is written back + * to the page in question. + * + * - addr - kernel address + * - size - region size + */ +ENTRY(v7m_flush_kern_dcache_area) + dcache_line_size r2, r3 + add r1, r0, r1 + sub r3, r2, #1 + bic r0, r0, r3 +1: + dccimvac r0, r3 @ clean & invalidate D line / unified line + add r0, r0, r2 + cmp r0, r1 + blo 1b + dsb st + ret lr +ENDPROC(v7m_flush_kern_dcache_area) + +/* + * v7m_dma_inv_range(start,end) + * + * Invalidate the data cache within the specified region; we will + * be performing a DMA operation in this region and we want to + * purge old data in the cache. + * + * - start - virtual start address of region + * - end - virtual end address of region + */ +v7m_dma_inv_range: + dcache_line_size r2, r3 + sub r3, r2, #1 + tst r0, r3 + bic r0, r0, r3 + dccimvacne r0, r3 + subne r3, r2, #1 @ restore r3, corrupted by v7m's dccimvac + tst r1, r3 + bic r1, r1, r3 + dccimvacne r1, r3 +1: + dcimvac r0, r3 + add r0, r0, r2 + cmp r0, r1 + blo 1b + dsb st + ret lr +ENDPROC(v7m_dma_inv_range) + +/* + * v7m_dma_clean_range(start,end) + * - start - virtual start address of region + * - end - virtual end address of region + */ +v7m_dma_clean_range: + dcache_line_size r2, r3 + sub r3, r2, #1 + bic r0, r0, r3 +1: + dccmvac r0, r3 @ clean D / U line + add r0, r0, r2 + cmp r0, r1 + blo 1b + dsb st + ret lr +ENDPROC(v7m_dma_clean_range) + +/* + * v7m_dma_flush_range(start,end) + * - start - virtual start address of region + * - end - virtual end address of region + */ +ENTRY(v7m_dma_flush_range) + dcache_line_size r2, r3 + sub r3, r2, #1 + bic r0, r0, r3 +1: + dccimvac r0, r3 @ clean & invalidate D / U line + add r0, r0, r2 + cmp r0, r1 + blo 1b + dsb st + ret lr +ENDPROC(v7m_dma_flush_range) + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v7m_dma_map_area) + add r1, r1, r0 + teq r2, #DMA_FROM_DEVICE + beq v7m_dma_inv_range + b v7m_dma_clean_range +ENDPROC(v7m_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v7m_dma_unmap_area) + add r1, r1, r0 + teq r2, #DMA_TO_DEVICE + bne v7m_dma_inv_range + ret lr +ENDPROC(v7m_dma_unmap_area) + + .globl v7m_flush_kern_cache_louis + .equ v7m_flush_kern_cache_louis, v7m_flush_kern_cache_all + + __INITDATA + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions v7m diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index c6834c0cfd1c..a2302aba5df2 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -436,7 +436,7 @@ static int __init atomic_pool_init(void) gen_pool_set_algo(atomic_pool, gen_pool_first_fit_order_align, (void *)PAGE_SHIFT); - pr_info("DMA: preallocated %zd KiB pool for atomic coherent allocations\n", + pr_info("DMA: preallocated %zu KiB pool for atomic coherent allocations\n", atomic_pool_size / 1024); return 0; } @@ -445,7 +445,7 @@ destroy_genpool: gen_pool_destroy(atomic_pool); atomic_pool = NULL; out: - pr_err("DMA: failed to allocate %zx KiB pool for atomic coherent allocation\n", + pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n", atomic_pool_size / 1024); return -ENOMEM; } diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 30fe03f95c85..4001dd15818d 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -243,7 +243,7 @@ __setup("noalign", noalign_setup); #define PROT_PTE_S2_DEVICE PROT_PTE_DEVICE #define PROT_SECT_DEVICE PMD_TYPE_SECT|PMD_SECT_AP_WRITE -static struct mem_type mem_types[] = { +static struct mem_type mem_types[] __ro_after_init = { [MT_DEVICE] = { /* Strongly ordered / ARMv6 shared device */ .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED | L_PTE_SHARED, diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index c671f345266a..0d40c285bd86 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -7,6 +7,10 @@ #include <asm/asm-offsets.h> #include <asm/thread_info.h> +#ifdef CONFIG_CPU_V7M +#include <asm/v7m.h> +#endif + /* * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm) */ @@ -70,7 +74,13 @@ * on ARMv7. */ .macro dcache_line_size, reg, tmp +#ifdef CONFIG_CPU_V7M + movw \tmp, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_CTR + movt \tmp, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_CTR + ldr \tmp, [\tmp] +#else mrc p15, 0, \tmp, c0, c0, 1 @ read ctr +#endif lsr \tmp, \tmp, #16 and \tmp, \tmp, #0xf @ cache line size encoding mov \reg, #4 @ bytes per word @@ -82,7 +92,13 @@ * on ARMv7. */ .macro icache_line_size, reg, tmp +#ifdef CONFIG_CPU_V7M + movw \tmp, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_CTR + movt \tmp, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_CTR + ldr \tmp, [\tmp] +#else mrc p15, 0, \tmp, c0, c0, 1 @ read ctr +#endif and \tmp, \tmp, #0xf @ cache line size encoding mov \reg, #4 @ bytes per word mov \reg, \reg, lsl \tmp @ actual cache line size diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S index 7229d8d0be1a..f6d333f09bfe 100644 --- a/arch/arm/mm/proc-v7m.S +++ b/arch/arm/mm/proc-v7m.S @@ -74,14 +74,42 @@ ENTRY(cpu_v7m_do_resume) ENDPROC(cpu_v7m_do_resume) #endif +ENTRY(cpu_cm7_dcache_clean_area) + dcache_line_size r2, r3 + movw r3, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_DCCMVAC + movt r3, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_DCCMVAC + +1: str r0, [r3] @ clean D entry + add r0, r0, r2 + subs r1, r1, r2 + bhi 1b + dsb + ret lr +ENDPROC(cpu_cm7_dcache_clean_area) + +ENTRY(cpu_cm7_proc_fin) + movw r2, #:lower16:(BASEADDR_V7M_SCB + V7M_SCB_CCR) + movt r2, #:upper16:(BASEADDR_V7M_SCB + V7M_SCB_CCR) + ldr r0, [r2] + bic r0, r0, #(V7M_SCB_CCR_DC | V7M_SCB_CCR_IC) + str r0, [r2] + ret lr +ENDPROC(cpu_cm7_proc_fin) + .section ".text.init", #alloc, #execinstr +__v7m_cm7_setup: + mov r8, #(V7M_SCB_CCR_DC | V7M_SCB_CCR_IC| V7M_SCB_CCR_BP) + b __v7m_setup_cont /* * __v7m_setup * * This should be able to cover all ARMv7-M cores. */ __v7m_setup: + mov r8, 0 + +__v7m_setup_cont: @ Configure the vector table base address ldr r0, =BASEADDR_V7M_SCB ldr r12, =vector_table @@ -104,6 +132,7 @@ __v7m_setup: badr r1, 1f ldr r5, [r12, #11 * 4] @ read the SVC vector entry str r1, [r12, #11 * 4] @ write the temporary SVC vector entry + dsb mov r6, lr @ save LR ldr sp, =init_thread_union + THREAD_START_SP cpsie i @@ -116,15 +145,32 @@ __v7m_setup: mov r1, #1 msr control, r1 @ Thread mode has unpriviledged access + @ Configure caches (if implemented) + teq r8, #0 + stmneia r12, {r0-r6, lr} @ v7m_invalidate_l1 touches r0-r6 + blne v7m_invalidate_l1 + teq r8, #0 @ re-evalutae condition + ldmneia r12, {r0-r6, lr} + @ Configure the System Control Register to ensure 8-byte stack alignment @ Note the STKALIGN bit is either RW or RAO. - ldr r12, [r0, V7M_SCB_CCR] @ system control register - orr r12, #V7M_SCB_CCR_STKALIGN - str r12, [r0, V7M_SCB_CCR] + ldr r0, [r0, V7M_SCB_CCR] @ system control register + orr r0, #V7M_SCB_CCR_STKALIGN + orr r0, r0, r8 + ret lr ENDPROC(__v7m_setup) +/* + * Cortex-M7 processor functions + */ + globl_equ cpu_cm7_proc_init, cpu_v7m_proc_init + globl_equ cpu_cm7_reset, cpu_v7m_reset + globl_equ cpu_cm7_do_idle, cpu_v7m_do_idle + globl_equ cpu_cm7_switch_mm, cpu_v7m_switch_mm + define_processor_functions v7m, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1 + define_processor_functions cm7, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1 .section ".rodata" string cpu_arch_name, "armv7m" @@ -133,6 +179,50 @@ ENDPROC(__v7m_setup) .section ".proc.info.init", #alloc +.macro __v7m_proc name, initfunc, cache_fns = nop_cache_fns, hwcaps = 0, proc_fns = v7m_processor_functions + .long 0 /* proc_info_list.__cpu_mm_mmu_flags */ + .long 0 /* proc_info_list.__cpu_io_mmu_flags */ + initfn \initfunc, \name + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_HALF | HWCAP_THUMB | HWCAP_FAST_MULT | \hwcaps + .long cpu_v7m_name + .long \proc_fns + .long 0 /* proc_info_list.tlb */ + .long 0 /* proc_info_list.user */ + .long \cache_fns +.endm + + /* + * Match ARM Cortex-M7 processor. + */ + .type __v7m_cm7_proc_info, #object +__v7m_cm7_proc_info: + .long 0x410fc270 /* ARM Cortex-M7 0xC27 */ + .long 0xff0ffff0 /* Mask off revision, patch release */ + __v7m_proc __v7m_cm7_proc_info, __v7m_cm7_setup, hwcaps = HWCAP_EDSP, cache_fns = v7m_cache_fns, proc_fns = cm7_processor_functions + .size __v7m_cm7_proc_info, . - __v7m_cm7_proc_info + + /* + * Match ARM Cortex-M4 processor. + */ + .type __v7m_cm4_proc_info, #object +__v7m_cm4_proc_info: + .long 0x410fc240 /* ARM Cortex-M4 0xC24 */ + .long 0xff0ffff0 /* Mask off revision, patch release */ + __v7m_proc __v7m_cm4_proc_info, __v7m_setup, hwcaps = HWCAP_EDSP + .size __v7m_cm4_proc_info, . - __v7m_cm4_proc_info + + /* + * Match ARM Cortex-M3 processor. + */ + .type __v7m_cm3_proc_info, #object +__v7m_cm3_proc_info: + .long 0x410fc230 /* ARM Cortex-M3 0xC23 */ + .long 0xff0ffff0 /* Mask off revision, patch release */ + __v7m_proc __v7m_cm3_proc_info, __v7m_setup + .size __v7m_cm3_proc_info, . - __v7m_cm3_proc_info + /* * Match any ARMv7-M processor core. */ @@ -140,16 +230,6 @@ ENDPROC(__v7m_setup) __v7m_proc_info: .long 0x000f0000 @ Required ID value .long 0x000f0000 @ Mask for ID - .long 0 @ proc_info_list.__cpu_mm_mmu_flags - .long 0 @ proc_info_list.__cpu_io_mmu_flags - initfn __v7m_setup, __v7m_proc_info @ proc_info_list.__cpu_flush - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT - .long cpu_v7m_name - .long v7m_processor_functions @ proc_info_list.proc - .long 0 @ proc_info_list.tlb - .long 0 @ proc_info_list.user - .long nop_cache_fns @ proc_info_list.cache + __v7m_proc __v7m_proc_info, __v7m_setup .size __v7m_proc_info, . - __v7m_proc_info diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c index a5b5c87e2114..a56fa2a1e9aa 100644 --- a/drivers/amba/bus.c +++ b/drivers/amba/bus.c @@ -19,6 +19,7 @@ #include <linux/amba/bus.h> #include <linux/sizes.h> #include <linux/limits.h> +#include <linux/clk/clk-conf.h> #include <asm/irq.h> @@ -237,6 +238,10 @@ static int amba_probe(struct device *dev) int ret; do { + ret = of_clk_set_defaults(dev->of_node, false); + if (ret < 0) + break; + ret = dev_pm_domain_attach(dev, true); if (ret == -EPROBE_DEFER) break; diff --git a/drivers/cpufreq/sa1110-cpufreq.c b/drivers/cpufreq/sa1110-cpufreq.c index b5befc211172..2bac9b6cfeea 100644 --- a/drivers/cpufreq/sa1110-cpufreq.c +++ b/drivers/cpufreq/sa1110-cpufreq.c @@ -159,7 +159,7 @@ sdram_calculate_timing(struct sdram_info *sd, u_int cpu_khz, * half speed or use delayed read latching (errata 13). */ if ((ns_to_cycles(sdram->tck, sd_khz) > 1) || - (CPU_REVISION < CPU_SA1110_B2 && sd_khz < 62000)) + (read_cpuid_revision() < ARM_CPU_REV_SA1110_B2 && sd_khz < 62000)) sd_khz /= 2; sd->mdcnfg = MDCNFG & 0x007f007f; diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index c7efddf6e038..4c09d93d9569 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -89,7 +89,7 @@ config BINFMT_SCRIPT config BINFMT_FLAT bool "Kernel support for flat binaries" - depends on !MMU || M68K + depends on !MMU || ARM || M68K depends on !FRV || BROKEN help Support uClinux FLAT format binaries. diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 7b6c446ee17f..a8ffc405f915 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -114,6 +114,7 @@ enum cpuhp_state { CPUHP_AP_PERF_S390_SF_ONLINE, CPUHP_AP_PERF_ARM_CCI_ONLINE, CPUHP_AP_PERF_ARM_CCN_ONLINE, + CPUHP_AP_PERF_ARM_L2X0_ONLINE, CPUHP_AP_WORKQUEUE_ONLINE, CPUHP_AP_RCUTREE_ONLINE, CPUHP_AP_NOTIFY_ONLINE, |