39 files changed, 1752 insertions, 394 deletions
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 61f6ccc19cfa..6be9ee148b78 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -23,7 +23,6 @@ ifeq ($(CONFIG_ARM_MODULE_PLTS),y)
 LDFLAGS_MODULE	+= -T $(srctree)/arch/arm/kernel/module.lds
 endif
 
-OBJCOPYFLAGS	:=-O binary -R .comment -S
 GZFLAGS		:=-9
 #KBUILD_CFLAGS	+=-pipe
 
diff --git a/arch/arm/boot/Makefile b/arch/arm/boot/Makefile
index bdc1d5af03d2..50f8d1be7fcb 100644
--- a/arch/arm/boot/Makefile
+++ b/arch/arm/boot/Makefile
@@ -11,6 +11,8 @@
 # Copyright (C) 1995-2002 Russell King
 #
 
+OBJCOPYFLAGS	:=-O binary -R .comment -S
+
 ifneq ($(MACHINE),)
 include $(MACHINE)/Makefile.boot
 endif
diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c
index 2e076c492005..4ecd5120fce7 100644
--- a/arch/arm/common/sa1111.c
+++ b/arch/arm/common/sa1111.c
@@ -15,6 +15,7 @@
  * from machine specific code with proper arguments when required.
  */
 #include <linux/module.h>
+#include <linux/gpio/driver.h>
 #include <linux/init.h>
 #include <linux/irq.h>
 #include <linux/kernel.h>
@@ -107,6 +108,7 @@ struct sa1111 {
 	spinlock_t	lock;
 	void __iomem	*base;
 	struct sa1111_platform_data *pdata;
+	struct gpio_chip gc;
 #ifdef CONFIG_PM
 	void		*saved_state;
 #endif
@@ -231,132 +233,44 @@ static void sa1111_irq_handler(struct irq_desc *desc)
 #define SA1111_IRQMASK_LO(x)	(1 << (x - sachip->irq_base))
 #define SA1111_IRQMASK_HI(x)	(1 << (x - sachip->irq_base - 32))
 
-static void sa1111_ack_irq(struct irq_data *d)
-{
-}
-
-static void sa1111_mask_lowirq(struct irq_data *d)
+static u32 sa1111_irqmask(struct irq_data *d)
 {
 	struct sa1111 *sachip = irq_data_get_irq_chip_data(d);
-	void __iomem *mapbase = sachip->base + SA1111_INTC;
-	unsigned long ie0;
 
-	ie0 = sa1111_readl(mapbase + SA1111_INTEN0);
-	ie0 &= ~SA1111_IRQMASK_LO(d->irq);
-	writel(ie0, mapbase + SA1111_INTEN0);
+	return BIT((d->irq - sachip->irq_base) & 31);
 }
 
-static void sa1111_unmask_lowirq(struct irq_data *d)
+static int sa1111_irqbank(struct irq_data *d)
 {
 	struct sa1111 *sachip = irq_data_get_irq_chip_data(d);
-	void __iomem *mapbase = sachip->base + SA1111_INTC;
-	unsigned long ie0;
-
-	ie0 = sa1111_readl(mapbase + SA1111_INTEN0);
-	ie0 |= SA1111_IRQMASK_LO(d->irq);
-	sa1111_writel(ie0, mapbase + SA1111_INTEN0);
-}
-
-/*
- * Attempt to re-trigger the interrupt.  The SA1111 contains a register
- * (INTSET) which claims to do this.  However, in practice no amount of
- * manipulation of INTEN and INTSET guarantees that the interrupt will
- * be triggered.  In fact, its very difficult, if not impossible to get
- * INTSET to re-trigger the interrupt.
- */
-static int sa1111_retrigger_lowirq(struct irq_data *d)
-{
-	struct sa1111 *sachip = irq_data_get_irq_chip_data(d);
-	void __iomem *mapbase = sachip->base + SA1111_INTC;
-	unsigned int mask = SA1111_IRQMASK_LO(d->irq);
-	unsigned long ip0;
-	int i;
-
-	ip0 = sa1111_readl(mapbase + SA1111_INTPOL0);
-	for (i = 0; i < 8; i++) {
-		sa1111_writel(ip0 ^ mask, mapbase + SA1111_INTPOL0);
-		sa1111_writel(ip0, mapbase + SA1111_INTPOL0);
-		if (sa1111_readl(mapbase + SA1111_INTSTATCLR0) & mask)
-			break;
-	}
 
-	if (i == 8)
-		pr_err("Danger Will Robinson: failed to re-trigger IRQ%d\n",
-		       d->irq);
-	return i == 8 ? -1 : 0;
+	return ((d->irq - sachip->irq_base) / 32) * 4;
 }
 
-static int sa1111_type_lowirq(struct irq_data *d, unsigned int flags)
-{
-	struct sa1111 *sachip = irq_data_get_irq_chip_data(d);
-	void __iomem *mapbase = sachip->base + SA1111_INTC;
-	unsigned int mask = SA1111_IRQMASK_LO(d->irq);
-	unsigned long ip0;
-
-	if (flags == IRQ_TYPE_PROBE)
-		return 0;
-
-	if ((!(flags & IRQ_TYPE_EDGE_RISING) ^ !(flags & IRQ_TYPE_EDGE_FALLING)) == 0)
-		return -EINVAL;
-
-	ip0 = sa1111_readl(mapbase + SA1111_INTPOL0);
-	if (flags & IRQ_TYPE_EDGE_RISING)
-		ip0 &= ~mask;
-	else
-		ip0 |= mask;
-	sa1111_writel(ip0, mapbase + SA1111_INTPOL0);
-	sa1111_writel(ip0, mapbase + SA1111_WAKEPOL0);
-
-	return 0;
-}
-
-static int sa1111_wake_lowirq(struct irq_data *d, unsigned int on)
+static void sa1111_ack_irq(struct irq_data *d)
 {
-	struct sa1111 *sachip = irq_data_get_irq_chip_data(d);
-	void __iomem *mapbase = sachip->base + SA1111_INTC;
-	unsigned int mask = SA1111_IRQMASK_LO(d->irq);
-	unsigned long we0;
-
-	we0 = sa1111_readl(mapbase + SA1111_WAKEEN0);
-	if (on)
-		we0 |= mask;
-	else
-		we0 &= ~mask;
-	sa1111_writel(we0, mapbase + SA1111_WAKEEN0);
-
-	return 0;
 }
 
-static struct irq_chip sa1111_low_chip = {
-	.name		= "SA1111-l",
-	.irq_ack	= sa1111_ack_irq,
-	.irq_mask	= sa1111_mask_lowirq,
-	.irq_unmask	= sa1111_unmask_lowirq,
-	.irq_retrigger	= sa1111_retrigger_lowirq,
-	.irq_set_type	= sa1111_type_lowirq,
-	.irq_set_wake	= sa1111_wake_lowirq,
-};
-
-static void sa1111_mask_highirq(struct irq_data *d)
+static void sa1111_mask_irq(struct irq_data *d)
 {
 	struct sa1111 *sachip = irq_data_get_irq_chip_data(d);
-	void __iomem *mapbase = sachip->base + SA1111_INTC;
-	unsigned long ie1;
+	void __iomem *mapbase = sachip->base + SA1111_INTC + sa1111_irqbank(d);
+	u32 ie;
 
-	ie1 = sa1111_readl(mapbase + SA1111_INTEN1);
-	ie1 &= ~SA1111_IRQMASK_HI(d->irq);
-	sa1111_writel(ie1, mapbase + SA1111_INTEN1);
+	ie = sa1111_readl(mapbase + SA1111_INTEN0);
+	ie &= ~sa1111_irqmask(d);
+	sa1111_writel(ie, mapbase + SA1111_INTEN0);
 }
 
-static void sa1111_unmask_highirq(struct irq_data *d)
+static void sa1111_unmask_irq(struct irq_data *d)
 {
 	struct sa1111 *sachip = irq_data_get_irq_chip_data(d);
-	void __iomem *mapbase = sachip->base + SA1111_INTC;
-	unsigned long ie1;
+	void __iomem *mapbase = sachip->base + SA1111_INTC + sa1111_irqbank(d);
+	u32 ie;
 
-	ie1 = sa1111_readl(mapbase + SA1111_INTEN1);
-	ie1 |= SA1111_IRQMASK_HI(d->irq);
-	sa1111_writel(ie1, mapbase + SA1111_INTEN1);
+	ie = sa1111_readl(mapbase + SA1111_INTEN0);
+	ie |= sa1111_irqmask(d);
+	sa1111_writel(ie, mapbase + SA1111_INTEN0);
 }
 
 /*
@@ -366,19 +280,18 @@ static void sa1111_unmask_highirq(struct irq_data *d)
  * be triggered.  In fact, its very difficult, if not impossible to get
  * INTSET to re-trigger the interrupt.
  */
-static int sa1111_retrigger_highirq(struct irq_data *d)
+static int sa1111_retrigger_irq(struct irq_data *d)
 {
 	struct sa1111 *sachip = irq_data_get_irq_chip_data(d);
-	void __iomem *mapbase = sachip->base + SA1111_INTC;
-	unsigned int mask = SA1111_IRQMASK_HI(d->irq);
-	unsigned long ip1;
+	void __iomem *mapbase = sachip->base + SA1111_INTC + sa1111_irqbank(d);
+	u32 ip, mask = sa1111_irqmask(d);
 	int i;
 
-	ip1 = sa1111_readl(mapbase + SA1111_INTPOL1);
+	ip = sa1111_readl(mapbase + SA1111_INTPOL0);
 	for (i = 0; i < 8; i++) {
-		sa1111_writel(ip1 ^ mask, mapbase + SA1111_INTPOL1);
-		sa1111_writel(ip1, mapbase + SA1111_INTPOL1);
-		if (sa1111_readl(mapbase + SA1111_INTSTATCLR1) & mask)
+		sa1111_writel(ip ^ mask, mapbase + SA1111_INTPOL0);
+		sa1111_writel(ip, mapbase + SA1111_INTPOL0);
+		if (sa1111_readl(mapbase + SA1111_INTSTATCLR0) & mask)
 			break;
 	}
 
@@ -388,12 +301,11 @@ static int sa1111_retrigger_highirq(struct irq_data *d)
 	return i == 8 ? -1 : 0;
 }
 
-static int sa1111_type_highirq(struct irq_data *d, unsigned int flags)
+static int sa1111_type_irq(struct irq_data *d, unsigned int flags)
 {
 	struct sa1111 *sachip = irq_data_get_irq_chip_data(d);
-	void __iomem *mapbase = sachip->base + SA1111_INTC;
-	unsigned int mask = SA1111_IRQMASK_HI(d->irq);
-	unsigned long ip1;
+	void __iomem *mapbase = sachip->base + SA1111_INTC + sa1111_irqbank(d);
+	u32 ip, mask = sa1111_irqmask(d);
 
 	if (flags == IRQ_TYPE_PROBE)
 		return 0;
@@ -401,42 +313,41 @@ static int sa1111_type_highirq(struct irq_data *d, unsigned int flags)
 	if ((!(flags & IRQ_TYPE_EDGE_RISING) ^ !(flags & IRQ_TYPE_EDGE_FALLING)) == 0)
 		return -EINVAL;
 
-	ip1 = sa1111_readl(mapbase + SA1111_INTPOL1);
+	ip = sa1111_readl(mapbase + SA1111_INTPOL0);
 	if (flags & IRQ_TYPE_EDGE_RISING)
-		ip1 &= ~mask;
+		ip &= ~mask;
 	else
-		ip1 |= mask;
-	sa1111_writel(ip1, mapbase + SA1111_INTPOL1);
-	sa1111_writel(ip1, mapbase + SA1111_WAKEPOL1);
+		ip |= mask;
+	sa1111_writel(ip, mapbase + SA1111_INTPOL0);
+	sa1111_writel(ip, mapbase + SA1111_WAKEPOL0);
 
 	return 0;
 }
 
-static int sa1111_wake_highirq(struct irq_data *d, unsigned int on)
+static int sa1111_wake_irq(struct irq_data *d, unsigned int on)
 {
 	struct sa1111 *sachip = irq_data_get_irq_chip_data(d);
-	void __iomem *mapbase = sachip->base + SA1111_INTC;
-	unsigned int mask = SA1111_IRQMASK_HI(d->irq);
-	unsigned long we1;
+	void __iomem *mapbase = sachip->base + SA1111_INTC + sa1111_irqbank(d);
+	u32 we, mask = sa1111_irqmask(d);
 
-	we1 = sa1111_readl(mapbase + SA1111_WAKEEN1);
+	we = sa1111_readl(mapbase + SA1111_WAKEEN0);
 	if (on)
-		we1 |= mask;
+		we |= mask;
 	else
-		we1 &= ~mask;
-	sa1111_writel(we1, mapbase + SA1111_WAKEEN1);
+		we &= ~mask;
+	sa1111_writel(we, mapbase + SA1111_WAKEEN0);
 
 	return 0;
 }
 
-static struct irq_chip sa1111_high_chip = {
-	.name		= "SA1111-h",
+static struct irq_chip sa1111_irq_chip = {
+	.name		= "SA1111",
 	.irq_ack	= sa1111_ack_irq,
-	.irq_mask	= sa1111_mask_highirq,
-	.irq_unmask	= sa1111_unmask_highirq,
-	.irq_retrigger	= sa1111_retrigger_highirq,
-	.irq_set_type	= sa1111_type_highirq,
-	.irq_set_wake	= sa1111_wake_highirq,
+	.irq_mask	= sa1111_mask_irq,
+	.irq_unmask	= sa1111_unmask_irq,
+	.irq_retrigger	= sa1111_retrigger_irq,
+	.irq_set_type	= sa1111_type_irq,
+	.irq_set_wake	= sa1111_wake_irq,
 };
 
 static int sa1111_setup_irq(struct sa1111 *sachip, unsigned irq_base)
@@ -482,16 +393,14 @@ static int sa1111_setup_irq(struct sa1111 *sachip, unsigned irq_base)
 
 	for (i = IRQ_GPAIN0; i <= SSPROR; i++) {
 		irq = sachip->irq_base + i;
-		irq_set_chip_and_handler(irq, &sa1111_low_chip,
-					 handle_edge_irq);
+		irq_set_chip_and_handler(irq, &sa1111_irq_chip, handle_edge_irq);
 		irq_set_chip_data(irq, sachip);
 		irq_clear_status_flags(irq, IRQ_NOREQUEST | IRQ_NOPROBE);
 	}
 
 	for (i = AUDXMTDMADONEA; i <= IRQ_S1_BVD1_STSCHG; i++) {
 		irq = sachip->irq_base + i;
-		irq_set_chip_and_handler(irq, &sa1111_high_chip,
-					 handle_edge_irq);
+		irq_set_chip_and_handler(irq, &sa1111_irq_chip, handle_edge_irq);
 		irq_set_chip_data(irq, sachip);
 		irq_clear_status_flags(irq, IRQ_NOREQUEST | IRQ_NOPROBE);
 	}
@@ -509,6 +418,181 @@ static int sa1111_setup_irq(struct sa1111 *sachip, unsigned irq_base)
 	return 0;
 }
 
+static void sa1111_remove_irq(struct sa1111 *sachip)
+{
+	void __iomem *irqbase = sachip->base + SA1111_INTC;
+
+	/* disable all IRQs */
+	sa1111_writel(0, irqbase + SA1111_INTEN0);
+	sa1111_writel(0, irqbase + SA1111_INTEN1);
+	sa1111_writel(0, irqbase + SA1111_WAKEEN0);
+	sa1111_writel(0, irqbase + SA1111_WAKEEN1);
+
+	if (sachip->irq != NO_IRQ) {
+		irq_set_chained_handler_and_data(sachip->irq, NULL, NULL);
+		irq_free_descs(sachip->irq_base, SA1111_IRQ_NR);
+
+		release_mem_region(sachip->phys + SA1111_INTC, 512);
+	}
+}
+
+enum {
+	SA1111_GPIO_PXDDR = (SA1111_GPIO_PADDR - SA1111_GPIO_PADDR),
+	SA1111_GPIO_PXDRR = (SA1111_GPIO_PADRR - SA1111_GPIO_PADDR),
+	SA1111_GPIO_PXDWR = (SA1111_GPIO_PADWR - SA1111_GPIO_PADDR),
+	SA1111_GPIO_PXSDR = (SA1111_GPIO_PASDR - SA1111_GPIO_PADDR),
+	SA1111_GPIO_PXSSR = (SA1111_GPIO_PASSR - SA1111_GPIO_PADDR),
+};
+
+static struct sa1111 *gc_to_sa1111(struct gpio_chip *gc)
+{
+	return container_of(gc, struct sa1111, gc);
+}
+
+static void __iomem *sa1111_gpio_map_reg(struct sa1111 *sachip, unsigned offset)
+{
+	void __iomem *reg = sachip->base + SA1111_GPIO;
+
+	if (offset < 4)
+		return reg + SA1111_GPIO_PADDR;
+	if (offset < 10)
+		return reg + SA1111_GPIO_PBDDR;
+	if (offset < 18)
+		return reg + SA1111_GPIO_PCDDR;
+	return NULL;
+}
+
+static u32 sa1111_gpio_map_bit(unsigned offset)
+{
+	if (offset < 4)
+		return BIT(offset);
+	if (offset < 10)
+		return BIT(offset - 4);
+	if (offset < 18)
+		return BIT(offset - 10);
+	return 0;
+}
+
+static void sa1111_gpio_modify(void __iomem *reg, u32 mask, u32 set)
+{
+	u32 val;
+
+	val = readl_relaxed(reg);
+	val &= ~mask;
+	val |= mask & set;
+	writel_relaxed(val, reg);
+}
+
+static int sa1111_gpio_get_direction(struct gpio_chip *gc, unsigned offset)
+{
+	struct sa1111 *sachip = gc_to_sa1111(gc);
+	void __iomem *reg = sa1111_gpio_map_reg(sachip, offset);
+	u32 mask = sa1111_gpio_map_bit(offset);
+
+	return !!(readl_relaxed(reg + SA1111_GPIO_PXDDR) & mask);
+}
+
+static int sa1111_gpio_direction_input(struct gpio_chip *gc, unsigned offset)
+{
+	struct sa1111 *sachip = gc_to_sa1111(gc);
+	unsigned long flags;
+	void __iomem *reg = sa1111_gpio_map_reg(sachip, offset);
+	u32 mask = sa1111_gpio_map_bit(offset);
+
+	spin_lock_irqsave(&sachip->lock, flags);
+	sa1111_gpio_modify(reg + SA1111_GPIO_PXDDR, mask, mask);
+	sa1111_gpio_modify(reg + SA1111_GPIO_PXSDR, mask, mask);
+	spin_unlock_irqrestore(&sachip->lock, flags);
+
+	return 0;
+}
+
+static int sa1111_gpio_direction_output(struct gpio_chip *gc, unsigned offset,
+	int value)
+{
+	struct sa1111 *sachip = gc_to_sa1111(gc);
+	unsigned long flags;
+	void __iomem *reg = sa1111_gpio_map_reg(sachip, offset);
+	u32 mask = sa1111_gpio_map_bit(offset);
+
+	spin_lock_irqsave(&sachip->lock, flags);
+	sa1111_gpio_modify(reg + SA1111_GPIO_PXDWR, mask, value ? mask : 0);
+	sa1111_gpio_modify(reg + SA1111_GPIO_PXSSR, mask, value ? mask : 0);
+	sa1111_gpio_modify(reg + SA1111_GPIO_PXDDR, mask, 0);
+	sa1111_gpio_modify(reg + SA1111_GPIO_PXSDR, mask, 0);
+	spin_unlock_irqrestore(&sachip->lock, flags);
+
+	return 0;
+}
+
+static int sa1111_gpio_get(struct gpio_chip *gc, unsigned offset)
+{
+	struct sa1111 *sachip = gc_to_sa1111(gc);
+	void __iomem *reg = sa1111_gpio_map_reg(sachip, offset);
+	u32 mask = sa1111_gpio_map_bit(offset);
+
+	return !!(readl_relaxed(reg + SA1111_GPIO_PXDRR) & mask);
+}
+
+static void sa1111_gpio_set(struct gpio_chip *gc, unsigned offset, int value)
+{
+	struct sa1111 *sachip = gc_to_sa1111(gc);
+	unsigned long flags;
+	void __iomem *reg = sa1111_gpio_map_reg(sachip, offset);
+	u32 mask = sa1111_gpio_map_bit(offset);
+
+	spin_lock_irqsave(&sachip->lock, flags);
+	sa1111_gpio_modify(reg + SA1111_GPIO_PXDWR, mask, value ? mask : 0);
+	sa1111_gpio_modify(reg + SA1111_GPIO_PXSSR, mask, value ? mask : 0);
+	spin_unlock_irqrestore(&sachip->lock, flags);
+}
+
+static void sa1111_gpio_set_multiple(struct gpio_chip *gc, unsigned long *mask,
+	unsigned long *bits)
+{
+	struct sa1111 *sachip = gc_to_sa1111(gc);
+	unsigned long flags;
+	void __iomem *reg = sachip->base + SA1111_GPIO;
+	u32 msk, val;
+
+	msk = *mask;
+	val = *bits;
+
+	spin_lock_irqsave(&sachip->lock, flags);
+	sa1111_gpio_modify(reg + SA1111_GPIO_PADWR, msk & 15, val);
+	sa1111_gpio_modify(reg + SA1111_GPIO_PASSR, msk & 15, val);
+	sa1111_gpio_modify(reg + SA1111_GPIO_PBDWR, (msk >> 4) & 255, val >> 4);
+	sa1111_gpio_modify(reg + SA1111_GPIO_PBSSR, (msk >> 4) & 255, val >> 4);
+	sa1111_gpio_modify(reg + SA1111_GPIO_PCDWR, (msk >> 12) & 255, val >> 12);
+	sa1111_gpio_modify(reg + SA1111_GPIO_PCSSR, (msk >> 12) & 255, val >> 12);
+	spin_unlock_irqrestore(&sachip->lock, flags);
+}
+
+static int sa1111_gpio_to_irq(struct gpio_chip *gc, unsigned offset)
+{
+	struct sa1111 *sachip = gc_to_sa1111(gc);
+
+	return sachip->irq_base + offset;
+}
+
+static int sa1111_setup_gpios(struct sa1111 *sachip)
+{
+	sachip->gc.label = "sa1111";
+	sachip->gc.parent = sachip->dev;
+	sachip->gc.owner = THIS_MODULE;
+	sachip->gc.get_direction = sa1111_gpio_get_direction;
+	sachip->gc.direction_input = sa1111_gpio_direction_input;
+	sachip->gc.direction_output = sa1111_gpio_direction_output;
+	sachip->gc.get = sa1111_gpio_get;
+	sachip->gc.set = sa1111_gpio_set;
+	sachip->gc.set_multiple = sa1111_gpio_set_multiple;
+	sachip->gc.to_irq = sa1111_gpio_to_irq;
+	sachip->gc.base = -1;
+	sachip->gc.ngpio = 18;
+
+	return devm_gpiochip_add_data(sachip->dev, &sachip->gc, sachip);
+}
+
 /*
  * Bring the SA1111 out of reset.  This requires a set procedure:
  *  1. nRESET asserted (by hardware)
@@ -607,7 +691,7 @@ sa1111_configure_smc(struct sa1111 *sachip, int sdram, unsigned int drac,
 
 static void sa1111_dev_release(struct device *_dev)
 {
-	struct sa1111_dev *dev = SA1111_DEV(_dev);
+	struct sa1111_dev *dev = to_sa1111_device(_dev);
 
 	kfree(dev);
 }
@@ -696,19 +780,17 @@ static int __sa1111_probe(struct device *me, struct resource *mem, int irq)
 	if (!pd)
 		return -EINVAL;
 
-	sachip = kzalloc(sizeof(struct sa1111), GFP_KERNEL);
+	sachip = devm_kzalloc(me, sizeof(struct sa1111), GFP_KERNEL);
 	if (!sachip)
 		return -ENOMEM;
 
-	sachip->clk = clk_get(me, "SA1111_CLK");
-	if (IS_ERR(sachip->clk)) {
-		ret = PTR_ERR(sachip->clk);
-		goto err_free;
-	}
+	sachip->clk = devm_clk_get(me, "SA1111_CLK");
+	if (IS_ERR(sachip->clk))
+		return PTR_ERR(sachip->clk);
 
 	ret = clk_prepare(sachip->clk);
 	if (ret)
-		goto err_clkput;
+		return ret;
 
 	spin_lock_init(&sachip->lock);
 
@@ -757,6 +839,11 @@ static int __sa1111_probe(struct device *me, struct resource *mem, int irq)
 			goto err_clk;
 	}
 
+	/* Setup the GPIOs - should really be done after the IRQ setup */
+	ret = sa1111_setup_gpios(sachip);
+	if (ret)
+		goto err_irq;
+
 #ifdef CONFIG_ARCH_SA1100
 	{
 	unsigned int val;
@@ -799,22 +886,22 @@ static int __sa1111_probe(struct device *me, struct resource *mem, int irq)
 
 	return 0;
 
+ err_irq:
+	sa1111_remove_irq(sachip);
  err_clk:
 	clk_disable(sachip->clk);
  err_unmap:
 	iounmap(sachip->base);
  err_clk_unprep:
 	clk_unprepare(sachip->clk);
- err_clkput:
-	clk_put(sachip->clk);
- err_free:
-	kfree(sachip);
 	return ret;
 }
 
 static int sa1111_remove_one(struct device *dev, void *data)
 {
-	struct sa1111_dev *sadev = SA1111_DEV(dev);
+	struct sa1111_dev *sadev = to_sa1111_device(dev);
+	if (dev->bus != &sa1111_bus_type)
+		return 0;
 	device_del(&sadev->dev);
 	release_resource(&sadev->res);
 	put_device(&sadev->dev);
@@ -823,29 +910,14 @@ static int sa1111_remove_one(struct device *dev, void *data)
 
 static void __sa1111_remove(struct sa1111 *sachip)
 {
-	void __iomem *irqbase = sachip->base + SA1111_INTC;
-
 	device_for_each_child(sachip->dev, NULL, sa1111_remove_one);
 
-	/* disable all IRQs */
-	sa1111_writel(0, irqbase + SA1111_INTEN0);
-	sa1111_writel(0, irqbase + SA1111_INTEN1);
-	sa1111_writel(0, irqbase + SA1111_WAKEEN0);
-	sa1111_writel(0, irqbase + SA1111_WAKEEN1);
+	sa1111_remove_irq(sachip);
 
 	clk_disable(sachip->clk);
 	clk_unprepare(sachip->clk);
 
-	if (sachip->irq != NO_IRQ) {
-		irq_set_chained_handler_and_data(sachip->irq, NULL, NULL);
-		irq_free_descs(sachip->irq_base, SA1111_IRQ_NR);
-
-		release_mem_region(sachip->phys + SA1111_INTC, 512);
-	}
-
 	iounmap(sachip->base);
-	clk_put(sachip->clk);
-	kfree(sachip);
 }
 
 struct sa1111_save_data {
@@ -1285,6 +1357,14 @@ void sa1111_disable_device(struct sa1111_dev *sadev)
 }
 EXPORT_SYMBOL(sa1111_disable_device);
 
+int sa1111_get_irq(struct sa1111_dev *sadev, unsigned num)
+{
+	if (num >= ARRAY_SIZE(sadev->irq))
+		return -EINVAL;
+	return sadev->irq[num];
+}
+EXPORT_SYMBOL_GPL(sa1111_get_irq);
+
 /*
  *	SA1111 "Register Access Bus."
  *
@@ -1293,7 +1373,7 @@ EXPORT_SYMBOL(sa1111_disable_device);
  */
 static int sa1111_match(struct device *_dev, struct device_driver *_drv)
 {
-	struct sa1111_dev *dev = SA1111_DEV(_dev);
+	struct sa1111_dev *dev = to_sa1111_device(_dev);
 	struct sa1111_driver *drv = SA1111_DRV(_drv);
 
 	return !!(dev->devid & drv->devid);
@@ -1301,7 +1381,7 @@ static int sa1111_match(struct device *_dev, struct device_driver *_drv)
 
 static int sa1111_bus_suspend(struct device *dev, pm_message_t state)
 {
-	struct sa1111_dev *sadev = SA1111_DEV(dev);
+	struct sa1111_dev *sadev = to_sa1111_device(dev);
 	struct sa1111_driver *drv = SA1111_DRV(dev->driver);
 	int ret = 0;
 
@@ -1312,7 +1392,7 @@ static int sa1111_bus_suspend(struct device *dev, pm_message_t state)
 
 static int sa1111_bus_resume(struct device *dev)
 {
-	struct sa1111_dev *sadev = SA1111_DEV(dev);
+	struct sa1111_dev *sadev = to_sa1111_device(dev);
 	struct sa1111_driver *drv = SA1111_DRV(dev->driver);
 	int ret = 0;
 
@@ -1326,12 +1406,12 @@ static void sa1111_bus_shutdown(struct device *dev)
 	struct sa1111_driver *drv = SA1111_DRV(dev->driver);
 
 	if (drv && drv->shutdown)
-		drv->shutdown(SA1111_DEV(dev));
+		drv->shutdown(to_sa1111_device(dev));
 }
 
 static int sa1111_bus_probe(struct device *dev)
 {
-	struct sa1111_dev *sadev = SA1111_DEV(dev);
+	struct sa1111_dev *sadev = to_sa1111_device(dev);
 	struct sa1111_driver *drv = SA1111_DRV(dev->driver);
 	int ret = -ENODEV;
 
@@ -1342,7 +1422,7 @@ static int sa1111_bus_probe(struct device *dev)
 
 static int sa1111_bus_remove(struct device *dev)
 {
-	struct sa1111_dev *sadev = SA1111_DEV(dev);
+	struct sa1111_dev *sadev = to_sa1111_device(dev);
 	struct sa1111_driver *drv = SA1111_DRV(dev->driver);
 	int ret = 0;
 
@@ -1407,7 +1487,7 @@ static int sa1111_needs_bounce(struct device *dev, dma_addr_t addr, size_t size)
 static int sa1111_notifier_call(struct notifier_block *n, unsigned long action,
 	void *data)
 {
-	struct sa1111_dev *dev = SA1111_DEV(data);
+	struct sa1111_dev *dev = to_sa1111_device(data);
 
 	switch (action) {
 	case BUS_NOTIFY_ADD_DEVICE:
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 4eaea2173bf8..68b06f9c65de 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -159,7 +159,11 @@
 	.endm
 
 	.macro	save_and_disable_irqs_notrace, oldcpsr
+#ifdef CONFIG_CPU_V7M
+	mrs	\oldcpsr, primask
+#else
 	mrs	\oldcpsr, cpsr
+#endif
 	disable_irq_notrace
 	.endm
 
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index 9156fc303afd..bdd283bc5842 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -501,21 +501,4 @@ static inline void set_kernel_text_ro(void) { }
 void flush_uprobe_xol_access(struct page *page, unsigned long uaddr,
 			     void *kaddr, unsigned long len);
 
-/**
- * secure_flush_area - ensure coherency across the secure boundary
- * @addr: virtual address
- * @size: size of region
- *
- * Ensure that the specified area of memory is coherent across the secure
- * boundary from the non-secure side.  This is used when calling secure
- * firmware where the secure firmware does not ensure coherency.
- */
-static inline void secure_flush_area(const void *addr, size_t size)
-{
-	phys_addr_t phys = __pa(addr);
-
-	__cpuc_flush_dcache_area((void *)addr, size);
-	outer_flush_range(phys, phys + size);
-}
-
 #endif
diff --git a/arch/arm/include/asm/cachetype.h b/arch/arm/include/asm/cachetype.h
index 7ea78144ae22..01509ae0bbec 100644
--- a/arch/arm/include/asm/cachetype.h
+++ b/arch/arm/include/asm/cachetype.h
@@ -56,4 +56,43 @@ static inline unsigned int __attribute__((pure)) cacheid_is(unsigned int mask)
 	       (~__CACHEID_NEVER & __CACHEID_ARCH_MIN & mask & cacheid);
 }
 
+#define CSSELR_ICACHE	1
+#define CSSELR_DCACHE	0
+
+#define CSSELR_L1	(0 << 1)
+#define CSSELR_L2	(1 << 1)
+#define CSSELR_L3	(2 << 1)
+#define CSSELR_L4	(3 << 1)
+#define CSSELR_L5	(4 << 1)
+#define CSSELR_L6	(5 << 1)
+#define CSSELR_L7	(6 << 1)
+
+#ifndef CONFIG_CPU_V7M
+static inline void set_csselr(unsigned int cache_selector)
+{
+	asm volatile("mcr p15, 2, %0, c0, c0, 0" : : "r" (cache_selector));
+}
+
+static inline unsigned int read_ccsidr(void)
+{
+	unsigned int val;
+
+	asm volatile("mrc p15, 1, %0, c0, c0, 0" : "=r" (val));
+	return val;
+}
+#else /* CONFIG_CPU_V7M */
+#include <linux/io.h>
+#include "asm/v7m.h"
+
+static inline void set_csselr(unsigned int cache_selector)
+{
+	writel(cache_selector, BASEADDR_V7M_SCB + V7M_SCB_CTR);
+}
+
+static inline unsigned int read_ccsidr(void)
+{
+	return readl(BASEADDR_V7M_SCB + V7M_SCB_CCSIDR);
+}
+#endif
+
 #endif
diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
index 1ee94c716a7f..754f86f667d4 100644
--- a/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@ -60,6 +60,7 @@
 	((mpidr >> (MPIDR_LEVEL_BITS * level)) & MPIDR_LEVEL_MASK)
 
 #define ARM_CPU_IMP_ARM			0x41
+#define ARM_CPU_IMP_DEC			0x44
 #define ARM_CPU_IMP_INTEL		0x69
 
 /* ARM implemented processors */
@@ -76,6 +77,17 @@
 #define ARM_CPU_PART_CORTEX_A15		0x4100c0f0
 #define ARM_CPU_PART_MASK		0xff00fff0
 
+/* DEC implemented cores */
+#define ARM_CPU_PART_SA1100		0x4400a110
+
+/* Intel implemented cores */
+#define ARM_CPU_PART_SA1110		0x6900b110
+#define ARM_CPU_REV_SA1110_A0		0
+#define ARM_CPU_REV_SA1110_B0		4
+#define ARM_CPU_REV_SA1110_B1		5
+#define ARM_CPU_REV_SA1110_B2		6
+#define ARM_CPU_REV_SA1110_B4		8
+
 #define ARM_CPU_XSCALE_ARCH_MASK	0xe000
 #define ARM_CPU_XSCALE_ARCH_V1		0x2000
 #define ARM_CPU_XSCALE_ARCH_V2		0x4000
@@ -152,6 +164,11 @@ static inline unsigned int __attribute_const__ read_cpuid_id(void)
 	return read_cpuid(CPUID_ID);
 }
 
+static inline unsigned int __attribute_const__ read_cpuid_cachetype(void)
+{
+	return read_cpuid(CPUID_CACHETYPE);
+}
+
 #elif defined(CONFIG_CPU_V7M)
 
 static inline unsigned int __attribute_const__ read_cpuid_id(void)
@@ -159,6 +176,11 @@ static inline unsigned int __attribute_const__ read_cpuid_id(void)
 	return readl(BASEADDR_V7M_SCB + V7M_SCB_CPUID);
 }
 
+static inline unsigned int __attribute_const__ read_cpuid_cachetype(void)
+{
+	return readl(BASEADDR_V7M_SCB + V7M_SCB_CTR);
+}
+
 #else /* ifdef CONFIG_CPU_CP15 / elif defined(CONFIG_CPU_V7M) */
 
 static inline unsigned int __attribute_const__ read_cpuid_id(void)
@@ -173,6 +195,11 @@ static inline unsigned int __attribute_const__ read_cpuid_implementor(void)
 	return (read_cpuid_id() & 0xFF000000) >> 24;
 }
 
+static inline unsigned int __attribute_const__ read_cpuid_revision(void)
+{
+	return read_cpuid_id() & 0x0000000f;
+}
+
 /*
  * The CPU part number is meaningless without referring to the CPU
  * implementer: implementers are free to define their own part numbers
@@ -193,11 +220,6 @@ static inline unsigned int __attribute_const__ xscale_cpu_arch_version(void)
 	return read_cpuid_id() & ARM_CPU_XSCALE_ARCH_MASK;
 }
 
-static inline unsigned int __attribute_const__ read_cpuid_cachetype(void)
-{
-	return read_cpuid(CPUID_CACHETYPE);
-}
-
 static inline unsigned int __attribute_const__ read_cpuid_tcmstatus(void)
 {
 	return read_cpuid(CPUID_TCM);
@@ -208,6 +230,10 @@ static inline unsigned int __attribute_const__ read_cpuid_mpidr(void)
 	return read_cpuid(CPUID_MPIDR);
 }
 
+/* StrongARM-11x0 CPUs */
+#define cpu_is_sa1100() (read_cpuid_part() == ARM_CPU_PART_SA1100)
+#define cpu_is_sa1110() (read_cpuid_part() == ARM_CPU_PART_SA1110)
+
 /*
  * Intel's XScale3 core supports some v6 features (supersections, L2)
  * but advertises itself as v5 as it does not support the v6 ISA.  For
diff --git a/arch/arm/include/asm/flat.h b/arch/arm/include/asm/flat.h
index e847d23351ed..acf1d14b89a6 100644
--- a/arch/arm/include/asm/flat.h
+++ b/arch/arm/include/asm/flat.h
@@ -8,8 +8,9 @@
 #define	flat_argvp_envp_on_stack()		1
 #define	flat_old_ram_flag(flags)		(flags)
 #define	flat_reloc_valid(reloc, size)		((reloc) <= (size))
-#define	flat_get_addr_from_rp(rp, relval, flags, persistent) ((void)persistent,get_unaligned(rp))
-#define	flat_put_addr_at_rp(rp, val, relval)	put_unaligned(val,rp)
+#define	flat_get_addr_from_rp(rp, relval, flags, persistent) \
+	({ unsigned long __val; __get_user_unaligned(__val, rp); __val; })
+#define	flat_put_addr_at_rp(rp, val, relval)	__put_user_unaligned(val, rp)
 #define	flat_get_relocate_addr(rel)		(rel)
 #define	flat_set_persistent(relval, p)		0
 
diff --git a/arch/arm/include/asm/glue-cache.h b/arch/arm/include/asm/glue-cache.h
index cab07f69382d..01c3d92624e5 100644
--- a/arch/arm/include/asm/glue-cache.h
+++ b/arch/arm/include/asm/glue-cache.h
@@ -118,11 +118,7 @@
 #endif
 
 #if defined(CONFIG_CPU_V7M)
-# ifdef _CACHE
 #  define MULTI_CACHE 1
-# else
-#  define _CACHE nop
-# endif
 #endif
 
 #if !defined(_CACHE) && !defined(MULTI_CACHE)
diff --git a/arch/arm/include/asm/hardware/cache-l2x0.h b/arch/arm/include/asm/hardware/cache-l2x0.h
index 3a5ec1c25659..736292b42fca 100644
--- a/arch/arm/include/asm/hardware/cache-l2x0.h
+++ b/arch/arm/include/asm/hardware/cache-l2x0.h
@@ -87,6 +87,15 @@
 #define L310_CACHE_ID_RTL_R3P2		0x08
 #define L310_CACHE_ID_RTL_R3P3		0x09
 
+#define L2X0_EVENT_CNT_CTRL_ENABLE	BIT(0)
+
+#define L2X0_EVENT_CNT_CFG_SRC_SHIFT	2
+#define L2X0_EVENT_CNT_CFG_SRC_MASK	0xf
+#define L2X0_EVENT_CNT_CFG_SRC_DISABLED	0
+#define L2X0_EVENT_CNT_CFG_INT_DISABLED	0
+#define L2X0_EVENT_CNT_CFG_INT_INCR	1
+#define L2X0_EVENT_CNT_CFG_INT_OVERFLOW	2
+
 /* L2C auxiliary control register - bits common to L2C-210/220/310 */
 #define L2C_AUX_CTRL_WAY_SIZE_SHIFT		17
 #define L2C_AUX_CTRL_WAY_SIZE_MASK		(7 << 17)
@@ -157,6 +166,16 @@ static inline int l2x0_of_init(u32 aux_val, u32 aux_mask)
 }
 #endif
 
+#ifdef CONFIG_CACHE_L2X0_PMU
+void l2x0_pmu_register(void __iomem *base, u32 part);
+void l2x0_pmu_suspend(void);
+void l2x0_pmu_resume(void);
+#else
+static inline void l2x0_pmu_register(void __iomem *base, u32 part) {}
+static inline void l2x0_pmu_suspend(void) {}
+static inline void l2x0_pmu_resume(void) {}
+#endif
+
 struct l2x0_regs {
 	unsigned long phy_base;
 	unsigned long aux_ctrl;
diff --git a/arch/arm/include/asm/hardware/sa1111.h b/arch/arm/include/asm/hardware/sa1111.h
index 7c2bbc7f0be1..8979fa3bbf2d 100644
--- a/arch/arm/include/asm/hardware/sa1111.h
+++ b/arch/arm/include/asm/hardware/sa1111.h
@@ -420,7 +420,7 @@ struct sa1111_dev {
 	u64		dma_mask;
 };
 
-#define SA1111_DEV(_d)	container_of((_d), struct sa1111_dev, dev)
+#define to_sa1111_device(x)	container_of(x, struct sa1111_dev, dev)
 
 #define sa1111_get_drvdata(d)	dev_get_drvdata(&(d)->dev)
 #define sa1111_set_drvdata(d,p)	dev_set_drvdata(&(d)->dev, p)
@@ -446,6 +446,8 @@ struct sa1111_driver {
 int sa1111_enable_device(struct sa1111_dev *);
 void sa1111_disable_device(struct sa1111_dev *);
 
+int sa1111_get_irq(struct sa1111_dev *, unsigned num);
+
 unsigned int sa1111_pll_clock(struct sa1111_dev *);
 
 #define SA1111_AUDIO_ACLINK	0
diff --git a/arch/arm/include/asm/hw_breakpoint.h b/arch/arm/include/asm/hw_breakpoint.h
index 8e427c7b4425..afcaf8bf971b 100644
--- a/arch/arm/include/asm/hw_breakpoint.h
+++ b/arch/arm/include/asm/hw_breakpoint.h
@@ -114,7 +114,6 @@ struct notifier_block;
 struct perf_event;
 struct pmu;
 
-extern struct pmu perf_ops_bp;
 extern int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl,
 				  int *gen_len, int *gen_type);
 extern int arch_check_bp_in_kernelspace(struct perf_event *bp);
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 31c07a2cc100..76cbd9c674df 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -159,13 +159,8 @@
  * PFNs are used to describe any physical page; this means
  * PFN 0 == physical address 0.
  */
-#if defined(__virt_to_phys)
-#define PHYS_OFFSET	PLAT_PHYS_OFFSET
-#define PHYS_PFN_OFFSET	((unsigned long)(PHYS_OFFSET >> PAGE_SHIFT))
-
-#define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT)
 
-#elif defined(CONFIG_ARM_PATCH_PHYS_VIRT)
+#if defined(CONFIG_ARM_PATCH_PHYS_VIRT)
 
 /*
  * Constants used to force the right instruction encodings and shifts
@@ -182,10 +177,6 @@ extern const void *__pv_table_begin, *__pv_table_end;
 #define PHYS_OFFSET	((phys_addr_t)__pv_phys_pfn_offset << PAGE_SHIFT)
 #define PHYS_PFN_OFFSET	(__pv_phys_pfn_offset)
 
-#define virt_to_pfn(kaddr) \
-	((((unsigned long)(kaddr) - PAGE_OFFSET) >> PAGE_SHIFT) + \
-	 PHYS_PFN_OFFSET)
-
 #define __pv_stub(from,to,instr,type)			\
 	__asm__("@ __pv_stub\n"				\
 	"1:	" instr "	%0, %1, %2\n"		\
@@ -257,12 +248,12 @@ static inline unsigned long __phys_to_virt(phys_addr_t x)
 	return x - PHYS_OFFSET + PAGE_OFFSET;
 }
 
+#endif
+
 #define virt_to_pfn(kaddr) \
 	((((unsigned long)(kaddr) - PAGE_OFFSET) >> PAGE_SHIFT) + \
 	 PHYS_PFN_OFFSET)
 
-#endif
-
 /*
  * These are *only* valid on the kernel direct mapped RAM memory.
  * Note: Drivers should NOT use these.  They are the wrong
diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h
index e358b7966c06..464748b9fd7d 100644
--- a/arch/arm/include/asm/module.h
+++ b/arch/arm/include/asm/module.h
@@ -23,10 +23,8 @@ struct mod_arch_specific {
 	struct unwind_table *unwind[ARM_SEC_MAX];
 #endif
 #ifdef CONFIG_ARM_MODULE_PLTS
-	struct elf32_shdr   *core_plt;
-	struct elf32_shdr   *init_plt;
-	int		    core_plt_count;
-	int		    init_plt_count;
+	struct elf32_shdr   *plt;
+	int		    plt_count;
 #endif
 };
 
diff --git a/arch/arm/include/asm/v7m.h b/arch/arm/include/asm/v7m.h
index 615781c61627..1fd775c1bc5d 100644
--- a/arch/arm/include/asm/v7m.h
+++ b/arch/arm/include/asm/v7m.h
@@ -24,6 +24,9 @@
 
 #define V7M_SCB_CCR			0x14
 #define V7M_SCB_CCR_STKALIGN			(1 << 9)
+#define V7M_SCB_CCR_DC				(1 << 16)
+#define V7M_SCB_CCR_IC				(1 << 17)
+#define V7M_SCB_CCR_BP				(1 << 18)
 
 #define V7M_SCB_SHPR2			0x1c
 #define V7M_SCB_SHPR3			0x20
@@ -47,6 +50,25 @@
 #define EXC_RET_STACK_MASK			0x00000004
 #define EXC_RET_THREADMODE_PROCESSSTACK		0xfffffffd
 
+/* Cache related definitions */
+
+#define	V7M_SCB_CLIDR		0x78	/* Cache Level ID register */
+#define	V7M_SCB_CTR		0x7c	/* Cache Type register */
+#define	V7M_SCB_CCSIDR		0x80	/* Cache size ID register */
+#define	V7M_SCB_CSSELR		0x84	/* Cache size selection register */
+
+/* Cache opeartions */
+#define	V7M_SCB_ICIALLU		0x250	/* I-cache invalidate all to PoU */
+#define	V7M_SCB_ICIMVAU		0x258	/* I-cache invalidate by MVA to PoU */
+#define	V7M_SCB_DCIMVAC		0x25c	/* D-cache invalidate by MVA to PoC */
+#define	V7M_SCB_DCISW		0x260	/* D-cache invalidate by set-way */
+#define	V7M_SCB_DCCMVAU		0x264	/* D-cache clean by MVA to PoU */
+#define	V7M_SCB_DCCMVAC		0x268	/* D-cache clean by MVA to PoC */
+#define	V7M_SCB_DCCSW		0x26c	/* D-cache clean by set-way */
+#define	V7M_SCB_DCCIMVAC	0x270	/* D-cache clean and invalidate by MVA to PoC */
+#define	V7M_SCB_DCCISW		0x274	/* D-cache clean and invalidate by set-way */
+#define	V7M_SCB_BPIALL		0x278	/* D-cache clean and invalidate by set-way */
+
 #ifndef __ASSEMBLY__
 
 enum reboot_mode;
diff --git a/arch/arm/kernel/cpuidle.c b/arch/arm/kernel/cpuidle.c
index 7dccc964d75f..a3308ad1a024 100644
--- a/arch/arm/kernel/cpuidle.c
+++ b/arch/arm/kernel/cpuidle.c
@@ -19,7 +19,7 @@ extern struct of_cpuidle_method __cpuidle_method_of_table[];
 static const struct of_cpuidle_method __cpuidle_method_of_table_sentinel
 	__used __section(__cpuidle_method_of_table_end);
 
-static struct cpuidle_ops cpuidle_ops[NR_CPUS];
+static struct cpuidle_ops cpuidle_ops[NR_CPUS] __ro_after_init;
 
 /**
  * arm_cpuidle_simple_enter() - a wrapper to cpu_do_idle()
diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index fb1a69eb49c1..6b4eb27b8758 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -158,7 +158,21 @@ __after_proc_init:
 	bic	r0, r0, #CR_V
 #endif
 	mcr	p15, 0, r0, c1, c0, 0		@ write control reg
-#endif /* CONFIG_CPU_CP15 */
+#elif defined (CONFIG_CPU_V7M)
+	/* For V7M systems we want to modify the CCR similarly to the SCTLR */
+#ifdef CONFIG_CPU_DCACHE_DISABLE
+	bic	r0, r0, #V7M_SCB_CCR_DC
+#endif
+#ifdef CONFIG_CPU_BPREDICT_DISABLE
+	bic	r0, r0, #V7M_SCB_CCR_BP
+#endif
+#ifdef CONFIG_CPU_ICACHE_DISABLE
+	bic	r0, r0, #V7M_SCB_CCR_IC
+#endif
+	movw	r3, #:lower16:(BASEADDR_V7M_SCB + V7M_SCB_CCR)
+	movt	r3, #:upper16:(BASEADDR_V7M_SCB + V7M_SCB_CCR)
+	str	r0, [r3]
+#endif /* CONFIG_CPU_CP15 elif CONFIG_CPU_V7M */
 	ret	lr
 ENDPROC(__after_proc_init)
 	.ltorg
diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c
index 0c7efc3446c0..3a5cba90c971 100644
--- a/arch/arm/kernel/module-plts.c
+++ b/arch/arm/kernel/module-plts.c
@@ -9,6 +9,7 @@
 #include <linux/elf.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/sort.h>
 
 #include <asm/cache.h>
 #include <asm/opcodes.h>
@@ -30,154 +31,198 @@ struct plt_entries {
 	u32	lit[PLT_ENT_COUNT];
 };
 
-static bool in_init(const struct module *mod, u32 addr)
+u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val)
 {
-	return addr - (u32)mod->init_layout.base < mod->init_layout.size;
+	struct plt_entries *plt = (struct plt_entries *)mod->arch.plt->sh_addr;
+	int idx = 0;
+
+	/*
+	 * Look for an existing entry pointing to 'val'. Given that the
+	 * relocations are sorted, this will be the last entry we allocated.
+	 * (if one exists).
+	 */
+	if (mod->arch.plt_count > 0) {
+		plt += (mod->arch.plt_count - 1) / PLT_ENT_COUNT;
+		idx = (mod->arch.plt_count - 1) % PLT_ENT_COUNT;
+
+		if (plt->lit[idx] == val)
+			return (u32)&plt->ldr[idx];
+
+		idx = (idx + 1) % PLT_ENT_COUNT;
+		if (!idx)
+			plt++;
+	}
+
+	mod->arch.plt_count++;
+	BUG_ON(mod->arch.plt_count * PLT_ENT_SIZE > mod->arch.plt->sh_size);
+
+	if (!idx)
+		/* Populate a new set of entries */
+		*plt = (struct plt_entries){
+			{ [0 ... PLT_ENT_COUNT - 1] = PLT_ENT_LDR, },
+			{ val, }
+		};
+	else
+		plt->lit[idx] = val;
+
+	return (u32)&plt->ldr[idx];
 }
 
-u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val)
+#define cmp_3way(a,b)	((a) < (b) ? -1 : (a) > (b))
+
+static int cmp_rel(const void *a, const void *b)
 {
-	struct plt_entries *plt, *plt_end;
-	int c, *count;
-
-	if (in_init(mod, loc)) {
-		plt = (void *)mod->arch.init_plt->sh_addr;
-		plt_end = (void *)plt + mod->arch.init_plt->sh_size;
-		count = &mod->arch.init_plt_count;
-	} else {
-		plt = (void *)mod->arch.core_plt->sh_addr;
-		plt_end = (void *)plt + mod->arch.core_plt->sh_size;
-		count = &mod->arch.core_plt_count;
-	}
+	const Elf32_Rel *x = a, *y = b;
+	int i;
 
-	/* Look for an existing entry pointing to 'val' */
-	for (c = *count; plt < plt_end; c -= PLT_ENT_COUNT, plt++) {
-		int i;
-
-		if (!c) {
-			/* Populate a new set of entries */
-			*plt = (struct plt_entries){
-				{ [0 ... PLT_ENT_COUNT - 1] = PLT_ENT_LDR, },
-				{ val, }
-			};
-			++*count;
-			return (u32)plt->ldr;
-		}
-		for (i = 0; i < PLT_ENT_COUNT; i++) {
-			if (!plt->lit[i]) {
-				plt->lit[i] = val;
-				++*count;
-			}
-			if (plt->lit[i] == val)
-				return (u32)&plt->ldr[i];
-		}
+	/* sort by type and symbol index */
+	i = cmp_3way(ELF32_R_TYPE(x->r_info), ELF32_R_TYPE(y->r_info));
+	if (i == 0)
+		i = cmp_3way(ELF32_R_SYM(x->r_info), ELF32_R_SYM(y->r_info));
+	return i;
+}
+
+static bool is_zero_addend_relocation(Elf32_Addr base, const Elf32_Rel *rel)
+{
+	u32 *tval = (u32 *)(base + rel->r_offset);
+
+	/*
+	 * Do a bitwise compare on the raw addend rather than fully decoding
+	 * the offset and doing an arithmetic comparison.
+	 * Note that a zero-addend jump/call relocation is encoded taking the
+	 * PC bias into account, i.e., -8 for ARM and -4 for Thumb2.
+	 */
+	switch (ELF32_R_TYPE(rel->r_info)) {
+		u16 upper, lower;
+
+	case R_ARM_THM_CALL:
+	case R_ARM_THM_JUMP24:
+		upper = __mem_to_opcode_thumb16(((u16 *)tval)[0]);
+		lower = __mem_to_opcode_thumb16(((u16 *)tval)[1]);
+
+		return (upper & 0x7ff) == 0x7ff && (lower & 0x2fff) == 0x2ffe;
+
+	case R_ARM_CALL:
+	case R_ARM_PC24:
+	case R_ARM_JUMP24:
+		return (__mem_to_opcode_arm(*tval) & 0xffffff) == 0xfffffe;
 	}
 	BUG();
 }
 
-static int duplicate_rel(Elf32_Addr base, const Elf32_Rel *rel, int num,
-			   u32 mask)
+static bool duplicate_rel(Elf32_Addr base, const Elf32_Rel *rel, int num)
 {
-	u32 *loc1, *loc2;
-	int i;
+	const Elf32_Rel *prev;
 
-	for (i = 0; i < num; i++) {
-		if (rel[i].r_info != rel[num].r_info)
-			continue;
+	/*
+	 * Entries are sorted by type and symbol index. That means that,
+	 * if a duplicate entry exists, it must be in the preceding
+	 * slot.
+	 */
+	if (!num)
+		return false;
 
-		/*
-		 * Identical relocation types against identical symbols can
-		 * still result in different PLT entries if the addend in the
-		 * place is different. So resolve the target of the relocation
-		 * to compare the values.
-		 */
-		loc1 = (u32 *)(base + rel[i].r_offset);
-		loc2 = (u32 *)(base + rel[num].r_offset);
-		if (((*loc1 ^ *loc2) & mask) == 0)
-			return 1;
-	}
-	return 0;
+	prev = rel + num - 1;
+	return cmp_rel(rel + num, prev) == 0 &&
+	       is_zero_addend_relocation(base, prev);
 }
 
 /* Count how many PLT entries we may need */
-static unsigned int count_plts(Elf32_Addr base, const Elf32_Rel *rel, int num)
+static unsigned int count_plts(const Elf32_Sym *syms, Elf32_Addr base,
+			       const Elf32_Rel *rel, int num)
 {
 	unsigned int ret = 0;
+	const Elf32_Sym *s;
 	int i;
 
-	/*
-	 * Sure, this is order(n^2), but it's usually short, and not
-	 * time critical
-	 */
-	for (i = 0; i < num; i++)
+	for (i = 0; i < num; i++) {
 		switch (ELF32_R_TYPE(rel[i].r_info)) {
 		case R_ARM_CALL:
 		case R_ARM_PC24:
 		case R_ARM_JUMP24:
-			if (!duplicate_rel(base, rel, i,
-					   __opcode_to_mem_arm(0x00ffffff)))
-				ret++;
-			break;
-#ifdef CONFIG_THUMB2_KERNEL
 		case R_ARM_THM_CALL:
 		case R_ARM_THM_JUMP24:
-			if (!duplicate_rel(base, rel, i,
-					   __opcode_to_mem_thumb32(0x07ff2fff)))
+			/*
+			 * We only have to consider branch targets that resolve
+			 * to undefined symbols. This is not simply a heuristic,
+			 * it is a fundamental limitation, since the PLT itself
+			 * is part of the module, and needs to be within range
+			 * as well, so modules can never grow beyond that limit.
+			 */
+			s = syms + ELF32_R_SYM(rel[i].r_info);
+			if (s->st_shndx != SHN_UNDEF)
+				break;
+
+			/*
+			 * Jump relocations with non-zero addends against
+			 * undefined symbols are supported by the ELF spec, but
+			 * do not occur in practice (e.g., 'jump n bytes past
+			 * the entry point of undefined function symbol f').
+			 * So we need to support them, but there is no need to
+			 * take them into consideration when trying to optimize
+			 * this code. So let's only check for duplicates when
+			 * the addend is zero.
+			 */
+			if (!is_zero_addend_relocation(base, rel + i) ||
+			    !duplicate_rel(base, rel, i))
 				ret++;
-#endif
 		}
+	}
 	return ret;
 }
 
 int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
 			      char *secstrings, struct module *mod)
 {
-	unsigned long core_plts = 0, init_plts = 0;
+	unsigned long plts = 0;
 	Elf32_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum;
+	Elf32_Sym *syms = NULL;
 
 	/*
 	 * To store the PLTs, we expand the .text section for core module code
-	 * and the .init.text section for initialization code.
+	 * and for initialization code.
 	 */
-	for (s = sechdrs; s < sechdrs_end; ++s)
-		if (strcmp(".core.plt", secstrings + s->sh_name) == 0)
-			mod->arch.core_plt = s;
-		else if (strcmp(".init.plt", secstrings + s->sh_name) == 0)
-			mod->arch.init_plt = s;
-
-	if (!mod->arch.core_plt || !mod->arch.init_plt) {
-		pr_err("%s: sections missing\n", mod->name);
+	for (s = sechdrs; s < sechdrs_end; ++s) {
+		if (strcmp(".plt", secstrings + s->sh_name) == 0)
+			mod->arch.plt = s;
+		else if (s->sh_type == SHT_SYMTAB)
+			syms = (Elf32_Sym *)s->sh_addr;
+	}
+
+	if (!mod->arch.plt) {
+		pr_err("%s: module PLT section missing\n", mod->name);
+		return -ENOEXEC;
+	}
+	if (!syms) {
+		pr_err("%s: module symtab section missing\n", mod->name);
 		return -ENOEXEC;
 	}
 
 	for (s = sechdrs + 1; s < sechdrs_end; ++s) {
-		const Elf32_Rel *rels = (void *)ehdr + s->sh_offset;
+		Elf32_Rel *rels = (void *)ehdr + s->sh_offset;
 		int numrels = s->sh_size / sizeof(Elf32_Rel);
 		Elf32_Shdr *dstsec = sechdrs + s->sh_info;
 
 		if (s->sh_type != SHT_REL)
 			continue;
 
-		if (strstr(secstrings + s->sh_name, ".init"))
-			init_plts += count_plts(dstsec->sh_addr, rels, numrels);
-		else
-			core_plts += count_plts(dstsec->sh_addr, rels, numrels);
+		/* ignore relocations that operate on non-exec sections */
+		if (!(dstsec->sh_flags & SHF_EXECINSTR))
+			continue;
+
+		/* sort by type and symbol index */
+		sort(rels, numrels, sizeof(Elf32_Rel), cmp_rel, NULL);
+
+		plts += count_plts(syms, dstsec->sh_addr, rels, numrels);
 	}
 
-	mod->arch.core_plt->sh_type = SHT_NOBITS;
-	mod->arch.core_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
-	mod->arch.core_plt->sh_addralign = L1_CACHE_BYTES;
-	mod->arch.core_plt->sh_size = round_up(core_plts * PLT_ENT_SIZE,
-					       sizeof(struct plt_entries));
-	mod->arch.core_plt_count = 0;
-
-	mod->arch.init_plt->sh_type = SHT_NOBITS;
-	mod->arch.init_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
-	mod->arch.init_plt->sh_addralign = L1_CACHE_BYTES;
-	mod->arch.init_plt->sh_size = round_up(init_plts * PLT_ENT_SIZE,
-					       sizeof(struct plt_entries));
-	mod->arch.init_plt_count = 0;
-	pr_debug("%s: core.plt=%x, init.plt=%x\n", __func__,
-		 mod->arch.core_plt->sh_size, mod->arch.init_plt->sh_size);
+	mod->arch.plt->sh_type = SHT_NOBITS;
+	mod->arch.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+	mod->arch.plt->sh_addralign = L1_CACHE_BYTES;
+	mod->arch.plt->sh_size = round_up(plts * PLT_ENT_SIZE,
+					  sizeof(struct plt_entries));
+	mod->arch.plt_count = 0;
+
+	pr_debug("%s: plt=%x\n", __func__, mod->arch.plt->sh_size);
 	return 0;
 }
diff --git a/arch/arm/kernel/module.lds b/arch/arm/kernel/module.lds
index 3682fa107918..05881e2b414c 100644
--- a/arch/arm/kernel/module.lds
+++ b/arch/arm/kernel/module.lds
@@ -1,4 +1,3 @@
 SECTIONS {
-        .core.plt : { BYTE(0) }
-        .init.plt : { BYTE(0) }
+	.plt : { BYTE(0) }
 }
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index df7f2a75e769..34e3f3c45634 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -114,19 +114,19 @@ EXPORT_SYMBOL(elf_hwcap2);
 
 
 #ifdef MULTI_CPU
-struct processor processor __read_mostly;
+struct processor processor __ro_after_init;
 #endif
 #ifdef MULTI_TLB
-struct cpu_tlb_fns cpu_tlb __read_mostly;
+struct cpu_tlb_fns cpu_tlb __ro_after_init;
 #endif
 #ifdef MULTI_USER
-struct cpu_user_fns cpu_user __read_mostly;
+struct cpu_user_fns cpu_user __ro_after_init;
 #endif
 #ifdef MULTI_CACHE
-struct cpu_cache_fns cpu_cache __read_mostly;
+struct cpu_cache_fns cpu_cache __ro_after_init;
 #endif
 #ifdef CONFIG_OUTER_CACHE
-struct outer_cache_fns outer_cache __read_mostly;
+struct outer_cache_fns outer_cache __ro_after_init;
 EXPORT_SYMBOL(outer_cache);
 #endif
 
@@ -290,12 +290,9 @@ static int cpu_has_aliasing_icache(unsigned int arch)
 	/* arch specifies the register format */
 	switch (arch) {
 	case CPU_ARCH_ARMv7:
-		asm("mcr	p15, 2, %0, c0, c0, 0 @ set CSSELR"
-		    : /* No output operands */
-		    : "r" (1));
+		set_csselr(CSSELR_ICACHE | CSSELR_L1);
 		isb();
-		asm("mrc	p15, 1, %0, c0, c0, 0 @ read CCSIDR"
-		    : "=r" (id_reg));
+		id_reg = read_ccsidr();
 		line_size = 4 << ((id_reg & 0x7) + 2);
 		num_sets = ((id_reg >> 13) & 0x7fff) + 1;
 		aliasing_icache = (line_size * num_sets) > PAGE_SIZE;
@@ -315,11 +312,12 @@ static void __init cacheid_init(void)
 {
 	unsigned int arch = cpu_architecture();
 
-	if (arch == CPU_ARCH_ARMv7M) {
-		cacheid = 0;
-	} else if (arch >= CPU_ARCH_ARMv6) {
+	if (arch >= CPU_ARCH_ARMv6) {
 		unsigned int cachetype = read_cpuid_cachetype();
-		if ((cachetype & (7 << 29)) == 4 << 29) {
+
+		if ((arch == CPU_ARCH_ARMv7M) && !cachetype) {
+			cacheid = 0;
+		} else if ((cachetype & (7 << 29)) == 4 << 29) {
 			/* ARMv7 register format */
 			arch = CPU_ARCH_ARMv7;
 			cacheid = CACHEID_VIPT_NONALIASING;
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 861521606c6d..937c8920d741 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -82,7 +82,7 @@ enum ipi_msg_type {
 
 static DECLARE_COMPLETION(cpu_running);
 
-static struct smp_operations smp_ops;
+static struct smp_operations smp_ops __ro_after_init;
 
 void __init smp_set_ops(const struct smp_operations *ops)
 {
diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c
index a0affd14086a..53cf86cf2d1a 100644
--- a/arch/arm/kernel/vdso.c
+++ b/arch/arm/kernel/vdso.c
@@ -17,6 +17,7 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/cache.h>
 #include <linux/elf.h>
 #include <linux/err.h>
 #include <linux/kernel.h>
@@ -39,7 +40,7 @@
 static struct page **vdso_text_pagelist;
 
 /* Total number of pages needed for the data and text portions of the VDSO. */
-unsigned int vdso_total_pages __read_mostly;
+unsigned int vdso_total_pages __ro_after_init;
 
 /*
  * The VDSO data page.
@@ -47,13 +48,13 @@ unsigned int vdso_total_pages __read_mostly;
 static union vdso_data_store vdso_data_store __page_aligned_data;
 static struct vdso_data *vdso_data = &vdso_data_store.data;
 
-static struct page *vdso_data_page;
-static struct vm_special_mapping vdso_data_mapping = {
+static struct page *vdso_data_page __ro_after_init;
+static const struct vm_special_mapping vdso_data_mapping = {
 	.name = "[vvar]",
 	.pages = &vdso_data_page,
 };
 
-static struct vm_special_mapping vdso_text_mapping = {
+static struct vm_special_mapping vdso_text_mapping __ro_after_init = {
 	.name = "[vdso]",
 };
 
@@ -67,7 +68,7 @@ struct elfinfo {
 /* Cached result of boot-time check for whether the arch timer exists,
  * and if so, whether the virtual counter is useable.
  */
-static bool cntvct_ok __read_mostly;
+static bool cntvct_ok __ro_after_init;
 
 static bool __init cntvct_functional(void)
 {
diff --git a/arch/arm/lib/delay.c b/arch/arm/lib/delay.c
index 8044591dca72..2cef11884857 100644
--- a/arch/arm/lib/delay.c
+++ b/arch/arm/lib/delay.c
@@ -29,7 +29,7 @@
 /*
  * Default to the loop-based delay implementation.
  */
-struct arm_delay_ops arm_delay_ops = {
+struct arm_delay_ops arm_delay_ops __ro_after_init = {
 	.delay		= __loop_delay,
 	.const_udelay	= __loop_const_udelay,
 	.udelay		= __loop_udelay,
diff --git a/arch/arm/mach-footbridge/include/mach/hardware.h b/arch/arm/mach-footbridge/include/mach/hardware.h
index 02f6d7a706b1..20d5ad781fe2 100644
--- a/arch/arm/mach-footbridge/include/mach/hardware.h
+++ b/arch/arm/mach-footbridge/include/mach/hardware.h
@@ -59,7 +59,7 @@
 #define XBUS_SWITCH_J17_11	((*XBUS_SWITCH) & (1 << 5))
 #define XBUS_SWITCH_J17_9	((*XBUS_SWITCH) & (1 << 6))
 
-#define UNCACHEABLE_ADDR	(ARMCSR_BASE + 0x108)
+#define UNCACHEABLE_ADDR	(ARMCSR_BASE + 0x108)	/* CSR_ROMBASEMASK */
 
 
 /* PIC irq control */
diff --git a/arch/arm/mach-rpc/include/mach/hardware.h b/arch/arm/mach-rpc/include/mach/hardware.h
index 257166b21f3d..aa79fa47373a 100644
--- a/arch/arm/mach-rpc/include/mach/hardware.h
+++ b/arch/arm/mach-rpc/include/mach/hardware.h
@@ -40,7 +40,7 @@
 #define SCREEN_END		0xdfc00000
 #define SCREEN_BASE		0xdf800000
 
-#define UNCACHEABLE_ADDR	0xdf010000
+#define UNCACHEABLE_ADDR	(FLUSH_BASE + 0x10000)
 
 /*
  * IO Addresses
diff --git a/arch/arm/mach-sa1100/include/mach/hardware.h b/arch/arm/mach-sa1100/include/mach/hardware.h
index cbedd75a9d65..d944fd7e464f 100644
--- a/arch/arm/mach-sa1100/include/mach/hardware.h
+++ b/arch/arm/mach-sa1100/include/mach/hardware.h
@@ -13,7 +13,7 @@
 #define __ASM_ARCH_HARDWARE_H
 
 
-#define UNCACHEABLE_ADDR	0xfa050000
+#define UNCACHEABLE_ADDR	0xfa050000	/* ICIP */
 
 
 /*
@@ -36,28 +36,10 @@
 #define io_v2p( x )             \
    ( (((x)&0x00ffffff) | (((x)&(0x30000000>>VIO_SHIFT))<<VIO_SHIFT)) + PIO_START )
 
-#define CPU_SA1110_A0	(0)
-#define CPU_SA1110_B0	(4)
-#define CPU_SA1110_B1	(5)
-#define CPU_SA1110_B2	(6)
-#define CPU_SA1110_B4	(8)
-
-#define CPU_SA1100_ID	(0x4401a110)
-#define CPU_SA1100_MASK	(0xfffffff0)
-#define CPU_SA1110_ID	(0x6901b110)
-#define CPU_SA1110_MASK	(0xfffffff0)
-
 #define __MREG(x)	IOMEM(io_p2v(x))
 
 #ifndef __ASSEMBLY__
 
-#include <asm/cputype.h>
-
-#define CPU_REVISION	(read_cpuid_id() & 15)
-
-#define cpu_is_sa1100()	((read_cpuid_id() & CPU_SA1100_MASK) == CPU_SA1100_ID)
-#define cpu_is_sa1110()	((read_cpuid_id() & CPU_SA1110_MASK) == CPU_SA1110_ID)
-
 # define __REG(x)	(*((volatile unsigned long __iomem *)io_p2v(x)))
 # define __PREG(x)	(io_v2p((unsigned long)&(x)))
 
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index d15a7fe51618..c1799dd1d0d9 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -403,6 +403,7 @@ config CPU_V7M
 	bool
 	select CPU_32v7M
 	select CPU_ABRT_NOMMU
+	select CPU_CACHE_V7M
 	select CPU_CACHE_NOP
 	select CPU_PABRT_LEGACY
 	select CPU_THUMBONLY
@@ -518,6 +519,9 @@ config CPU_CACHE_VIPT
 config CPU_CACHE_FA
 	bool
 
+config CPU_CACHE_V7M
+	bool
+
 if MMU
 # The copy-page model
 config CPU_COPY_V4WT
@@ -750,14 +754,14 @@ config CPU_HIGH_VECTOR
 
 config CPU_ICACHE_DISABLE
 	bool "Disable I-Cache (I-bit)"
-	depends on CPU_CP15 && !(CPU_ARM720T || CPU_ARM740T || CPU_XSCALE || CPU_XSC3)
+	depends on (CPU_CP15 && !(CPU_ARM720T || CPU_ARM740T || CPU_XSCALE || CPU_XSC3)) || CPU_V7M
 	help
 	  Say Y here to disable the processor instruction cache. Unless
 	  you have a reason not to or are unsure, say N.
 
 config CPU_DCACHE_DISABLE
 	bool "Disable D-Cache (C-bit)"
-	depends on CPU_CP15 && !SMP
+	depends on (CPU_CP15 && !SMP) || CPU_V7M
 	help
 	  Say Y here to disable the processor data cache. Unless
 	  you have a reason not to or are unsure, say N.
@@ -792,7 +796,7 @@ config CPU_CACHE_ROUND_ROBIN
 
 config CPU_BPREDICT_DISABLE
 	bool "Disable branch prediction"
-	depends on CPU_ARM1020 || CPU_V6 || CPU_V6K || CPU_MOHAWK || CPU_XSC3 || CPU_V7 || CPU_FA526
+	depends on CPU_ARM1020 || CPU_V6 || CPU_V6K || CPU_MOHAWK || CPU_XSC3 || CPU_V7 || CPU_FA526 || CPU_V7M
 	help
 	  Say Y here to disable branch prediction.  If unsure, say N.
 
@@ -916,6 +920,13 @@ config CACHE_L2X0
 	help
 	  This option enables the L2x0 PrimeCell.
 
+config CACHE_L2X0_PMU
+	bool "L2x0 performance monitor support" if CACHE_L2X0
+	depends on PERF_EVENTS
+	help
+	  This option enables support for the performance monitoring features
+	  of the L220 and PL310 outer cache controllers.
+
 if CACHE_L2X0
 
 config PL310_ERRATA_588369
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index 7f76d96ce546..e8698241ece9 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -43,9 +43,11 @@ obj-$(CONFIG_CPU_CACHE_V6)	+= cache-v6.o
 obj-$(CONFIG_CPU_CACHE_V7)	+= cache-v7.o
 obj-$(CONFIG_CPU_CACHE_FA)	+= cache-fa.o
 obj-$(CONFIG_CPU_CACHE_NOP)	+= cache-nop.o
+obj-$(CONFIG_CPU_CACHE_V7M)	+= cache-v7m.o
 
 AFLAGS_cache-v6.o	:=-Wa,-march=armv6
 AFLAGS_cache-v7.o	:=-Wa,-march=armv7-a
+AFLAGS_cache-v7m.o	:=-Wa,-march=armv7-m
 
 obj-$(CONFIG_CPU_COPY_V4WT)	+= copypage-v4wt.o
 obj-$(CONFIG_CPU_COPY_V4WB)	+= copypage-v4wb.o
@@ -101,6 +103,7 @@ AFLAGS_proc-v7.o	:=-Wa,-march=armv7-a
 obj-$(CONFIG_OUTER_CACHE)	+= l2c-common.o
 obj-$(CONFIG_CACHE_FEROCEON_L2)	+= cache-feroceon-l2.o
 obj-$(CONFIG_CACHE_L2X0)	+= cache-l2x0.o l2c-l2x0-resume.o
+obj-$(CONFIG_CACHE_L2X0_PMU)	+= cache-l2x0-pmu.o
 obj-$(CONFIG_CACHE_XSC3L2)	+= cache-xsc3l2.o
 obj-$(CONFIG_CACHE_TAUROS2)	+= cache-tauros2.o
 obj-$(CONFIG_CACHE_UNIPHIER)	+= cache-uniphier.o
diff --git a/arch/arm/mm/cache-l2x0-pmu.c b/arch/arm/mm/cache-l2x0-pmu.c
new file mode 100644
index 000000000000..976d3057272e
--- /dev/null
+++ b/arch/arm/mm/cache-l2x0-pmu.c
@@ -0,0 +1,584 @@
+/*
+ * L220/L310 cache controller support
+ *
+ * Copyright (C) 2016 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <linux/errno.h>
+#include <linux/hrtimer.h>
+#include <linux/io.h>
+#include <linux/list.h>
+#include <linux/perf_event.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include <asm/hardware/cache-l2x0.h>
+
+#define PMU_NR_COUNTERS 2
+
+static void __iomem *l2x0_base;
+static struct pmu *l2x0_pmu;
+static cpumask_t pmu_cpu;
+
+static const char *l2x0_name;
+
+static ktime_t l2x0_pmu_poll_period;
+static struct hrtimer l2x0_pmu_hrtimer;
+
+/*
+ * The L220/PL310 PMU has two equivalent counters, Counter1 and Counter0.
+ * Registers controlling these are laid out in pairs, in descending order, i.e.
+ * the register for Counter1 comes first, followed by the register for
+ * Counter0.
+ * We ensure that idx 0 -> Counter0, and idx1 -> Counter1.
+ */
+static struct perf_event *events[PMU_NR_COUNTERS];
+
+/* Find an unused counter */
+static int l2x0_pmu_find_idx(void)
+{
+	int i;
+
+	for (i = 0; i < PMU_NR_COUNTERS; i++) {
+		if (!events[i])
+			return i;
+	}
+
+	return -1;
+}
+
+/* How many counters are allocated? */
+static int l2x0_pmu_num_active_counters(void)
+{
+	int i, cnt = 0;
+
+	for (i = 0; i < PMU_NR_COUNTERS; i++) {
+		if (events[i])
+			cnt++;
+	}
+
+	return cnt;
+}
+
+static void l2x0_pmu_counter_config_write(int idx, u32 val)
+{
+	writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT0_CFG - 4 * idx);
+}
+
+static u32 l2x0_pmu_counter_read(int idx)
+{
+	return readl_relaxed(l2x0_base + L2X0_EVENT_CNT0_VAL - 4 * idx);
+}
+
+static void l2x0_pmu_counter_write(int idx, u32 val)
+{
+	writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT0_VAL - 4 * idx);
+}
+
+static void __l2x0_pmu_enable(void)
+{
+	u32 val = readl_relaxed(l2x0_base + L2X0_EVENT_CNT_CTRL);
+	val |= L2X0_EVENT_CNT_CTRL_ENABLE;
+	writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT_CTRL);
+}
+
+static void __l2x0_pmu_disable(void)
+{
+	u32 val = readl_relaxed(l2x0_base + L2X0_EVENT_CNT_CTRL);
+	val &= ~L2X0_EVENT_CNT_CTRL_ENABLE;
+	writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT_CTRL);
+}
+
+static void l2x0_pmu_enable(struct pmu *pmu)
+{
+	if (l2x0_pmu_num_active_counters() == 0)
+		return;
+
+	__l2x0_pmu_enable();
+}
+
+static void l2x0_pmu_disable(struct pmu *pmu)
+{
+	if (l2x0_pmu_num_active_counters() == 0)
+		return;
+
+	__l2x0_pmu_disable();
+}
+
+static void warn_if_saturated(u32 count)
+{
+	if (count != 0xffffffff)
+		return;
+
+	pr_warn_ratelimited("L2X0 counter saturated. Poll period too long\n");
+}
+
+static void l2x0_pmu_event_read(struct perf_event *event)
+{
+	struct hw_perf_event *hw = &event->hw;
+	u64 prev_count, new_count, mask;
+
+	do {
+		 prev_count = local64_read(&hw->prev_count);
+		 new_count = l2x0_pmu_counter_read(hw->idx);
+	} while (local64_xchg(&hw->prev_count, new_count) != prev_count);
+
+	mask = GENMASK_ULL(31, 0);
+	local64_add((new_count - prev_count) & mask, &event->count);
+
+	warn_if_saturated(new_count);
+}
+
+static void l2x0_pmu_event_configure(struct perf_event *event)
+{
+	struct hw_perf_event *hw = &event->hw;
+
+	/*
+	 * The L2X0 counters saturate at 0xffffffff rather than wrapping, so we
+	 * will *always* lose some number of events when a counter saturates,
+	 * and have no way of detecting how many were lost.
+	 *
+	 * To minimize the impact of this, we try to maximize the period by
+	 * always starting counters at zero. To ensure that group ratios are
+	 * representative, we poll periodically to avoid counters saturating.
+	 * See l2x0_pmu_poll().
+	 */
+	local64_set(&hw->prev_count, 0);
+	l2x0_pmu_counter_write(hw->idx, 0);
+}
+
+static enum hrtimer_restart l2x0_pmu_poll(struct hrtimer *hrtimer)
+{
+	unsigned long flags;
+	int i;
+
+	local_irq_save(flags);
+	__l2x0_pmu_disable();
+
+	for (i = 0; i < PMU_NR_COUNTERS; i++) {
+		struct perf_event *event = events[i];
+
+		if (!event)
+			continue;
+
+		l2x0_pmu_event_read(event);
+		l2x0_pmu_event_configure(event);
+	}
+
+	__l2x0_pmu_enable();
+	local_irq_restore(flags);
+
+	hrtimer_forward_now(hrtimer, l2x0_pmu_poll_period);
+	return HRTIMER_RESTART;
+}
+
+
+static void __l2x0_pmu_event_enable(int idx, u32 event)
+{
+	u32 val;
+
+	val = event << L2X0_EVENT_CNT_CFG_SRC_SHIFT;
+	val |= L2X0_EVENT_CNT_CFG_INT_DISABLED;
+	l2x0_pmu_counter_config_write(idx, val);
+}
+
+static void l2x0_pmu_event_start(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hw = &event->hw;
+
+	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+		return;
+
+	if (flags & PERF_EF_RELOAD) {
+		WARN_ON_ONCE(!(hw->state & PERF_HES_UPTODATE));
+		l2x0_pmu_event_configure(event);
+	}
+
+	hw->state = 0;
+
+	__l2x0_pmu_event_enable(hw->idx, hw->config_base);
+}
+
+static void __l2x0_pmu_event_disable(int idx)
+{
+	u32 val;
+
+	val = L2X0_EVENT_CNT_CFG_SRC_DISABLED << L2X0_EVENT_CNT_CFG_SRC_SHIFT;
+	val |= L2X0_EVENT_CNT_CFG_INT_DISABLED;
+	l2x0_pmu_counter_config_write(idx, val);
+}
+
+static void l2x0_pmu_event_stop(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hw = &event->hw;
+
+	if (WARN_ON_ONCE(event->hw.state & PERF_HES_STOPPED))
+		return;
+
+	__l2x0_pmu_event_disable(hw->idx);
+
+	hw->state |= PERF_HES_STOPPED;
+
+	if (flags & PERF_EF_UPDATE) {
+		l2x0_pmu_event_read(event);
+		hw->state |= PERF_HES_UPTODATE;
+	}
+}
+
+static int l2x0_pmu_event_add(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hw = &event->hw;
+	int idx = l2x0_pmu_find_idx();
+
+	if (idx == -1)
+		return -EAGAIN;
+
+	/*
+	 * Pin the timer, so that the overflows are handled by the chosen
+	 * event->cpu (this is the same one as presented in "cpumask"
+	 * attribute).
+	 */
+	if (l2x0_pmu_num_active_counters() == 0)
+		hrtimer_start(&l2x0_pmu_hrtimer, l2x0_pmu_poll_period,
+			      HRTIMER_MODE_REL_PINNED);
+
+	events[idx] = event;
+	hw->idx = idx;
+
+	l2x0_pmu_event_configure(event);
+
+	hw->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+
+	if (flags & PERF_EF_START)
+		l2x0_pmu_event_start(event, 0);
+
+	return 0;
+}
+
+static void l2x0_pmu_event_del(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hw = &event->hw;
+
+	l2x0_pmu_event_stop(event, PERF_EF_UPDATE);
+
+	events[hw->idx] = NULL;
+	hw->idx = -1;
+
+	if (l2x0_pmu_num_active_counters() == 0)
+		hrtimer_cancel(&l2x0_pmu_hrtimer);
+}
+
+static bool l2x0_pmu_group_is_valid(struct perf_event *event)
+{
+	struct pmu *pmu = event->pmu;
+	struct perf_event *leader = event->group_leader;
+	struct perf_event *sibling;
+	int num_hw = 0;
+
+	if (leader->pmu == pmu)
+		num_hw++;
+	else if (!is_software_event(leader))
+		return false;
+
+	list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
+		if (sibling->pmu == pmu)
+			num_hw++;
+		else if (!is_software_event(sibling))
+			return false;
+	}
+
+	return num_hw <= PMU_NR_COUNTERS;
+}
+
+static int l2x0_pmu_event_init(struct perf_event *event)
+{
+	struct hw_perf_event *hw = &event->hw;
+
+	if (event->attr.type != l2x0_pmu->type)
+		return -ENOENT;
+
+	if (is_sampling_event(event) ||
+	    event->attach_state & PERF_ATTACH_TASK)
+		return -EINVAL;
+
+	if (event->attr.exclude_user   ||
+	    event->attr.exclude_kernel ||
+	    event->attr.exclude_hv     ||
+	    event->attr.exclude_idle   ||
+	    event->attr.exclude_host   ||
+	    event->attr.exclude_guest)
+		return -EINVAL;
+
+	if (event->cpu < 0)
+		return -EINVAL;
+
+	if (event->attr.config & ~L2X0_EVENT_CNT_CFG_SRC_MASK)
+		return -EINVAL;
+
+	hw->config_base = event->attr.config;
+
+	if (!l2x0_pmu_group_is_valid(event))
+		return -EINVAL;
+
+	event->cpu = cpumask_first(&pmu_cpu);
+
+	return 0;
+}
+
+struct l2x0_event_attribute {
+	struct device_attribute attr;
+	unsigned int config;
+	bool pl310_only;
+};
+
+#define L2X0_EVENT_ATTR(_name, _config, _pl310_only)				\
+	(&((struct l2x0_event_attribute[]) {{					\
+		.attr = __ATTR(_name, S_IRUGO, l2x0_pmu_event_show, NULL),	\
+		.config = _config,						\
+		.pl310_only = _pl310_only,					\
+	}})[0].attr.attr)
+
+#define L220_PLUS_EVENT_ATTR(_name, _config)					\
+	L2X0_EVENT_ATTR(_name, _config, false)
+
+#define PL310_EVENT_ATTR(_name, _config)					\
+	L2X0_EVENT_ATTR(_name, _config, true)
+
+static ssize_t l2x0_pmu_event_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct l2x0_event_attribute *lattr;
+
+	lattr = container_of(attr, typeof(*lattr), attr);
+	return snprintf(buf, PAGE_SIZE, "config=0x%x\n", lattr->config);
+}
+
+static umode_t l2x0_pmu_event_attr_is_visible(struct kobject *kobj,
+					      struct attribute *attr,
+					      int unused)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct pmu *pmu = dev_get_drvdata(dev);
+	struct l2x0_event_attribute *lattr;
+
+	lattr = container_of(attr, typeof(*lattr), attr.attr);
+
+	if (!lattr->pl310_only || strcmp("l2c_310", pmu->name) == 0)
+		return attr->mode;
+
+	return 0;
+}
+
+static struct attribute *l2x0_pmu_event_attrs[] = {
+	L220_PLUS_EVENT_ATTR(co,	0x1),
+	L220_PLUS_EVENT_ATTR(drhit,	0x2),
+	L220_PLUS_EVENT_ATTR(drreq,	0x3),
+	L220_PLUS_EVENT_ATTR(dwhit,	0x4),
+	L220_PLUS_EVENT_ATTR(dwreq,	0x5),
+	L220_PLUS_EVENT_ATTR(dwtreq,	0x6),
+	L220_PLUS_EVENT_ATTR(irhit,	0x7),
+	L220_PLUS_EVENT_ATTR(irreq,	0x8),
+	L220_PLUS_EVENT_ATTR(wa,	0x9),
+	PL310_EVENT_ATTR(ipfalloc,	0xa),
+	PL310_EVENT_ATTR(epfhit,	0xb),
+	PL310_EVENT_ATTR(epfalloc,	0xc),
+	PL310_EVENT_ATTR(srrcvd,	0xd),
+	PL310_EVENT_ATTR(srconf,	0xe),
+	PL310_EVENT_ATTR(epfrcvd,	0xf),
+	NULL
+};
+
+static struct attribute_group l2x0_pmu_event_attrs_group = {
+	.name = "events",
+	.attrs = l2x0_pmu_event_attrs,
+	.is_visible = l2x0_pmu_event_attr_is_visible,
+};
+
+static ssize_t l2x0_pmu_cpumask_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	return cpumap_print_to_pagebuf(true, buf, &pmu_cpu);
+}
+
+static struct device_attribute l2x0_pmu_cpumask_attr =
+		__ATTR(cpumask, S_IRUGO, l2x0_pmu_cpumask_show, NULL);
+
+static struct attribute *l2x0_pmu_cpumask_attrs[] = {
+	&l2x0_pmu_cpumask_attr.attr,
+	NULL,
+};
+
+static struct attribute_group l2x0_pmu_cpumask_attr_group = {
+	.attrs = l2x0_pmu_cpumask_attrs,
+};
+
+static const struct attribute_group *l2x0_pmu_attr_groups[] = {
+	&l2x0_pmu_event_attrs_group,
+	&l2x0_pmu_cpumask_attr_group,
+	NULL,
+};
+
+static void l2x0_pmu_reset(void)
+{
+	int i;
+
+	__l2x0_pmu_disable();
+
+	for (i = 0; i < PMU_NR_COUNTERS; i++)
+		__l2x0_pmu_event_disable(i);
+}
+
+static int l2x0_pmu_offline_cpu(unsigned int cpu)
+{
+	unsigned int target;
+
+	if (!cpumask_test_and_clear_cpu(cpu, &pmu_cpu))
+		return 0;
+
+	target = cpumask_any_but(cpu_online_mask, cpu);
+	if (target >= nr_cpu_ids)
+		return 0;
+
+	perf_pmu_migrate_context(l2x0_pmu, cpu, target);
+	cpumask_set_cpu(target, &pmu_cpu);
+
+	return 0;
+}
+
+void l2x0_pmu_suspend(void)
+{
+	int i;
+
+	if (!l2x0_pmu)
+		return;
+
+	l2x0_pmu_disable(l2x0_pmu);
+
+	for (i = 0; i < PMU_NR_COUNTERS; i++) {
+		if (events[i])
+			l2x0_pmu_event_stop(events[i], PERF_EF_UPDATE);
+	}
+
+}
+
+void l2x0_pmu_resume(void)
+{
+	int i;
+
+	if (!l2x0_pmu)
+		return;
+
+	l2x0_pmu_reset();
+
+	for (i = 0; i < PMU_NR_COUNTERS; i++) {
+		if (events[i])
+			l2x0_pmu_event_start(events[i], PERF_EF_RELOAD);
+	}
+
+	l2x0_pmu_enable(l2x0_pmu);
+}
+
+void __init l2x0_pmu_register(void __iomem *base, u32 part)
+{
+	/*
+	 * Determine whether we support the PMU, and choose the name for sysfs.
+	 * This is also used by l2x0_pmu_event_attr_is_visible to determine
+	 * which events to display, as the PL310 PMU supports a superset of
+	 * L220 events.
+	 *
+	 * The L210 PMU has a different programmer's interface, and is not
+	 * supported by this driver.
+	 *
+	 * We must defer registering the PMU until the perf subsystem is up and
+	 * running, so just stash the name and base, and leave that to another
+	 * initcall.
+	 */
+	switch (part & L2X0_CACHE_ID_PART_MASK) {
+	case L2X0_CACHE_ID_PART_L220:
+		l2x0_name = "l2c_220";
+		break;
+	case L2X0_CACHE_ID_PART_L310:
+		l2x0_name = "l2c_310";
+		break;
+	default:
+		return;
+	}
+
+	l2x0_base = base;
+}
+
+static __init int l2x0_pmu_init(void)
+{
+	int ret;
+
+	if (!l2x0_base)
+		return 0;
+
+	l2x0_pmu = kzalloc(sizeof(*l2x0_pmu), GFP_KERNEL);
+	if (!l2x0_pmu) {
+		pr_warn("Unable to allocate L2x0 PMU\n");
+		return -ENOMEM;
+	}
+
+	*l2x0_pmu = (struct pmu) {
+		.task_ctx_nr = perf_invalid_context,
+		.pmu_enable = l2x0_pmu_enable,
+		.pmu_disable = l2x0_pmu_disable,
+		.read = l2x0_pmu_event_read,
+		.start = l2x0_pmu_event_start,
+		.stop = l2x0_pmu_event_stop,
+		.add = l2x0_pmu_event_add,
+		.del = l2x0_pmu_event_del,
+		.event_init = l2x0_pmu_event_init,
+		.attr_groups = l2x0_pmu_attr_groups,
+	};
+
+	l2x0_pmu_reset();
+
+	/*
+	 * We always use a hrtimer rather than an interrupt.
+	 * See comments in l2x0_pmu_event_configure and l2x0_pmu_poll.
+	 *
+	 * Polling once a second allows the counters to fill up to 1/128th on a
+	 * quad-core test chip with cores clocked at 400MHz. Hopefully this
+	 * leaves sufficient headroom to avoid overflow on production silicon
+	 * at higher frequencies.
+	 */
+	l2x0_pmu_poll_period = ms_to_ktime(1000);
+	hrtimer_init(&l2x0_pmu_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	l2x0_pmu_hrtimer.function = l2x0_pmu_poll;
+
+	cpumask_set_cpu(0, &pmu_cpu);
+	ret = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ARM_L2X0_ONLINE,
+					"AP_PERF_ARM_L2X0_ONLINE", NULL,
+					l2x0_pmu_offline_cpu);
+	if (ret)
+		goto out_pmu;
+
+	ret = perf_pmu_register(l2x0_pmu, l2x0_name, -1);
+	if (ret)
+		goto out_cpuhp;
+
+	return 0;
+
+out_cpuhp:
+	cpuhp_remove_state_nocalls(CPUHP_AP_PERF_ARM_L2X0_ONLINE);
+out_pmu:
+	kfree(l2x0_pmu);
+	l2x0_pmu = NULL;
+	return ret;
+}
+device_initcall(l2x0_pmu_init);
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index cc12905ae6f8..d1870c777c6e 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -142,6 +142,8 @@ static void l2c_disable(void)
 {
 	void __iomem *base = l2x0_base;
 
+	l2x0_pmu_suspend();
+
 	outer_cache.flush_all();
 	l2c_write_sec(0, base, L2X0_CTRL);
 	dsb(st);
@@ -159,6 +161,8 @@ static void l2c_resume(void)
 	/* Do not touch the controller if already enabled. */
 	if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN))
 		l2c_enable(base, l2x0_data->num_lock);
+
+	l2x0_pmu_resume();
 }
 
 /*
@@ -709,9 +713,8 @@ static void __init l2c310_fixup(void __iomem *base, u32 cache_id,
 	if (revision >= L310_CACHE_ID_RTL_R3P0 &&
 	    revision < L310_CACHE_ID_RTL_R3P2) {
 		u32 val = l2x0_saved_regs.prefetch_ctrl;
-		/* I don't think bit23 is required here... but iMX6 does so */
-		if (val & (BIT(30) | BIT(23))) {
-			val &= ~(BIT(30) | BIT(23));
+		if (val & L310_PREFETCH_CTRL_DBL_LINEFILL) {
+			val &= ~L310_PREFETCH_CTRL_DBL_LINEFILL;
 			l2x0_saved_regs.prefetch_ctrl = val;
 			errata[n++] = "752271";
 		}
@@ -892,6 +895,8 @@ static int __init __l2c_init(const struct l2c_init_data *data,
 	pr_info("%s: CACHE_ID 0x%08x, AUX_CTRL 0x%08x\n",
 		data->type, cache_id, aux);
 
+	l2x0_pmu_register(l2x0_base, cache_id);
+
 	return 0;
 }
 
diff --git a/arch/arm/mm/cache-v7m.S b/arch/arm/mm/cache-v7m.S
new file mode 100644
index 000000000000..816a7e44e6f1
--- /dev/null
+++ b/arch/arm/mm/cache-v7m.S
@@ -0,0 +1,453 @@
+/*
+ *  linux/arch/arm/mm/cache-v7m.S
+ *
+ *  Based on linux/arch/arm/mm/cache-v7.S
+ *
+ *  Copyright (C) 2001 Deep Blue Solutions Ltd.
+ *  Copyright (C) 2005 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  This is the "shell" of the ARMv7M processor support.
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/errno.h>
+#include <asm/unwind.h>
+#include <asm/v7m.h>
+
+#include "proc-macros.S"
+
+/* Generic V7M read/write macros for memory mapped cache operations */
+.macro v7m_cache_read, rt, reg
+	movw	\rt, #:lower16:BASEADDR_V7M_SCB + \reg
+	movt	\rt, #:upper16:BASEADDR_V7M_SCB + \reg
+	ldr     \rt, [\rt]
+.endm
+
+.macro v7m_cacheop, rt, tmp, op, c = al
+	movw\c	\tmp, #:lower16:BASEADDR_V7M_SCB + \op
+	movt\c	\tmp, #:upper16:BASEADDR_V7M_SCB + \op
+	str\c	\rt, [\tmp]
+.endm
+
+
+.macro	read_ccsidr, rt
+	v7m_cache_read \rt, V7M_SCB_CCSIDR
+.endm
+
+.macro read_clidr, rt
+	v7m_cache_read \rt, V7M_SCB_CLIDR
+.endm
+
+.macro	write_csselr, rt, tmp
+	v7m_cacheop \rt, \tmp, V7M_SCB_CSSELR
+.endm
+
+/*
+ * dcisw: Invalidate data cache by set/way
+ */
+.macro dcisw, rt, tmp
+	v7m_cacheop \rt, \tmp, V7M_SCB_DCISW
+.endm
+
+/*
+ * dccisw: Clean and invalidate data cache by set/way
+ */
+.macro dccisw, rt, tmp
+	v7m_cacheop \rt, \tmp, V7M_SCB_DCCISW
+.endm
+
+/*
+ * dccimvac: Clean and invalidate data cache line by MVA to PoC.
+ */
+.irp    c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo
+.macro dccimvac\c, rt, tmp
+	v7m_cacheop \rt, \tmp, V7M_SCB_DCCIMVAC, \c
+.endm
+.endr
+
+/*
+ * dcimvac: Invalidate data cache line by MVA to PoC
+ */
+.macro dcimvac, rt, tmp
+	v7m_cacheop \rt, \tmp, V7M_SCB_DCIMVAC
+.endm
+
+/*
+ * dccmvau: Clean data cache line by MVA to PoU
+ */
+.macro dccmvau, rt, tmp
+	v7m_cacheop \rt, \tmp, V7M_SCB_DCCMVAU
+.endm
+
+/*
+ * dccmvac: Clean data cache line by MVA to PoC
+ */
+.macro dccmvac,  rt, tmp
+	v7m_cacheop \rt, \tmp, V7M_SCB_DCCMVAC
+.endm
+
+/*
+ * icimvau: Invalidate instruction caches by MVA to PoU
+ */
+.macro icimvau, rt, tmp
+	v7m_cacheop \rt, \tmp, V7M_SCB_ICIMVAU
+.endm
+
+/*
+ * Invalidate the icache, inner shareable if SMP, invalidate BTB for UP.
+ * rt data ignored by ICIALLU(IS), so can be used for the address
+ */
+.macro invalidate_icache, rt
+	v7m_cacheop \rt, \rt, V7M_SCB_ICIALLU
+	mov \rt, #0
+.endm
+
+/*
+ * Invalidate the BTB, inner shareable if SMP.
+ * rt data ignored by BPIALL, so it can be used for the address
+ */
+.macro invalidate_bp, rt
+	v7m_cacheop \rt, \rt, V7M_SCB_BPIALL
+	mov \rt, #0
+.endm
+
+ENTRY(v7m_invalidate_l1)
+	mov	r0, #0
+
+	write_csselr r0, r1
+	read_ccsidr r0
+
+	movw	r1, #0x7fff
+	and	r2, r1, r0, lsr #13
+
+	movw	r1, #0x3ff
+
+	and	r3, r1, r0, lsr #3      @ NumWays - 1
+	add	r2, r2, #1              @ NumSets
+
+	and	r0, r0, #0x7
+	add	r0, r0, #4      @ SetShift
+
+	clz	r1, r3          @ WayShift
+	add	r4, r3, #1      @ NumWays
+1:	sub	r2, r2, #1      @ NumSets--
+	mov	r3, r4          @ Temp = NumWays
+2:	subs	r3, r3, #1      @ Temp--
+	mov	r5, r3, lsl r1
+	mov	r6, r2, lsl r0
+	orr	r5, r5, r6      @ Reg = (Temp<<WayShift)|(NumSets<<SetShift)
+	dcisw	r5, r6
+	bgt	2b
+	cmp	r2, #0
+	bgt	1b
+	dsb	st
+	isb
+	ret	lr
+ENDPROC(v7m_invalidate_l1)
+
+/*
+ *	v7m_flush_icache_all()
+ *
+ *	Flush the whole I-cache.
+ *
+ *	Registers:
+ *	r0 - set to 0
+ */
+ENTRY(v7m_flush_icache_all)
+	invalidate_icache r0
+	ret	lr
+ENDPROC(v7m_flush_icache_all)
+
+/*
+ *	v7m_flush_dcache_all()
+ *
+ *	Flush the whole D-cache.
+ *
+ *	Corrupted registers: r0-r7, r9-r11
+ */
+ENTRY(v7m_flush_dcache_all)
+	dmb					@ ensure ordering with previous memory accesses
+	read_clidr r0
+	mov	r3, r0, lsr #23			@ move LoC into position
+	ands	r3, r3, #7 << 1			@ extract LoC*2 from clidr
+	beq	finished			@ if loc is 0, then no need to clean
+start_flush_levels:
+	mov	r10, #0				@ start clean at cache level 0
+flush_levels:
+	add	r2, r10, r10, lsr #1		@ work out 3x current cache level
+	mov	r1, r0, lsr r2			@ extract cache type bits from clidr
+	and	r1, r1, #7			@ mask of the bits for current cache only
+	cmp	r1, #2				@ see what cache we have at this level
+	blt	skip				@ skip if no cache, or just i-cache
+#ifdef CONFIG_PREEMPT
+	save_and_disable_irqs_notrace r9	@ make cssr&csidr read atomic
+#endif
+	write_csselr r10, r1			@ set current cache level
+	isb					@ isb to sych the new cssr&csidr
+	read_ccsidr r1				@ read the new csidr
+#ifdef CONFIG_PREEMPT
+	restore_irqs_notrace r9
+#endif
+	and	r2, r1, #7			@ extract the length of the cache lines
+	add	r2, r2, #4			@ add 4 (line length offset)
+	movw	r4, #0x3ff
+	ands	r4, r4, r1, lsr #3		@ find maximum number on the way size
+	clz	r5, r4				@ find bit position of way size increment
+	movw	r7, #0x7fff
+	ands	r7, r7, r1, lsr #13		@ extract max number of the index size
+loop1:
+	mov	r9, r7				@ create working copy of max index
+loop2:
+	lsl	r6, r4, r5
+	orr	r11, r10, r6			@ factor way and cache number into r11
+	lsl	r6, r9, r2
+	orr	r11, r11, r6			@ factor index number into r11
+	dccisw	r11, r6				@ clean/invalidate by set/way
+	subs	r9, r9, #1			@ decrement the index
+	bge	loop2
+	subs	r4, r4, #1			@ decrement the way
+	bge	loop1
+skip:
+	add	r10, r10, #2			@ increment cache number
+	cmp	r3, r10
+	bgt	flush_levels
+finished:
+	mov	r10, #0				@ swith back to cache level 0
+	write_csselr r10, r3			@ select current cache level in cssr
+	dsb	st
+	isb
+	ret	lr
+ENDPROC(v7m_flush_dcache_all)
+
+/*
+ *	v7m_flush_cache_all()
+ *
+ *	Flush the entire cache system.
+ *  The data cache flush is now achieved using atomic clean / invalidates
+ *  working outwards from L1 cache. This is done using Set/Way based cache
+ *  maintenance instructions.
+ *  The instruction cache can still be invalidated back to the point of
+ *  unification in a single instruction.
+ *
+ */
+ENTRY(v7m_flush_kern_cache_all)
+	stmfd	sp!, {r4-r7, r9-r11, lr}
+	bl	v7m_flush_dcache_all
+	invalidate_icache r0
+	ldmfd	sp!, {r4-r7, r9-r11, lr}
+	ret	lr
+ENDPROC(v7m_flush_kern_cache_all)
+
+/*
+ *	v7m_flush_cache_all()
+ *
+ *	Flush all TLB entries in a particular address space
+ *
+ *	- mm    - mm_struct describing address space
+ */
+ENTRY(v7m_flush_user_cache_all)
+	/*FALLTHROUGH*/
+
+/*
+ *	v7m_flush_cache_range(start, end, flags)
+ *
+ *	Flush a range of TLB entries in the specified address space.
+ *
+ *	- start - start address (may not be aligned)
+ *	- end   - end address (exclusive, may not be aligned)
+ *	- flags	- vm_area_struct flags describing address space
+ *
+ *	It is assumed that:
+ *	- we have a VIPT cache.
+ */
+ENTRY(v7m_flush_user_cache_range)
+	ret	lr
+ENDPROC(v7m_flush_user_cache_all)
+ENDPROC(v7m_flush_user_cache_range)
+
+/*
+ *	v7m_coherent_kern_range(start,end)
+ *
+ *	Ensure that the I and D caches are coherent within specified
+ *	region.  This is typically used when code has been written to
+ *	a memory region, and will be executed.
+ *
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ *
+ *	It is assumed that:
+ *	- the Icache does not read data from the write buffer
+ */
+ENTRY(v7m_coherent_kern_range)
+	/* FALLTHROUGH */
+
+/*
+ *	v7m_coherent_user_range(start,end)
+ *
+ *	Ensure that the I and D caches are coherent within specified
+ *	region.  This is typically used when code has been written to
+ *	a memory region, and will be executed.
+ *
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ *
+ *	It is assumed that:
+ *	- the Icache does not read data from the write buffer
+ */
+ENTRY(v7m_coherent_user_range)
+ UNWIND(.fnstart		)
+	dcache_line_size r2, r3
+	sub	r3, r2, #1
+	bic	r12, r0, r3
+1:
+/*
+ * We use open coded version of dccmvau otherwise USER() would
+ * point at movw instruction.
+ */
+	dccmvau	r12, r3
+	add	r12, r12, r2
+	cmp	r12, r1
+	blo	1b
+	dsb	ishst
+	icache_line_size r2, r3
+	sub	r3, r2, #1
+	bic	r12, r0, r3
+2:
+	icimvau r12, r3
+	add	r12, r12, r2
+	cmp	r12, r1
+	blo	2b
+	invalidate_bp r0
+	dsb	ishst
+	isb
+	ret	lr
+ UNWIND(.fnend		)
+ENDPROC(v7m_coherent_kern_range)
+ENDPROC(v7m_coherent_user_range)
+
+/*
+ *	v7m_flush_kern_dcache_area(void *addr, size_t size)
+ *
+ *	Ensure that the data held in the page kaddr is written back
+ *	to the page in question.
+ *
+ *	- addr	- kernel address
+ *	- size	- region size
+ */
+ENTRY(v7m_flush_kern_dcache_area)
+	dcache_line_size r2, r3
+	add	r1, r0, r1
+	sub	r3, r2, #1
+	bic	r0, r0, r3
+1:
+	dccimvac r0, r3		@ clean & invalidate D line / unified line
+	add	r0, r0, r2
+	cmp	r0, r1
+	blo	1b
+	dsb	st
+	ret	lr
+ENDPROC(v7m_flush_kern_dcache_area)
+
+/*
+ *	v7m_dma_inv_range(start,end)
+ *
+ *	Invalidate the data cache within the specified region; we will
+ *	be performing a DMA operation in this region and we want to
+ *	purge old data in the cache.
+ *
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+v7m_dma_inv_range:
+	dcache_line_size r2, r3
+	sub	r3, r2, #1
+	tst	r0, r3
+	bic	r0, r0, r3
+	dccimvacne r0, r3
+	subne	r3, r2, #1	@ restore r3, corrupted by v7m's dccimvac
+	tst	r1, r3
+	bic	r1, r1, r3
+	dccimvacne r1, r3
+1:
+	dcimvac r0, r3
+	add	r0, r0, r2
+	cmp	r0, r1
+	blo	1b
+	dsb	st
+	ret	lr
+ENDPROC(v7m_dma_inv_range)
+
+/*
+ *	v7m_dma_clean_range(start,end)
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+v7m_dma_clean_range:
+	dcache_line_size r2, r3
+	sub	r3, r2, #1
+	bic	r0, r0, r3
+1:
+	dccmvac r0, r3			@ clean D / U line
+	add	r0, r0, r2
+	cmp	r0, r1
+	blo	1b
+	dsb	st
+	ret	lr
+ENDPROC(v7m_dma_clean_range)
+
+/*
+ *	v7m_dma_flush_range(start,end)
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+ENTRY(v7m_dma_flush_range)
+	dcache_line_size r2, r3
+	sub	r3, r2, #1
+	bic	r0, r0, r3
+1:
+	dccimvac r0, r3			 @ clean & invalidate D / U line
+	add	r0, r0, r2
+	cmp	r0, r1
+	blo	1b
+	dsb	st
+	ret	lr
+ENDPROC(v7m_dma_flush_range)
+
+/*
+ *	dma_map_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(v7m_dma_map_area)
+	add	r1, r1, r0
+	teq	r2, #DMA_FROM_DEVICE
+	beq	v7m_dma_inv_range
+	b	v7m_dma_clean_range
+ENDPROC(v7m_dma_map_area)
+
+/*
+ *	dma_unmap_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(v7m_dma_unmap_area)
+	add	r1, r1, r0
+	teq	r2, #DMA_TO_DEVICE
+	bne	v7m_dma_inv_range
+	ret	lr
+ENDPROC(v7m_dma_unmap_area)
+
+	.globl	v7m_flush_kern_cache_louis
+	.equ	v7m_flush_kern_cache_louis, v7m_flush_kern_cache_all
+
+	__INITDATA
+
+	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
+	define_cache_functions v7m
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index c6834c0cfd1c..a2302aba5df2 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -436,7 +436,7 @@ static int __init atomic_pool_init(void)
 		gen_pool_set_algo(atomic_pool,
 				gen_pool_first_fit_order_align,
 				(void *)PAGE_SHIFT);
-		pr_info("DMA: preallocated %zd KiB pool for atomic coherent allocations\n",
+		pr_info("DMA: preallocated %zu KiB pool for atomic coherent allocations\n",
 		       atomic_pool_size / 1024);
 		return 0;
 	}
@@ -445,7 +445,7 @@ destroy_genpool:
 	gen_pool_destroy(atomic_pool);
 	atomic_pool = NULL;
 out:
-	pr_err("DMA: failed to allocate %zx KiB pool for atomic coherent allocation\n",
+	pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n",
 	       atomic_pool_size / 1024);
 	return -ENOMEM;
 }
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 30fe03f95c85..4001dd15818d 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -243,7 +243,7 @@ __setup("noalign", noalign_setup);
 #define PROT_PTE_S2_DEVICE	PROT_PTE_DEVICE
 #define PROT_SECT_DEVICE	PMD_TYPE_SECT|PMD_SECT_AP_WRITE
 
-static struct mem_type mem_types[] = {
+static struct mem_type mem_types[] __ro_after_init = {
 	[MT_DEVICE] = {		  /* Strongly ordered / ARMv6 shared device */
 		.prot_pte	= PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED |
 				  L_PTE_SHARED,
diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
index c671f345266a..0d40c285bd86 100644
--- a/arch/arm/mm/proc-macros.S
+++ b/arch/arm/mm/proc-macros.S
@@ -7,6 +7,10 @@
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
 
+#ifdef CONFIG_CPU_V7M
+#include <asm/v7m.h>
+#endif
+
 /*
  * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm)
  */
@@ -70,7 +74,13 @@
  * on ARMv7.
  */
 	.macro	dcache_line_size, reg, tmp
+#ifdef CONFIG_CPU_V7M
+	movw	\tmp, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_CTR
+	movt	\tmp, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_CTR
+	ldr     \tmp, [\tmp]
+#else
 	mrc	p15, 0, \tmp, c0, c0, 1		@ read ctr
+#endif
 	lsr	\tmp, \tmp, #16
 	and	\tmp, \tmp, #0xf		@ cache line size encoding
 	mov	\reg, #4			@ bytes per word
@@ -82,7 +92,13 @@
  * on ARMv7.
  */
 	.macro	icache_line_size, reg, tmp
+#ifdef CONFIG_CPU_V7M
+	movw	\tmp, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_CTR
+	movt	\tmp, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_CTR
+	ldr     \tmp, [\tmp]
+#else
 	mrc	p15, 0, \tmp, c0, c0, 1		@ read ctr
+#endif
 	and	\tmp, \tmp, #0xf		@ cache line size encoding
 	mov	\reg, #4			@ bytes per word
 	mov	\reg, \reg, lsl \tmp		@ actual cache line size
diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S
index 7229d8d0be1a..f6d333f09bfe 100644
--- a/arch/arm/mm/proc-v7m.S
+++ b/arch/arm/mm/proc-v7m.S
@@ -74,14 +74,42 @@ ENTRY(cpu_v7m_do_resume)
 ENDPROC(cpu_v7m_do_resume)
 #endif
 
+ENTRY(cpu_cm7_dcache_clean_area)
+	dcache_line_size r2, r3
+	movw	r3, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_DCCMVAC
+	movt	r3, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_DCCMVAC
+
+1:	str	r0, [r3]		@ clean D entry
+	add	r0, r0, r2
+	subs	r1, r1, r2
+	bhi	1b
+	dsb
+	ret	lr
+ENDPROC(cpu_cm7_dcache_clean_area)
+
+ENTRY(cpu_cm7_proc_fin)
+	movw	r2, #:lower16:(BASEADDR_V7M_SCB + V7M_SCB_CCR)
+	movt	r2, #:upper16:(BASEADDR_V7M_SCB + V7M_SCB_CCR)
+	ldr	r0, [r2]
+	bic	r0, r0, #(V7M_SCB_CCR_DC | V7M_SCB_CCR_IC)
+	str	r0, [r2]
+	ret	lr
+ENDPROC(cpu_cm7_proc_fin)
+
 	.section ".text.init", #alloc, #execinstr
 
+__v7m_cm7_setup:
+	mov	r8, #(V7M_SCB_CCR_DC | V7M_SCB_CCR_IC| V7M_SCB_CCR_BP)
+	b	__v7m_setup_cont
 /*
  *	__v7m_setup
  *
  *	This should be able to cover all ARMv7-M cores.
  */
 __v7m_setup:
+	mov	r8, 0
+
+__v7m_setup_cont:
 	@ Configure the vector table base address
 	ldr	r0, =BASEADDR_V7M_SCB
 	ldr	r12, =vector_table
@@ -104,6 +132,7 @@ __v7m_setup:
 	badr	r1, 1f
 	ldr	r5, [r12, #11 * 4]	@ read the SVC vector entry
 	str	r1, [r12, #11 * 4]	@ write the temporary SVC vector entry
+	dsb
 	mov	r6, lr			@ save LR
 	ldr	sp, =init_thread_union + THREAD_START_SP
 	cpsie	i
@@ -116,15 +145,32 @@ __v7m_setup:
 	mov	r1, #1
 	msr	control, r1		@ Thread mode has unpriviledged access
 
+	@ Configure caches (if implemented)
+	teq     r8, #0
+	stmneia	r12, {r0-r6, lr}	@ v7m_invalidate_l1 touches r0-r6
+	blne	v7m_invalidate_l1
+	teq     r8, #0			@ re-evalutae condition
+	ldmneia	r12, {r0-r6, lr}
+
 	@ Configure the System Control Register to ensure 8-byte stack alignment
 	@ Note the STKALIGN bit is either RW or RAO.
-	ldr	r12, [r0, V7M_SCB_CCR]	@ system control register
-	orr	r12, #V7M_SCB_CCR_STKALIGN
-	str	r12, [r0, V7M_SCB_CCR]
+	ldr	r0, [r0, V7M_SCB_CCR]   @ system control register
+	orr	r0, #V7M_SCB_CCR_STKALIGN
+	orr	r0, r0, r8
+
 	ret	lr
 ENDPROC(__v7m_setup)
 
+/*
+ * Cortex-M7 processor functions
+ */
+	globl_equ	cpu_cm7_proc_init,	cpu_v7m_proc_init
+	globl_equ	cpu_cm7_reset,		cpu_v7m_reset
+	globl_equ	cpu_cm7_do_idle,	cpu_v7m_do_idle
+	globl_equ	cpu_cm7_switch_mm,	cpu_v7m_switch_mm
+
 	define_processor_functions v7m, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1
+	define_processor_functions cm7, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1
 
 	.section ".rodata"
 	string cpu_arch_name, "armv7m"
@@ -133,6 +179,50 @@ ENDPROC(__v7m_setup)
 
 	.section ".proc.info.init", #alloc
 
+.macro __v7m_proc name, initfunc, cache_fns = nop_cache_fns, hwcaps = 0,  proc_fns = v7m_processor_functions
+	.long	0			/* proc_info_list.__cpu_mm_mmu_flags */
+	.long	0			/* proc_info_list.__cpu_io_mmu_flags */
+	initfn	\initfunc, \name
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_HALF | HWCAP_THUMB | HWCAP_FAST_MULT | \hwcaps
+	.long	cpu_v7m_name
+	.long   \proc_fns
+	.long	0			/* proc_info_list.tlb */
+	.long	0			/* proc_info_list.user */
+	.long	\cache_fns
+.endm
+
+	/*
+	 * Match ARM Cortex-M7 processor.
+	 */
+	.type	__v7m_cm7_proc_info, #object
+__v7m_cm7_proc_info:
+	.long	0x410fc270		/* ARM Cortex-M7 0xC27 */
+	.long	0xff0ffff0		/* Mask off revision, patch release */
+	__v7m_proc __v7m_cm7_proc_info, __v7m_cm7_setup, hwcaps = HWCAP_EDSP, cache_fns = v7m_cache_fns, proc_fns = cm7_processor_functions
+	.size	__v7m_cm7_proc_info, . - __v7m_cm7_proc_info
+
+	/*
+	 * Match ARM Cortex-M4 processor.
+	 */
+	.type	__v7m_cm4_proc_info, #object
+__v7m_cm4_proc_info:
+	.long	0x410fc240		/* ARM Cortex-M4 0xC24 */
+	.long	0xff0ffff0		/* Mask off revision, patch release */
+	__v7m_proc __v7m_cm4_proc_info, __v7m_setup, hwcaps = HWCAP_EDSP
+	.size	__v7m_cm4_proc_info, . - __v7m_cm4_proc_info
+
+	/*
+	 * Match ARM Cortex-M3 processor.
+	 */
+	.type	__v7m_cm3_proc_info, #object
+__v7m_cm3_proc_info:
+	.long	0x410fc230		/* ARM Cortex-M3 0xC23 */
+	.long	0xff0ffff0		/* Mask off revision, patch release */
+	__v7m_proc __v7m_cm3_proc_info, __v7m_setup
+	.size	__v7m_cm3_proc_info, . - __v7m_cm3_proc_info
+
 	/*
 	 * Match any ARMv7-M processor core.
 	 */
@@ -140,16 +230,6 @@ ENDPROC(__v7m_setup)
 __v7m_proc_info:
 	.long	0x000f0000		@ Required ID value
 	.long	0x000f0000		@ Mask for ID
-	.long   0			@ proc_info_list.__cpu_mm_mmu_flags
-	.long   0			@ proc_info_list.__cpu_io_mmu_flags
-	initfn	__v7m_setup, __v7m_proc_info	@ proc_info_list.__cpu_flush
-	.long	cpu_arch_name
-	.long	cpu_elf_name
-	.long	HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT
-	.long	cpu_v7m_name
-	.long	v7m_processor_functions	@ proc_info_list.proc
-	.long	0			@ proc_info_list.tlb
-	.long	0			@ proc_info_list.user
-	.long	nop_cache_fns		@ proc_info_list.cache
+	__v7m_proc __v7m_proc_info, __v7m_setup
 	.size	__v7m_proc_info, . - __v7m_proc_info
 
diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index a5b5c87e2114..a56fa2a1e9aa 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -19,6 +19,7 @@
 #include <linux/amba/bus.h>
 #include <linux/sizes.h>
 #include <linux/limits.h>
+#include <linux/clk/clk-conf.h>
 
 #include <asm/irq.h>
 
@@ -237,6 +238,10 @@ static int amba_probe(struct device *dev)
 	int ret;
 
 	do {
+		ret = of_clk_set_defaults(dev->of_node, false);
+		if (ret < 0)
+			break;
+
 		ret = dev_pm_domain_attach(dev, true);
 		if (ret == -EPROBE_DEFER)
 			break;
diff --git a/drivers/cpufreq/sa1110-cpufreq.c b/drivers/cpufreq/sa1110-cpufreq.c
index b5befc211172..2bac9b6cfeea 100644
--- a/drivers/cpufreq/sa1110-cpufreq.c
+++ b/drivers/cpufreq/sa1110-cpufreq.c
@@ -159,7 +159,7 @@ sdram_calculate_timing(struct sdram_info *sd, u_int cpu_khz,
 	 * half speed or use delayed read latching (errata 13).
 	 */
 	if ((ns_to_cycles(sdram->tck, sd_khz) > 1) ||
-	    (CPU_REVISION < CPU_SA1110_B2 && sd_khz < 62000))
+	    (read_cpuid_revision() < ARM_CPU_REV_SA1110_B2 && sd_khz < 62000))
 		sd_khz /= 2;
 
 	sd->mdcnfg = MDCNFG & 0x007f007f;
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index c7efddf6e038..4c09d93d9569 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -89,7 +89,7 @@ config BINFMT_SCRIPT
 
 config BINFMT_FLAT
 	bool "Kernel support for flat binaries"
-	depends on !MMU || M68K
+	depends on !MMU || ARM || M68K
 	depends on !FRV || BROKEN
 	help
 	  Support uClinux FLAT format binaries.
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 7b6c446ee17f..a8ffc405f915 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -114,6 +114,7 @@ enum cpuhp_state {
 	CPUHP_AP_PERF_S390_SF_ONLINE,
 	CPUHP_AP_PERF_ARM_CCI_ONLINE,
 	CPUHP_AP_PERF_ARM_CCN_ONLINE,
+	CPUHP_AP_PERF_ARM_L2X0_ONLINE,
 	CPUHP_AP_WORKQUEUE_ONLINE,
 	CPUHP_AP_RCUTREE_ONLINE,
 	CPUHP_AP_NOTIFY_ONLINE,