36 files changed, 4065 insertions, 940 deletions
diff --git a/drivers/bus/Kconfig b/drivers/bus/Kconfig
index a1d4af6df3f5..1a82f3a17681 100644
--- a/drivers/bus/Kconfig
+++ b/drivers/bus/Kconfig
@@ -7,21 +7,24 @@ menu "Bus devices"
 config ARM_CCI
 	bool
 
+config ARM_CCI_PMU
+	bool
+	select ARM_CCI
+
 config ARM_CCI400_COMMON
 	bool
 	select ARM_CCI
 
 config ARM_CCI400_PMU
 	bool "ARM CCI400 PMU support"
-	default y
-	depends on ARM || ARM64
-	depends on HW_PERF_EVENTS
+	depends on (ARM && CPU_V7) || ARM64
+	depends on PERF_EVENTS
 	select ARM_CCI400_COMMON
+	select ARM_CCI_PMU
 	help
-	  Support for PMU events monitoring on the ARM CCI cache coherent
-	  interconnect.
-
-	  If unsure, say Y
+	  Support for PMU events monitoring on the ARM CCI-400 (cache coherent
+	  interconnect). CCI-400 supports counting events related to the
+	  connected slave/master interfaces.
 
 config ARM_CCI400_PORT_CTRL
 	bool
@@ -31,6 +34,20 @@ config ARM_CCI400_PORT_CTRL
 	  Low level power management driver for CCI400 cache coherent
 	  interconnect for ARM platforms.
 
+config ARM_CCI500_PMU
+	bool "ARM CCI500 PMU support"
+	default y
+	depends on (ARM && CPU_V7) || ARM64
+	depends on PERF_EVENTS
+	select ARM_CCI_PMU
+	help
+	  Support for PMU events monitoring on the ARM CCI-500 cache coherent
+	  interconnect. CCI-500 provides 8 independent event counters, which
+	  can count events pertaining to the slave/master interfaces as well
+	  as the internal events to the CCI.
+
+	  If unsure, say Y
+
 config ARM_CCN
 	bool "ARM CCN driver support"
 	depends on ARM || ARM64
diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c
index 5340604b23a4..577cc4bf6a9d 100644
--- a/drivers/bus/arm-cci.c
+++ b/drivers/bus/arm-cci.c
@@ -52,12 +52,15 @@ static const struct of_device_id arm_cci_matches[] = {
 #ifdef CONFIG_ARM_CCI400_COMMON
 	{.compatible = "arm,cci-400", .data = CCI400_PORTS_DATA },
 #endif
+#ifdef CONFIG_ARM_CCI500_PMU
+	{ .compatible = "arm,cci-500", },
+#endif
 	{},
 };
 
-#ifdef CONFIG_ARM_CCI400_PMU
+#ifdef CONFIG_ARM_CCI_PMU
 
-#define DRIVER_NAME		"CCI-400"
+#define DRIVER_NAME		"ARM-CCI"
 #define DRIVER_NAME_PMU		DRIVER_NAME " PMU"
 
 #define CCI_PMCR		0x0100
@@ -77,20 +80,21 @@ static const struct of_device_id arm_cci_matches[] = {
 
 #define CCI_PMU_OVRFLW_FLAG	1
 
-#define CCI_PMU_CNTR_BASE(idx)	((idx) * SZ_4K)
-
-#define CCI_PMU_CNTR_MASK	((1ULL << 32) -1)
+#define CCI_PMU_CNTR_SIZE(model)	((model)->cntr_size)
+#define CCI_PMU_CNTR_BASE(model, idx)	((idx) * CCI_PMU_CNTR_SIZE(model))
+#define CCI_PMU_CNTR_MASK		((1ULL << 32) -1)
+#define CCI_PMU_CNTR_LAST(cci_pmu)	(cci_pmu->num_cntrs - 1)
 
-#define CCI_PMU_EVENT_MASK		0xffUL
-#define CCI_PMU_EVENT_SOURCE(event)	((event >> 5) & 0x7)
-#define CCI_PMU_EVENT_CODE(event)	(event & 0x1f)
-
-#define CCI_PMU_MAX_HW_EVENTS 5   /* CCI PMU has 4 counters + 1 cycle counter */
+#define CCI_PMU_MAX_HW_CNTRS(model) \
+	((model)->num_hw_cntrs + (model)->fixed_hw_cntrs)
 
 /* Types of interfaces that can generate events */
 enum {
 	CCI_IF_SLAVE,
 	CCI_IF_MASTER,
+#ifdef CONFIG_ARM_CCI500_PMU
+	CCI_IF_GLOBAL,
+#endif
 	CCI_IF_MAX,
 };
 
@@ -100,14 +104,30 @@ struct event_range {
 };
 
 struct cci_pmu_hw_events {
-	struct perf_event *events[CCI_PMU_MAX_HW_EVENTS];
-	unsigned long used_mask[BITS_TO_LONGS(CCI_PMU_MAX_HW_EVENTS)];
+	struct perf_event **events;
+	unsigned long *used_mask;
 	raw_spinlock_t pmu_lock;
 };
 
+struct cci_pmu;
+/*
+ * struct cci_pmu_model:
+ * @fixed_hw_cntrs - Number of fixed event counters
+ * @num_hw_cntrs - Maximum number of programmable event counters
+ * @cntr_size - Size of an event counter mapping
+ */
 struct cci_pmu_model {
 	char *name;
+	u32 fixed_hw_cntrs;
+	u32 num_hw_cntrs;
+	u32 cntr_size;
+	u64 nformat_attrs;
+	u64 nevent_attrs;
+	struct dev_ext_attribute *format_attrs;
+	struct dev_ext_attribute *event_attrs;
 	struct event_range event_ranges[CCI_IF_MAX];
+	int (*validate_hw_event)(struct cci_pmu *, unsigned long);
+	int (*get_event_idx)(struct cci_pmu *, struct cci_pmu_hw_events *, unsigned long);
 };
 
 static struct cci_pmu_model cci_pmu_models[];
@@ -116,33 +136,59 @@ struct cci_pmu {
 	void __iomem *base;
 	struct pmu pmu;
 	int nr_irqs;
-	int irqs[CCI_PMU_MAX_HW_EVENTS];
+	int *irqs;
 	unsigned long active_irqs;
 	const struct cci_pmu_model *model;
 	struct cci_pmu_hw_events hw_events;
 	struct platform_device *plat_device;
-	int num_events;
+	int num_cntrs;
 	atomic_t active_events;
 	struct mutex reserve_mutex;
+	struct notifier_block cpu_nb;
 	cpumask_t cpus;
 };
-static struct cci_pmu *pmu;
 
 #define to_cci_pmu(c)	(container_of(c, struct cci_pmu, pmu))
 
+enum cci_models {
+#ifdef CONFIG_ARM_CCI400_PMU
+	CCI400_R0,
+	CCI400_R1,
+#endif
+#ifdef CONFIG_ARM_CCI500_PMU
+	CCI500_R0,
+#endif
+	CCI_MODEL_MAX
+};
+
+static ssize_t cci_pmu_format_show(struct device *dev,
+			struct device_attribute *attr, char *buf);
+static ssize_t cci_pmu_event_show(struct device *dev,
+			struct device_attribute *attr, char *buf);
+
+#define CCI_EXT_ATTR_ENTRY(_name, _func, _config) \
+	{ __ATTR(_name, S_IRUGO, _func, NULL), (void *)_config }
+
+#define CCI_FORMAT_EXT_ATTR_ENTRY(_name, _config) \
+	CCI_EXT_ATTR_ENTRY(_name, cci_pmu_format_show, (char *)_config)
+#define CCI_EVENT_EXT_ATTR_ENTRY(_name, _config) \
+	CCI_EXT_ATTR_ENTRY(_name, cci_pmu_event_show, (unsigned long)_config)
+
+/* CCI400 PMU Specific definitions */
+
+#ifdef CONFIG_ARM_CCI400_PMU
+
 /* Port ids */
-#define CCI_PORT_S0	0
-#define CCI_PORT_S1	1
-#define CCI_PORT_S2	2
-#define CCI_PORT_S3	3
-#define CCI_PORT_S4	4
-#define CCI_PORT_M0	5
-#define CCI_PORT_M1	6
-#define CCI_PORT_M2	7
-
-#define CCI_REV_R0		0
-#define CCI_REV_R1		1
-#define CCI_REV_R1_PX		5
+#define CCI400_PORT_S0		0
+#define CCI400_PORT_S1		1
+#define CCI400_PORT_S2		2
+#define CCI400_PORT_S3		3
+#define CCI400_PORT_S4		4
+#define CCI400_PORT_M0		5
+#define CCI400_PORT_M1		6
+#define CCI400_PORT_M2		7
+
+#define CCI400_R1_PX		5
 
 /*
  * Instead of an event id to monitor CCI cycles, a dedicated counter is
@@ -150,12 +196,11 @@ static struct cci_pmu *pmu;
  * make use of this event in hardware.
  */
 enum cci400_perf_events {
-	CCI_PMU_CYCLES = 0xff
+	CCI400_PMU_CYCLES = 0xff
 };
 
-#define CCI_PMU_CYCLE_CNTR_IDX		0
-#define CCI_PMU_CNTR0_IDX		1
-#define CCI_PMU_CNTR_LAST(cci_pmu)	(CCI_PMU_CYCLE_CNTR_IDX + cci_pmu->num_events - 1)
+#define CCI400_PMU_CYCLE_CNTR_IDX	0
+#define CCI400_PMU_CNTR0_IDX		1
 
 /*
  * CCI PMU event id is an 8-bit value made of two parts - bits 7:5 for one of 8
@@ -169,37 +214,173 @@ enum cci400_perf_events {
  * the different revisions and are used to validate the event to be monitored.
  */
 
-#define CCI_REV_R0_SLAVE_PORT_MIN_EV	0x00
-#define CCI_REV_R0_SLAVE_PORT_MAX_EV	0x13
-#define CCI_REV_R0_MASTER_PORT_MIN_EV	0x14
-#define CCI_REV_R0_MASTER_PORT_MAX_EV	0x1a
+#define CCI400_PMU_EVENT_MASK		0xffUL
+#define CCI400_PMU_EVENT_SOURCE_SHIFT	5
+#define CCI400_PMU_EVENT_SOURCE_MASK	0x7
+#define CCI400_PMU_EVENT_CODE_SHIFT	0
+#define CCI400_PMU_EVENT_CODE_MASK	0x1f
+#define CCI400_PMU_EVENT_SOURCE(event) \
+	((event >> CCI400_PMU_EVENT_SOURCE_SHIFT) & \
+			CCI400_PMU_EVENT_SOURCE_MASK)
+#define CCI400_PMU_EVENT_CODE(event) \
+	((event >> CCI400_PMU_EVENT_CODE_SHIFT) & CCI400_PMU_EVENT_CODE_MASK)
+
+#define CCI400_R0_SLAVE_PORT_MIN_EV	0x00
+#define CCI400_R0_SLAVE_PORT_MAX_EV	0x13
+#define CCI400_R0_MASTER_PORT_MIN_EV	0x14
+#define CCI400_R0_MASTER_PORT_MAX_EV	0x1a
+
+#define CCI400_R1_SLAVE_PORT_MIN_EV	0x00
+#define CCI400_R1_SLAVE_PORT_MAX_EV	0x14
+#define CCI400_R1_MASTER_PORT_MIN_EV	0x00
+#define CCI400_R1_MASTER_PORT_MAX_EV	0x11
+
+#define CCI400_CYCLE_EVENT_EXT_ATTR_ENTRY(_name, _config) \
+	CCI_EXT_ATTR_ENTRY(_name, cci400_pmu_cycle_event_show, \
+					(unsigned long)_config)
+
+static ssize_t cci400_pmu_cycle_event_show(struct device *dev,
+			struct device_attribute *attr, char *buf);
+
+static struct dev_ext_attribute cci400_pmu_format_attrs[] = {
+	CCI_FORMAT_EXT_ATTR_ENTRY(event, "config:0-4"),
+	CCI_FORMAT_EXT_ATTR_ENTRY(source, "config:5-7"),
+};
+
+static struct dev_ext_attribute cci400_r0_pmu_event_attrs[] = {
+	/* Slave events */
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_any, 0x0),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_device, 0x01),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_normal_or_nonshareable, 0x2),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_inner_or_outershareable, 0x3),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_cache_maintenance, 0x4),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_mem_barrier, 0x5),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_sync_barrier, 0x6),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_dvm_msg, 0x7),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_dvm_msg_sync, 0x8),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_stall_tt_full, 0x9),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_r_data_last_hs_snoop, 0xA),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_r_data_stall_rvalids_h_rready_l, 0xB),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_any, 0xC),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_device, 0xD),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_normal_or_nonshareable, 0xE),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_inner_or_outershare_wback_wclean, 0xF),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_write_unique, 0x10),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_write_line_unique, 0x11),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_evict, 0x12),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_stall_tt_full, 0x13),
+	/* Master events */
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_retry_speculative_fetch, 0x14),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_addr_hazard, 0x15),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_id_hazard, 0x16),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_tt_full, 0x17),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_barrier_hazard, 0x18),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_barrier_hazard, 0x19),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_tt_full, 0x1A),
+	/* Special event for cycles counter */
+	CCI400_CYCLE_EVENT_EXT_ATTR_ENTRY(cycles, 0xff),
+};
+
+static struct dev_ext_attribute cci400_r1_pmu_event_attrs[] = {
+	/* Slave events */
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_any, 0x0),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_device, 0x01),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_normal_or_nonshareable, 0x2),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_inner_or_outershareable, 0x3),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_cache_maintenance, 0x4),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_mem_barrier, 0x5),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_sync_barrier, 0x6),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_dvm_msg, 0x7),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_dvm_msg_sync, 0x8),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_stall_tt_full, 0x9),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_r_data_last_hs_snoop, 0xA),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_r_data_stall_rvalids_h_rready_l, 0xB),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_any, 0xC),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_device, 0xD),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_normal_or_nonshareable, 0xE),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_inner_or_outershare_wback_wclean, 0xF),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_write_unique, 0x10),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_write_line_unique, 0x11),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_evict, 0x12),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_stall_tt_full, 0x13),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_stall_slave_id_hazard, 0x14),
+	/* Master events */
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_retry_speculative_fetch, 0x0),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_stall_cycle_addr_hazard, 0x1),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_master_id_hazard, 0x2),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_hi_prio_rtq_full, 0x3),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_barrier_hazard, 0x4),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_barrier_hazard, 0x5),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_wtq_full, 0x6),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_low_prio_rtq_full, 0x7),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_mid_prio_rtq_full, 0x8),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_qvn_vn0, 0x9),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_qvn_vn1, 0xA),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_qvn_vn2, 0xB),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall_qvn_vn3, 0xC),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_qvn_vn0, 0xD),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_qvn_vn1, 0xE),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_qvn_vn2, 0xF),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_qvn_vn3, 0x10),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_unique_or_line_unique_addr_hazard, 0x11),
+	/* Special event for cycles counter */
+	CCI400_CYCLE_EVENT_EXT_ATTR_ENTRY(cycles, 0xff),
+};
 
-#define CCI_REV_R1_SLAVE_PORT_MIN_EV	0x00
-#define CCI_REV_R1_SLAVE_PORT_MAX_EV	0x14
-#define CCI_REV_R1_MASTER_PORT_MIN_EV	0x00
-#define CCI_REV_R1_MASTER_PORT_MAX_EV	0x11
+static ssize_t cci400_pmu_cycle_event_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct dev_ext_attribute *eattr = container_of(attr,
+				struct dev_ext_attribute, attr);
+	return snprintf(buf, PAGE_SIZE, "config=0x%lx\n", (unsigned long)eattr->var);
+}
 
-static int pmu_validate_hw_event(unsigned long hw_event)
+static int cci400_get_event_idx(struct cci_pmu *cci_pmu,
+				struct cci_pmu_hw_events *hw,
+				unsigned long cci_event)
 {
-	u8 ev_source = CCI_PMU_EVENT_SOURCE(hw_event);
-	u8 ev_code = CCI_PMU_EVENT_CODE(hw_event);
+	int idx;
+
+	/* cycles event idx is fixed */
+	if (cci_event == CCI400_PMU_CYCLES) {
+		if (test_and_set_bit(CCI400_PMU_CYCLE_CNTR_IDX, hw->used_mask))
+			return -EAGAIN;
+
+		return CCI400_PMU_CYCLE_CNTR_IDX;
+	}
+
+	for (idx = CCI400_PMU_CNTR0_IDX; idx <= CCI_PMU_CNTR_LAST(cci_pmu); ++idx)
+		if (!test_and_set_bit(idx, hw->used_mask))
+			return idx;
+
+	/* No counters available */
+	return -EAGAIN;
+}
+
+static int cci400_validate_hw_event(struct cci_pmu *cci_pmu, unsigned long hw_event)
+{
+	u8 ev_source = CCI400_PMU_EVENT_SOURCE(hw_event);
+	u8 ev_code = CCI400_PMU_EVENT_CODE(hw_event);
 	int if_type;
 
-	if (hw_event & ~CCI_PMU_EVENT_MASK)
+	if (hw_event & ~CCI400_PMU_EVENT_MASK)
 		return -ENOENT;
 
+	if (hw_event == CCI400_PMU_CYCLES)
+		return hw_event;
+
 	switch (ev_source) {
-	case CCI_PORT_S0:
-	case CCI_PORT_S1:
-	case CCI_PORT_S2:
-	case CCI_PORT_S3:
-	case CCI_PORT_S4:
+	case CCI400_PORT_S0:
+	case CCI400_PORT_S1:
+	case CCI400_PORT_S2:
+	case CCI400_PORT_S3:
+	case CCI400_PORT_S4:
 		/* Slave Interface */
 		if_type = CCI_IF_SLAVE;
 		break;
-	case CCI_PORT_M0:
-	case CCI_PORT_M1:
-	case CCI_PORT_M2:
+	case CCI400_PORT_M0:
+	case CCI400_PORT_M1:
+	case CCI400_PORT_M2:
 		/* Master Interface */
 		if_type = CCI_IF_MASTER;
 		break;
@@ -207,87 +388,291 @@ static int pmu_validate_hw_event(unsigned long hw_event)
 		return -ENOENT;
 	}
 
-	if (ev_code >= pmu->model->event_ranges[if_type].min &&
-		ev_code <= pmu->model->event_ranges[if_type].max)
+	if (ev_code >= cci_pmu->model->event_ranges[if_type].min &&
+		ev_code <= cci_pmu->model->event_ranges[if_type].max)
 		return hw_event;
 
 	return -ENOENT;
 }
 
-static int probe_cci_revision(void)
+static int probe_cci400_revision(void)
 {
 	int rev;
 	rev = readl_relaxed(cci_ctrl_base + CCI_PID2) & CCI_PID2_REV_MASK;
 	rev >>= CCI_PID2_REV_SHIFT;
 
-	if (rev < CCI_REV_R1_PX)
-		return CCI_REV_R0;
+	if (rev < CCI400_R1_PX)
+		return CCI400_R0;
 	else
-		return CCI_REV_R1;
+		return CCI400_R1;
 }
 
 static const struct cci_pmu_model *probe_cci_model(struct platform_device *pdev)
 {
 	if (platform_has_secure_cci_access())
-		return &cci_pmu_models[probe_cci_revision()];
+		return &cci_pmu_models[probe_cci400_revision()];
 	return NULL;
 }
+#else	/* !CONFIG_ARM_CCI400_PMU */
+static inline struct cci_pmu_model *probe_cci_model(struct platform_device *pdev)
+{
+	return NULL;
+}
+#endif	/* CONFIG_ARM_CCI400_PMU */
+
+#ifdef CONFIG_ARM_CCI500_PMU
+
+/*
+ * CCI500 provides 8 independent event counters that can count
+ * any of the events available.
+ *
+ * CCI500 PMU event id is an 9-bit value made of two parts.
+ *	 bits [8:5] - Source for the event
+ *		      0x0-0x6 - Slave interfaces
+ *		      0x8-0xD - Master interfaces
+ *		      0xf     - Global Events
+ *		      0x7,0xe - Reserved
+ *
+ *	 bits [4:0] - Event code (specific to type of interface)
+ */
+
+/* Port ids */
+#define CCI500_PORT_S0			0x0
+#define CCI500_PORT_S1			0x1
+#define CCI500_PORT_S2			0x2
+#define CCI500_PORT_S3			0x3
+#define CCI500_PORT_S4			0x4
+#define CCI500_PORT_S5			0x5
+#define CCI500_PORT_S6			0x6
+
+#define CCI500_PORT_M0			0x8
+#define CCI500_PORT_M1			0x9
+#define CCI500_PORT_M2			0xa
+#define CCI500_PORT_M3			0xb
+#define CCI500_PORT_M4			0xc
+#define CCI500_PORT_M5			0xd
+
+#define CCI500_PORT_GLOBAL 		0xf
+
+#define CCI500_PMU_EVENT_MASK		0x1ffUL
+#define CCI500_PMU_EVENT_SOURCE_SHIFT	0x5
+#define CCI500_PMU_EVENT_SOURCE_MASK	0xf
+#define CCI500_PMU_EVENT_CODE_SHIFT	0x0
+#define CCI500_PMU_EVENT_CODE_MASK	0x1f
+
+#define CCI500_PMU_EVENT_SOURCE(event)	\
+	((event >> CCI500_PMU_EVENT_SOURCE_SHIFT) & CCI500_PMU_EVENT_SOURCE_MASK)
+#define CCI500_PMU_EVENT_CODE(event)	\
+	((event >> CCI500_PMU_EVENT_CODE_SHIFT) & CCI500_PMU_EVENT_CODE_MASK)
+
+#define CCI500_SLAVE_PORT_MIN_EV	0x00
+#define CCI500_SLAVE_PORT_MAX_EV	0x1f
+#define CCI500_MASTER_PORT_MIN_EV	0x00
+#define CCI500_MASTER_PORT_MAX_EV	0x06
+#define CCI500_GLOBAL_PORT_MIN_EV	0x00
+#define CCI500_GLOBAL_PORT_MAX_EV	0x0f
+
+
+#define CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(_name, _config) \
+	CCI_EXT_ATTR_ENTRY(_name, cci500_pmu_global_event_show, \
+					(unsigned long) _config)
+
+static ssize_t cci500_pmu_global_event_show(struct device *dev,
+				struct device_attribute *attr, char *buf);
+
+static struct dev_ext_attribute cci500_pmu_format_attrs[] = {
+	CCI_FORMAT_EXT_ATTR_ENTRY(event, "config:0-4"),
+	CCI_FORMAT_EXT_ATTR_ENTRY(source, "config:5-8"),
+};
+
+static struct dev_ext_attribute cci500_pmu_event_attrs[] = {
+	/* Slave events */
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_arvalid, 0x0),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_dev, 0x1),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_nonshareable, 0x2),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_shareable_non_alloc, 0x3),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_shareable_alloc, 0x4),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_invalidate, 0x5),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_cache_maint, 0x6),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_dvm_msg, 0x7),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_rval, 0x8),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_rlast_snoop, 0x9),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_hs_awalid, 0xA),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_dev, 0xB),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_non_shareable, 0xC),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_share_wb, 0xD),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_share_wlu, 0xE),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_share_wunique, 0xF),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_evict, 0x10),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_wrevict, 0x11),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_w_data_beat, 0x12),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_srq_acvalid, 0x13),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_srq_read, 0x14),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_srq_clean, 0x15),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_srq_data_transfer_low, 0x16),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_stall_arvalid, 0x17),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_r_data_stall, 0x18),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_wrq_stall, 0x19),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_w_data_stall, 0x1A),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_w_resp_stall, 0x1B),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_srq_stall, 0x1C),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_s_data_stall, 0x1D),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_rq_stall_ot_limit, 0x1E),
+	CCI_EVENT_EXT_ATTR_ENTRY(si_r_stall_arbit, 0x1F),
+
+	/* Master events */
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_r_data_beat_any, 0x0),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_w_data_beat_any, 0x1),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_rrq_stall, 0x2),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_r_data_stall, 0x3),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall, 0x4),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_w_data_stall, 0x5),
+	CCI_EVENT_EXT_ATTR_ENTRY(mi_w_resp_stall, 0x6),
+
+	/* Global events */
+	CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_0_1, 0x0),
+	CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_2_3, 0x1),
+	CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_4_5, 0x2),
+	CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_6_7, 0x3),
+	CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_0_1, 0x4),
+	CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_2_3, 0x5),
+	CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_4_5, 0x6),
+	CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_6_7, 0x7),
+	CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_back_invalidation, 0x8),
+	CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_stall_alloc_busy, 0x9),
+	CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_stall_tt_full, 0xA),
+	CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_wrq, 0xB),
+	CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_cd_hs, 0xC),
+	CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_rq_stall_addr_hazard, 0xD),
+	CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snopp_rq_stall_tt_full, 0xE),
+	CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_rq_tzmp1_prot, 0xF),
+};
+
+static ssize_t cci500_pmu_global_event_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct dev_ext_attribute *eattr = container_of(attr,
+					struct dev_ext_attribute, attr);
+	/* Global events have single fixed source code */
+	return snprintf(buf, PAGE_SIZE, "event=0x%lx,source=0x%x\n",
+				(unsigned long)eattr->var, CCI500_PORT_GLOBAL);
+}
+
+static int cci500_validate_hw_event(struct cci_pmu *cci_pmu,
+					unsigned long hw_event)
+{
+	u32 ev_source = CCI500_PMU_EVENT_SOURCE(hw_event);
+	u32 ev_code = CCI500_PMU_EVENT_CODE(hw_event);
+	int if_type;
+
+	if (hw_event & ~CCI500_PMU_EVENT_MASK)
+		return -ENOENT;
+
+	switch (ev_source) {
+	case CCI500_PORT_S0:
+	case CCI500_PORT_S1:
+	case CCI500_PORT_S2:
+	case CCI500_PORT_S3:
+	case CCI500_PORT_S4:
+	case CCI500_PORT_S5:
+	case CCI500_PORT_S6:
+		if_type = CCI_IF_SLAVE;
+		break;
+	case CCI500_PORT_M0:
+	case CCI500_PORT_M1:
+	case CCI500_PORT_M2:
+	case CCI500_PORT_M3:
+	case CCI500_PORT_M4:
+	case CCI500_PORT_M5:
+		if_type = CCI_IF_MASTER;
+		break;
+	case CCI500_PORT_GLOBAL:
+		if_type = CCI_IF_GLOBAL;
+		break;
+	default:
+		return -ENOENT;
+	}
+
+	if (ev_code >= cci_pmu->model->event_ranges[if_type].min &&
+		ev_code <= cci_pmu->model->event_ranges[if_type].max)
+		return hw_event;
+
+	return -ENOENT;
+}
+#endif	/* CONFIG_ARM_CCI500_PMU */
+
+static ssize_t cci_pmu_format_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct dev_ext_attribute *eattr = container_of(attr,
+				struct dev_ext_attribute, attr);
+	return snprintf(buf, PAGE_SIZE, "%s\n", (char *)eattr->var);
+}
+
+static ssize_t cci_pmu_event_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct dev_ext_attribute *eattr = container_of(attr,
+				struct dev_ext_attribute, attr);
+	/* source parameter is mandatory for normal PMU events */
+	return snprintf(buf, PAGE_SIZE, "source=?,event=0x%lx\n",
+					 (unsigned long)eattr->var);
+}
 
 static int pmu_is_valid_counter(struct cci_pmu *cci_pmu, int idx)
 {
-	return CCI_PMU_CYCLE_CNTR_IDX <= idx &&
-		idx <= CCI_PMU_CNTR_LAST(cci_pmu);
+	return 0 <= idx && idx <= CCI_PMU_CNTR_LAST(cci_pmu);
 }
 
-static u32 pmu_read_register(int idx, unsigned int offset)
+static u32 pmu_read_register(struct cci_pmu *cci_pmu, int idx, unsigned int offset)
 {
-	return readl_relaxed(pmu->base + CCI_PMU_CNTR_BASE(idx) + offset);
+	return readl_relaxed(cci_pmu->base +
+			     CCI_PMU_CNTR_BASE(cci_pmu->model, idx) + offset);
 }
 
-static void pmu_write_register(u32 value, int idx, unsigned int offset)
+static void pmu_write_register(struct cci_pmu *cci_pmu, u32 value,
+			       int idx, unsigned int offset)
 {
-	return writel_relaxed(value, pmu->base + CCI_PMU_CNTR_BASE(idx) + offset);
+	return writel_relaxed(value, cci_pmu->base +
+			      CCI_PMU_CNTR_BASE(cci_pmu->model, idx) + offset);
 }
 
-static void pmu_disable_counter(int idx)
+static void pmu_disable_counter(struct cci_pmu *cci_pmu, int idx)
 {
-	pmu_write_register(0, idx, CCI_PMU_CNTR_CTRL);
+	pmu_write_register(cci_pmu, 0, idx, CCI_PMU_CNTR_CTRL);
 }
 
-static void pmu_enable_counter(int idx)
+static void pmu_enable_counter(struct cci_pmu *cci_pmu, int idx)
 {
-	pmu_write_register(1, idx, CCI_PMU_CNTR_CTRL);
+	pmu_write_register(cci_pmu, 1, idx, CCI_PMU_CNTR_CTRL);
 }
 
-static void pmu_set_event(int idx, unsigned long event)
+static void pmu_set_event(struct cci_pmu *cci_pmu, int idx, unsigned long event)
 {
-	pmu_write_register(event, idx, CCI_PMU_EVT_SEL);
+	pmu_write_register(cci_pmu, event, idx, CCI_PMU_EVT_SEL);
 }
 
+/*
+ * Returns the number of programmable counters actually implemented
+ * by the cci
+ */
 static u32 pmu_get_max_counters(void)
 {
-	u32 n_cnts = (readl_relaxed(cci_ctrl_base + CCI_PMCR) &
-		      CCI_PMCR_NCNT_MASK) >> CCI_PMCR_NCNT_SHIFT;
-
-	/* add 1 for cycle counter */
-	return n_cnts + 1;
+	return (readl_relaxed(cci_ctrl_base + CCI_PMCR) &
+		CCI_PMCR_NCNT_MASK) >> CCI_PMCR_NCNT_SHIFT;
 }
 
 static int pmu_get_event_idx(struct cci_pmu_hw_events *hw, struct perf_event *event)
 {
 	struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
-	struct hw_perf_event *hw_event = &event->hw;
-	unsigned long cci_event = hw_event->config_base;
+	unsigned long cci_event = event->hw.config_base;
 	int idx;
 
-	if (cci_event == CCI_PMU_CYCLES) {
-		if (test_and_set_bit(CCI_PMU_CYCLE_CNTR_IDX, hw->used_mask))
-			return -EAGAIN;
+	if (cci_pmu->model->get_event_idx)
+		return cci_pmu->model->get_event_idx(cci_pmu, hw, cci_event);
 
-		return CCI_PMU_CYCLE_CNTR_IDX;
-	}
-
-	for (idx = CCI_PMU_CNTR0_IDX; idx <= CCI_PMU_CNTR_LAST(cci_pmu); ++idx)
+	/* Generic code to find an unused idx from the mask */
+	for(idx = 0; idx <= CCI_PMU_CNTR_LAST(cci_pmu); idx++)
 		if (!test_and_set_bit(idx, hw->used_mask))
 			return idx;
 
@@ -297,18 +682,13 @@ static int pmu_get_event_idx(struct cci_pmu_hw_events *hw, struct perf_event *ev
 
 static int pmu_map_event(struct perf_event *event)
 {
-	int mapping;
-	unsigned long config = event->attr.config;
+	struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
 
-	if (event->attr.type < PERF_TYPE_MAX)
+	if (event->attr.type < PERF_TYPE_MAX ||
+			!cci_pmu->model->validate_hw_event)
 		return -ENOENT;
 
-	if (config == CCI_PMU_CYCLES)
-		mapping = config;
-	else
-		mapping = pmu_validate_hw_event(config);
-
-	return mapping;
+	return	cci_pmu->model->validate_hw_event(cci_pmu, event->attr.config);
 }
 
 static int pmu_request_irq(struct cci_pmu *cci_pmu, irq_handler_t handler)
@@ -319,7 +699,7 @@ static int pmu_request_irq(struct cci_pmu *cci_pmu, irq_handler_t handler)
 	if (unlikely(!pmu_device))
 		return -ENODEV;
 
-	if (pmu->nr_irqs < 1) {
+	if (cci_pmu->nr_irqs < 1) {
 		dev_err(&pmu_device->dev, "no irqs for CCI PMUs defined\n");
 		return -ENODEV;
 	}
@@ -331,16 +711,16 @@ static int pmu_request_irq(struct cci_pmu *cci_pmu, irq_handler_t handler)
 	 *
 	 * This should allow handling of non-unique interrupt for the counters.
 	 */
-	for (i = 0; i < pmu->nr_irqs; i++) {
-		int err = request_irq(pmu->irqs[i], handler, IRQF_SHARED,
+	for (i = 0; i < cci_pmu->nr_irqs; i++) {
+		int err = request_irq(cci_pmu->irqs[i], handler, IRQF_SHARED,
 				"arm-cci-pmu", cci_pmu);
 		if (err) {
 			dev_err(&pmu_device->dev, "unable to request IRQ%d for ARM CCI PMU counters\n",
-				pmu->irqs[i]);
+				cci_pmu->irqs[i]);
 			return err;
 		}
 
-		set_bit(i, &pmu->active_irqs);
+		set_bit(i, &cci_pmu->active_irqs);
 	}
 
 	return 0;
@@ -350,11 +730,11 @@ static void pmu_free_irq(struct cci_pmu *cci_pmu)
 {
 	int i;
 
-	for (i = 0; i < pmu->nr_irqs; i++) {
-		if (!test_and_clear_bit(i, &pmu->active_irqs))
+	for (i = 0; i < cci_pmu->nr_irqs; i++) {
+		if (!test_and_clear_bit(i, &cci_pmu->active_irqs))
 			continue;
 
-		free_irq(pmu->irqs[i], cci_pmu);
+		free_irq(cci_pmu->irqs[i], cci_pmu);
 	}
 }
 
@@ -369,7 +749,7 @@ static u32 pmu_read_counter(struct perf_event *event)
 		dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
 		return 0;
 	}
-	value = pmu_read_register(idx, CCI_PMU_CNTR);
+	value = pmu_read_register(cci_pmu, idx, CCI_PMU_CNTR);
 
 	return value;
 }
@@ -383,7 +763,7 @@ static void pmu_write_counter(struct perf_event *event, u32 value)
 	if (unlikely(!pmu_is_valid_counter(cci_pmu, idx)))
 		dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
 	else
-		pmu_write_register(value, idx, CCI_PMU_CNTR);
+		pmu_write_register(cci_pmu, value, idx, CCI_PMU_CNTR);
 }
 
 static u64 pmu_event_update(struct perf_event *event)
@@ -427,7 +807,7 @@ static irqreturn_t pmu_handle_irq(int irq_num, void *dev)
 {
 	unsigned long flags;
 	struct cci_pmu *cci_pmu = dev;
-	struct cci_pmu_hw_events *events = &pmu->hw_events;
+	struct cci_pmu_hw_events *events = &cci_pmu->hw_events;
 	int idx, handled = IRQ_NONE;
 
 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
@@ -436,7 +816,7 @@ static irqreturn_t pmu_handle_irq(int irq_num, void *dev)
 	 * This should work regardless of whether we have per-counter overflow
 	 * interrupt or a combined overflow interrupt.
 	 */
-	for (idx = CCI_PMU_CYCLE_CNTR_IDX; idx <= CCI_PMU_CNTR_LAST(cci_pmu); idx++) {
+	for (idx = 0; idx <= CCI_PMU_CNTR_LAST(cci_pmu); idx++) {
 		struct perf_event *event = events->events[idx];
 		struct hw_perf_event *hw_counter;
 
@@ -446,11 +826,12 @@ static irqreturn_t pmu_handle_irq(int irq_num, void *dev)
 		hw_counter = &event->hw;
 
 		/* Did this counter overflow? */
-		if (!(pmu_read_register(idx, CCI_PMU_OVRFLW) &
+		if (!(pmu_read_register(cci_pmu, idx, CCI_PMU_OVRFLW) &
 		      CCI_PMU_OVRFLW_FLAG))
 			continue;
 
-		pmu_write_register(CCI_PMU_OVRFLW_FLAG, idx, CCI_PMU_OVRFLW);
+		pmu_write_register(cci_pmu, CCI_PMU_OVRFLW_FLAG, idx,
+							CCI_PMU_OVRFLW);
 
 		pmu_event_update(event);
 		pmu_event_set_period(event);
@@ -492,7 +873,7 @@ static void cci_pmu_enable(struct pmu *pmu)
 {
 	struct cci_pmu *cci_pmu = to_cci_pmu(pmu);
 	struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
-	int enabled = bitmap_weight(hw_events->used_mask, cci_pmu->num_events);
+	int enabled = bitmap_weight(hw_events->used_mask, cci_pmu->num_cntrs);
 	unsigned long flags;
 	u32 val;
 
@@ -523,6 +904,16 @@ static void cci_pmu_disable(struct pmu *pmu)
 	raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags);
 }
 
+/*
+ * Check if the idx represents a non-programmable counter.
+ * All the fixed event counters are mapped before the programmable
+ * counters.
+ */
+static bool pmu_fixed_hw_idx(struct cci_pmu *cci_pmu, int idx)
+{
+	return (idx >= 0) && (idx < cci_pmu->model->fixed_hw_cntrs);
+}
+
 static void cci_pmu_start(struct perf_event *event, int pmu_flags)
 {
 	struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
@@ -547,12 +938,12 @@ static void cci_pmu_start(struct perf_event *event, int pmu_flags)
 
 	raw_spin_lock_irqsave(&hw_events->pmu_lock, flags);
 
-	/* Configure the event to count, unless you are counting cycles */
-	if (idx != CCI_PMU_CYCLE_CNTR_IDX)
-		pmu_set_event(idx, hwc->config_base);
+	/* Configure the counter unless you are counting a fixed event */
+	if (!pmu_fixed_hw_idx(cci_pmu, idx))
+		pmu_set_event(cci_pmu, idx, hwc->config_base);
 
 	pmu_event_set_period(event);
-	pmu_enable_counter(idx);
+	pmu_enable_counter(cci_pmu, idx);
 
 	raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags);
 }
@@ -575,7 +966,7 @@ static void cci_pmu_stop(struct perf_event *event, int pmu_flags)
 	 * We always reprogram the counter, so ignore PERF_EF_UPDATE. See
 	 * cci_pmu_start()
 	 */
-	pmu_disable_counter(idx);
+	pmu_disable_counter(cci_pmu, idx);
 	pmu_event_update(event);
 	hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
 }
@@ -655,13 +1046,16 @@ static int
 validate_group(struct perf_event *event)
 {
 	struct perf_event *sibling, *leader = event->group_leader;
+	struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
+	unsigned long mask[BITS_TO_LONGS(cci_pmu->num_cntrs)];
 	struct cci_pmu_hw_events fake_pmu = {
 		/*
 		 * Initialise the fake PMU. We only need to populate the
 		 * used_mask for the purposes of validation.
 		 */
-		.used_mask = { 0 },
+		.used_mask = mask,
 	};
+	memset(mask, 0, BITS_TO_LONGS(cci_pmu->num_cntrs) * sizeof(unsigned long));
 
 	if (!validate_event(event->pmu, &fake_pmu, leader))
 		return -EINVAL;
@@ -779,20 +1173,27 @@ static int cci_pmu_event_init(struct perf_event *event)
 	return err;
 }
 
-static ssize_t pmu_attr_cpumask_show(struct device *dev,
+static ssize_t pmu_cpumask_attr_show(struct device *dev,
 				     struct device_attribute *attr, char *buf)
 {
+	struct dev_ext_attribute *eattr = container_of(attr,
+					struct dev_ext_attribute, attr);
+	struct cci_pmu *cci_pmu = eattr->var;
+
 	int n = scnprintf(buf, PAGE_SIZE - 1, "%*pbl",
-			  cpumask_pr_args(&pmu->cpus));
+			  cpumask_pr_args(&cci_pmu->cpus));
 	buf[n++] = '\n';
 	buf[n] = '\0';
 	return n;
 }
 
-static DEVICE_ATTR(cpumask, S_IRUGO, pmu_attr_cpumask_show, NULL);
+static struct dev_ext_attribute pmu_cpumask_attr = {
+	__ATTR(cpumask, S_IRUGO, pmu_cpumask_attr_show, NULL),
+	NULL,		/* Populated in cci_pmu_init */
+};
 
 static struct attribute *pmu_attrs[] = {
-	&dev_attr_cpumask.attr,
+	&pmu_cpumask_attr.attr.attr,
 	NULL,
 };
 
@@ -800,14 +1201,78 @@ static struct attribute_group pmu_attr_group = {
 	.attrs = pmu_attrs,
 };
 
+static struct attribute_group pmu_format_attr_group = {
+	.name = "format",
+	.attrs = NULL,		/* Filled in cci_pmu_init_attrs */
+};
+
+static struct attribute_group pmu_event_attr_group = {
+	.name = "events",
+	.attrs = NULL,		/* Filled in cci_pmu_init_attrs */
+};
+
 static const struct attribute_group *pmu_attr_groups[] = {
 	&pmu_attr_group,
+	&pmu_format_attr_group,
+	&pmu_event_attr_group,
 	NULL
 };
 
+static struct attribute **alloc_attrs(struct platform_device *pdev,
+				int n, struct dev_ext_attribute *source)
+{
+	int i;
+	struct attribute **attrs;
+
+	/* Alloc n + 1 (for terminating NULL) */
+	attrs  = devm_kcalloc(&pdev->dev, n + 1, sizeof(struct attribute *),
+								GFP_KERNEL);
+	if (!attrs)
+		return attrs;
+	for(i = 0; i < n; i++)
+		attrs[i] = &source[i].attr.attr;
+	return attrs;
+}
+
+static int cci_pmu_init_attrs(struct cci_pmu *cci_pmu, struct platform_device *pdev)
+{
+	const struct cci_pmu_model *model = cci_pmu->model;
+	struct attribute **attrs;
+
+	/*
+	 * All allocations below are managed, hence doesn't need to be
+	 * free'd explicitly in case of an error.
+	 */
+
+	if (model->nevent_attrs) {
+		attrs = alloc_attrs(pdev, model->nevent_attrs,
+						model->event_attrs);
+		if (!attrs)
+			return -ENOMEM;
+		pmu_event_attr_group.attrs = attrs;
+	}
+	if (model->nformat_attrs) {
+		attrs = alloc_attrs(pdev, model->nformat_attrs,
+						 model->format_attrs);
+		if (!attrs)
+			return -ENOMEM;
+		pmu_format_attr_group.attrs = attrs;
+	}
+	pmu_cpumask_attr.var = cci_pmu;
+
+	return 0;
+}
+
 static int cci_pmu_init(struct cci_pmu *cci_pmu, struct platform_device *pdev)
 {
 	char *name = cci_pmu->model->name;
+	u32 num_cntrs;
+	int rc;
+
+	rc = cci_pmu_init_attrs(cci_pmu, pdev);
+	if (rc)
+		return rc;
+
 	cci_pmu->pmu = (struct pmu) {
 		.name		= cci_pmu->model->name,
 		.task_ctx_nr	= perf_invalid_context,
@@ -823,7 +1288,15 @@ static int cci_pmu_init(struct cci_pmu *cci_pmu, struct platform_device *pdev)
 	};
 
 	cci_pmu->plat_device = pdev;
-	cci_pmu->num_events = pmu_get_max_counters();
+	num_cntrs = pmu_get_max_counters();
+	if (num_cntrs > cci_pmu->model->num_hw_cntrs) {
+		dev_warn(&pdev->dev,
+			"PMU implements more counters(%d) than supported by"
+			" the model(%d), truncated.",
+			num_cntrs, cci_pmu->model->num_hw_cntrs);
+		num_cntrs = cci_pmu->model->num_hw_cntrs;
+	}
+	cci_pmu->num_cntrs = num_cntrs + cci_pmu->model->fixed_hw_cntrs;
 
 	return perf_pmu_register(&cci_pmu->pmu, name, -1);
 }
@@ -831,12 +1304,14 @@ static int cci_pmu_init(struct cci_pmu *cci_pmu, struct platform_device *pdev)
 static int cci_pmu_cpu_notifier(struct notifier_block *self,
 				unsigned long action, void *hcpu)
 {
+	struct cci_pmu *cci_pmu = container_of(self,
+					struct cci_pmu, cpu_nb);
 	unsigned int cpu = (long)hcpu;
 	unsigned int target;
 
 	switch (action & ~CPU_TASKS_FROZEN) {
 	case CPU_DOWN_PREPARE:
-		if (!cpumask_test_and_clear_cpu(cpu, &pmu->cpus))
+		if (!cpumask_test_and_clear_cpu(cpu, &cci_pmu->cpus))
 			break;
 		target = cpumask_any_but(cpu_online_mask, cpu);
 		if (target < 0) // UP, last CPU
@@ -845,7 +1320,7 @@ static int cci_pmu_cpu_notifier(struct notifier_block *self,
 		 * TODO: migrate context once core races on event->ctx have
 		 * been fixed.
 		 */
-		cpumask_set_cpu(target, &pmu->cpus);
+		cpumask_set_cpu(target, &cci_pmu->cpus);
 	default:
 		break;
 	}
@@ -853,57 +1328,103 @@ static int cci_pmu_cpu_notifier(struct notifier_block *self,
 	return NOTIFY_OK;
 }
 
-static struct notifier_block cci_pmu_cpu_nb = {
-	.notifier_call	= cci_pmu_cpu_notifier,
-	/*
-	 * to migrate uncore events, our notifier should be executed
-	 * before perf core's notifier.
-	 */
-	.priority	= CPU_PRI_PERF + 1,
-};
-
 static struct cci_pmu_model cci_pmu_models[] = {
-	[CCI_REV_R0] = {
+#ifdef CONFIG_ARM_CCI400_PMU
+	[CCI400_R0] = {
 		.name = "CCI_400",
+		.fixed_hw_cntrs = 1,	/* Cycle counter */
+		.num_hw_cntrs = 4,
+		.cntr_size = SZ_4K,
+		.format_attrs = cci400_pmu_format_attrs,
+		.nformat_attrs = ARRAY_SIZE(cci400_pmu_format_attrs),
+		.event_attrs = cci400_r0_pmu_event_attrs,
+		.nevent_attrs = ARRAY_SIZE(cci400_r0_pmu_event_attrs),
 		.event_ranges = {
 			[CCI_IF_SLAVE] = {
-				CCI_REV_R0_SLAVE_PORT_MIN_EV,
-				CCI_REV_R0_SLAVE_PORT_MAX_EV,
+				CCI400_R0_SLAVE_PORT_MIN_EV,
+				CCI400_R0_SLAVE_PORT_MAX_EV,
 			},
 			[CCI_IF_MASTER] = {
-				CCI_REV_R0_MASTER_PORT_MIN_EV,
-				CCI_REV_R0_MASTER_PORT_MAX_EV,
+				CCI400_R0_MASTER_PORT_MIN_EV,
+				CCI400_R0_MASTER_PORT_MAX_EV,
 			},
 		},
+		.validate_hw_event = cci400_validate_hw_event,
+		.get_event_idx = cci400_get_event_idx,
 	},
-	[CCI_REV_R1] = {
+	[CCI400_R1] = {
 		.name = "CCI_400_r1",
+		.fixed_hw_cntrs = 1,	/* Cycle counter */
+		.num_hw_cntrs = 4,
+		.cntr_size = SZ_4K,
+		.format_attrs = cci400_pmu_format_attrs,
+		.nformat_attrs = ARRAY_SIZE(cci400_pmu_format_attrs),
+		.event_attrs = cci400_r1_pmu_event_attrs,
+		.nevent_attrs = ARRAY_SIZE(cci400_r1_pmu_event_attrs),
 		.event_ranges = {
 			[CCI_IF_SLAVE] = {
-				CCI_REV_R1_SLAVE_PORT_MIN_EV,
-				CCI_REV_R1_SLAVE_PORT_MAX_EV,
+				CCI400_R1_SLAVE_PORT_MIN_EV,
+				CCI400_R1_SLAVE_PORT_MAX_EV,
 			},
 			[CCI_IF_MASTER] = {
-				CCI_REV_R1_MASTER_PORT_MIN_EV,
-				CCI_REV_R1_MASTER_PORT_MAX_EV,
+				CCI400_R1_MASTER_PORT_MIN_EV,
+				CCI400_R1_MASTER_PORT_MAX_EV,
 			},
 		},
+		.validate_hw_event = cci400_validate_hw_event,
+		.get_event_idx = cci400_get_event_idx,
 	},
+#endif
+#ifdef CONFIG_ARM_CCI500_PMU
+	[CCI500_R0] = {
+		.name = "CCI_500",
+		.fixed_hw_cntrs = 0,
+		.num_hw_cntrs = 8,
+		.cntr_size = SZ_64K,
+		.format_attrs = cci500_pmu_format_attrs,
+		.nformat_attrs = ARRAY_SIZE(cci500_pmu_format_attrs),
+		.event_attrs = cci500_pmu_event_attrs,
+		.nevent_attrs = ARRAY_SIZE(cci500_pmu_event_attrs),
+		.event_ranges = {
+			[CCI_IF_SLAVE] = {
+				CCI500_SLAVE_PORT_MIN_EV,
+				CCI500_SLAVE_PORT_MAX_EV,
+			},
+			[CCI_IF_MASTER] = {
+				CCI500_MASTER_PORT_MIN_EV,
+				CCI500_MASTER_PORT_MAX_EV,
+			},
+			[CCI_IF_GLOBAL] = {
+				CCI500_GLOBAL_PORT_MIN_EV,
+				CCI500_GLOBAL_PORT_MAX_EV,
+			},
+		},
+		.validate_hw_event = cci500_validate_hw_event,
+	},
+#endif
 };
 
 static const struct of_device_id arm_cci_pmu_matches[] = {
+#ifdef CONFIG_ARM_CCI400_PMU
 	{
 		.compatible = "arm,cci-400-pmu",
 		.data	= NULL,
 	},
 	{
 		.compatible = "arm,cci-400-pmu,r0",
-		.data	= &cci_pmu_models[CCI_REV_R0],
+		.data	= &cci_pmu_models[CCI400_R0],
 	},
 	{
 		.compatible = "arm,cci-400-pmu,r1",
-		.data	= &cci_pmu_models[CCI_REV_R1],
+		.data	= &cci_pmu_models[CCI400_R1],
+	},
+#endif
+#ifdef CONFIG_ARM_CCI500_PMU
+	{
+		.compatible = "arm,cci-500-pmu,r0",
+		.data = &cci_pmu_models[CCI500_R0],
 	},
+#endif
 	{},
 };
 
@@ -932,68 +1453,114 @@ static bool is_duplicate_irq(int irq, int *irqs, int nr_irqs)
 	return false;
 }
 
-static int cci_pmu_probe(struct platform_device *pdev)
+static struct cci_pmu *cci_pmu_alloc(struct platform_device *pdev)
 {
-	struct resource *res;
-	int i, ret, irq;
+	struct cci_pmu *cci_pmu;
 	const struct cci_pmu_model *model;
 
+	/*
+	 * All allocations are devm_* hence we don't have to free
+	 * them explicitly on an error, as it would end up in driver
+	 * detach.
+	 */
 	model = get_cci_model(pdev);
 	if (!model) {
 		dev_warn(&pdev->dev, "CCI PMU version not supported\n");
-		return -ENODEV;
+		return ERR_PTR(-ENODEV);
 	}
 
-	pmu = devm_kzalloc(&pdev->dev, sizeof(*pmu), GFP_KERNEL);
-	if (!pmu)
-		return -ENOMEM;
+	cci_pmu = devm_kzalloc(&pdev->dev, sizeof(*cci_pmu), GFP_KERNEL);
+	if (!cci_pmu)
+		return ERR_PTR(-ENOMEM);
+
+	cci_pmu->model = model;
+	cci_pmu->irqs = devm_kcalloc(&pdev->dev, CCI_PMU_MAX_HW_CNTRS(model),
+					sizeof(*cci_pmu->irqs), GFP_KERNEL);
+	if (!cci_pmu->irqs)
+		return ERR_PTR(-ENOMEM);
+	cci_pmu->hw_events.events = devm_kcalloc(&pdev->dev,
+					     CCI_PMU_MAX_HW_CNTRS(model),
+					     sizeof(*cci_pmu->hw_events.events),
+					     GFP_KERNEL);
+	if (!cci_pmu->hw_events.events)
+		return ERR_PTR(-ENOMEM);
+	cci_pmu->hw_events.used_mask = devm_kcalloc(&pdev->dev,
+						BITS_TO_LONGS(CCI_PMU_MAX_HW_CNTRS(model)),
+						sizeof(*cci_pmu->hw_events.used_mask),
+						GFP_KERNEL);
+	if (!cci_pmu->hw_events.used_mask)
+		return ERR_PTR(-ENOMEM);
+
+	return cci_pmu;
+}
+
+
+static int cci_pmu_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+	struct cci_pmu *cci_pmu;
+	int i, ret, irq;
+
+	cci_pmu = cci_pmu_alloc(pdev);
+	if (IS_ERR(cci_pmu))
+		return PTR_ERR(cci_pmu);
 
-	pmu->model = model;
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	pmu->base = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(pmu->base))
+	cci_pmu->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(cci_pmu->base))
 		return -ENOMEM;
 
 	/*
-	 * CCI PMU has 5 overflow signals - one per counter; but some may be tied
+	 * CCI PMU has one overflow interrupt per counter; but some may be tied
 	 * together to a common interrupt.
 	 */
-	pmu->nr_irqs = 0;
-	for (i = 0; i < CCI_PMU_MAX_HW_EVENTS; i++) {
+	cci_pmu->nr_irqs = 0;
+	for (i = 0; i < CCI_PMU_MAX_HW_CNTRS(cci_pmu->model); i++) {
 		irq = platform_get_irq(pdev, i);
 		if (irq < 0)
 			break;
 
-		if (is_duplicate_irq(irq, pmu->irqs, pmu->nr_irqs))
+		if (is_duplicate_irq(irq, cci_pmu->irqs, cci_pmu->nr_irqs))
 			continue;
 
-		pmu->irqs[pmu->nr_irqs++] = irq;
+		cci_pmu->irqs[cci_pmu->nr_irqs++] = irq;
 	}
 
 	/*
 	 * Ensure that the device tree has as many interrupts as the number
 	 * of counters.
 	 */
-	if (i < CCI_PMU_MAX_HW_EVENTS) {
+	if (i < CCI_PMU_MAX_HW_CNTRS(cci_pmu->model)) {
 		dev_warn(&pdev->dev, "In-correct number of interrupts: %d, should be %d\n",
-			i, CCI_PMU_MAX_HW_EVENTS);
+			i, CCI_PMU_MAX_HW_CNTRS(cci_pmu->model));
 		return -EINVAL;
 	}
 
-	raw_spin_lock_init(&pmu->hw_events.pmu_lock);
-	mutex_init(&pmu->reserve_mutex);
-	atomic_set(&pmu->active_events, 0);
-	cpumask_set_cpu(smp_processor_id(), &pmu->cpus);
+	raw_spin_lock_init(&cci_pmu->hw_events.pmu_lock);
+	mutex_init(&cci_pmu->reserve_mutex);
+	atomic_set(&cci_pmu->active_events, 0);
+	cpumask_set_cpu(smp_processor_id(), &cci_pmu->cpus);
+
+	cci_pmu->cpu_nb = (struct notifier_block) {
+		.notifier_call	= cci_pmu_cpu_notifier,
+		/*
+		 * to migrate uncore events, our notifier should be executed
+		 * before perf core's notifier.
+		 */
+		.priority	= CPU_PRI_PERF + 1,
+	};
 
-	ret = register_cpu_notifier(&cci_pmu_cpu_nb);
+	ret = register_cpu_notifier(&cci_pmu->cpu_nb);
 	if (ret)
 		return ret;
 
-	ret = cci_pmu_init(pmu, pdev);
-	if (ret)
+	ret = cci_pmu_init(cci_pmu, pdev);
+	if (ret) {
+		unregister_cpu_notifier(&cci_pmu->cpu_nb);
 		return ret;
+	}
 
-	pr_info("ARM %s PMU driver probed", pmu->model->name);
+	pr_info("ARM %s PMU driver probed", cci_pmu->model->name);
 	return 0;
 }
 
@@ -1032,14 +1599,14 @@ static int __init cci_platform_init(void)
 	return platform_driver_register(&cci_platform_driver);
 }
 
-#else /* !CONFIG_ARM_CCI400_PMU */
+#else /* !CONFIG_ARM_CCI_PMU */
 
 static int __init cci_platform_init(void)
 {
 	return 0;
 }
 
-#endif /* CONFIG_ARM_CCI400_PMU */
+#endif /* CONFIG_ARM_CCI_PMU */
 
 #ifdef CONFIG_ARM_CCI400_PORT_CTRL
 
diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c
index aaa0f2a87118..7d9879e166cf 100644
--- a/drivers/bus/arm-ccn.c
+++ b/drivers/bus/arm-ccn.c
@@ -166,13 +166,17 @@ struct arm_ccn_dt {
 
 	struct hrtimer hrtimer;
 
+	cpumask_t cpu;
+	struct notifier_block cpu_nb;
+
 	struct pmu pmu;
 };
 
 struct arm_ccn {
 	struct device *dev;
 	void __iomem *base;
-	unsigned irq_used:1;
+	unsigned int irq;
+
 	unsigned sbas_present:1;
 	unsigned sbsx_present:1;
 
@@ -212,7 +216,7 @@ static int arm_ccn_node_to_xp_port(int node)
 
 static void arm_ccn_pmu_config_set(u64 *config, u32 node_xp, u32 type, u32 port)
 {
-	*config &= ~((0xff << 0) | (0xff << 8) | (0xff << 24));
+	*config &= ~((0xff << 0) | (0xff << 8) | (0x3 << 24));
 	*config |= (node_xp << 0) | (type << 8) | (port << 24);
 }
 
@@ -336,6 +340,23 @@ static ssize_t arm_ccn_pmu_event_show(struct device *dev,
 	if (event->mask)
 		res += snprintf(buf + res, PAGE_SIZE - res, ",mask=0x%x",
 				event->mask);
+
+	/* Arguments required by an event */
+	switch (event->type) {
+	case CCN_TYPE_CYCLES:
+		break;
+	case CCN_TYPE_XP:
+		res += snprintf(buf + res, PAGE_SIZE - res,
+				",xp=?,port=?,vc=?,dir=?");
+		if (event->event == CCN_EVENT_WATCHPOINT)
+			res += snprintf(buf + res, PAGE_SIZE - res,
+					",cmp_l=?,cmp_h=?,mask=?");
+		break;
+	default:
+		res += snprintf(buf + res, PAGE_SIZE - res, ",node=?");
+		break;
+	}
+
 	res += snprintf(buf + res, PAGE_SIZE - res, "\n");
 
 	return res;
@@ -521,6 +542,25 @@ static struct attribute_group arm_ccn_pmu_cmp_mask_attr_group = {
 	.attrs = arm_ccn_pmu_cmp_mask_attrs,
 };
 
+static ssize_t arm_ccn_pmu_cpumask_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct arm_ccn *ccn = pmu_to_arm_ccn(dev_get_drvdata(dev));
+
+	return cpumap_print_to_pagebuf(true, buf, &ccn->dt.cpu);
+}
+
+static struct device_attribute arm_ccn_pmu_cpumask_attr =
+		__ATTR(cpumask, S_IRUGO, arm_ccn_pmu_cpumask_show, NULL);
+
+static struct attribute *arm_ccn_pmu_cpumask_attrs[] = {
+	&arm_ccn_pmu_cpumask_attr.attr,
+	NULL,
+};
+
+static struct attribute_group arm_ccn_pmu_cpumask_attr_group = {
+	.attrs = arm_ccn_pmu_cpumask_attrs,
+};
 
 /*
  * Default poll period is 10ms, which is way over the top anyway,
@@ -542,6 +582,7 @@ static const struct attribute_group *arm_ccn_pmu_attr_groups[] = {
 	&arm_ccn_pmu_events_attr_group,
 	&arm_ccn_pmu_format_attr_group,
 	&arm_ccn_pmu_cmp_mask_attr_group,
+	&arm_ccn_pmu_cpumask_attr_group,
 	NULL
 };
 
@@ -587,7 +628,65 @@ static int arm_ccn_pmu_type_eq(u32 a, u32 b)
 	return 0;
 }
 
-static void arm_ccn_pmu_event_destroy(struct perf_event *event)
+static int arm_ccn_pmu_event_alloc(struct perf_event *event)
+{
+	struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu);
+	struct hw_perf_event *hw = &event->hw;
+	u32 node_xp, type, event_id;
+	struct arm_ccn_component *source;
+	int bit;
+
+	node_xp = CCN_CONFIG_NODE(event->attr.config);
+	type = CCN_CONFIG_TYPE(event->attr.config);
+	event_id = CCN_CONFIG_EVENT(event->attr.config);
+
+	/* Allocate the cycle counter */
+	if (type == CCN_TYPE_CYCLES) {
+		if (test_and_set_bit(CCN_IDX_PMU_CYCLE_COUNTER,
+				ccn->dt.pmu_counters_mask))
+			return -EAGAIN;
+
+		hw->idx = CCN_IDX_PMU_CYCLE_COUNTER;
+		ccn->dt.pmu_counters[CCN_IDX_PMU_CYCLE_COUNTER].event = event;
+
+		return 0;
+	}
+
+	/* Allocate an event counter */
+	hw->idx = arm_ccn_pmu_alloc_bit(ccn->dt.pmu_counters_mask,
+			CCN_NUM_PMU_EVENT_COUNTERS);
+	if (hw->idx < 0) {
+		dev_dbg(ccn->dev, "No more counters available!\n");
+		return -EAGAIN;
+	}
+
+	if (type == CCN_TYPE_XP)
+		source = &ccn->xp[node_xp];
+	else
+		source = &ccn->node[node_xp];
+	ccn->dt.pmu_counters[hw->idx].source = source;
+
+	/* Allocate an event source or a watchpoint */
+	if (type == CCN_TYPE_XP && event_id == CCN_EVENT_WATCHPOINT)
+		bit = arm_ccn_pmu_alloc_bit(source->xp.dt_cmp_mask,
+				CCN_NUM_XP_WATCHPOINTS);
+	else
+		bit = arm_ccn_pmu_alloc_bit(source->pmu_events_mask,
+				CCN_NUM_PMU_EVENTS);
+	if (bit < 0) {
+		dev_dbg(ccn->dev, "No more event sources/watchpoints on node/XP %d!\n",
+				node_xp);
+		clear_bit(hw->idx, ccn->dt.pmu_counters_mask);
+		return -EAGAIN;
+	}
+	hw->config_base = bit;
+
+	ccn->dt.pmu_counters[hw->idx].event = event;
+
+	return 0;
+}
+
+static void arm_ccn_pmu_event_release(struct perf_event *event)
 {
 	struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu);
 	struct hw_perf_event *hw = &event->hw;
@@ -616,15 +715,14 @@ static int arm_ccn_pmu_event_init(struct perf_event *event)
 	struct arm_ccn *ccn;
 	struct hw_perf_event *hw = &event->hw;
 	u32 node_xp, type, event_id;
-	int valid, bit;
-	struct arm_ccn_component *source;
+	int valid;
 	int i;
+	struct perf_event *sibling;
 
 	if (event->attr.type != event->pmu->type)
 		return -ENOENT;
 
 	ccn = pmu_to_arm_ccn(event->pmu);
-	event->destroy = arm_ccn_pmu_event_destroy;
 
 	if (hw->sample_period) {
 		dev_warn(ccn->dev, "Sampling not supported!\n");
@@ -642,6 +740,16 @@ static int arm_ccn_pmu_event_init(struct perf_event *event)
 		dev_warn(ccn->dev, "Can't provide per-task data!\n");
 		return -EOPNOTSUPP;
 	}
+	/*
+	 * Many perf core operations (eg. events rotation) operate on a
+	 * single CPU context. This is obvious for CPU PMUs, where one
+	 * expects the same sets of events being observed on all CPUs,
+	 * but can lead to issues for off-core PMUs, like CCN, where each
+	 * event could be theoretically assigned to a different CPU. To
+	 * mitigate this, we enforce CPU assignment to one, selected
+	 * processor (the one described in the "cpumask" attribute).
+	 */
+	event->cpu = cpumask_first(&ccn->dt.cpu);
 
 	node_xp = CCN_CONFIG_NODE(event->attr.config);
 	type = CCN_CONFIG_TYPE(event->attr.config);
@@ -711,48 +819,20 @@ static int arm_ccn_pmu_event_init(struct perf_event *event)
 				node_xp, type, port);
 	}
 
-	/* Allocate the cycle counter */
-	if (type == CCN_TYPE_CYCLES) {
-		if (test_and_set_bit(CCN_IDX_PMU_CYCLE_COUNTER,
-				ccn->dt.pmu_counters_mask))
-			return -EAGAIN;
-
-		hw->idx = CCN_IDX_PMU_CYCLE_COUNTER;
-		ccn->dt.pmu_counters[CCN_IDX_PMU_CYCLE_COUNTER].event = event;
-
-		return 0;
-	}
-
-	/* Allocate an event counter */
-	hw->idx = arm_ccn_pmu_alloc_bit(ccn->dt.pmu_counters_mask,
-			CCN_NUM_PMU_EVENT_COUNTERS);
-	if (hw->idx < 0) {
-		dev_warn(ccn->dev, "No more counters available!\n");
-		return -EAGAIN;
-	}
-
-	if (type == CCN_TYPE_XP)
-		source = &ccn->xp[node_xp];
-	else
-		source = &ccn->node[node_xp];
-	ccn->dt.pmu_counters[hw->idx].source = source;
-
-	/* Allocate an event source or a watchpoint */
-	if (type == CCN_TYPE_XP && event_id == CCN_EVENT_WATCHPOINT)
-		bit = arm_ccn_pmu_alloc_bit(source->xp.dt_cmp_mask,
-				CCN_NUM_XP_WATCHPOINTS);
-	else
-		bit = arm_ccn_pmu_alloc_bit(source->pmu_events_mask,
-				CCN_NUM_PMU_EVENTS);
-	if (bit < 0) {
-		dev_warn(ccn->dev, "No more event sources/watchpoints on node/XP %d!\n",
-				node_xp);
-		clear_bit(hw->idx, ccn->dt.pmu_counters_mask);
-		return -EAGAIN;
-	}
-	hw->config_base = bit;
+	/*
+	 * We must NOT create groups containing mixed PMUs, although software
+	 * events are acceptable (for example to create a CCN group
+	 * periodically read when a hrtimer aka cpu-clock leader triggers).
+	 */
+	if (event->group_leader->pmu != event->pmu &&
+			!is_software_event(event->group_leader))
+		return -EINVAL;
 
-	ccn->dt.pmu_counters[hw->idx].event = event;
+	list_for_each_entry(sibling, &event->group_leader->sibling_list,
+			group_entry)
+		if (sibling->pmu != event->pmu &&
+				!is_software_event(sibling))
+			return -EINVAL;
 
 	return 0;
 }
@@ -835,9 +915,14 @@ static void arm_ccn_pmu_event_start(struct perf_event *event, int flags)
 			arm_ccn_pmu_read_counter(ccn, hw->idx));
 	hw->state = 0;
 
-	if (!ccn->irq_used)
+	/*
+	 * Pin the timer, so that the overflows are handled by the chosen
+	 * event->cpu (this is the same one as presented in "cpumask"
+	 * attribute).
+	 */
+	if (!ccn->irq)
 		hrtimer_start(&ccn->dt.hrtimer, arm_ccn_pmu_timer_period(),
-				HRTIMER_MODE_REL);
+				HRTIMER_MODE_REL_PINNED);
 
 	/* Set the DT bus input, engaging the counter */
 	arm_ccn_pmu_xp_dt_config(event, 1);
@@ -852,7 +937,7 @@ static void arm_ccn_pmu_event_stop(struct perf_event *event, int flags)
 	/* Disable counting, setting the DT bus to pass-through mode */
 	arm_ccn_pmu_xp_dt_config(event, 0);
 
-	if (!ccn->irq_used)
+	if (!ccn->irq)
 		hrtimer_cancel(&ccn->dt.hrtimer);
 
 	/* Let the DT bus drain */
@@ -1014,8 +1099,13 @@ static void arm_ccn_pmu_event_config(struct perf_event *event)
 
 static int arm_ccn_pmu_event_add(struct perf_event *event, int flags)
 {
+	int err;
 	struct hw_perf_event *hw = &event->hw;
 
+	err = arm_ccn_pmu_event_alloc(event);
+	if (err)
+		return err;
+
 	arm_ccn_pmu_event_config(event);
 
 	hw->state = PERF_HES_STOPPED;
@@ -1029,6 +1119,8 @@ static int arm_ccn_pmu_event_add(struct perf_event *event, int flags)
 static void arm_ccn_pmu_event_del(struct perf_event *event, int flags)
 {
 	arm_ccn_pmu_event_stop(event, PERF_EF_UPDATE);
+
+	arm_ccn_pmu_event_release(event);
 }
 
 static void arm_ccn_pmu_event_read(struct perf_event *event)
@@ -1079,12 +1171,39 @@ static enum hrtimer_restart arm_ccn_pmu_timer_handler(struct hrtimer *hrtimer)
 }
 
 
+static int arm_ccn_pmu_cpu_notifier(struct notifier_block *nb,
+		unsigned long action, void *hcpu)
+{
+	struct arm_ccn_dt *dt = container_of(nb, struct arm_ccn_dt, cpu_nb);
+	struct arm_ccn *ccn = container_of(dt, struct arm_ccn, dt);
+	unsigned int cpu = (long)hcpu; /* for (long) see kernel/cpu.c */
+	unsigned int target;
+
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_DOWN_PREPARE:
+		if (!cpumask_test_and_clear_cpu(cpu, &dt->cpu))
+			break;
+		target = cpumask_any_but(cpu_online_mask, cpu);
+		if (target < 0)
+			break;
+		perf_pmu_migrate_context(&dt->pmu, cpu, target);
+		cpumask_set_cpu(target, &dt->cpu);
+		WARN_ON(irq_set_affinity(ccn->irq, &dt->cpu) != 0);
+	default:
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+
 static DEFINE_IDA(arm_ccn_pmu_ida);
 
 static int arm_ccn_pmu_init(struct arm_ccn *ccn)
 {
 	int i;
 	char *name;
+	int err;
 
 	/* Initialize DT subsystem */
 	ccn->dt.base = ccn->base + CCN_REGION_SIZE;
@@ -1136,20 +1255,58 @@ static int arm_ccn_pmu_init(struct arm_ccn *ccn)
 	};
 
 	/* No overflow interrupt? Have to use a timer instead. */
-	if (!ccn->irq_used) {
+	if (!ccn->irq) {
 		dev_info(ccn->dev, "No access to interrupts, using timer.\n");
 		hrtimer_init(&ccn->dt.hrtimer, CLOCK_MONOTONIC,
 				HRTIMER_MODE_REL);
 		ccn->dt.hrtimer.function = arm_ccn_pmu_timer_handler;
 	}
 
-	return perf_pmu_register(&ccn->dt.pmu, name, -1);
+	/* Pick one CPU which we will use to collect data from CCN... */
+	cpumask_set_cpu(smp_processor_id(), &ccn->dt.cpu);
+
+	/*
+	 * ... and change the selection when it goes offline. Priority is
+	 * picked to have a chance to migrate events before perf is notified.
+	 */
+	ccn->dt.cpu_nb.notifier_call = arm_ccn_pmu_cpu_notifier;
+	ccn->dt.cpu_nb.priority = CPU_PRI_PERF + 1,
+	err = register_cpu_notifier(&ccn->dt.cpu_nb);
+	if (err)
+		goto error_cpu_notifier;
+
+	/* Also make sure that the overflow interrupt is handled by this CPU */
+	if (ccn->irq) {
+		err = irq_set_affinity(ccn->irq, &ccn->dt.cpu);
+		if (err) {
+			dev_err(ccn->dev, "Failed to set interrupt affinity!\n");
+			goto error_set_affinity;
+		}
+	}
+
+	err = perf_pmu_register(&ccn->dt.pmu, name, -1);
+	if (err)
+		goto error_pmu_register;
+
+	return 0;
+
+error_pmu_register:
+error_set_affinity:
+	unregister_cpu_notifier(&ccn->dt.cpu_nb);
+error_cpu_notifier:
+	ida_simple_remove(&arm_ccn_pmu_ida, ccn->dt.id);
+	for (i = 0; i < ccn->num_xps; i++)
+		writel(0, ccn->xp[i].base + CCN_XP_DT_CONTROL);
+	writel(0, ccn->dt.base + CCN_DT_PMCR);
+	return err;
 }
 
 static void arm_ccn_pmu_cleanup(struct arm_ccn *ccn)
 {
 	int i;
 
+	irq_set_affinity(ccn->irq, cpu_possible_mask);
+	unregister_cpu_notifier(&ccn->dt.cpu_nb);
 	for (i = 0; i < ccn->num_xps; i++)
 		writel(0, ccn->xp[i].base + CCN_XP_DT_CONTROL);
 	writel(0, ccn->dt.base + CCN_DT_PMCR);
@@ -1285,6 +1442,7 @@ static int arm_ccn_probe(struct platform_device *pdev)
 {
 	struct arm_ccn *ccn;
 	struct resource *res;
+	unsigned int irq;
 	int err;
 
 	ccn = devm_kzalloc(&pdev->dev, sizeof(*ccn), GFP_KERNEL);
@@ -1309,6 +1467,7 @@ static int arm_ccn_probe(struct platform_device *pdev)
 	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
 	if (!res)
 		return -EINVAL;
+	irq = res->start;
 
 	/* Check if we can use the interrupt */
 	writel(CCN_MN_ERRINT_STATUS__PMU_EVENTS__DISABLE,
@@ -1318,13 +1477,12 @@ static int arm_ccn_probe(struct platform_device *pdev)
 		/* Can set 'disable' bits, so can acknowledge interrupts */
 		writel(CCN_MN_ERRINT_STATUS__PMU_EVENTS__ENABLE,
 				ccn->base + CCN_MN_ERRINT_STATUS);
-		err = devm_request_irq(ccn->dev, res->start,
-				arm_ccn_irq_handler, 0, dev_name(ccn->dev),
-				ccn);
+		err = devm_request_irq(ccn->dev, irq, arm_ccn_irq_handler, 0,
+				dev_name(ccn->dev), ccn);
 		if (err)
 			return err;
 
-		ccn->irq_used = 1;
+		ccn->irq = irq;
 	}
 
 
diff --git a/drivers/bus/mvebu-mbus.c b/drivers/bus/mvebu-mbus.c
index 6f047dcb94c2..c43c3d2baf73 100644
--- a/drivers/bus/mvebu-mbus.c
+++ b/drivers/bus/mvebu-mbus.c
@@ -57,6 +57,7 @@
 #include <linux/of_address.h>
 #include <linux/debugfs.h>
 #include <linux/log2.h>
+#include <linux/memblock.h>
 #include <linux/syscore_ops.h>
 
 /*
@@ -152,13 +153,39 @@ struct mvebu_mbus_state {
 
 static struct mvebu_mbus_state mbus_state;
 
+/*
+ * We provide two variants of the mv_mbus_dram_info() function:
+ *
+ * - The normal one, where the described DRAM ranges may overlap with
+ *   the I/O windows, but for which the DRAM ranges are guaranteed to
+ *   have a power of two size. Such ranges are suitable for the DMA
+ *   masters that only DMA between the RAM and the device, which is
+ *   actually all devices except the crypto engines.
+ *
+ * - The 'nooverlap' one, where the described DRAM ranges are
+ *   guaranteed to not overlap with the I/O windows, but for which the
+ *   DRAM ranges will not have power of two sizes. They will only be
+ *   aligned on a 64 KB boundary, and have a size multiple of 64
+ *   KB. Such ranges are suitable for the DMA masters that DMA between
+ *   the crypto SRAM (which is mapped through an I/O window) and a
+ *   device. This is the case for the crypto engines.
+ */
+
 static struct mbus_dram_target_info mvebu_mbus_dram_info;
+static struct mbus_dram_target_info mvebu_mbus_dram_info_nooverlap;
+
 const struct mbus_dram_target_info *mv_mbus_dram_info(void)
 {
 	return &mvebu_mbus_dram_info;
 }
 EXPORT_SYMBOL_GPL(mv_mbus_dram_info);
 
+const struct mbus_dram_target_info *mv_mbus_dram_info_nooverlap(void)
+{
+	return &mvebu_mbus_dram_info_nooverlap;
+}
+EXPORT_SYMBOL_GPL(mv_mbus_dram_info_nooverlap);
+
 /* Checks whether the given window has remap capability */
 static bool mvebu_mbus_window_is_remappable(struct mvebu_mbus_state *mbus,
 					    const int win)
@@ -576,6 +603,95 @@ static unsigned int armada_xp_mbus_win_remap_offset(int win)
 		return MVEBU_MBUS_NO_REMAP;
 }
 
+/*
+ * Use the memblock information to find the MBus bridge hole in the
+ * physical address space.
+ */
+static void __init
+mvebu_mbus_find_bridge_hole(uint64_t *start, uint64_t *end)
+{
+	struct memblock_region *r;
+	uint64_t s = 0;
+
+	for_each_memblock(memory, r) {
+		/*
+		 * This part of the memory is above 4 GB, so we don't
+		 * care for the MBus bridge hole.
+		 */
+		if (r->base >= 0x100000000ULL)
+			continue;
+
+		/*
+		 * The MBus bridge hole is at the end of the RAM under
+		 * the 4 GB limit.
+		 */
+		if (r->base + r->size > s)
+			s = r->base + r->size;
+	}
+
+	*start = s;
+	*end = 0x100000000ULL;
+}
+
+/*
+ * This function fills in the mvebu_mbus_dram_info_nooverlap data
+ * structure, by looking at the mvebu_mbus_dram_info data, and
+ * removing the parts of it that overlap with I/O windows.
+ */
+static void __init
+mvebu_mbus_setup_cpu_target_nooverlap(struct mvebu_mbus_state *mbus)
+{
+	uint64_t mbus_bridge_base, mbus_bridge_end;
+	int cs_nooverlap = 0;
+	int i;
+
+	mvebu_mbus_find_bridge_hole(&mbus_bridge_base, &mbus_bridge_end);
+
+	for (i = 0; i < mvebu_mbus_dram_info.num_cs; i++) {
+		struct mbus_dram_window *w;
+		u64 base, size, end;
+
+		w = &mvebu_mbus_dram_info.cs[i];
+		base = w->base;
+		size = w->size;
+		end = base + size;
+
+		/*
+		 * The CS is fully enclosed inside the MBus bridge
+		 * area, so ignore it.
+		 */
+		if (base >= mbus_bridge_base && end <= mbus_bridge_end)
+			continue;
+
+		/*
+		 * Beginning of CS overlaps with end of MBus, raise CS
+		 * base address, and shrink its size.
+		 */
+		if (base >= mbus_bridge_base && end > mbus_bridge_end) {
+			size -= mbus_bridge_end - base;
+			base = mbus_bridge_end;
+		}
+
+		/*
+		 * End of CS overlaps with beginning of MBus, shrink
+		 * CS size.
+		 */
+		if (base < mbus_bridge_base && end > mbus_bridge_base)
+			size -= end - mbus_bridge_base;
+
+		w = &mvebu_mbus_dram_info_nooverlap.cs[cs_nooverlap++];
+		w->cs_index = i;
+		w->mbus_attr = 0xf & ~(1 << i);
+		if (mbus->hw_io_coherency)
+			w->mbus_attr |= ATTR_HW_COHERENCY;
+		w->base = base;
+		w->size = size;
+	}
+
+	mvebu_mbus_dram_info_nooverlap.mbus_dram_target_id = TARGET_DDR;
+	mvebu_mbus_dram_info_nooverlap.num_cs = cs_nooverlap;
+}
+
 static void __init
 mvebu_mbus_default_setup_cpu_target(struct mvebu_mbus_state *mbus)
 {
@@ -964,6 +1080,7 @@ static int __init mvebu_mbus_common_init(struct mvebu_mbus_state *mbus,
 		mvebu_mbus_disable_window(mbus, win);
 
 	mbus->soc->setup_cpu_target(mbus);
+	mvebu_mbus_setup_cpu_target_nooverlap(mbus);
 
 	if (is_coherent)
 		writel(UNIT_SYNC_BARRIER_ALL,
diff --git a/drivers/clk/berlin/bg2.c b/drivers/clk/berlin/bg2.c
index 515fb133495c..73153fc45ee9 100644
--- a/drivers/clk/berlin/bg2.c
+++ b/drivers/clk/berlin/bg2.c
@@ -502,12 +502,13 @@ static const struct berlin2_gate_data bg2_gates[] __initconst = {
 
 static void __init berlin2_clock_setup(struct device_node *np)
 {
+	struct device_node *parent_np = of_get_parent(np);
 	const char *parent_names[9];
 	struct clk *clk;
 	u8 avpll_flags = 0;
 	int n;
 
-	gbase = of_iomap(np, 0);
+	gbase = of_iomap(parent_np, 0);
 	if (!gbase)
 		return;
 
@@ -685,7 +686,5 @@ static void __init berlin2_clock_setup(struct device_node *np)
 bg2_fail:
 	iounmap(gbase);
 }
-CLK_OF_DECLARE(berlin2_clock, "marvell,berlin2-chip-ctrl",
-	       berlin2_clock_setup);
-CLK_OF_DECLARE(berlin2cd_clock, "marvell,berlin2cd-chip-ctrl",
+CLK_OF_DECLARE(berlin2_clk, "marvell,berlin2-clk",
 	       berlin2_clock_setup);
diff --git a/drivers/clk/berlin/bg2q.c b/drivers/clk/berlin/bg2q.c
index 440ef81ab15c..221f40c2b850 100644
--- a/drivers/clk/berlin/bg2q.c
+++ b/drivers/clk/berlin/bg2q.c
@@ -290,18 +290,19 @@ static const struct berlin2_gate_data bg2q_gates[] __initconst = {
 
 static void __init berlin2q_clock_setup(struct device_node *np)
 {
+	struct device_node *parent_np = of_get_parent(np);
 	const char *parent_names[9];
 	struct clk *clk;
 	int n;
 
-	gbase = of_iomap(np, 0);
+	gbase = of_iomap(parent_np, 0);
 	if (!gbase) {
 		pr_err("%s: Unable to map global base\n", np->full_name);
 		return;
 	}
 
 	/* BG2Q CPU PLL is not part of global registers */
-	cpupll_base = of_iomap(np, 1);
+	cpupll_base = of_iomap(parent_np, 1);
 	if (!cpupll_base) {
 		pr_err("%s: Unable to map cpupll base\n", np->full_name);
 		iounmap(gbase);
@@ -384,5 +385,5 @@ bg2q_fail:
 	iounmap(cpupll_base);
 	iounmap(gbase);
 }
-CLK_OF_DECLARE(berlin2q_clock, "marvell,berlin2q-chip-ctrl",
+CLK_OF_DECLARE(berlin2q_clk, "marvell,berlin2q-clk",
 	       berlin2q_clock_setup);
diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile
index 3fdd3912709a..3001f1ae1062 100644
--- a/drivers/firmware/Makefile
+++ b/drivers/firmware/Makefile
@@ -12,7 +12,8 @@ obj-$(CONFIG_ISCSI_IBFT_FIND)	+= iscsi_ibft_find.o
 obj-$(CONFIG_ISCSI_IBFT)	+= iscsi_ibft.o
 obj-$(CONFIG_FIRMWARE_MEMMAP)	+= memmap.o
 obj-$(CONFIG_QCOM_SCM)		+= qcom_scm.o
-CFLAGS_qcom_scm.o :=$(call as-instr,.arch_extension sec,-DREQUIRES_SEC=1)
+obj-$(CONFIG_QCOM_SCM)		+= qcom_scm-32.o
+CFLAGS_qcom_scm-32.o :=$(call as-instr,.arch_extension sec,-DREQUIRES_SEC=1)
 
 obj-$(CONFIG_GOOGLE_FIRMWARE)	+= google/
 obj-$(CONFIG_EFI)		+= efi/
diff --git a/drivers/firmware/qcom_scm-32.c b/drivers/firmware/qcom_scm-32.c
new file mode 100644
index 000000000000..1bd6f9c34331
--- /dev/null
+++ b/drivers/firmware/qcom_scm-32.c
@@ -0,0 +1,503 @@
+/* Copyright (c) 2010,2015, The Linux Foundation. All rights reserved.
+ * Copyright (C) 2015 Linaro Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/qcom_scm.h>
+
+#include <asm/outercache.h>
+#include <asm/cacheflush.h>
+
+#include "qcom_scm.h"
+
+#define QCOM_SCM_FLAG_COLDBOOT_CPU0	0x00
+#define QCOM_SCM_FLAG_COLDBOOT_CPU1	0x01
+#define QCOM_SCM_FLAG_COLDBOOT_CPU2	0x08
+#define QCOM_SCM_FLAG_COLDBOOT_CPU3	0x20
+
+#define QCOM_SCM_FLAG_WARMBOOT_CPU0	0x04
+#define QCOM_SCM_FLAG_WARMBOOT_CPU1	0x02
+#define QCOM_SCM_FLAG_WARMBOOT_CPU2	0x10
+#define QCOM_SCM_FLAG_WARMBOOT_CPU3	0x40
+
+struct qcom_scm_entry {
+	int flag;
+	void *entry;
+};
+
+static struct qcom_scm_entry qcom_scm_wb[] = {
+	{ .flag = QCOM_SCM_FLAG_WARMBOOT_CPU0 },
+	{ .flag = QCOM_SCM_FLAG_WARMBOOT_CPU1 },
+	{ .flag = QCOM_SCM_FLAG_WARMBOOT_CPU2 },
+	{ .flag = QCOM_SCM_FLAG_WARMBOOT_CPU3 },
+};
+
+static DEFINE_MUTEX(qcom_scm_lock);
+
+/**
+ * struct qcom_scm_command - one SCM command buffer
+ * @len: total available memory for command and response
+ * @buf_offset: start of command buffer
+ * @resp_hdr_offset: start of response buffer
+ * @id: command to be executed
+ * @buf: buffer returned from qcom_scm_get_command_buffer()
+ *
+ * An SCM command is laid out in memory as follows:
+ *
+ *	------------------- <--- struct qcom_scm_command
+ *	| command header  |
+ *	------------------- <--- qcom_scm_get_command_buffer()
+ *	| command buffer  |
+ *	------------------- <--- struct qcom_scm_response and
+ *	| response header |      qcom_scm_command_to_response()
+ *	------------------- <--- qcom_scm_get_response_buffer()
+ *	| response buffer |
+ *	-------------------
+ *
+ * There can be arbitrary padding between the headers and buffers so
+ * you should always use the appropriate qcom_scm_get_*_buffer() routines
+ * to access the buffers in a safe manner.
+ */
+struct qcom_scm_command {
+	__le32 len;
+	__le32 buf_offset;
+	__le32 resp_hdr_offset;
+	__le32 id;
+	__le32 buf[0];
+};
+
+/**
+ * struct qcom_scm_response - one SCM response buffer
+ * @len: total available memory for response
+ * @buf_offset: start of response data relative to start of qcom_scm_response
+ * @is_complete: indicates if the command has finished processing
+ */
+struct qcom_scm_response {
+	__le32 len;
+	__le32 buf_offset;
+	__le32 is_complete;
+};
+
+/**
+ * alloc_qcom_scm_command() - Allocate an SCM command
+ * @cmd_size: size of the command buffer
+ * @resp_size: size of the response buffer
+ *
+ * Allocate an SCM command, including enough room for the command
+ * and response headers as well as the command and response buffers.
+ *
+ * Returns a valid &qcom_scm_command on success or %NULL if the allocation fails.
+ */
+static struct qcom_scm_command *alloc_qcom_scm_command(size_t cmd_size, size_t resp_size)
+{
+	struct qcom_scm_command *cmd;
+	size_t len = sizeof(*cmd) + sizeof(struct qcom_scm_response) + cmd_size +
+		resp_size;
+	u32 offset;
+
+	cmd = kzalloc(PAGE_ALIGN(len), GFP_KERNEL);
+	if (cmd) {
+		cmd->len = cpu_to_le32(len);
+		offset = offsetof(struct qcom_scm_command, buf);
+		cmd->buf_offset = cpu_to_le32(offset);
+		cmd->resp_hdr_offset = cpu_to_le32(offset + cmd_size);
+	}
+	return cmd;
+}
+
+/**
+ * free_qcom_scm_command() - Free an SCM command
+ * @cmd: command to free
+ *
+ * Free an SCM command.
+ */
+static inline void free_qcom_scm_command(struct qcom_scm_command *cmd)
+{
+	kfree(cmd);
+}
+
+/**
+ * qcom_scm_command_to_response() - Get a pointer to a qcom_scm_response
+ * @cmd: command
+ *
+ * Returns a pointer to a response for a command.
+ */
+static inline struct qcom_scm_response *qcom_scm_command_to_response(
+		const struct qcom_scm_command *cmd)
+{
+	return (void *)cmd + le32_to_cpu(cmd->resp_hdr_offset);
+}
+
+/**
+ * qcom_scm_get_command_buffer() - Get a pointer to a command buffer
+ * @cmd: command
+ *
+ * Returns a pointer to the command buffer of a command.
+ */
+static inline void *qcom_scm_get_command_buffer(const struct qcom_scm_command *cmd)
+{
+	return (void *)cmd->buf;
+}
+
+/**
+ * qcom_scm_get_response_buffer() - Get a pointer to a response buffer
+ * @rsp: response
+ *
+ * Returns a pointer to a response buffer of a response.
+ */
+static inline void *qcom_scm_get_response_buffer(const struct qcom_scm_response *rsp)
+{
+	return (void *)rsp + le32_to_cpu(rsp->buf_offset);
+}
+
+static int qcom_scm_remap_error(int err)
+{
+	pr_err("qcom_scm_call failed with error code %d\n", err);
+	switch (err) {
+	case QCOM_SCM_ERROR:
+		return -EIO;
+	case QCOM_SCM_EINVAL_ADDR:
+	case QCOM_SCM_EINVAL_ARG:
+		return -EINVAL;
+	case QCOM_SCM_EOPNOTSUPP:
+		return -EOPNOTSUPP;
+	case QCOM_SCM_ENOMEM:
+		return -ENOMEM;
+	}
+	return -EINVAL;
+}
+
+static u32 smc(u32 cmd_addr)
+{
+	int context_id;
+	register u32 r0 asm("r0") = 1;
+	register u32 r1 asm("r1") = (u32)&context_id;
+	register u32 r2 asm("r2") = cmd_addr;
+	do {
+		asm volatile(
+			__asmeq("%0", "r0")
+			__asmeq("%1", "r0")
+			__asmeq("%2", "r1")
+			__asmeq("%3", "r2")
+#ifdef REQUIRES_SEC
+			".arch_extension sec\n"
+#endif
+			"smc	#0	@ switch to secure world\n"
+			: "=r" (r0)
+			: "r" (r0), "r" (r1), "r" (r2)
+			: "r3");
+	} while (r0 == QCOM_SCM_INTERRUPTED);
+
+	return r0;
+}
+
+static int __qcom_scm_call(const struct qcom_scm_command *cmd)
+{
+	int ret;
+	u32 cmd_addr = virt_to_phys(cmd);
+
+	/*
+	 * Flush the command buffer so that the secure world sees
+	 * the correct data.
+	 */
+	__cpuc_flush_dcache_area((void *)cmd, cmd->len);
+	outer_flush_range(cmd_addr, cmd_addr + cmd->len);
+
+	ret = smc(cmd_addr);
+	if (ret < 0)
+		ret = qcom_scm_remap_error(ret);
+
+	return ret;
+}
+
+static void qcom_scm_inv_range(unsigned long start, unsigned long end)
+{
+	u32 cacheline_size, ctr;
+
+	asm volatile("mrc p15, 0, %0, c0, c0, 1" : "=r" (ctr));
+	cacheline_size = 4 << ((ctr >> 16) & 0xf);
+
+	start = round_down(start, cacheline_size);
+	end = round_up(end, cacheline_size);
+	outer_inv_range(start, end);
+	while (start < end) {
+		asm ("mcr p15, 0, %0, c7, c6, 1" : : "r" (start)
+		     : "memory");
+		start += cacheline_size;
+	}
+	dsb();
+	isb();
+}
+
+/**
+ * qcom_scm_call() - Send an SCM command
+ * @svc_id: service identifier
+ * @cmd_id: command identifier
+ * @cmd_buf: command buffer
+ * @cmd_len: length of the command buffer
+ * @resp_buf: response buffer
+ * @resp_len: length of the response buffer
+ *
+ * Sends a command to the SCM and waits for the command to finish processing.
+ *
+ * A note on cache maintenance:
+ * Note that any buffers that are expected to be accessed by the secure world
+ * must be flushed before invoking qcom_scm_call and invalidated in the cache
+ * immediately after qcom_scm_call returns. Cache maintenance on the command
+ * and response buffers is taken care of by qcom_scm_call; however, callers are
+ * responsible for any other cached buffers passed over to the secure world.
+ */
+static int qcom_scm_call(u32 svc_id, u32 cmd_id, const void *cmd_buf,
+			size_t cmd_len, void *resp_buf, size_t resp_len)
+{
+	int ret;
+	struct qcom_scm_command *cmd;
+	struct qcom_scm_response *rsp;
+	unsigned long start, end;
+
+	cmd = alloc_qcom_scm_command(cmd_len, resp_len);
+	if (!cmd)
+		return -ENOMEM;
+
+	cmd->id = cpu_to_le32((svc_id << 10) | cmd_id);
+	if (cmd_buf)
+		memcpy(qcom_scm_get_command_buffer(cmd), cmd_buf, cmd_len);
+
+	mutex_lock(&qcom_scm_lock);
+	ret = __qcom_scm_call(cmd);
+	mutex_unlock(&qcom_scm_lock);
+	if (ret)
+		goto out;
+
+	rsp = qcom_scm_command_to_response(cmd);
+	start = (unsigned long)rsp;
+
+	do {
+		qcom_scm_inv_range(start, start + sizeof(*rsp));
+	} while (!rsp->is_complete);
+
+	end = (unsigned long)qcom_scm_get_response_buffer(rsp) + resp_len;
+	qcom_scm_inv_range(start, end);
+
+	if (resp_buf)
+		memcpy(resp_buf, qcom_scm_get_response_buffer(rsp), resp_len);
+out:
+	free_qcom_scm_command(cmd);
+	return ret;
+}
+
+#define SCM_CLASS_REGISTER	(0x2 << 8)
+#define SCM_MASK_IRQS		BIT(5)
+#define SCM_ATOMIC(svc, cmd, n) (((((svc) << 10)|((cmd) & 0x3ff)) << 12) | \
+				SCM_CLASS_REGISTER | \
+				SCM_MASK_IRQS | \
+				(n & 0xf))
+
+/**
+ * qcom_scm_call_atomic1() - Send an atomic SCM command with one argument
+ * @svc_id: service identifier
+ * @cmd_id: command identifier
+ * @arg1: first argument
+ *
+ * This shall only be used with commands that are guaranteed to be
+ * uninterruptable, atomic and SMP safe.
+ */
+static s32 qcom_scm_call_atomic1(u32 svc, u32 cmd, u32 arg1)
+{
+	int context_id;
+
+	register u32 r0 asm("r0") = SCM_ATOMIC(svc, cmd, 1);
+	register u32 r1 asm("r1") = (u32)&context_id;
+	register u32 r2 asm("r2") = arg1;
+
+	asm volatile(
+			__asmeq("%0", "r0")
+			__asmeq("%1", "r0")
+			__asmeq("%2", "r1")
+			__asmeq("%3", "r2")
+#ifdef REQUIRES_SEC
+			".arch_extension sec\n"
+#endif
+			"smc    #0      @ switch to secure world\n"
+			: "=r" (r0)
+			: "r" (r0), "r" (r1), "r" (r2)
+			: "r3");
+	return r0;
+}
+
+u32 qcom_scm_get_version(void)
+{
+	int context_id;
+	static u32 version = -1;
+	register u32 r0 asm("r0");
+	register u32 r1 asm("r1");
+
+	if (version != -1)
+		return version;
+
+	mutex_lock(&qcom_scm_lock);
+
+	r0 = 0x1 << 8;
+	r1 = (u32)&context_id;
+	do {
+		asm volatile(
+			__asmeq("%0", "r0")
+			__asmeq("%1", "r1")
+			__asmeq("%2", "r0")
+			__asmeq("%3", "r1")
+#ifdef REQUIRES_SEC
+			".arch_extension sec\n"
+#endif
+			"smc	#0	@ switch to secure world\n"
+			: "=r" (r0), "=r" (r1)
+			: "r" (r0), "r" (r1)
+			: "r2", "r3");
+	} while (r0 == QCOM_SCM_INTERRUPTED);
+
+	version = r1;
+	mutex_unlock(&qcom_scm_lock);
+
+	return version;
+}
+EXPORT_SYMBOL(qcom_scm_get_version);
+
+/*
+ * Set the cold/warm boot address for one of the CPU cores.
+ */
+static int qcom_scm_set_boot_addr(u32 addr, int flags)
+{
+	struct {
+		__le32 flags;
+		__le32 addr;
+	} cmd;
+
+	cmd.addr = cpu_to_le32(addr);
+	cmd.flags = cpu_to_le32(flags);
+	return qcom_scm_call(QCOM_SCM_SVC_BOOT, QCOM_SCM_BOOT_ADDR,
+			&cmd, sizeof(cmd), NULL, 0);
+}
+
+/**
+ * qcom_scm_set_cold_boot_addr() - Set the cold boot address for cpus
+ * @entry: Entry point function for the cpus
+ * @cpus: The cpumask of cpus that will use the entry point
+ *
+ * Set the cold boot address of the cpus. Any cpu outside the supported
+ * range would be removed from the cpu present mask.
+ */
+int __qcom_scm_set_cold_boot_addr(void *entry, const cpumask_t *cpus)
+{
+	int flags = 0;
+	int cpu;
+	int scm_cb_flags[] = {
+		QCOM_SCM_FLAG_COLDBOOT_CPU0,
+		QCOM_SCM_FLAG_COLDBOOT_CPU1,
+		QCOM_SCM_FLAG_COLDBOOT_CPU2,
+		QCOM_SCM_FLAG_COLDBOOT_CPU3,
+	};
+
+	if (!cpus || (cpus && cpumask_empty(cpus)))
+		return -EINVAL;
+
+	for_each_cpu(cpu, cpus) {
+		if (cpu < ARRAY_SIZE(scm_cb_flags))
+			flags |= scm_cb_flags[cpu];
+		else
+			set_cpu_present(cpu, false);
+	}
+
+	return qcom_scm_set_boot_addr(virt_to_phys(entry), flags);
+}
+
+/**
+ * qcom_scm_set_warm_boot_addr() - Set the warm boot address for cpus
+ * @entry: Entry point function for the cpus
+ * @cpus: The cpumask of cpus that will use the entry point
+ *
+ * Set the Linux entry point for the SCM to transfer control to when coming
+ * out of a power down. CPU power down may be executed on cpuidle or hotplug.
+ */
+int __qcom_scm_set_warm_boot_addr(void *entry, const cpumask_t *cpus)
+{
+	int ret;
+	int flags = 0;
+	int cpu;
+
+	/*
+	 * Reassign only if we are switching from hotplug entry point
+	 * to cpuidle entry point or vice versa.
+	 */
+	for_each_cpu(cpu, cpus) {
+		if (entry == qcom_scm_wb[cpu].entry)
+			continue;
+		flags |= qcom_scm_wb[cpu].flag;
+	}
+
+	/* No change in entry function */
+	if (!flags)
+		return 0;
+
+	ret = qcom_scm_set_boot_addr(virt_to_phys(entry), flags);
+	if (!ret) {
+		for_each_cpu(cpu, cpus)
+			qcom_scm_wb[cpu].entry = entry;
+	}
+
+	return ret;
+}
+
+/**
+ * qcom_scm_cpu_power_down() - Power down the cpu
+ * @flags - Flags to flush cache
+ *
+ * This is an end point to power down cpu. If there was a pending interrupt,
+ * the control would return from this function, otherwise, the cpu jumps to the
+ * warm boot entry point set for this cpu upon reset.
+ */
+void __qcom_scm_cpu_power_down(u32 flags)
+{
+	qcom_scm_call_atomic1(QCOM_SCM_SVC_BOOT, QCOM_SCM_CMD_TERMINATE_PC,
+			flags & QCOM_SCM_FLUSH_FLAG_MASK);
+}
+
+int __qcom_scm_is_call_available(u32 svc_id, u32 cmd_id)
+{
+	int ret;
+	u32 svc_cmd = (svc_id << 10) | cmd_id;
+	u32 ret_val = 0;
+
+	ret = qcom_scm_call(QCOM_SCM_SVC_INFO, QCOM_IS_CALL_AVAIL_CMD, &svc_cmd,
+			sizeof(svc_cmd), &ret_val, sizeof(ret_val));
+	if (ret)
+		return ret;
+
+	return ret_val;
+}
+
+int __qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt, u32 *resp)
+{
+	if (req_cnt > QCOM_SCM_HDCP_MAX_REQ_CNT)
+		return -ERANGE;
+
+	return qcom_scm_call(QCOM_SCM_SVC_HDCP, QCOM_SCM_CMD_HDCP,
+		req, req_cnt * sizeof(*req), resp, sizeof(*resp));
+}
diff --git a/drivers/firmware/qcom_scm.c b/drivers/firmware/qcom_scm.c
index 994b50fd997c..45c008d68891 100644
--- a/drivers/firmware/qcom_scm.c
+++ b/drivers/firmware/qcom_scm.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2010, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2010,2015, The Linux Foundation. All rights reserved.
  * Copyright (C) 2015 Linaro Ltd.
  *
  * This program is free software; you can redistribute it and/or modify
@@ -16,393 +16,12 @@
  * 02110-1301, USA.
  */
 
-#include <linux/slab.h>
-#include <linux/io.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/errno.h>
-#include <linux/err.h>
+#include <linux/cpumask.h>
+#include <linux/export.h>
+#include <linux/types.h>
 #include <linux/qcom_scm.h>
 
-#include <asm/outercache.h>
-#include <asm/cacheflush.h>
-
-
-#define QCOM_SCM_ENOMEM		-5
-#define QCOM_SCM_EOPNOTSUPP	-4
-#define QCOM_SCM_EINVAL_ADDR	-3
-#define QCOM_SCM_EINVAL_ARG	-2
-#define QCOM_SCM_ERROR		-1
-#define QCOM_SCM_INTERRUPTED	1
-
-#define QCOM_SCM_FLAG_COLDBOOT_CPU0	0x00
-#define QCOM_SCM_FLAG_COLDBOOT_CPU1	0x01
-#define QCOM_SCM_FLAG_COLDBOOT_CPU2	0x08
-#define QCOM_SCM_FLAG_COLDBOOT_CPU3	0x20
-
-#define QCOM_SCM_FLAG_WARMBOOT_CPU0	0x04
-#define QCOM_SCM_FLAG_WARMBOOT_CPU1	0x02
-#define QCOM_SCM_FLAG_WARMBOOT_CPU2	0x10
-#define QCOM_SCM_FLAG_WARMBOOT_CPU3	0x40
-
-struct qcom_scm_entry {
-	int flag;
-	void *entry;
-};
-
-static struct qcom_scm_entry qcom_scm_wb[] = {
-	{ .flag = QCOM_SCM_FLAG_WARMBOOT_CPU0 },
-	{ .flag = QCOM_SCM_FLAG_WARMBOOT_CPU1 },
-	{ .flag = QCOM_SCM_FLAG_WARMBOOT_CPU2 },
-	{ .flag = QCOM_SCM_FLAG_WARMBOOT_CPU3 },
-};
-
-static DEFINE_MUTEX(qcom_scm_lock);
-
-/**
- * struct qcom_scm_command - one SCM command buffer
- * @len: total available memory for command and response
- * @buf_offset: start of command buffer
- * @resp_hdr_offset: start of response buffer
- * @id: command to be executed
- * @buf: buffer returned from qcom_scm_get_command_buffer()
- *
- * An SCM command is laid out in memory as follows:
- *
- *	------------------- <--- struct qcom_scm_command
- *	| command header  |
- *	------------------- <--- qcom_scm_get_command_buffer()
- *	| command buffer  |
- *	------------------- <--- struct qcom_scm_response and
- *	| response header |      qcom_scm_command_to_response()
- *	------------------- <--- qcom_scm_get_response_buffer()
- *	| response buffer |
- *	-------------------
- *
- * There can be arbitrary padding between the headers and buffers so
- * you should always use the appropriate qcom_scm_get_*_buffer() routines
- * to access the buffers in a safe manner.
- */
-struct qcom_scm_command {
-	__le32 len;
-	__le32 buf_offset;
-	__le32 resp_hdr_offset;
-	__le32 id;
-	__le32 buf[0];
-};
-
-/**
- * struct qcom_scm_response - one SCM response buffer
- * @len: total available memory for response
- * @buf_offset: start of response data relative to start of qcom_scm_response
- * @is_complete: indicates if the command has finished processing
- */
-struct qcom_scm_response {
-	__le32 len;
-	__le32 buf_offset;
-	__le32 is_complete;
-};
-
-/**
- * alloc_qcom_scm_command() - Allocate an SCM command
- * @cmd_size: size of the command buffer
- * @resp_size: size of the response buffer
- *
- * Allocate an SCM command, including enough room for the command
- * and response headers as well as the command and response buffers.
- *
- * Returns a valid &qcom_scm_command on success or %NULL if the allocation fails.
- */
-static struct qcom_scm_command *alloc_qcom_scm_command(size_t cmd_size, size_t resp_size)
-{
-	struct qcom_scm_command *cmd;
-	size_t len = sizeof(*cmd) + sizeof(struct qcom_scm_response) + cmd_size +
-		resp_size;
-	u32 offset;
-
-	cmd = kzalloc(PAGE_ALIGN(len), GFP_KERNEL);
-	if (cmd) {
-		cmd->len = cpu_to_le32(len);
-		offset = offsetof(struct qcom_scm_command, buf);
-		cmd->buf_offset = cpu_to_le32(offset);
-		cmd->resp_hdr_offset = cpu_to_le32(offset + cmd_size);
-	}
-	return cmd;
-}
-
-/**
- * free_qcom_scm_command() - Free an SCM command
- * @cmd: command to free
- *
- * Free an SCM command.
- */
-static inline void free_qcom_scm_command(struct qcom_scm_command *cmd)
-{
-	kfree(cmd);
-}
-
-/**
- * qcom_scm_command_to_response() - Get a pointer to a qcom_scm_response
- * @cmd: command
- *
- * Returns a pointer to a response for a command.
- */
-static inline struct qcom_scm_response *qcom_scm_command_to_response(
-		const struct qcom_scm_command *cmd)
-{
-	return (void *)cmd + le32_to_cpu(cmd->resp_hdr_offset);
-}
-
-/**
- * qcom_scm_get_command_buffer() - Get a pointer to a command buffer
- * @cmd: command
- *
- * Returns a pointer to the command buffer of a command.
- */
-static inline void *qcom_scm_get_command_buffer(const struct qcom_scm_command *cmd)
-{
-	return (void *)cmd->buf;
-}
-
-/**
- * qcom_scm_get_response_buffer() - Get a pointer to a response buffer
- * @rsp: response
- *
- * Returns a pointer to a response buffer of a response.
- */
-static inline void *qcom_scm_get_response_buffer(const struct qcom_scm_response *rsp)
-{
-	return (void *)rsp + le32_to_cpu(rsp->buf_offset);
-}
-
-static int qcom_scm_remap_error(int err)
-{
-	pr_err("qcom_scm_call failed with error code %d\n", err);
-	switch (err) {
-	case QCOM_SCM_ERROR:
-		return -EIO;
-	case QCOM_SCM_EINVAL_ADDR:
-	case QCOM_SCM_EINVAL_ARG:
-		return -EINVAL;
-	case QCOM_SCM_EOPNOTSUPP:
-		return -EOPNOTSUPP;
-	case QCOM_SCM_ENOMEM:
-		return -ENOMEM;
-	}
-	return -EINVAL;
-}
-
-static u32 smc(u32 cmd_addr)
-{
-	int context_id;
-	register u32 r0 asm("r0") = 1;
-	register u32 r1 asm("r1") = (u32)&context_id;
-	register u32 r2 asm("r2") = cmd_addr;
-	do {
-		asm volatile(
-			__asmeq("%0", "r0")
-			__asmeq("%1", "r0")
-			__asmeq("%2", "r1")
-			__asmeq("%3", "r2")
-#ifdef REQUIRES_SEC
-			".arch_extension sec\n"
-#endif
-			"smc	#0	@ switch to secure world\n"
-			: "=r" (r0)
-			: "r" (r0), "r" (r1), "r" (r2)
-			: "r3");
-	} while (r0 == QCOM_SCM_INTERRUPTED);
-
-	return r0;
-}
-
-static int __qcom_scm_call(const struct qcom_scm_command *cmd)
-{
-	int ret;
-	u32 cmd_addr = virt_to_phys(cmd);
-
-	/*
-	 * Flush the command buffer so that the secure world sees
-	 * the correct data.
-	 */
-	__cpuc_flush_dcache_area((void *)cmd, cmd->len);
-	outer_flush_range(cmd_addr, cmd_addr + cmd->len);
-
-	ret = smc(cmd_addr);
-	if (ret < 0)
-		ret = qcom_scm_remap_error(ret);
-
-	return ret;
-}
-
-static void qcom_scm_inv_range(unsigned long start, unsigned long end)
-{
-	u32 cacheline_size, ctr;
-
-	asm volatile("mrc p15, 0, %0, c0, c0, 1" : "=r" (ctr));
-	cacheline_size = 4 << ((ctr >> 16) & 0xf);
-
-	start = round_down(start, cacheline_size);
-	end = round_up(end, cacheline_size);
-	outer_inv_range(start, end);
-	while (start < end) {
-		asm ("mcr p15, 0, %0, c7, c6, 1" : : "r" (start)
-		     : "memory");
-		start += cacheline_size;
-	}
-	dsb();
-	isb();
-}
-
-/**
- * qcom_scm_call() - Send an SCM command
- * @svc_id: service identifier
- * @cmd_id: command identifier
- * @cmd_buf: command buffer
- * @cmd_len: length of the command buffer
- * @resp_buf: response buffer
- * @resp_len: length of the response buffer
- *
- * Sends a command to the SCM and waits for the command to finish processing.
- *
- * A note on cache maintenance:
- * Note that any buffers that are expected to be accessed by the secure world
- * must be flushed before invoking qcom_scm_call and invalidated in the cache
- * immediately after qcom_scm_call returns. Cache maintenance on the command
- * and response buffers is taken care of by qcom_scm_call; however, callers are
- * responsible for any other cached buffers passed over to the secure world.
- */
-static int qcom_scm_call(u32 svc_id, u32 cmd_id, const void *cmd_buf,
-			size_t cmd_len, void *resp_buf, size_t resp_len)
-{
-	int ret;
-	struct qcom_scm_command *cmd;
-	struct qcom_scm_response *rsp;
-	unsigned long start, end;
-
-	cmd = alloc_qcom_scm_command(cmd_len, resp_len);
-	if (!cmd)
-		return -ENOMEM;
-
-	cmd->id = cpu_to_le32((svc_id << 10) | cmd_id);
-	if (cmd_buf)
-		memcpy(qcom_scm_get_command_buffer(cmd), cmd_buf, cmd_len);
-
-	mutex_lock(&qcom_scm_lock);
-	ret = __qcom_scm_call(cmd);
-	mutex_unlock(&qcom_scm_lock);
-	if (ret)
-		goto out;
-
-	rsp = qcom_scm_command_to_response(cmd);
-	start = (unsigned long)rsp;
-
-	do {
-		qcom_scm_inv_range(start, start + sizeof(*rsp));
-	} while (!rsp->is_complete);
-
-	end = (unsigned long)qcom_scm_get_response_buffer(rsp) + resp_len;
-	qcom_scm_inv_range(start, end);
-
-	if (resp_buf)
-		memcpy(resp_buf, qcom_scm_get_response_buffer(rsp), resp_len);
-out:
-	free_qcom_scm_command(cmd);
-	return ret;
-}
-
-#define SCM_CLASS_REGISTER	(0x2 << 8)
-#define SCM_MASK_IRQS		BIT(5)
-#define SCM_ATOMIC(svc, cmd, n) (((((svc) << 10)|((cmd) & 0x3ff)) << 12) | \
-				SCM_CLASS_REGISTER | \
-				SCM_MASK_IRQS | \
-				(n & 0xf))
-
-/**
- * qcom_scm_call_atomic1() - Send an atomic SCM command with one argument
- * @svc_id: service identifier
- * @cmd_id: command identifier
- * @arg1: first argument
- *
- * This shall only be used with commands that are guaranteed to be
- * uninterruptable, atomic and SMP safe.
- */
-static s32 qcom_scm_call_atomic1(u32 svc, u32 cmd, u32 arg1)
-{
-	int context_id;
-
-	register u32 r0 asm("r0") = SCM_ATOMIC(svc, cmd, 1);
-	register u32 r1 asm("r1") = (u32)&context_id;
-	register u32 r2 asm("r2") = arg1;
-
-	asm volatile(
-			__asmeq("%0", "r0")
-			__asmeq("%1", "r0")
-			__asmeq("%2", "r1")
-			__asmeq("%3", "r2")
-#ifdef REQUIRES_SEC
-			".arch_extension sec\n"
-#endif
-			"smc    #0      @ switch to secure world\n"
-			: "=r" (r0)
-			: "r" (r0), "r" (r1), "r" (r2)
-			: "r3");
-	return r0;
-}
-
-u32 qcom_scm_get_version(void)
-{
-	int context_id;
-	static u32 version = -1;
-	register u32 r0 asm("r0");
-	register u32 r1 asm("r1");
-
-	if (version != -1)
-		return version;
-
-	mutex_lock(&qcom_scm_lock);
-
-	r0 = 0x1 << 8;
-	r1 = (u32)&context_id;
-	do {
-		asm volatile(
-			__asmeq("%0", "r0")
-			__asmeq("%1", "r1")
-			__asmeq("%2", "r0")
-			__asmeq("%3", "r1")
-#ifdef REQUIRES_SEC
-			".arch_extension sec\n"
-#endif
-			"smc	#0	@ switch to secure world\n"
-			: "=r" (r0), "=r" (r1)
-			: "r" (r0), "r" (r1)
-			: "r2", "r3");
-	} while (r0 == QCOM_SCM_INTERRUPTED);
-
-	version = r1;
-	mutex_unlock(&qcom_scm_lock);
-
-	return version;
-}
-EXPORT_SYMBOL(qcom_scm_get_version);
-
-#define QCOM_SCM_SVC_BOOT			0x1
-#define QCOM_SCM_BOOT_ADDR			0x1
-/*
- * Set the cold/warm boot address for one of the CPU cores.
- */
-static int qcom_scm_set_boot_addr(u32 addr, int flags)
-{
-	struct {
-		__le32 flags;
-		__le32 addr;
-	} cmd;
-
-	cmd.addr = cpu_to_le32(addr);
-	cmd.flags = cpu_to_le32(flags);
-	return qcom_scm_call(QCOM_SCM_SVC_BOOT, QCOM_SCM_BOOT_ADDR,
-			&cmd, sizeof(cmd), NULL, 0);
-}
+#include "qcom_scm.h"
 
 /**
  * qcom_scm_set_cold_boot_addr() - Set the cold boot address for cpus
@@ -414,26 +33,7 @@ static int qcom_scm_set_boot_addr(u32 addr, int flags)
  */
 int qcom_scm_set_cold_boot_addr(void *entry, const cpumask_t *cpus)
 {
-	int flags = 0;
-	int cpu;
-	int scm_cb_flags[] = {
-		QCOM_SCM_FLAG_COLDBOOT_CPU0,
-		QCOM_SCM_FLAG_COLDBOOT_CPU1,
-		QCOM_SCM_FLAG_COLDBOOT_CPU2,
-		QCOM_SCM_FLAG_COLDBOOT_CPU3,
-	};
-
-	if (!cpus || (cpus && cpumask_empty(cpus)))
-		return -EINVAL;
-
-	for_each_cpu(cpu, cpus) {
-		if (cpu < ARRAY_SIZE(scm_cb_flags))
-			flags |= scm_cb_flags[cpu];
-		else
-			set_cpu_present(cpu, false);
-	}
-
-	return qcom_scm_set_boot_addr(virt_to_phys(entry), flags);
+	return __qcom_scm_set_cold_boot_addr(entry, cpus);
 }
 EXPORT_SYMBOL(qcom_scm_set_cold_boot_addr);
 
@@ -447,37 +47,10 @@ EXPORT_SYMBOL(qcom_scm_set_cold_boot_addr);
  */
 int qcom_scm_set_warm_boot_addr(void *entry, const cpumask_t *cpus)
 {
-	int ret;
-	int flags = 0;
-	int cpu;
-
-	/*
-	 * Reassign only if we are switching from hotplug entry point
-	 * to cpuidle entry point or vice versa.
-	 */
-	for_each_cpu(cpu, cpus) {
-		if (entry == qcom_scm_wb[cpu].entry)
-			continue;
-		flags |= qcom_scm_wb[cpu].flag;
-	}
-
-	/* No change in entry function */
-	if (!flags)
-		return 0;
-
-	ret = qcom_scm_set_boot_addr(virt_to_phys(entry), flags);
-	if (!ret) {
-		for_each_cpu(cpu, cpus)
-			qcom_scm_wb[cpu].entry = entry;
-	}
-
-	return ret;
+	return __qcom_scm_set_warm_boot_addr(entry, cpus);
 }
 EXPORT_SYMBOL(qcom_scm_set_warm_boot_addr);
 
-#define QCOM_SCM_CMD_TERMINATE_PC	0x2
-#define QCOM_SCM_FLUSH_FLAG_MASK	0x3
-
 /**
  * qcom_scm_cpu_power_down() - Power down the cpu
  * @flags - Flags to flush cache
@@ -488,7 +61,36 @@ EXPORT_SYMBOL(qcom_scm_set_warm_boot_addr);
  */
 void qcom_scm_cpu_power_down(u32 flags)
 {
-	qcom_scm_call_atomic1(QCOM_SCM_SVC_BOOT, QCOM_SCM_CMD_TERMINATE_PC,
-			flags & QCOM_SCM_FLUSH_FLAG_MASK);
+	__qcom_scm_cpu_power_down(flags);
 }
 EXPORT_SYMBOL(qcom_scm_cpu_power_down);
+
+/**
+ * qcom_scm_hdcp_available() - Check if secure environment supports HDCP.
+ *
+ * Return true if HDCP is supported, false if not.
+ */
+bool qcom_scm_hdcp_available(void)
+{
+	int ret;
+
+	ret = __qcom_scm_is_call_available(QCOM_SCM_SVC_HDCP,
+		QCOM_SCM_CMD_HDCP);
+
+	return (ret > 0) ? true : false;
+}
+EXPORT_SYMBOL(qcom_scm_hdcp_available);
+
+/**
+ * qcom_scm_hdcp_req() - Send HDCP request.
+ * @req: HDCP request array
+ * @req_cnt: HDCP request array count
+ * @resp: response buffer passed to SCM
+ *
+ * Write HDCP register(s) through SCM.
+ */
+int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt, u32 *resp)
+{
+	return __qcom_scm_hdcp_req(req, req_cnt, resp);
+}
+EXPORT_SYMBOL(qcom_scm_hdcp_req);
diff --git a/drivers/firmware/qcom_scm.h b/drivers/firmware/qcom_scm.h
new file mode 100644
index 000000000000..2cce75c08b99
--- /dev/null
+++ b/drivers/firmware/qcom_scm.h
@@ -0,0 +1,47 @@
+/* Copyright (c) 2010-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#ifndef __QCOM_SCM_INT_H
+#define __QCOM_SCM_INT_H
+
+#define QCOM_SCM_SVC_BOOT		0x1
+#define QCOM_SCM_BOOT_ADDR		0x1
+#define QCOM_SCM_BOOT_ADDR_MC		0x11
+
+#define QCOM_SCM_FLAG_HLOS		0x01
+#define QCOM_SCM_FLAG_COLDBOOT_MC	0x02
+#define QCOM_SCM_FLAG_WARMBOOT_MC	0x04
+extern int __qcom_scm_set_warm_boot_addr(void *entry, const cpumask_t *cpus);
+extern int __qcom_scm_set_cold_boot_addr(void *entry, const cpumask_t *cpus);
+
+#define QCOM_SCM_CMD_TERMINATE_PC	0x2
+#define QCOM_SCM_FLUSH_FLAG_MASK	0x3
+#define QCOM_SCM_CMD_CORE_HOTPLUGGED	0x10
+extern void __qcom_scm_cpu_power_down(u32 flags);
+
+#define QCOM_SCM_SVC_INFO		0x6
+#define QCOM_IS_CALL_AVAIL_CMD		0x1
+extern int __qcom_scm_is_call_available(u32 svc_id, u32 cmd_id);
+
+#define QCOM_SCM_SVC_HDCP		0x11
+#define QCOM_SCM_CMD_HDCP		0x01
+extern int __qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt,
+		u32 *resp);
+
+/* common error codes */
+#define QCOM_SCM_ENOMEM		-5
+#define QCOM_SCM_EOPNOTSUPP	-4
+#define QCOM_SCM_EINVAL_ADDR	-3
+#define QCOM_SCM_EINVAL_ARG	-2
+#define QCOM_SCM_ERROR		-1
+#define QCOM_SCM_INTERRUPTED	1
+
+#endif
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 1ae4e547b419..73f918d066c6 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -219,7 +219,7 @@ config TEGRA_IOMMU_SMMU
 	select IOMMU_API
 	help
 	  This driver supports the IOMMU hardware (SMMU) found on NVIDIA Tegra
-	  SoCs (Tegra30 up to Tegra124).
+	  SoCs (Tegra30 up to Tegra132).
 
 config EXYNOS_IOMMU
 	bool "Exynos IOMMU Support"
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index c845d99ecf6b..c1f2e521dc52 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -7,6 +7,7 @@
  */
 
 #include <linux/bitops.h>
+#include <linux/debugfs.h>
 #include <linux/err.h>
 #include <linux/iommu.h>
 #include <linux/kernel.h>
@@ -31,6 +32,8 @@ struct tegra_smmu {
 	struct mutex lock;
 
 	struct list_head list;
+
+	struct dentry *debugfs;
 };
 
 struct tegra_smmu_as {
@@ -673,6 +676,103 @@ static void tegra_smmu_ahb_enable(void)
 	}
 }
 
+static int tegra_smmu_swgroups_show(struct seq_file *s, void *data)
+{
+	struct tegra_smmu *smmu = s->private;
+	unsigned int i;
+	u32 value;
+
+	seq_printf(s, "swgroup    enabled  ASID\n");
+	seq_printf(s, "------------------------\n");
+
+	for (i = 0; i < smmu->soc->num_swgroups; i++) {
+		const struct tegra_smmu_swgroup *group = &smmu->soc->swgroups[i];
+		const char *status;
+		unsigned int asid;
+
+		value = smmu_readl(smmu, group->reg);
+
+		if (value & SMMU_ASID_ENABLE)
+			status = "yes";
+		else
+			status = "no";
+
+		asid = value & SMMU_ASID_MASK;
+
+		seq_printf(s, "%-9s  %-7s  %#04x\n", group->name, status,
+			   asid);
+	}
+
+	return 0;
+}
+
+static int tegra_smmu_swgroups_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, tegra_smmu_swgroups_show, inode->i_private);
+}
+
+static const struct file_operations tegra_smmu_swgroups_fops = {
+	.open = tegra_smmu_swgroups_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static int tegra_smmu_clients_show(struct seq_file *s, void *data)
+{
+	struct tegra_smmu *smmu = s->private;
+	unsigned int i;
+	u32 value;
+
+	seq_printf(s, "client       enabled\n");
+	seq_printf(s, "--------------------\n");
+
+	for (i = 0; i < smmu->soc->num_clients; i++) {
+		const struct tegra_mc_client *client = &smmu->soc->clients[i];
+		const char *status;
+
+		value = smmu_readl(smmu, client->smmu.reg);
+
+		if (value & BIT(client->smmu.bit))
+			status = "yes";
+		else
+			status = "no";
+
+		seq_printf(s, "%-12s %s\n", client->name, status);
+	}
+
+	return 0;
+}
+
+static int tegra_smmu_clients_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, tegra_smmu_clients_show, inode->i_private);
+}
+
+static const struct file_operations tegra_smmu_clients_fops = {
+	.open = tegra_smmu_clients_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static void tegra_smmu_debugfs_init(struct tegra_smmu *smmu)
+{
+	smmu->debugfs = debugfs_create_dir("smmu", NULL);
+	if (!smmu->debugfs)
+		return;
+
+	debugfs_create_file("swgroups", S_IRUGO, smmu->debugfs, smmu,
+			    &tegra_smmu_swgroups_fops);
+	debugfs_create_file("clients", S_IRUGO, smmu->debugfs, smmu,
+			    &tegra_smmu_clients_fops);
+}
+
+static void tegra_smmu_debugfs_exit(struct tegra_smmu *smmu)
+{
+	debugfs_remove_recursive(smmu->debugfs);
+}
+
 struct tegra_smmu *tegra_smmu_probe(struct device *dev,
 				    const struct tegra_smmu_soc *soc,
 				    struct tegra_mc *mc)
@@ -743,5 +843,14 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev,
 	if (err < 0)
 		return ERR_PTR(err);
 
+	if (IS_ENABLED(CONFIG_DEBUG_FS))
+		tegra_smmu_debugfs_init(smmu);
+
 	return smmu;
 }
+
+void tegra_smmu_remove(struct tegra_smmu *smmu)
+{
+	if (IS_ENABLED(CONFIG_DEBUG_FS))
+		tegra_smmu_debugfs_exit(smmu);
+}
diff --git a/drivers/leds/leds-syscon.c b/drivers/leds/leds-syscon.c
index 6896e2d9ba58..d1660b039812 100644
--- a/drivers/leds/leds-syscon.c
+++ b/drivers/leds/leds-syscon.c
@@ -20,6 +20,7 @@
  * MA 02111-1307 USA
  */
 #include <linux/io.h>
+#include <linux/module.h>
 #include <linux/of_device.h>
 #include <linux/of_address.h>
 #include <linux/platform_device.h>
@@ -66,102 +67,101 @@ static void syscon_led_set(struct led_classdev *led_cdev,
 		dev_err(sled->cdev.dev, "error updating LED status\n");
 }
 
-static int __init syscon_leds_spawn(struct device_node *np,
-				    struct device *dev,
-				    struct regmap *map)
+static int syscon_led_probe(struct platform_device *pdev)
 {
-	struct device_node *child;
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	struct device *parent;
+	struct regmap *map;
+	struct syscon_led *sled;
+	const char *state;
 	int ret;
 
-	for_each_available_child_of_node(np, child) {
-		struct syscon_led *sled;
-		const char *state;
-
-		/* Only check for register-bit-leds */
-		if (of_property_match_string(child, "compatible",
-					     "register-bit-led") < 0)
-			continue;
-
-		sled = devm_kzalloc(dev, sizeof(*sled), GFP_KERNEL);
-		if (!sled)
-			return -ENOMEM;
-
-		sled->map = map;
-
-		if (of_property_read_u32(child, "offset", &sled->offset))
-			return -EINVAL;
-		if (of_property_read_u32(child, "mask", &sled->mask))
-			return -EINVAL;
-		sled->cdev.name =
-			of_get_property(child, "label", NULL) ? : child->name;
-		sled->cdev.default_trigger =
-			of_get_property(child, "linux,default-trigger", NULL);
-
-		state = of_get_property(child, "default-state", NULL);
-		if (state) {
-			if (!strcmp(state, "keep")) {
-				u32 val;
-
-				ret = regmap_read(map, sled->offset, &val);
-				if (ret < 0)
-					return ret;
-				sled->state = !!(val & sled->mask);
-			} else if (!strcmp(state, "on")) {
-				sled->state = true;
-				ret = regmap_update_bits(map, sled->offset,
-							 sled->mask,
-							 sled->mask);
-				if (ret < 0)
-					return ret;
-			} else {
-				sled->state = false;
-				ret = regmap_update_bits(map, sled->offset,
-							 sled->mask, 0);
-				if (ret < 0)
-					return ret;
-			}
+	parent = dev->parent;
+	if (!parent) {
+		dev_err(dev, "no parent for syscon LED\n");
+		return -ENODEV;
+	}
+	map = syscon_node_to_regmap(parent->of_node);
+	if (!map) {
+		dev_err(dev, "no regmap for syscon LED parent\n");
+		return -ENODEV;
+	}
+
+	sled = devm_kzalloc(dev, sizeof(*sled), GFP_KERNEL);
+	if (!sled)
+		return -ENOMEM;
+
+	sled->map = map;
+
+	if (of_property_read_u32(np, "offset", &sled->offset))
+		return -EINVAL;
+	if (of_property_read_u32(np, "mask", &sled->mask))
+		return -EINVAL;
+	sled->cdev.name =
+		of_get_property(np, "label", NULL) ? : np->name;
+	sled->cdev.default_trigger =
+		of_get_property(np, "linux,default-trigger", NULL);
+
+	state = of_get_property(np, "default-state", NULL);
+	if (state) {
+		if (!strcmp(state, "keep")) {
+			u32 val;
+
+			ret = regmap_read(map, sled->offset, &val);
+			if (ret < 0)
+				return ret;
+			sled->state = !!(val & sled->mask);
+		} else if (!strcmp(state, "on")) {
+			sled->state = true;
+			ret = regmap_update_bits(map, sled->offset,
+						 sled->mask,
+						 sled->mask);
+			if (ret < 0)
+				return ret;
+		} else {
+			sled->state = false;
+			ret = regmap_update_bits(map, sled->offset,
+						 sled->mask, 0);
+			if (ret < 0)
+				return ret;
 		}
-		sled->cdev.brightness_set = syscon_led_set;
+	}
+	sled->cdev.brightness_set = syscon_led_set;
 
-		ret = led_classdev_register(dev, &sled->cdev);
-		if (ret < 0)
-			return ret;
+	ret = led_classdev_register(dev, &sled->cdev);
+	if (ret < 0)
+		return ret;
+
+	platform_set_drvdata(pdev, sled);
+	dev_info(dev, "registered LED %s\n", sled->cdev.name);
 
-		dev_info(dev, "registered LED %s\n", sled->cdev.name);
-	}
 	return 0;
 }
 
-static int __init syscon_leds_init(void)
+static int syscon_led_remove(struct platform_device *pdev)
 {
-	struct device_node *np;
-
-	for_each_of_allnodes(np) {
-		struct platform_device *pdev;
-		struct regmap *map;
-		int ret;
+	struct syscon_led *sled = platform_get_drvdata(pdev);
 
-		if (!of_device_is_compatible(np, "syscon"))
-			continue;
+	led_classdev_unregister(&sled->cdev);
+	/* Turn it off */
+	regmap_update_bits(sled->map, sled->offset, sled->mask, 0);
+	return 0;
+}
 
-		map = syscon_node_to_regmap(np);
-		if (IS_ERR(map)) {
-			pr_err("error getting regmap for syscon LEDs\n");
-			continue;
-		}
+static const struct of_device_id of_syscon_leds_match[] = {
+	{ .compatible = "register-bit-led", },
+	{},
+};
 
-		/*
-		 * If the map is there, the device should be there, we allocate
-		 * memory on the syscon device's behalf here.
-		 */
-		pdev = of_find_device_by_node(np);
-		if (!pdev)
-			return -ENODEV;
-		ret = syscon_leds_spawn(np, &pdev->dev, map);
-		if (ret)
-			dev_err(&pdev->dev, "could not spawn syscon LEDs\n");
-	}
+MODULE_DEVICE_TABLE(of, of_syscon_leds_match);
 
-	return 0;
-}
-device_initcall(syscon_leds_init);
+static struct platform_driver syscon_led_driver = {
+	.probe		= syscon_led_probe,
+	.remove		= syscon_led_remove,
+	.driver		= {
+		.name	= "leds-syscon",
+		.of_match_table = of_syscon_leds_match,
+	},
+};
+module_platform_driver(syscon_led_driver);
diff --git a/drivers/memory/tegra/Kconfig b/drivers/memory/tegra/Kconfig
index 571087621827..6d74e499e18d 100644
--- a/drivers/memory/tegra/Kconfig
+++ b/drivers/memory/tegra/Kconfig
@@ -5,3 +5,13 @@ config TEGRA_MC
 	help
 	  This driver supports the Memory Controller (MC) hardware found on
 	  NVIDIA Tegra SoCs.
+
+config TEGRA124_EMC
+	bool "NVIDIA Tegra124 External Memory Controller driver"
+	default y
+	depends on TEGRA_MC && ARCH_TEGRA_124_SOC
+	help
+	  This driver is for the External Memory Controller (EMC) found on
+	  Tegra124 chips. The EMC controls the external DRAM on the board.
+	  This driver is required to change memory timings / clock rate for
+	  external memory.
diff --git a/drivers/memory/tegra/Makefile b/drivers/memory/tegra/Makefile
index 0d9f497b786c..6a0b9ac54f05 100644
--- a/drivers/memory/tegra/Makefile
+++ b/drivers/memory/tegra/Makefile
@@ -3,5 +3,8 @@ tegra-mc-y := mc.o
 tegra-mc-$(CONFIG_ARCH_TEGRA_3x_SOC)  += tegra30.o
 tegra-mc-$(CONFIG_ARCH_TEGRA_114_SOC) += tegra114.o
 tegra-mc-$(CONFIG_ARCH_TEGRA_124_SOC) += tegra124.o
+tegra-mc-$(CONFIG_ARCH_TEGRA_132_SOC) += tegra124.o
 
 obj-$(CONFIG_TEGRA_MC) += tegra-mc.o
+
+obj-$(CONFIG_TEGRA124_EMC) += tegra124-emc.o
diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c
index fe3c44e7e1d1..c71ede67e6c8 100644
--- a/drivers/memory/tegra/mc.c
+++ b/drivers/memory/tegra/mc.c
@@ -13,6 +13,9 @@
 #include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
+#include <linux/sort.h>
+
+#include <soc/tegra/fuse.h>
 
 #include "mc.h"
 
@@ -48,6 +51,9 @@
 #define  MC_EMEM_ARB_CFG_CYCLES_PER_UPDATE_MASK	0x1ff
 #define MC_EMEM_ARB_MISC0 0xd8
 
+#define MC_EMEM_ADR_CFG 0x54
+#define MC_EMEM_ADR_CFG_EMEM_NUMDEV BIT(0)
+
 static const struct of_device_id tegra_mc_of_match[] = {
 #ifdef CONFIG_ARCH_TEGRA_3x_SOC
 	{ .compatible = "nvidia,tegra30-mc", .data = &tegra30_mc_soc },
@@ -58,6 +64,9 @@ static const struct of_device_id tegra_mc_of_match[] = {
 #ifdef CONFIG_ARCH_TEGRA_124_SOC
 	{ .compatible = "nvidia,tegra124-mc", .data = &tegra124_mc_soc },
 #endif
+#ifdef CONFIG_ARCH_TEGRA_132_SOC
+	{ .compatible = "nvidia,tegra132-mc", .data = &tegra132_mc_soc },
+#endif
 	{ }
 };
 MODULE_DEVICE_TABLE(of, tegra_mc_of_match);
@@ -91,6 +100,130 @@ static int tegra_mc_setup_latency_allowance(struct tegra_mc *mc)
 	return 0;
 }
 
+void tegra_mc_write_emem_configuration(struct tegra_mc *mc, unsigned long rate)
+{
+	unsigned int i;
+	struct tegra_mc_timing *timing = NULL;
+
+	for (i = 0; i < mc->num_timings; i++) {
+		if (mc->timings[i].rate == rate) {
+			timing = &mc->timings[i];
+			break;
+		}
+	}
+
+	if (!timing) {
+		dev_err(mc->dev, "no memory timing registered for rate %lu\n",
+			rate);
+		return;
+	}
+
+	for (i = 0; i < mc->soc->num_emem_regs; ++i)
+		mc_writel(mc, timing->emem_data[i], mc->soc->emem_regs[i]);
+}
+
+unsigned int tegra_mc_get_emem_device_count(struct tegra_mc *mc)
+{
+	u8 dram_count;
+
+	dram_count = mc_readl(mc, MC_EMEM_ADR_CFG);
+	dram_count &= MC_EMEM_ADR_CFG_EMEM_NUMDEV;
+	dram_count++;
+
+	return dram_count;
+}
+
+static int load_one_timing(struct tegra_mc *mc,
+			   struct tegra_mc_timing *timing,
+			   struct device_node *node)
+{
+	int err;
+	u32 tmp;
+
+	err = of_property_read_u32(node, "clock-frequency", &tmp);
+	if (err) {
+		dev_err(mc->dev,
+			"timing %s: failed to read rate\n", node->name);
+		return err;
+	}
+
+	timing->rate = tmp;
+	timing->emem_data = devm_kcalloc(mc->dev, mc->soc->num_emem_regs,
+					 sizeof(u32), GFP_KERNEL);
+	if (!timing->emem_data)
+		return -ENOMEM;
+
+	err = of_property_read_u32_array(node, "nvidia,emem-configuration",
+					 timing->emem_data,
+					 mc->soc->num_emem_regs);
+	if (err) {
+		dev_err(mc->dev,
+			"timing %s: failed to read EMEM configuration\n",
+			node->name);
+		return err;
+	}
+
+	return 0;
+}
+
+static int load_timings(struct tegra_mc *mc, struct device_node *node)
+{
+	struct device_node *child;
+	struct tegra_mc_timing *timing;
+	int child_count = of_get_child_count(node);
+	int i = 0, err;
+
+	mc->timings = devm_kcalloc(mc->dev, child_count, sizeof(*timing),
+				   GFP_KERNEL);
+	if (!mc->timings)
+		return -ENOMEM;
+
+	mc->num_timings = child_count;
+
+	for_each_child_of_node(node, child) {
+		timing = &mc->timings[i++];
+
+		err = load_one_timing(mc, timing, child);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int tegra_mc_setup_timings(struct tegra_mc *mc)
+{
+	struct device_node *node;
+	u32 ram_code, node_ram_code;
+	int err;
+
+	ram_code = tegra_read_ram_code();
+
+	mc->num_timings = 0;
+
+	for_each_child_of_node(mc->dev->of_node, node) {
+		err = of_property_read_u32(node, "nvidia,ram-code",
+					   &node_ram_code);
+		if (err || (node_ram_code != ram_code)) {
+			of_node_put(node);
+			continue;
+		}
+
+		err = load_timings(mc, node);
+		if (err)
+			return err;
+		of_node_put(node);
+		break;
+	}
+
+	if (mc->num_timings == 0)
+		dev_warn(mc->dev,
+			 "no memory timings for RAM code %u registered\n",
+			 ram_code);
+
+	return 0;
+}
+
 static const char *const status_names[32] = {
 	[ 1] = "External interrupt",
 	[ 6] = "EMEM address decode error",
@@ -248,6 +381,12 @@ static int tegra_mc_probe(struct platform_device *pdev)
 		return err;
 	}
 
+	err = tegra_mc_setup_timings(mc);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to setup timings: %d\n", err);
+		return err;
+	}
+
 	if (IS_ENABLED(CONFIG_TEGRA_IOMMU_SMMU)) {
 		mc->smmu = tegra_smmu_probe(&pdev->dev, mc->soc->smmu, mc);
 		if (IS_ERR(mc->smmu)) {
@@ -273,8 +412,8 @@ static int tegra_mc_probe(struct platform_device *pdev)
 
 	value = MC_INT_DECERR_MTS | MC_INT_SECERR_SEC | MC_INT_DECERR_VPR |
 		MC_INT_INVALID_APB_ASID_UPDATE | MC_INT_INVALID_SMMU_PAGE |
-		MC_INT_ARBITRATION_EMEM | MC_INT_SECURITY_VIOLATION |
-		MC_INT_DECERR_EMEM;
+		MC_INT_SECURITY_VIOLATION | MC_INT_DECERR_EMEM;
+
 	mc_writel(mc, value, MC_INTMASK);
 
 	return 0;
diff --git a/drivers/memory/tegra/mc.h b/drivers/memory/tegra/mc.h
index d5d21147fc77..b7361b0a6696 100644
--- a/drivers/memory/tegra/mc.h
+++ b/drivers/memory/tegra/mc.h
@@ -37,4 +37,8 @@ extern const struct tegra_mc_soc tegra114_mc_soc;
 extern const struct tegra_mc_soc tegra124_mc_soc;
 #endif
 
+#ifdef CONFIG_ARCH_TEGRA_132_SOC
+extern const struct tegra_mc_soc tegra132_mc_soc;
+#endif
+
 #endif /* MEMORY_TEGRA_MC_H */
diff --git a/drivers/memory/tegra/tegra114.c b/drivers/memory/tegra/tegra114.c
index 511e9a25c151..9f579589e800 100644
--- a/drivers/memory/tegra/tegra114.c
+++ b/drivers/memory/tegra/tegra114.c
@@ -896,22 +896,22 @@ static const struct tegra_mc_client tegra114_mc_clients[] = {
 };
 
 static const struct tegra_smmu_swgroup tegra114_swgroups[] = {
-	{ .swgroup = TEGRA_SWGROUP_DC,        .reg = 0x240 },
-	{ .swgroup = TEGRA_SWGROUP_DCB,       .reg = 0x244 },
-	{ .swgroup = TEGRA_SWGROUP_EPP,       .reg = 0x248 },
-	{ .swgroup = TEGRA_SWGROUP_G2,        .reg = 0x24c },
-	{ .swgroup = TEGRA_SWGROUP_AVPC,      .reg = 0x23c },
-	{ .swgroup = TEGRA_SWGROUP_NV,        .reg = 0x268 },
-	{ .swgroup = TEGRA_SWGROUP_HDA,       .reg = 0x254 },
-	{ .swgroup = TEGRA_SWGROUP_HC,        .reg = 0x250 },
-	{ .swgroup = TEGRA_SWGROUP_MSENC,     .reg = 0x264 },
-	{ .swgroup = TEGRA_SWGROUP_PPCS,      .reg = 0x270 },
-	{ .swgroup = TEGRA_SWGROUP_VDE,       .reg = 0x27c },
-	{ .swgroup = TEGRA_SWGROUP_VI,        .reg = 0x280 },
-	{ .swgroup = TEGRA_SWGROUP_ISP,       .reg = 0x258 },
-	{ .swgroup = TEGRA_SWGROUP_XUSB_HOST, .reg = 0x288 },
-	{ .swgroup = TEGRA_SWGROUP_XUSB_DEV,  .reg = 0x28c },
-	{ .swgroup = TEGRA_SWGROUP_TSEC,      .reg = 0x294 },
+	{ .name = "dc",        .swgroup = TEGRA_SWGROUP_DC,        .reg = 0x240 },
+	{ .name = "dcb",       .swgroup = TEGRA_SWGROUP_DCB,       .reg = 0x244 },
+	{ .name = "epp",       .swgroup = TEGRA_SWGROUP_EPP,       .reg = 0x248 },
+	{ .name = "g2",        .swgroup = TEGRA_SWGROUP_G2,        .reg = 0x24c },
+	{ .name = "avpc",      .swgroup = TEGRA_SWGROUP_AVPC,      .reg = 0x23c },
+	{ .name = "nv",        .swgroup = TEGRA_SWGROUP_NV,        .reg = 0x268 },
+	{ .name = "hda",       .swgroup = TEGRA_SWGROUP_HDA,       .reg = 0x254 },
+	{ .name = "hc",        .swgroup = TEGRA_SWGROUP_HC,        .reg = 0x250 },
+	{ .name = "msenc",     .swgroup = TEGRA_SWGROUP_MSENC,     .reg = 0x264 },
+	{ .name = "ppcs",      .swgroup = TEGRA_SWGROUP_PPCS,      .reg = 0x270 },
+	{ .name = "vde",       .swgroup = TEGRA_SWGROUP_VDE,       .reg = 0x27c },
+	{ .name = "vi",        .swgroup = TEGRA_SWGROUP_VI,        .reg = 0x280 },
+	{ .name = "isp",       .swgroup = TEGRA_SWGROUP_ISP,       .reg = 0x258 },
+	{ .name = "xusb_host", .swgroup = TEGRA_SWGROUP_XUSB_HOST, .reg = 0x288 },
+	{ .name = "xusb_dev",  .swgroup = TEGRA_SWGROUP_XUSB_DEV,  .reg = 0x28c },
+	{ .name = "tsec",      .swgroup = TEGRA_SWGROUP_TSEC,      .reg = 0x294 },
 };
 
 static void tegra114_flush_dcache(struct page *page, unsigned long offset,
diff --git a/drivers/memory/tegra/tegra124-emc.c b/drivers/memory/tegra/tegra124-emc.c
new file mode 100644
index 000000000000..8620355776fe
--- /dev/null
+++ b/drivers/memory/tegra/tegra124-emc.c
@@ -0,0 +1,1140 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Author:
+ *	Mikko Perttunen <mperttunen@nvidia.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/clkdev.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/sort.h>
+#include <linux/string.h>
+
+#include <soc/tegra/emc.h>
+#include <soc/tegra/fuse.h>
+#include <soc/tegra/mc.h>
+
+#define EMC_FBIO_CFG5				0x104
+#define	EMC_FBIO_CFG5_DRAM_TYPE_MASK		0x3
+#define	EMC_FBIO_CFG5_DRAM_TYPE_SHIFT		0
+
+#define EMC_INTSTATUS				0x0
+#define EMC_INTSTATUS_CLKCHANGE_COMPLETE	BIT(4)
+
+#define EMC_CFG					0xc
+#define EMC_CFG_DRAM_CLKSTOP_PD			BIT(31)
+#define EMC_CFG_DRAM_CLKSTOP_SR			BIT(30)
+#define EMC_CFG_DRAM_ACPD			BIT(29)
+#define EMC_CFG_DYN_SREF			BIT(28)
+#define EMC_CFG_PWR_MASK			((0xF << 28) | BIT(18))
+#define EMC_CFG_DSR_VTTGEN_DRV_EN		BIT(18)
+
+#define EMC_REFCTRL				0x20
+#define EMC_REFCTRL_DEV_SEL_SHIFT		0
+#define EMC_REFCTRL_ENABLE			BIT(31)
+
+#define EMC_TIMING_CONTROL			0x28
+#define EMC_RC					0x2c
+#define EMC_RFC					0x30
+#define EMC_RAS					0x34
+#define EMC_RP					0x38
+#define EMC_R2W					0x3c
+#define EMC_W2R					0x40
+#define EMC_R2P					0x44
+#define EMC_W2P					0x48
+#define EMC_RD_RCD				0x4c
+#define EMC_WR_RCD				0x50
+#define EMC_RRD					0x54
+#define EMC_REXT				0x58
+#define EMC_WDV					0x5c
+#define EMC_QUSE				0x60
+#define EMC_QRST				0x64
+#define EMC_QSAFE				0x68
+#define EMC_RDV					0x6c
+#define EMC_REFRESH				0x70
+#define EMC_BURST_REFRESH_NUM			0x74
+#define EMC_PDEX2WR				0x78
+#define EMC_PDEX2RD				0x7c
+#define EMC_PCHG2PDEN				0x80
+#define EMC_ACT2PDEN				0x84
+#define EMC_AR2PDEN				0x88
+#define EMC_RW2PDEN				0x8c
+#define EMC_TXSR				0x90
+#define EMC_TCKE				0x94
+#define EMC_TFAW				0x98
+#define EMC_TRPAB				0x9c
+#define EMC_TCLKSTABLE				0xa0
+#define EMC_TCLKSTOP				0xa4
+#define EMC_TREFBW				0xa8
+#define EMC_ODT_WRITE				0xb0
+#define EMC_ODT_READ				0xb4
+#define EMC_WEXT				0xb8
+#define EMC_CTT					0xbc
+#define EMC_RFC_SLR				0xc0
+#define EMC_MRS_WAIT_CNT2			0xc4
+
+#define EMC_MRS_WAIT_CNT			0xc8
+#define EMC_MRS_WAIT_CNT_SHORT_WAIT_SHIFT	0
+#define EMC_MRS_WAIT_CNT_SHORT_WAIT_MASK	\
+	(0x3FF << EMC_MRS_WAIT_CNT_SHORT_WAIT_SHIFT)
+#define EMC_MRS_WAIT_CNT_LONG_WAIT_SHIFT	16
+#define EMC_MRS_WAIT_CNT_LONG_WAIT_MASK		\
+	(0x3FF << EMC_MRS_WAIT_CNT_LONG_WAIT_SHIFT)
+
+#define EMC_MRS					0xcc
+#define EMC_MODE_SET_DLL_RESET			BIT(8)
+#define EMC_MODE_SET_LONG_CNT			BIT(26)
+#define EMC_EMRS				0xd0
+#define EMC_REF					0xd4
+#define EMC_PRE					0xd8
+
+#define EMC_SELF_REF				0xe0
+#define EMC_SELF_REF_CMD_ENABLED		BIT(0)
+#define EMC_SELF_REF_DEV_SEL_SHIFT		30
+
+#define EMC_MRW					0xe8
+
+#define EMC_MRR					0xec
+#define EMC_MRR_MA_SHIFT			16
+#define LPDDR2_MR4_TEMP_SHIFT			0
+
+#define EMC_XM2DQSPADCTRL3			0xf8
+#define EMC_FBIO_SPARE				0x100
+
+#define EMC_FBIO_CFG6				0x114
+#define EMC_EMRS2				0x12c
+#define EMC_MRW2				0x134
+#define EMC_MRW4				0x13c
+#define EMC_EINPUT				0x14c
+#define EMC_EINPUT_DURATION			0x150
+#define EMC_PUTERM_EXTRA			0x154
+#define EMC_TCKESR				0x158
+#define EMC_TPD					0x15c
+
+#define EMC_AUTO_CAL_CONFIG			0x2a4
+#define EMC_AUTO_CAL_CONFIG_AUTO_CAL_START	BIT(31)
+#define EMC_AUTO_CAL_INTERVAL			0x2a8
+#define EMC_AUTO_CAL_STATUS			0x2ac
+#define EMC_AUTO_CAL_STATUS_ACTIVE		BIT(31)
+#define EMC_STATUS				0x2b4
+#define EMC_STATUS_TIMING_UPDATE_STALLED	BIT(23)
+
+#define EMC_CFG_2				0x2b8
+#define EMC_CFG_2_MODE_SHIFT			0
+#define EMC_CFG_2_DIS_STP_OB_CLK_DURING_NON_WR	BIT(6)
+
+#define EMC_CFG_DIG_DLL				0x2bc
+#define EMC_CFG_DIG_DLL_PERIOD			0x2c0
+#define EMC_RDV_MASK				0x2cc
+#define EMC_WDV_MASK				0x2d0
+#define EMC_CTT_DURATION			0x2d8
+#define EMC_CTT_TERM_CTRL			0x2dc
+#define EMC_ZCAL_INTERVAL			0x2e0
+#define EMC_ZCAL_WAIT_CNT			0x2e4
+
+#define EMC_ZQ_CAL				0x2ec
+#define EMC_ZQ_CAL_CMD				BIT(0)
+#define EMC_ZQ_CAL_LONG				BIT(4)
+#define EMC_ZQ_CAL_LONG_CMD_DEV0		\
+	(DRAM_DEV_SEL_0 | EMC_ZQ_CAL_LONG | EMC_ZQ_CAL_CMD)
+#define EMC_ZQ_CAL_LONG_CMD_DEV1		\
+	(DRAM_DEV_SEL_1 | EMC_ZQ_CAL_LONG | EMC_ZQ_CAL_CMD)
+
+#define EMC_XM2CMDPADCTRL			0x2f0
+#define EMC_XM2DQSPADCTRL			0x2f8
+#define EMC_XM2DQSPADCTRL2			0x2fc
+#define EMC_XM2DQSPADCTRL2_RX_FT_REC_ENABLE	BIT(0)
+#define EMC_XM2DQSPADCTRL2_VREF_ENABLE		BIT(5)
+#define EMC_XM2DQPADCTRL			0x300
+#define EMC_XM2DQPADCTRL2			0x304
+#define EMC_XM2CLKPADCTRL			0x308
+#define EMC_XM2COMPPADCTRL			0x30c
+#define EMC_XM2VTTGENPADCTRL			0x310
+#define EMC_XM2VTTGENPADCTRL2			0x314
+#define EMC_XM2VTTGENPADCTRL3			0x318
+#define EMC_XM2DQSPADCTRL4			0x320
+#define EMC_DLL_XFORM_DQS0			0x328
+#define EMC_DLL_XFORM_DQS1			0x32c
+#define EMC_DLL_XFORM_DQS2			0x330
+#define EMC_DLL_XFORM_DQS3			0x334
+#define EMC_DLL_XFORM_DQS4			0x338
+#define EMC_DLL_XFORM_DQS5			0x33c
+#define EMC_DLL_XFORM_DQS6			0x340
+#define EMC_DLL_XFORM_DQS7			0x344
+#define EMC_DLL_XFORM_QUSE0			0x348
+#define EMC_DLL_XFORM_QUSE1			0x34c
+#define EMC_DLL_XFORM_QUSE2			0x350
+#define EMC_DLL_XFORM_QUSE3			0x354
+#define EMC_DLL_XFORM_QUSE4			0x358
+#define EMC_DLL_XFORM_QUSE5			0x35c
+#define EMC_DLL_XFORM_QUSE6			0x360
+#define EMC_DLL_XFORM_QUSE7			0x364
+#define EMC_DLL_XFORM_DQ0			0x368
+#define EMC_DLL_XFORM_DQ1			0x36c
+#define EMC_DLL_XFORM_DQ2			0x370
+#define EMC_DLL_XFORM_DQ3			0x374
+#define EMC_DLI_TRIM_TXDQS0			0x3a8
+#define EMC_DLI_TRIM_TXDQS1			0x3ac
+#define EMC_DLI_TRIM_TXDQS2			0x3b0
+#define EMC_DLI_TRIM_TXDQS3			0x3b4
+#define EMC_DLI_TRIM_TXDQS4			0x3b8
+#define EMC_DLI_TRIM_TXDQS5			0x3bc
+#define EMC_DLI_TRIM_TXDQS6			0x3c0
+#define EMC_DLI_TRIM_TXDQS7			0x3c4
+#define EMC_STALL_THEN_EXE_AFTER_CLKCHANGE	0x3cc
+#define EMC_SEL_DPD_CTRL			0x3d8
+#define EMC_SEL_DPD_CTRL_DATA_SEL_DPD		BIT(8)
+#define EMC_SEL_DPD_CTRL_ODT_SEL_DPD		BIT(5)
+#define EMC_SEL_DPD_CTRL_RESET_SEL_DPD		BIT(4)
+#define EMC_SEL_DPD_CTRL_CA_SEL_DPD		BIT(3)
+#define EMC_SEL_DPD_CTRL_CLK_SEL_DPD		BIT(2)
+#define EMC_SEL_DPD_CTRL_DDR3_MASK	\
+	((0xf << 2) | BIT(8))
+#define EMC_SEL_DPD_CTRL_MASK \
+	((0x3 << 2) | BIT(5) | BIT(8))
+#define EMC_PRE_REFRESH_REQ_CNT			0x3dc
+#define EMC_DYN_SELF_REF_CONTROL		0x3e0
+#define EMC_TXSRDLL				0x3e4
+#define EMC_CCFIFO_ADDR				0x3e8
+#define EMC_CCFIFO_DATA				0x3ec
+#define EMC_CCFIFO_STATUS			0x3f0
+#define EMC_CDB_CNTL_1				0x3f4
+#define EMC_CDB_CNTL_2				0x3f8
+#define EMC_XM2CLKPADCTRL2			0x3fc
+#define EMC_AUTO_CAL_CONFIG2			0x458
+#define EMC_AUTO_CAL_CONFIG3			0x45c
+#define EMC_IBDLY				0x468
+#define EMC_DLL_XFORM_ADDR0			0x46c
+#define EMC_DLL_XFORM_ADDR1			0x470
+#define EMC_DLL_XFORM_ADDR2			0x474
+#define EMC_DSR_VTTGEN_DRV			0x47c
+#define EMC_TXDSRVTTGEN				0x480
+#define EMC_XM2CMDPADCTRL4			0x484
+#define EMC_XM2CMDPADCTRL5			0x488
+#define EMC_DLL_XFORM_DQS8			0x4a0
+#define EMC_DLL_XFORM_DQS9			0x4a4
+#define EMC_DLL_XFORM_DQS10			0x4a8
+#define EMC_DLL_XFORM_DQS11			0x4ac
+#define EMC_DLL_XFORM_DQS12			0x4b0
+#define EMC_DLL_XFORM_DQS13			0x4b4
+#define EMC_DLL_XFORM_DQS14			0x4b8
+#define EMC_DLL_XFORM_DQS15			0x4bc
+#define EMC_DLL_XFORM_QUSE8			0x4c0
+#define EMC_DLL_XFORM_QUSE9			0x4c4
+#define EMC_DLL_XFORM_QUSE10			0x4c8
+#define EMC_DLL_XFORM_QUSE11			0x4cc
+#define EMC_DLL_XFORM_QUSE12			0x4d0
+#define EMC_DLL_XFORM_QUSE13			0x4d4
+#define EMC_DLL_XFORM_QUSE14			0x4d8
+#define EMC_DLL_XFORM_QUSE15			0x4dc
+#define EMC_DLL_XFORM_DQ4			0x4e0
+#define EMC_DLL_XFORM_DQ5			0x4e4
+#define EMC_DLL_XFORM_DQ6			0x4e8
+#define EMC_DLL_XFORM_DQ7			0x4ec
+#define EMC_DLI_TRIM_TXDQS8			0x520
+#define EMC_DLI_TRIM_TXDQS9			0x524
+#define EMC_DLI_TRIM_TXDQS10			0x528
+#define EMC_DLI_TRIM_TXDQS11			0x52c
+#define EMC_DLI_TRIM_TXDQS12			0x530
+#define EMC_DLI_TRIM_TXDQS13			0x534
+#define EMC_DLI_TRIM_TXDQS14			0x538
+#define EMC_DLI_TRIM_TXDQS15			0x53c
+#define EMC_CDB_CNTL_3				0x540
+#define EMC_XM2DQSPADCTRL5			0x544
+#define EMC_XM2DQSPADCTRL6			0x548
+#define EMC_XM2DQPADCTRL3			0x54c
+#define EMC_DLL_XFORM_ADDR3			0x550
+#define EMC_DLL_XFORM_ADDR4			0x554
+#define EMC_DLL_XFORM_ADDR5			0x558
+#define EMC_CFG_PIPE				0x560
+#define EMC_QPOP				0x564
+#define EMC_QUSE_WIDTH				0x568
+#define EMC_PUTERM_WIDTH			0x56c
+#define EMC_BGBIAS_CTL0				0x570
+#define EMC_BGBIAS_CTL0_BIAS0_DSC_E_PWRD_IBIAS_RX BIT(3)
+#define EMC_BGBIAS_CTL0_BIAS0_DSC_E_PWRD_IBIAS_VTTGEN BIT(2)
+#define EMC_BGBIAS_CTL0_BIAS0_DSC_E_PWRD	BIT(1)
+#define EMC_PUTERM_ADJ				0x574
+
+#define DRAM_DEV_SEL_ALL			0
+#define DRAM_DEV_SEL_0				(2 << 30)
+#define DRAM_DEV_SEL_1				(1 << 30)
+
+#define EMC_CFG_POWER_FEATURES_MASK		\
+	(EMC_CFG_DYN_SREF | EMC_CFG_DRAM_ACPD | EMC_CFG_DRAM_CLKSTOP_SR | \
+	EMC_CFG_DRAM_CLKSTOP_PD | EMC_CFG_DSR_VTTGEN_DRV_EN)
+#define EMC_REFCTRL_DEV_SEL(n) (((n > 1) ? 0 : 2) << EMC_REFCTRL_DEV_SEL_SHIFT)
+#define EMC_DRAM_DEV_SEL(n) ((n > 1) ? DRAM_DEV_SEL_ALL : DRAM_DEV_SEL_0)
+
+/* Maximum amount of time in us. to wait for changes to become effective */
+#define EMC_STATUS_UPDATE_TIMEOUT		1000
+
+enum emc_dram_type {
+	DRAM_TYPE_DDR3 = 0,
+	DRAM_TYPE_DDR1 = 1,
+	DRAM_TYPE_LPDDR3 = 2,
+	DRAM_TYPE_DDR2 = 3
+};
+
+enum emc_dll_change {
+	DLL_CHANGE_NONE,
+	DLL_CHANGE_ON,
+	DLL_CHANGE_OFF
+};
+
+static const unsigned long emc_burst_regs[] = {
+	EMC_RC,
+	EMC_RFC,
+	EMC_RFC_SLR,
+	EMC_RAS,
+	EMC_RP,
+	EMC_R2W,
+	EMC_W2R,
+	EMC_R2P,
+	EMC_W2P,
+	EMC_RD_RCD,
+	EMC_WR_RCD,
+	EMC_RRD,
+	EMC_REXT,
+	EMC_WEXT,
+	EMC_WDV,
+	EMC_WDV_MASK,
+	EMC_QUSE,
+	EMC_QUSE_WIDTH,
+	EMC_IBDLY,
+	EMC_EINPUT,
+	EMC_EINPUT_DURATION,
+	EMC_PUTERM_EXTRA,
+	EMC_PUTERM_WIDTH,
+	EMC_PUTERM_ADJ,
+	EMC_CDB_CNTL_1,
+	EMC_CDB_CNTL_2,
+	EMC_CDB_CNTL_3,
+	EMC_QRST,
+	EMC_QSAFE,
+	EMC_RDV,
+	EMC_RDV_MASK,
+	EMC_REFRESH,
+	EMC_BURST_REFRESH_NUM,
+	EMC_PRE_REFRESH_REQ_CNT,
+	EMC_PDEX2WR,
+	EMC_PDEX2RD,
+	EMC_PCHG2PDEN,
+	EMC_ACT2PDEN,
+	EMC_AR2PDEN,
+	EMC_RW2PDEN,
+	EMC_TXSR,
+	EMC_TXSRDLL,
+	EMC_TCKE,
+	EMC_TCKESR,
+	EMC_TPD,
+	EMC_TFAW,
+	EMC_TRPAB,
+	EMC_TCLKSTABLE,
+	EMC_TCLKSTOP,
+	EMC_TREFBW,
+	EMC_FBIO_CFG6,
+	EMC_ODT_WRITE,
+	EMC_ODT_READ,
+	EMC_FBIO_CFG5,
+	EMC_CFG_DIG_DLL,
+	EMC_CFG_DIG_DLL_PERIOD,
+	EMC_DLL_XFORM_DQS0,
+	EMC_DLL_XFORM_DQS1,
+	EMC_DLL_XFORM_DQS2,
+	EMC_DLL_XFORM_DQS3,
+	EMC_DLL_XFORM_DQS4,
+	EMC_DLL_XFORM_DQS5,
+	EMC_DLL_XFORM_DQS6,
+	EMC_DLL_XFORM_DQS7,
+	EMC_DLL_XFORM_DQS8,
+	EMC_DLL_XFORM_DQS9,
+	EMC_DLL_XFORM_DQS10,
+	EMC_DLL_XFORM_DQS11,
+	EMC_DLL_XFORM_DQS12,
+	EMC_DLL_XFORM_DQS13,
+	EMC_DLL_XFORM_DQS14,
+	EMC_DLL_XFORM_DQS15,
+	EMC_DLL_XFORM_QUSE0,
+	EMC_DLL_XFORM_QUSE1,
+	EMC_DLL_XFORM_QUSE2,
+	EMC_DLL_XFORM_QUSE3,
+	EMC_DLL_XFORM_QUSE4,
+	EMC_DLL_XFORM_QUSE5,
+	EMC_DLL_XFORM_QUSE6,
+	EMC_DLL_XFORM_QUSE7,
+	EMC_DLL_XFORM_ADDR0,
+	EMC_DLL_XFORM_ADDR1,
+	EMC_DLL_XFORM_ADDR2,
+	EMC_DLL_XFORM_ADDR3,
+	EMC_DLL_XFORM_ADDR4,
+	EMC_DLL_XFORM_ADDR5,
+	EMC_DLL_XFORM_QUSE8,
+	EMC_DLL_XFORM_QUSE9,
+	EMC_DLL_XFORM_QUSE10,
+	EMC_DLL_XFORM_QUSE11,
+	EMC_DLL_XFORM_QUSE12,
+	EMC_DLL_XFORM_QUSE13,
+	EMC_DLL_XFORM_QUSE14,
+	EMC_DLL_XFORM_QUSE15,
+	EMC_DLI_TRIM_TXDQS0,
+	EMC_DLI_TRIM_TXDQS1,
+	EMC_DLI_TRIM_TXDQS2,
+	EMC_DLI_TRIM_TXDQS3,
+	EMC_DLI_TRIM_TXDQS4,
+	EMC_DLI_TRIM_TXDQS5,
+	EMC_DLI_TRIM_TXDQS6,
+	EMC_DLI_TRIM_TXDQS7,
+	EMC_DLI_TRIM_TXDQS8,
+	EMC_DLI_TRIM_TXDQS9,
+	EMC_DLI_TRIM_TXDQS10,
+	EMC_DLI_TRIM_TXDQS11,
+	EMC_DLI_TRIM_TXDQS12,
+	EMC_DLI_TRIM_TXDQS13,
+	EMC_DLI_TRIM_TXDQS14,
+	EMC_DLI_TRIM_TXDQS15,
+	EMC_DLL_XFORM_DQ0,
+	EMC_DLL_XFORM_DQ1,
+	EMC_DLL_XFORM_DQ2,
+	EMC_DLL_XFORM_DQ3,
+	EMC_DLL_XFORM_DQ4,
+	EMC_DLL_XFORM_DQ5,
+	EMC_DLL_XFORM_DQ6,
+	EMC_DLL_XFORM_DQ7,
+	EMC_XM2CMDPADCTRL,
+	EMC_XM2CMDPADCTRL4,
+	EMC_XM2CMDPADCTRL5,
+	EMC_XM2DQPADCTRL2,
+	EMC_XM2DQPADCTRL3,
+	EMC_XM2CLKPADCTRL,
+	EMC_XM2CLKPADCTRL2,
+	EMC_XM2COMPPADCTRL,
+	EMC_XM2VTTGENPADCTRL,
+	EMC_XM2VTTGENPADCTRL2,
+	EMC_XM2VTTGENPADCTRL3,
+	EMC_XM2DQSPADCTRL3,
+	EMC_XM2DQSPADCTRL4,
+	EMC_XM2DQSPADCTRL5,
+	EMC_XM2DQSPADCTRL6,
+	EMC_DSR_VTTGEN_DRV,
+	EMC_TXDSRVTTGEN,
+	EMC_FBIO_SPARE,
+	EMC_ZCAL_WAIT_CNT,
+	EMC_MRS_WAIT_CNT2,
+	EMC_CTT,
+	EMC_CTT_DURATION,
+	EMC_CFG_PIPE,
+	EMC_DYN_SELF_REF_CONTROL,
+	EMC_QPOP
+};
+
+struct emc_timing {
+	unsigned long rate;
+
+	u32 emc_burst_data[ARRAY_SIZE(emc_burst_regs)];
+
+	u32 emc_auto_cal_config;
+	u32 emc_auto_cal_config2;
+	u32 emc_auto_cal_config3;
+	u32 emc_auto_cal_interval;
+	u32 emc_bgbias_ctl0;
+	u32 emc_cfg;
+	u32 emc_cfg_2;
+	u32 emc_ctt_term_ctrl;
+	u32 emc_mode_1;
+	u32 emc_mode_2;
+	u32 emc_mode_4;
+	u32 emc_mode_reset;
+	u32 emc_mrs_wait_cnt;
+	u32 emc_sel_dpd_ctrl;
+	u32 emc_xm2dqspadctrl2;
+	u32 emc_zcal_cnt_long;
+	u32 emc_zcal_interval;
+};
+
+struct tegra_emc {
+	struct device *dev;
+
+	struct tegra_mc *mc;
+
+	void __iomem *regs;
+
+	enum emc_dram_type dram_type;
+	unsigned int dram_num;
+
+	struct emc_timing last_timing;
+	struct emc_timing *timings;
+	unsigned int num_timings;
+};
+
+/* Timing change sequence functions */
+
+static void emc_ccfifo_writel(struct tegra_emc *emc, u32 value,
+			      unsigned long offset)
+{
+	writel(value, emc->regs + EMC_CCFIFO_DATA);
+	writel(offset, emc->regs + EMC_CCFIFO_ADDR);
+}
+
+static void emc_seq_update_timing(struct tegra_emc *emc)
+{
+	unsigned int i;
+	u32 value;
+
+	writel(1, emc->regs + EMC_TIMING_CONTROL);
+
+	for (i = 0; i < EMC_STATUS_UPDATE_TIMEOUT; ++i) {
+		value = readl(emc->regs + EMC_STATUS);
+		if ((value & EMC_STATUS_TIMING_UPDATE_STALLED) == 0)
+			return;
+		udelay(1);
+	}
+
+	dev_err(emc->dev, "timing update timed out\n");
+}
+
+static void emc_seq_disable_auto_cal(struct tegra_emc *emc)
+{
+	unsigned int i;
+	u32 value;
+
+	writel(0, emc->regs + EMC_AUTO_CAL_INTERVAL);
+
+	for (i = 0; i < EMC_STATUS_UPDATE_TIMEOUT; ++i) {
+		value = readl(emc->regs + EMC_AUTO_CAL_STATUS);
+		if ((value & EMC_AUTO_CAL_STATUS_ACTIVE) == 0)
+			return;
+		udelay(1);
+	}
+
+	dev_err(emc->dev, "auto cal disable timed out\n");
+}
+
+static void emc_seq_wait_clkchange(struct tegra_emc *emc)
+{
+	unsigned int i;
+	u32 value;
+
+	for (i = 0; i < EMC_STATUS_UPDATE_TIMEOUT; ++i) {
+		value = readl(emc->regs + EMC_INTSTATUS);
+		if (value & EMC_INTSTATUS_CLKCHANGE_COMPLETE)
+			return;
+		udelay(1);
+	}
+
+	dev_err(emc->dev, "clock change timed out\n");
+}
+
+static struct emc_timing *tegra_emc_find_timing(struct tegra_emc *emc,
+						unsigned long rate)
+{
+	struct emc_timing *timing = NULL;
+	unsigned int i;
+
+	for (i = 0; i < emc->num_timings; i++) {
+		if (emc->timings[i].rate == rate) {
+			timing = &emc->timings[i];
+			break;
+		}
+	}
+
+	if (!timing) {
+		dev_err(emc->dev, "no timing for rate %lu\n", rate);
+		return NULL;
+	}
+
+	return timing;
+}
+
+int tegra_emc_prepare_timing_change(struct tegra_emc *emc,
+				    unsigned long rate)
+{
+	struct emc_timing *timing = tegra_emc_find_timing(emc, rate);
+	struct emc_timing *last = &emc->last_timing;
+	enum emc_dll_change dll_change;
+	unsigned int pre_wait = 0;
+	u32 val, val2, mask;
+	bool update = false;
+	unsigned int i;
+
+	if (!timing)
+		return -ENOENT;
+
+	if ((last->emc_mode_1 & 0x1) == (timing->emc_mode_1 & 0x1))
+		dll_change = DLL_CHANGE_NONE;
+	else if (timing->emc_mode_1 & 0x1)
+		dll_change = DLL_CHANGE_ON;
+	else
+		dll_change = DLL_CHANGE_OFF;
+
+	/* Clear CLKCHANGE_COMPLETE interrupts */
+	writel(EMC_INTSTATUS_CLKCHANGE_COMPLETE, emc->regs + EMC_INTSTATUS);
+
+	/* Disable dynamic self-refresh */
+	val = readl(emc->regs + EMC_CFG);
+	if (val & EMC_CFG_PWR_MASK) {
+		val &= ~EMC_CFG_POWER_FEATURES_MASK;
+		writel(val, emc->regs + EMC_CFG);
+
+		pre_wait = 5;
+	}
+
+	/* Disable SEL_DPD_CTRL for clock change */
+	if (emc->dram_type == DRAM_TYPE_DDR3)
+		mask = EMC_SEL_DPD_CTRL_DDR3_MASK;
+	else
+		mask = EMC_SEL_DPD_CTRL_MASK;
+
+	val = readl(emc->regs + EMC_SEL_DPD_CTRL);
+	if (val & mask) {
+		val &= ~mask;
+		writel(val, emc->regs + EMC_SEL_DPD_CTRL);
+	}
+
+	/* Prepare DQ/DQS for clock change */
+	val = readl(emc->regs + EMC_BGBIAS_CTL0);
+	val2 = last->emc_bgbias_ctl0;
+	if (!(timing->emc_bgbias_ctl0 &
+	      EMC_BGBIAS_CTL0_BIAS0_DSC_E_PWRD_IBIAS_RX) &&
+	    (val & EMC_BGBIAS_CTL0_BIAS0_DSC_E_PWRD_IBIAS_RX)) {
+		val2 &= ~EMC_BGBIAS_CTL0_BIAS0_DSC_E_PWRD_IBIAS_RX;
+		update = true;
+	}
+
+	if ((val & EMC_BGBIAS_CTL0_BIAS0_DSC_E_PWRD) ||
+	    (val & EMC_BGBIAS_CTL0_BIAS0_DSC_E_PWRD_IBIAS_VTTGEN)) {
+		update = true;
+	}
+
+	if (update) {
+		writel(val2, emc->regs + EMC_BGBIAS_CTL0);
+		if (pre_wait < 5)
+			pre_wait = 5;
+	}
+
+	update = false;
+	val = readl(emc->regs + EMC_XM2DQSPADCTRL2);
+	if (timing->emc_xm2dqspadctrl2 & EMC_XM2DQSPADCTRL2_VREF_ENABLE &&
+	    !(val & EMC_XM2DQSPADCTRL2_VREF_ENABLE)) {
+		val |= EMC_XM2DQSPADCTRL2_VREF_ENABLE;
+		update = true;
+	}
+
+	if (timing->emc_xm2dqspadctrl2 & EMC_XM2DQSPADCTRL2_RX_FT_REC_ENABLE &&
+	    !(val & EMC_XM2DQSPADCTRL2_RX_FT_REC_ENABLE)) {
+		val |= EMC_XM2DQSPADCTRL2_RX_FT_REC_ENABLE;
+		update = true;
+	}
+
+	if (update) {
+		writel(val, emc->regs + EMC_XM2DQSPADCTRL2);
+		if (pre_wait < 30)
+			pre_wait = 30;
+	}
+
+	/* Wait to settle */
+	if (pre_wait) {
+		emc_seq_update_timing(emc);
+		udelay(pre_wait);
+	}
+
+	/* Program CTT_TERM control */
+	if (last->emc_ctt_term_ctrl != timing->emc_ctt_term_ctrl) {
+		emc_seq_disable_auto_cal(emc);
+		writel(timing->emc_ctt_term_ctrl,
+		       emc->regs + EMC_CTT_TERM_CTRL);
+		emc_seq_update_timing(emc);
+	}
+
+	/* Program burst shadow registers */
+	for (i = 0; i < ARRAY_SIZE(timing->emc_burst_data); ++i)
+		writel(timing->emc_burst_data[i],
+		       emc->regs + emc_burst_regs[i]);
+
+	writel(timing->emc_xm2dqspadctrl2, emc->regs + EMC_XM2DQSPADCTRL2);
+	writel(timing->emc_zcal_interval, emc->regs + EMC_ZCAL_INTERVAL);
+
+	tegra_mc_write_emem_configuration(emc->mc, timing->rate);
+
+	val = timing->emc_cfg & ~EMC_CFG_POWER_FEATURES_MASK;
+	emc_ccfifo_writel(emc, val, EMC_CFG);
+
+	/* Program AUTO_CAL_CONFIG */
+	if (timing->emc_auto_cal_config2 != last->emc_auto_cal_config2)
+		emc_ccfifo_writel(emc, timing->emc_auto_cal_config2,
+				  EMC_AUTO_CAL_CONFIG2);
+
+	if (timing->emc_auto_cal_config3 != last->emc_auto_cal_config3)
+		emc_ccfifo_writel(emc, timing->emc_auto_cal_config3,
+				  EMC_AUTO_CAL_CONFIG3);
+
+	if (timing->emc_auto_cal_config != last->emc_auto_cal_config) {
+		val = timing->emc_auto_cal_config;
+		val &= EMC_AUTO_CAL_CONFIG_AUTO_CAL_START;
+		emc_ccfifo_writel(emc, val, EMC_AUTO_CAL_CONFIG);
+	}
+
+	/* DDR3: predict MRS long wait count */
+	if (emc->dram_type == DRAM_TYPE_DDR3 &&
+	    dll_change == DLL_CHANGE_ON) {
+		u32 cnt = 512;
+
+		if (timing->emc_zcal_interval != 0 &&
+		    last->emc_zcal_interval == 0)
+			cnt -= emc->dram_num * 256;
+
+		val = (timing->emc_mrs_wait_cnt
+			& EMC_MRS_WAIT_CNT_SHORT_WAIT_MASK)
+			>> EMC_MRS_WAIT_CNT_SHORT_WAIT_SHIFT;
+		if (cnt < val)
+			cnt = val;
+
+		val = timing->emc_mrs_wait_cnt
+			& ~EMC_MRS_WAIT_CNT_LONG_WAIT_MASK;
+		val |= (cnt << EMC_MRS_WAIT_CNT_LONG_WAIT_SHIFT)
+			& EMC_MRS_WAIT_CNT_LONG_WAIT_MASK;
+
+		writel(val, emc->regs + EMC_MRS_WAIT_CNT);
+	}
+
+	val = timing->emc_cfg_2;
+	val &= ~EMC_CFG_2_DIS_STP_OB_CLK_DURING_NON_WR;
+	emc_ccfifo_writel(emc, val, EMC_CFG_2);
+
+	/* DDR3: Turn off DLL and enter self-refresh */
+	if (emc->dram_type == DRAM_TYPE_DDR3 && dll_change == DLL_CHANGE_OFF)
+		emc_ccfifo_writel(emc, timing->emc_mode_1, EMC_EMRS);
+
+	/* Disable refresh controller */
+	emc_ccfifo_writel(emc, EMC_REFCTRL_DEV_SEL(emc->dram_num),
+			  EMC_REFCTRL);
+	if (emc->dram_type == DRAM_TYPE_DDR3)
+		emc_ccfifo_writel(emc, EMC_DRAM_DEV_SEL(emc->dram_num) |
+				       EMC_SELF_REF_CMD_ENABLED,
+				  EMC_SELF_REF);
+
+	/* Flow control marker */
+	emc_ccfifo_writel(emc, 1, EMC_STALL_THEN_EXE_AFTER_CLKCHANGE);
+
+	/* DDR3: Exit self-refresh */
+	if (emc->dram_type == DRAM_TYPE_DDR3)
+		emc_ccfifo_writel(emc, EMC_DRAM_DEV_SEL(emc->dram_num),
+				  EMC_SELF_REF);
+	emc_ccfifo_writel(emc, EMC_REFCTRL_DEV_SEL(emc->dram_num) |
+			       EMC_REFCTRL_ENABLE,
+			  EMC_REFCTRL);
+
+	/* Set DRAM mode registers */
+	if (emc->dram_type == DRAM_TYPE_DDR3) {
+		if (timing->emc_mode_1 != last->emc_mode_1)
+			emc_ccfifo_writel(emc, timing->emc_mode_1, EMC_EMRS);
+		if (timing->emc_mode_2 != last->emc_mode_2)
+			emc_ccfifo_writel(emc, timing->emc_mode_2, EMC_EMRS2);
+
+		if ((timing->emc_mode_reset != last->emc_mode_reset) ||
+		    dll_change == DLL_CHANGE_ON) {
+			val = timing->emc_mode_reset;
+			if (dll_change == DLL_CHANGE_ON) {
+				val |= EMC_MODE_SET_DLL_RESET;
+				val |= EMC_MODE_SET_LONG_CNT;
+			} else {
+				val &= ~EMC_MODE_SET_DLL_RESET;
+			}
+			emc_ccfifo_writel(emc, val, EMC_MRS);
+		}
+	} else {
+		if (timing->emc_mode_2 != last->emc_mode_2)
+			emc_ccfifo_writel(emc, timing->emc_mode_2, EMC_MRW2);
+		if (timing->emc_mode_1 != last->emc_mode_1)
+			emc_ccfifo_writel(emc, timing->emc_mode_1, EMC_MRW);
+		if (timing->emc_mode_4 != last->emc_mode_4)
+			emc_ccfifo_writel(emc, timing->emc_mode_4, EMC_MRW4);
+	}
+
+	/*  Issue ZCAL command if turning ZCAL on */
+	if (timing->emc_zcal_interval != 0 && last->emc_zcal_interval == 0) {
+		emc_ccfifo_writel(emc, EMC_ZQ_CAL_LONG_CMD_DEV0, EMC_ZQ_CAL);
+		if (emc->dram_num > 1)
+			emc_ccfifo_writel(emc, EMC_ZQ_CAL_LONG_CMD_DEV1,
+					  EMC_ZQ_CAL);
+	}
+
+	/*  Write to RO register to remove stall after change */
+	emc_ccfifo_writel(emc, 0, EMC_CCFIFO_STATUS);
+
+	if (timing->emc_cfg_2 & EMC_CFG_2_DIS_STP_OB_CLK_DURING_NON_WR)
+		emc_ccfifo_writel(emc, timing->emc_cfg_2, EMC_CFG_2);
+
+	/* Disable AUTO_CAL for clock change */
+	emc_seq_disable_auto_cal(emc);
+
+	/* Read register to wait until programming has settled */
+	readl(emc->regs + EMC_INTSTATUS);
+
+	return 0;
+}
+
+void tegra_emc_complete_timing_change(struct tegra_emc *emc,
+				      unsigned long rate)
+{
+	struct emc_timing *timing = tegra_emc_find_timing(emc, rate);
+	struct emc_timing *last = &emc->last_timing;
+	u32 val;
+
+	if (!timing)
+		return;
+
+	/* Wait until the state machine has settled */
+	emc_seq_wait_clkchange(emc);
+
+	/* Restore AUTO_CAL */
+	if (timing->emc_ctt_term_ctrl != last->emc_ctt_term_ctrl)
+		writel(timing->emc_auto_cal_interval,
+		       emc->regs + EMC_AUTO_CAL_INTERVAL);
+
+	/* Restore dynamic self-refresh */
+	if (timing->emc_cfg & EMC_CFG_PWR_MASK)
+		writel(timing->emc_cfg, emc->regs + EMC_CFG);
+
+	/* Set ZCAL wait count */
+	writel(timing->emc_zcal_cnt_long, emc->regs + EMC_ZCAL_WAIT_CNT);
+
+	/* LPDDR3: Turn off BGBIAS if low frequency */
+	if (emc->dram_type == DRAM_TYPE_LPDDR3 &&
+	    timing->emc_bgbias_ctl0 &
+	      EMC_BGBIAS_CTL0_BIAS0_DSC_E_PWRD_IBIAS_RX) {
+		val = timing->emc_bgbias_ctl0;
+		val |= EMC_BGBIAS_CTL0_BIAS0_DSC_E_PWRD_IBIAS_VTTGEN;
+		val |= EMC_BGBIAS_CTL0_BIAS0_DSC_E_PWRD;
+		writel(val, emc->regs + EMC_BGBIAS_CTL0);
+	} else {
+		if (emc->dram_type == DRAM_TYPE_DDR3 &&
+		    readl(emc->regs + EMC_BGBIAS_CTL0) !=
+		      timing->emc_bgbias_ctl0) {
+			writel(timing->emc_bgbias_ctl0,
+			       emc->regs + EMC_BGBIAS_CTL0);
+		}
+
+		writel(timing->emc_auto_cal_interval,
+		       emc->regs + EMC_AUTO_CAL_INTERVAL);
+	}
+
+	/* Wait for timing to settle */
+	udelay(2);
+
+	/* Reprogram SEL_DPD_CTRL */
+	writel(timing->emc_sel_dpd_ctrl, emc->regs + EMC_SEL_DPD_CTRL);
+	emc_seq_update_timing(emc);
+
+	emc->last_timing = *timing;
+}
+
+/* Initialization and deinitialization */
+
+static void emc_read_current_timing(struct tegra_emc *emc,
+				    struct emc_timing *timing)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(emc_burst_regs); ++i)
+		timing->emc_burst_data[i] =
+			readl(emc->regs + emc_burst_regs[i]);
+
+	timing->emc_cfg = readl(emc->regs + EMC_CFG);
+
+	timing->emc_auto_cal_interval = 0;
+	timing->emc_zcal_cnt_long = 0;
+	timing->emc_mode_1 = 0;
+	timing->emc_mode_2 = 0;
+	timing->emc_mode_4 = 0;
+	timing->emc_mode_reset = 0;
+}
+
+static int emc_init(struct tegra_emc *emc)
+{
+	emc->dram_type = readl(emc->regs + EMC_FBIO_CFG5);
+	emc->dram_type &= EMC_FBIO_CFG5_DRAM_TYPE_MASK;
+	emc->dram_type >>= EMC_FBIO_CFG5_DRAM_TYPE_SHIFT;
+
+	emc->dram_num = tegra_mc_get_emem_device_count(emc->mc);
+
+	emc_read_current_timing(emc, &emc->last_timing);
+
+	return 0;
+}
+
+static int load_one_timing_from_dt(struct tegra_emc *emc,
+				   struct emc_timing *timing,
+				   struct device_node *node)
+{
+	u32 value;
+	int err;
+
+	err = of_property_read_u32(node, "clock-frequency", &value);
+	if (err) {
+		dev_err(emc->dev, "timing %s: failed to read rate: %d\n",
+			node->name, err);
+		return err;
+	}
+
+	timing->rate = value;
+
+	err = of_property_read_u32_array(node, "nvidia,emc-configuration",
+					 timing->emc_burst_data,
+					 ARRAY_SIZE(timing->emc_burst_data));
+	if (err) {
+		dev_err(emc->dev,
+			"timing %s: failed to read emc burst data: %d\n",
+			node->name, err);
+		return err;
+	}
+
+#define EMC_READ_PROP(prop, dtprop) { \
+	err = of_property_read_u32(node, dtprop, &timing->prop); \
+	if (err) { \
+		dev_err(emc->dev, "timing %s: failed to read " #prop ": %d\n", \
+			node->name, err); \
+		return err; \
+	} \
+}
+
+	EMC_READ_PROP(emc_auto_cal_config, "nvidia,emc-auto-cal-config")
+	EMC_READ_PROP(emc_auto_cal_config2, "nvidia,emc-auto-cal-config2")
+	EMC_READ_PROP(emc_auto_cal_config3, "nvidia,emc-auto-cal-config3")
+	EMC_READ_PROP(emc_auto_cal_interval, "nvidia,emc-auto-cal-interval")
+	EMC_READ_PROP(emc_bgbias_ctl0, "nvidia,emc-bgbias-ctl0")
+	EMC_READ_PROP(emc_cfg, "nvidia,emc-cfg")
+	EMC_READ_PROP(emc_cfg_2, "nvidia,emc-cfg-2")
+	EMC_READ_PROP(emc_ctt_term_ctrl, "nvidia,emc-ctt-term-ctrl")
+	EMC_READ_PROP(emc_mode_1, "nvidia,emc-mode-1")
+	EMC_READ_PROP(emc_mode_2, "nvidia,emc-mode-2")
+	EMC_READ_PROP(emc_mode_4, "nvidia,emc-mode-4")
+	EMC_READ_PROP(emc_mode_reset, "nvidia,emc-mode-reset")
+	EMC_READ_PROP(emc_mrs_wait_cnt, "nvidia,emc-mrs-wait-cnt")
+	EMC_READ_PROP(emc_sel_dpd_ctrl, "nvidia,emc-sel-dpd-ctrl")
+	EMC_READ_PROP(emc_xm2dqspadctrl2, "nvidia,emc-xm2dqspadctrl2")
+	EMC_READ_PROP(emc_zcal_cnt_long, "nvidia,emc-zcal-cnt-long")
+	EMC_READ_PROP(emc_zcal_interval, "nvidia,emc-zcal-interval")
+
+#undef EMC_READ_PROP
+
+	return 0;
+}
+
+static int cmp_timings(const void *_a, const void *_b)
+{
+	const struct emc_timing *a = _a;
+	const struct emc_timing *b = _b;
+
+	if (a->rate < b->rate)
+		return -1;
+	else if (a->rate == b->rate)
+		return 0;
+	else
+		return 1;
+}
+
+static int tegra_emc_load_timings_from_dt(struct tegra_emc *emc,
+					  struct device_node *node)
+{
+	int child_count = of_get_child_count(node);
+	struct device_node *child;
+	struct emc_timing *timing;
+	unsigned int i = 0;
+	int err;
+
+	emc->timings = devm_kcalloc(emc->dev, child_count, sizeof(*timing),
+				    GFP_KERNEL);
+	if (!emc->timings)
+		return -ENOMEM;
+
+	emc->num_timings = child_count;
+
+	for_each_child_of_node(node, child) {
+		timing = &emc->timings[i++];
+
+		err = load_one_timing_from_dt(emc, timing, child);
+		if (err)
+			return err;
+	}
+
+	sort(emc->timings, emc->num_timings, sizeof(*timing), cmp_timings,
+	     NULL);
+
+	return 0;
+}
+
+static const struct of_device_id tegra_emc_of_match[] = {
+	{ .compatible = "nvidia,tegra124-emc" },
+	{}
+};
+
+static struct device_node *
+tegra_emc_find_node_by_ram_code(struct device_node *node, u32 ram_code)
+{
+	struct device_node *np;
+	int err;
+
+	for_each_child_of_node(node, np) {
+		u32 value;
+
+		err = of_property_read_u32(np, "nvidia,ram-code", &value);
+		if (err || (value != ram_code)) {
+			of_node_put(np);
+			continue;
+		}
+
+		return np;
+	}
+
+	return NULL;
+}
+
+/* Debugfs entry */
+
+static int emc_debug_rate_get(void *data, u64 *rate)
+{
+	struct clk *c = data;
+
+	*rate = clk_get_rate(c);
+
+	return 0;
+}
+
+static int emc_debug_rate_set(void *data, u64 rate)
+{
+	struct clk *c = data;
+
+	return clk_set_rate(c, rate);
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(emc_debug_rate_fops, emc_debug_rate_get,
+			emc_debug_rate_set, "%lld\n");
+
+static void emc_debugfs_init(struct device *dev)
+{
+	struct dentry *root, *file;
+	struct clk *clk;
+
+	root = debugfs_create_dir("emc", NULL);
+	if (!root) {
+		dev_err(dev, "failed to create debugfs directory\n");
+		return;
+	}
+
+	clk = clk_get_sys("tegra-clk-debug", "emc");
+	if (IS_ERR(clk)) {
+		dev_err(dev, "failed to get debug clock: %ld\n", PTR_ERR(clk));
+		return;
+	}
+
+	file = debugfs_create_file("rate", S_IRUGO | S_IWUSR, root, clk,
+				   &emc_debug_rate_fops);
+	if (!file)
+		dev_err(dev, "failed to create debugfs entry\n");
+}
+
+static int tegra_emc_probe(struct platform_device *pdev)
+{
+	struct platform_device *mc;
+	struct device_node *np;
+	struct tegra_emc *emc;
+	struct resource *res;
+	u32 ram_code;
+	int err;
+
+	emc = devm_kzalloc(&pdev->dev, sizeof(*emc), GFP_KERNEL);
+	if (!emc)
+		return -ENOMEM;
+
+	emc->dev = &pdev->dev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	emc->regs = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(emc->regs))
+		return PTR_ERR(emc->regs);
+
+	np = of_parse_phandle(pdev->dev.of_node, "nvidia,memory-controller", 0);
+	if (!np) {
+		dev_err(&pdev->dev, "could not get memory controller\n");
+		return -ENOENT;
+	}
+
+	mc = of_find_device_by_node(np);
+	if (!mc)
+		return -ENOENT;
+
+	of_node_put(np);
+
+	emc->mc = platform_get_drvdata(mc);
+	if (!emc->mc)
+		return -EPROBE_DEFER;
+
+	ram_code = tegra_read_ram_code();
+
+	np = tegra_emc_find_node_by_ram_code(pdev->dev.of_node, ram_code);
+	if (!np) {
+		dev_err(&pdev->dev,
+			"no memory timings for RAM code %u found in DT\n",
+			ram_code);
+		return -ENOENT;
+	}
+
+	err = tegra_emc_load_timings_from_dt(emc, np);
+
+	of_node_put(np);
+
+	if (err)
+		return err;
+
+	if (emc->num_timings == 0) {
+		dev_err(&pdev->dev,
+			"no memory timings for RAM code %u registered\n",
+			ram_code);
+		return -ENOENT;
+	}
+
+	err = emc_init(emc);
+	if (err) {
+		dev_err(&pdev->dev, "EMC initialization failed: %d\n", err);
+		return err;
+	}
+
+	platform_set_drvdata(pdev, emc);
+
+	if (IS_ENABLED(CONFIG_DEBUG_FS))
+		emc_debugfs_init(&pdev->dev);
+
+	return 0;
+};
+
+static struct platform_driver tegra_emc_driver = {
+	.probe = tegra_emc_probe,
+	.driver = {
+		.name = "tegra-emc",
+		.of_match_table = tegra_emc_of_match,
+		.suppress_bind_attrs = true,
+	},
+};
+
+static int tegra_emc_init(void)
+{
+	return platform_driver_register(&tegra_emc_driver);
+}
+subsys_initcall(tegra_emc_init);
diff --git a/drivers/memory/tegra/tegra124.c b/drivers/memory/tegra/tegra124.c
index 278d40b854c1..966e1557e6f4 100644
--- a/drivers/memory/tegra/tegra124.c
+++ b/drivers/memory/tegra/tegra124.c
@@ -15,6 +15,48 @@
 
 #include "mc.h"
 
+#define MC_EMEM_ARB_CFG				0x90
+#define MC_EMEM_ARB_OUTSTANDING_REQ		0x94
+#define MC_EMEM_ARB_TIMING_RCD			0x98
+#define MC_EMEM_ARB_TIMING_RP			0x9c
+#define MC_EMEM_ARB_TIMING_RC			0xa0
+#define MC_EMEM_ARB_TIMING_RAS			0xa4
+#define MC_EMEM_ARB_TIMING_FAW			0xa8
+#define MC_EMEM_ARB_TIMING_RRD			0xac
+#define MC_EMEM_ARB_TIMING_RAP2PRE		0xb0
+#define MC_EMEM_ARB_TIMING_WAP2PRE		0xb4
+#define MC_EMEM_ARB_TIMING_R2R			0xb8
+#define MC_EMEM_ARB_TIMING_W2W			0xbc
+#define MC_EMEM_ARB_TIMING_R2W			0xc0
+#define MC_EMEM_ARB_TIMING_W2R			0xc4
+#define MC_EMEM_ARB_DA_TURNS			0xd0
+#define MC_EMEM_ARB_DA_COVERS			0xd4
+#define MC_EMEM_ARB_MISC0			0xd8
+#define MC_EMEM_ARB_MISC1			0xdc
+#define MC_EMEM_ARB_RING1_THROTTLE		0xe0
+
+static const unsigned long tegra124_mc_emem_regs[] = {
+	MC_EMEM_ARB_CFG,
+	MC_EMEM_ARB_OUTSTANDING_REQ,
+	MC_EMEM_ARB_TIMING_RCD,
+	MC_EMEM_ARB_TIMING_RP,
+	MC_EMEM_ARB_TIMING_RC,
+	MC_EMEM_ARB_TIMING_RAS,
+	MC_EMEM_ARB_TIMING_FAW,
+	MC_EMEM_ARB_TIMING_RRD,
+	MC_EMEM_ARB_TIMING_RAP2PRE,
+	MC_EMEM_ARB_TIMING_WAP2PRE,
+	MC_EMEM_ARB_TIMING_R2R,
+	MC_EMEM_ARB_TIMING_W2W,
+	MC_EMEM_ARB_TIMING_R2W,
+	MC_EMEM_ARB_TIMING_W2R,
+	MC_EMEM_ARB_DA_TURNS,
+	MC_EMEM_ARB_DA_COVERS,
+	MC_EMEM_ARB_MISC0,
+	MC_EMEM_ARB_MISC1,
+	MC_EMEM_ARB_RING1_THROTTLE
+};
+
 static const struct tegra_mc_client tegra124_mc_clients[] = {
 	{
 		.id = 0x00,
@@ -934,29 +976,29 @@ static const struct tegra_mc_client tegra124_mc_clients[] = {
 };
 
 static const struct tegra_smmu_swgroup tegra124_swgroups[] = {
-	{ .swgroup = TEGRA_SWGROUP_DC,        .reg = 0x240 },
-	{ .swgroup = TEGRA_SWGROUP_DCB,       .reg = 0x244 },
-	{ .swgroup = TEGRA_SWGROUP_AFI,       .reg = 0x238 },
-	{ .swgroup = TEGRA_SWGROUP_AVPC,      .reg = 0x23c },
-	{ .swgroup = TEGRA_SWGROUP_HDA,       .reg = 0x254 },
-	{ .swgroup = TEGRA_SWGROUP_HC,        .reg = 0x250 },
-	{ .swgroup = TEGRA_SWGROUP_MSENC,     .reg = 0x264 },
-	{ .swgroup = TEGRA_SWGROUP_PPCS,      .reg = 0x270 },
-	{ .swgroup = TEGRA_SWGROUP_SATA,      .reg = 0x274 },
-	{ .swgroup = TEGRA_SWGROUP_VDE,       .reg = 0x27c },
-	{ .swgroup = TEGRA_SWGROUP_ISP2,      .reg = 0x258 },
-	{ .swgroup = TEGRA_SWGROUP_XUSB_HOST, .reg = 0x288 },
-	{ .swgroup = TEGRA_SWGROUP_XUSB_DEV,  .reg = 0x28c },
-	{ .swgroup = TEGRA_SWGROUP_ISP2B,     .reg = 0xaa4 },
-	{ .swgroup = TEGRA_SWGROUP_TSEC,      .reg = 0x294 },
-	{ .swgroup = TEGRA_SWGROUP_A9AVP,     .reg = 0x290 },
-	{ .swgroup = TEGRA_SWGROUP_GPU,       .reg = 0xaac },
-	{ .swgroup = TEGRA_SWGROUP_SDMMC1A,   .reg = 0xa94 },
-	{ .swgroup = TEGRA_SWGROUP_SDMMC2A,   .reg = 0xa98 },
-	{ .swgroup = TEGRA_SWGROUP_SDMMC3A,   .reg = 0xa9c },
-	{ .swgroup = TEGRA_SWGROUP_SDMMC4A,   .reg = 0xaa0 },
-	{ .swgroup = TEGRA_SWGROUP_VIC,       .reg = 0x284 },
-	{ .swgroup = TEGRA_SWGROUP_VI,        .reg = 0x280 },
+	{ .name = "dc",        .swgroup = TEGRA_SWGROUP_DC,        .reg = 0x240 },
+	{ .name = "dcb",       .swgroup = TEGRA_SWGROUP_DCB,       .reg = 0x244 },
+	{ .name = "afi",       .swgroup = TEGRA_SWGROUP_AFI,       .reg = 0x238 },
+	{ .name = "avpc",      .swgroup = TEGRA_SWGROUP_AVPC,      .reg = 0x23c },
+	{ .name = "hda",       .swgroup = TEGRA_SWGROUP_HDA,       .reg = 0x254 },
+	{ .name = "hc",        .swgroup = TEGRA_SWGROUP_HC,        .reg = 0x250 },
+	{ .name = "msenc",     .swgroup = TEGRA_SWGROUP_MSENC,     .reg = 0x264 },
+	{ .name = "ppcs",      .swgroup = TEGRA_SWGROUP_PPCS,      .reg = 0x270 },
+	{ .name = "sata",      .swgroup = TEGRA_SWGROUP_SATA,      .reg = 0x274 },
+	{ .name = "vde",       .swgroup = TEGRA_SWGROUP_VDE,       .reg = 0x27c },
+	{ .name = "isp2",      .swgroup = TEGRA_SWGROUP_ISP2,      .reg = 0x258 },
+	{ .name = "xusb_host", .swgroup = TEGRA_SWGROUP_XUSB_HOST, .reg = 0x288 },
+	{ .name = "xusb_dev",  .swgroup = TEGRA_SWGROUP_XUSB_DEV,  .reg = 0x28c },
+	{ .name = "isp2b",     .swgroup = TEGRA_SWGROUP_ISP2B,     .reg = 0xaa4 },
+	{ .name = "tsec",      .swgroup = TEGRA_SWGROUP_TSEC,      .reg = 0x294 },
+	{ .name = "a9avp",     .swgroup = TEGRA_SWGROUP_A9AVP,     .reg = 0x290 },
+	{ .name = "gpu",       .swgroup = TEGRA_SWGROUP_GPU,       .reg = 0xaac },
+	{ .name = "sdmmc1a",   .swgroup = TEGRA_SWGROUP_SDMMC1A,   .reg = 0xa94 },
+	{ .name = "sdmmc2a",   .swgroup = TEGRA_SWGROUP_SDMMC2A,   .reg = 0xa98 },
+	{ .name = "sdmmc3a",   .swgroup = TEGRA_SWGROUP_SDMMC3A,   .reg = 0xa9c },
+	{ .name = "sdmmc4a",   .swgroup = TEGRA_SWGROUP_SDMMC4A,   .reg = 0xaa0 },
+	{ .name = "vic",       .swgroup = TEGRA_SWGROUP_VIC,       .reg = 0x284 },
+	{ .name = "vi",        .swgroup = TEGRA_SWGROUP_VI,        .reg = 0x280 },
 };
 
 #ifdef CONFIG_ARCH_TEGRA_124_SOC
@@ -991,5 +1033,40 @@ const struct tegra_mc_soc tegra124_mc_soc = {
 	.num_address_bits = 34,
 	.atom_size = 32,
 	.smmu = &tegra124_smmu_soc,
+	.emem_regs = tegra124_mc_emem_regs,
+	.num_emem_regs = ARRAY_SIZE(tegra124_mc_emem_regs),
 };
 #endif /* CONFIG_ARCH_TEGRA_124_SOC */
+
+#ifdef CONFIG_ARCH_TEGRA_132_SOC
+static void tegra132_flush_dcache(struct page *page, unsigned long offset,
+				  size_t size)
+{
+	void *virt = page_address(page) + offset;
+
+	__flush_dcache_area(virt, size);
+}
+
+static const struct tegra_smmu_ops tegra132_smmu_ops = {
+	.flush_dcache = tegra132_flush_dcache,
+};
+
+static const struct tegra_smmu_soc tegra132_smmu_soc = {
+	.clients = tegra124_mc_clients,
+	.num_clients = ARRAY_SIZE(tegra124_mc_clients),
+	.swgroups = tegra124_swgroups,
+	.num_swgroups = ARRAY_SIZE(tegra124_swgroups),
+	.supports_round_robin_arbitration = true,
+	.supports_request_limit = true,
+	.num_asids = 128,
+	.ops = &tegra132_smmu_ops,
+};
+
+const struct tegra_mc_soc tegra132_mc_soc = {
+	.clients = tegra124_mc_clients,
+	.num_clients = ARRAY_SIZE(tegra124_mc_clients),
+	.num_address_bits = 34,
+	.atom_size = 32,
+	.smmu = &tegra132_smmu_soc,
+};
+#endif /* CONFIG_ARCH_TEGRA_132_SOC */
diff --git a/drivers/memory/tegra/tegra30.c b/drivers/memory/tegra/tegra30.c
index 71fe9376fe53..1abcd8f6f3ba 100644
--- a/drivers/memory/tegra/tegra30.c
+++ b/drivers/memory/tegra/tegra30.c
@@ -918,22 +918,22 @@ static const struct tegra_mc_client tegra30_mc_clients[] = {
 };
 
 static const struct tegra_smmu_swgroup tegra30_swgroups[] = {
-	{ .swgroup = TEGRA_SWGROUP_DC,   .reg = 0x240 },
-	{ .swgroup = TEGRA_SWGROUP_DCB,  .reg = 0x244 },
-	{ .swgroup = TEGRA_SWGROUP_EPP,  .reg = 0x248 },
-	{ .swgroup = TEGRA_SWGROUP_G2,   .reg = 0x24c },
-	{ .swgroup = TEGRA_SWGROUP_MPE,  .reg = 0x264 },
-	{ .swgroup = TEGRA_SWGROUP_VI,   .reg = 0x280 },
-	{ .swgroup = TEGRA_SWGROUP_AFI,  .reg = 0x238 },
-	{ .swgroup = TEGRA_SWGROUP_AVPC, .reg = 0x23c },
-	{ .swgroup = TEGRA_SWGROUP_NV,   .reg = 0x268 },
-	{ .swgroup = TEGRA_SWGROUP_NV2,  .reg = 0x26c },
-	{ .swgroup = TEGRA_SWGROUP_HDA,  .reg = 0x254 },
-	{ .swgroup = TEGRA_SWGROUP_HC,   .reg = 0x250 },
-	{ .swgroup = TEGRA_SWGROUP_PPCS, .reg = 0x270 },
-	{ .swgroup = TEGRA_SWGROUP_SATA, .reg = 0x278 },
-	{ .swgroup = TEGRA_SWGROUP_VDE,  .reg = 0x27c },
-	{ .swgroup = TEGRA_SWGROUP_ISP,  .reg = 0x258 },
+	{ .name = "dc",   .swgroup = TEGRA_SWGROUP_DC,   .reg = 0x240 },
+	{ .name = "dcb",  .swgroup = TEGRA_SWGROUP_DCB,  .reg = 0x244 },
+	{ .name = "epp",  .swgroup = TEGRA_SWGROUP_EPP,  .reg = 0x248 },
+	{ .name = "g2",   .swgroup = TEGRA_SWGROUP_G2,   .reg = 0x24c },
+	{ .name = "mpe",  .swgroup = TEGRA_SWGROUP_MPE,  .reg = 0x264 },
+	{ .name = "vi",   .swgroup = TEGRA_SWGROUP_VI,   .reg = 0x280 },
+	{ .name = "afi",  .swgroup = TEGRA_SWGROUP_AFI,  .reg = 0x238 },
+	{ .name = "avpc", .swgroup = TEGRA_SWGROUP_AVPC, .reg = 0x23c },
+	{ .name = "nv",   .swgroup = TEGRA_SWGROUP_NV,   .reg = 0x268 },
+	{ .name = "nv2",  .swgroup = TEGRA_SWGROUP_NV2,  .reg = 0x26c },
+	{ .name = "hda",  .swgroup = TEGRA_SWGROUP_HDA,  .reg = 0x254 },
+	{ .name = "hc",   .swgroup = TEGRA_SWGROUP_HC,   .reg = 0x250 },
+	{ .name = "ppcs", .swgroup = TEGRA_SWGROUP_PPCS, .reg = 0x270 },
+	{ .name = "sata", .swgroup = TEGRA_SWGROUP_SATA, .reg = 0x278 },
+	{ .name = "vde",  .swgroup = TEGRA_SWGROUP_VDE,  .reg = 0x27c },
+	{ .name = "isp",  .swgroup = TEGRA_SWGROUP_ISP,  .reg = 0x258 },
 };
 
 static void tegra30_flush_dcache(struct page *page, unsigned long offset,
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index a01f57c9e34e..ddf8e42c9367 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -25,6 +25,7 @@
 
 const struct of_device_id of_default_bus_match_table[] = {
 	{ .compatible = "simple-bus", },
+	{ .compatible = "simple-mfd", },
 #ifdef CONFIG_ARM_AMBA
 	{ .compatible = "arm,amba-bus", },
 #endif /* CONFIG_ARM_AMBA */
diff --git a/drivers/pinctrl/berlin/berlin-bg2.c b/drivers/pinctrl/berlin/berlin-bg2.c
index b71a6fffef1b..3769eaedf519 100644
--- a/drivers/pinctrl/berlin/berlin-bg2.c
+++ b/drivers/pinctrl/berlin/berlin-bg2.c
@@ -218,11 +218,11 @@ static const struct berlin_pinctrl_desc berlin2_sysmgr_pinctrl_data = {
 
 static const struct of_device_id berlin2_pinctrl_match[] = {
 	{
-		.compatible = "marvell,berlin2-chip-ctrl",
+		.compatible = "marvell,berlin2-soc-pinctrl",
 		.data = &berlin2_soc_pinctrl_data
 	},
 	{
-		.compatible = "marvell,berlin2-system-ctrl",
+		.compatible = "marvell,berlin2-system-pinctrl",
 		.data = &berlin2_sysmgr_pinctrl_data
 	},
 	{}
@@ -233,28 +233,6 @@ static int berlin2_pinctrl_probe(struct platform_device *pdev)
 {
 	const struct of_device_id *match =
 		of_match_device(berlin2_pinctrl_match, &pdev->dev);
-	struct regmap_config *rmconfig;
-	struct regmap *regmap;
-	struct resource *res;
-	void __iomem *base;
-
-	rmconfig = devm_kzalloc(&pdev->dev, sizeof(*rmconfig), GFP_KERNEL);
-	if (!rmconfig)
-		return -ENOMEM;
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	base = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(base))
-		return PTR_ERR(base);
-
-	rmconfig->reg_bits = 32,
-	rmconfig->val_bits = 32,
-	rmconfig->reg_stride = 4,
-	rmconfig->max_register = resource_size(res);
-
-	regmap = devm_regmap_init_mmio(&pdev->dev, base, rmconfig);
-	if (IS_ERR(regmap))
-		return PTR_ERR(regmap);
 
 	return berlin_pinctrl_probe(pdev, match->data);
 }
diff --git a/drivers/pinctrl/berlin/berlin-bg2cd.c b/drivers/pinctrl/berlin/berlin-bg2cd.c
index 19ac5a22c947..9e11f191d643 100644
--- a/drivers/pinctrl/berlin/berlin-bg2cd.c
+++ b/drivers/pinctrl/berlin/berlin-bg2cd.c
@@ -161,11 +161,11 @@ static const struct berlin_pinctrl_desc berlin2cd_sysmgr_pinctrl_data = {
 
 static const struct of_device_id berlin2cd_pinctrl_match[] = {
 	{
-		.compatible = "marvell,berlin2cd-chip-ctrl",
+		.compatible = "marvell,berlin2cd-soc-pinctrl",
 		.data = &berlin2cd_soc_pinctrl_data
 	},
 	{
-		.compatible = "marvell,berlin2cd-system-ctrl",
+		.compatible = "marvell,berlin2cd-system-pinctrl",
 		.data = &berlin2cd_sysmgr_pinctrl_data
 	},
 	{}
@@ -176,28 +176,6 @@ static int berlin2cd_pinctrl_probe(struct platform_device *pdev)
 {
 	const struct of_device_id *match =
 		of_match_device(berlin2cd_pinctrl_match, &pdev->dev);
-	struct regmap_config *rmconfig;
-	struct regmap *regmap;
-	struct resource *res;
-	void __iomem *base;
-
-	rmconfig = devm_kzalloc(&pdev->dev, sizeof(*rmconfig), GFP_KERNEL);
-	if (!rmconfig)
-		return -ENOMEM;
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	base = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(base))
-		return PTR_ERR(base);
-
-	rmconfig->reg_bits = 32,
-	rmconfig->val_bits = 32,
-	rmconfig->reg_stride = 4,
-	rmconfig->max_register = resource_size(res);
-
-	regmap = devm_regmap_init_mmio(&pdev->dev, base, rmconfig);
-	if (IS_ERR(regmap))
-		return PTR_ERR(regmap);
 
 	return berlin_pinctrl_probe(pdev, match->data);
 }
diff --git a/drivers/pinctrl/berlin/berlin-bg2q.c b/drivers/pinctrl/berlin/berlin-bg2q.c
index bd9662e57ad3..ba7a8a8ad010 100644
--- a/drivers/pinctrl/berlin/berlin-bg2q.c
+++ b/drivers/pinctrl/berlin/berlin-bg2q.c
@@ -380,11 +380,11 @@ static const struct berlin_pinctrl_desc berlin2q_sysmgr_pinctrl_data = {
 
 static const struct of_device_id berlin2q_pinctrl_match[] = {
 	{
-		.compatible = "marvell,berlin2q-chip-ctrl",
+		.compatible = "marvell,berlin2q-soc-pinctrl",
 		.data = &berlin2q_soc_pinctrl_data,
 	},
 	{
-		.compatible = "marvell,berlin2q-system-ctrl",
+		.compatible = "marvell,berlin2q-system-pinctrl",
 		.data = &berlin2q_sysmgr_pinctrl_data,
 	},
 	{}
@@ -395,28 +395,6 @@ static int berlin2q_pinctrl_probe(struct platform_device *pdev)
 {
 	const struct of_device_id *match =
 		of_match_device(berlin2q_pinctrl_match, &pdev->dev);
-	struct regmap_config *rmconfig;
-	struct regmap *regmap;
-	struct resource *res;
-	void __iomem *base;
-
-	rmconfig = devm_kzalloc(&pdev->dev, sizeof(*rmconfig), GFP_KERNEL);
-	if (!rmconfig)
-		return -ENOMEM;
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	base = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(base))
-		return PTR_ERR(base);
-
-	rmconfig->reg_bits = 32,
-	rmconfig->val_bits = 32,
-	rmconfig->reg_stride = 4,
-	rmconfig->max_register = resource_size(res);
-
-	regmap = devm_regmap_init_mmio(&pdev->dev, base, rmconfig);
-	if (IS_ERR(regmap))
-		return PTR_ERR(regmap);
 
 	return berlin_pinctrl_probe(pdev, match->data);
 }
diff --git a/drivers/pinctrl/berlin/berlin.c b/drivers/pinctrl/berlin/berlin.c
index 7f0b0f93242b..65b0e211b89e 100644
--- a/drivers/pinctrl/berlin/berlin.c
+++ b/drivers/pinctrl/berlin/berlin.c
@@ -11,6 +11,7 @@
  */
 
 #include <linux/io.h>
+#include <linux/mfd/syscon.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
@@ -295,13 +296,15 @@ int berlin_pinctrl_probe(struct platform_device *pdev,
 			 const struct berlin_pinctrl_desc *desc)
 {
 	struct device *dev = &pdev->dev;
+	struct device_node *parent_np = of_get_parent(dev->of_node);
 	struct berlin_pinctrl *pctrl;
 	struct regmap *regmap;
 	int ret;
 
-	regmap = dev_get_regmap(&pdev->dev, NULL);
-	if (!regmap)
-		return -ENODEV;
+	regmap = syscon_node_to_regmap(parent_np);
+	of_node_put(parent_np);
+	if (IS_ERR(regmap))
+		return PTR_ERR(regmap);
 
 	pctrl = devm_kzalloc(dev, sizeof(*pctrl), GFP_KERNEL);
 	if (!pctrl)
diff --git a/drivers/reset/reset-berlin.c b/drivers/reset/reset-berlin.c
index f8b48a13cf0b..3c922d37255c 100644
--- a/drivers/reset/reset-berlin.c
+++ b/drivers/reset/reset-berlin.c
@@ -11,10 +11,12 @@
 
 #include <linux/delay.h>
 #include <linux/io.h>
+#include <linux/mfd/syscon.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/platform_device.h>
+#include <linux/regmap.h>
 #include <linux/reset-controller.h>
 #include <linux/slab.h>
 #include <linux/types.h>
@@ -25,8 +27,7 @@
 	container_of((p), struct berlin_reset_priv, rcdev)
 
 struct berlin_reset_priv {
-	void __iomem			*base;
-	unsigned int			size;
+	struct regmap			*regmap;
 	struct reset_controller_dev	rcdev;
 };
 
@@ -37,7 +38,7 @@ static int berlin_reset_reset(struct reset_controller_dev *rcdev,
 	int offset = id >> 8;
 	int mask = BIT(id & 0x1f);
 
-	writel(mask, priv->base + offset);
+	regmap_write(priv->regmap, offset, mask);
 
 	/* let the reset be effective */
 	udelay(10);
@@ -52,7 +53,6 @@ static struct reset_control_ops berlin_reset_ops = {
 static int berlin_reset_xlate(struct reset_controller_dev *rcdev,
 			      const struct of_phandle_args *reset_spec)
 {
-	struct berlin_reset_priv *priv = to_berlin_reset_priv(rcdev);
 	unsigned offset, bit;
 
 	if (WARN_ON(reset_spec->args_count != rcdev->of_reset_n_cells))
@@ -61,71 +61,53 @@ static int berlin_reset_xlate(struct reset_controller_dev *rcdev,
 	offset = reset_spec->args[0];
 	bit = reset_spec->args[1];
 
-	if (offset >= priv->size)
-		return -EINVAL;
-
 	if (bit >= BERLIN_MAX_RESETS)
 		return -EINVAL;
 
 	return (offset << 8) | bit;
 }
 
-static int __berlin_reset_init(struct device_node *np)
+static int berlin2_reset_probe(struct platform_device *pdev)
 {
+	struct device_node *parent_np = of_get_parent(pdev->dev.of_node);
 	struct berlin_reset_priv *priv;
-	struct resource res;
-	resource_size_t size;
-	int ret;
 
-	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
 
-	ret = of_address_to_resource(np, 0, &res);
-	if (ret)
-		goto err;
-
-	size = resource_size(&res);
-	priv->base = ioremap(res.start, size);
-	if (!priv->base) {
-		ret = -ENOMEM;
-		goto err;
-	}
-	priv->size = size;
+	priv->regmap = syscon_node_to_regmap(parent_np);
+	of_node_put(parent_np);
+	if (IS_ERR(priv->regmap))
+		return PTR_ERR(priv->regmap);
 
 	priv->rcdev.owner = THIS_MODULE;
 	priv->rcdev.ops = &berlin_reset_ops;
-	priv->rcdev.of_node = np;
+	priv->rcdev.of_node = pdev->dev.of_node;
 	priv->rcdev.of_reset_n_cells = 2;
 	priv->rcdev.of_xlate = berlin_reset_xlate;
 
 	reset_controller_register(&priv->rcdev);
 
 	return 0;
-
-err:
-	kfree(priv);
-	return ret;
 }
 
-static const struct of_device_id berlin_reset_of_match[] __initconst = {
-	{ .compatible = "marvell,berlin2-chip-ctrl" },
-	{ .compatible = "marvell,berlin2cd-chip-ctrl" },
-	{ .compatible = "marvell,berlin2q-chip-ctrl" },
+static const struct of_device_id berlin_reset_dt_match[] = {
+	{ .compatible = "marvell,berlin2-reset" },
 	{ },
 };
+MODULE_DEVICE_TABLE(of, berlin_reset_dt_match);
+
+static struct platform_driver berlin_reset_driver = {
+	.probe	= berlin2_reset_probe,
+	.driver	= {
+		.name = "berlin2-reset",
+		.of_match_table = berlin_reset_dt_match,
+	},
+};
+module_platform_driver(berlin_reset_driver);
 
-static int __init berlin_reset_init(void)
-{
-	struct device_node *np;
-	int ret;
-
-	for_each_matching_node(np, berlin_reset_of_match) {
-		ret = __berlin_reset_init(np);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-arch_initcall(berlin_reset_init);
+MODULE_AUTHOR("Antoine Tenart <antoine.tenart@free-electrons.com>");
+MODULE_AUTHOR("Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>");
+MODULE_DESCRIPTION("Marvell Berlin reset driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/soc/Kconfig b/drivers/soc/Kconfig
index d8bde82f0370..96ddecb92254 100644
--- a/drivers/soc/Kconfig
+++ b/drivers/soc/Kconfig
@@ -2,6 +2,7 @@ menu "SOC (System On Chip) specific Drivers"
 
 source "drivers/soc/mediatek/Kconfig"
 source "drivers/soc/qcom/Kconfig"
+source "drivers/soc/sunxi/Kconfig"
 source "drivers/soc/ti/Kconfig"
 source "drivers/soc/versatile/Kconfig"
 
diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile
index 70042b259744..7dc7c0d8a2c1 100644
--- a/drivers/soc/Makefile
+++ b/drivers/soc/Makefile
@@ -4,6 +4,7 @@
 
 obj-$(CONFIG_ARCH_MEDIATEK)	+= mediatek/
 obj-$(CONFIG_ARCH_QCOM)		+= qcom/
+obj-$(CONFIG_ARCH_SUNXI)	+= sunxi/
 obj-$(CONFIG_ARCH_TEGRA)	+= tegra/
 obj-$(CONFIG_SOC_TI)		+= ti/
 obj-$(CONFIG_PLAT_VERSATILE)	+= versatile/
diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig
index 460b2dba109c..5eea374c8fa6 100644
--- a/drivers/soc/qcom/Kconfig
+++ b/drivers/soc/qcom/Kconfig
@@ -10,3 +10,10 @@ config QCOM_GSBI
           functions for connecting the underlying serial UART, SPI, and I2C
           devices to the output pins.
 
+config QCOM_PM
+	bool "Qualcomm Power Management"
+	depends on ARCH_QCOM && !ARM64
+	help
+	  QCOM Platform specific power driver to manage cores and L2 low power
+	  modes. It interface with various system drivers to put the cores in
+	  low power modes.
diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile
index 438901257ac1..931d385386c5 100644
--- a/drivers/soc/qcom/Makefile
+++ b/drivers/soc/qcom/Makefile
@@ -1 +1,2 @@
 obj-$(CONFIG_QCOM_GSBI)	+=	qcom_gsbi.o
+obj-$(CONFIG_QCOM_PM)	+=	spm.o
diff --git a/drivers/soc/qcom/spm.c b/drivers/soc/qcom/spm.c
new file mode 100644
index 000000000000..b562af816c0a
--- /dev/null
+++ b/drivers/soc/qcom/spm.c
@@ -0,0 +1,385 @@
+/*
+ * Copyright (c) 2011-2014, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2014,2015, Linaro Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/cpuidle.h>
+#include <linux/cpu_pm.h>
+#include <linux/qcom_scm.h>
+
+#include <asm/cpuidle.h>
+#include <asm/proc-fns.h>
+#include <asm/suspend.h>
+
+#define MAX_PMIC_DATA		2
+#define MAX_SEQ_DATA		64
+#define SPM_CTL_INDEX		0x7f
+#define SPM_CTL_INDEX_SHIFT	4
+#define SPM_CTL_EN		BIT(0)
+
+enum pm_sleep_mode {
+	PM_SLEEP_MODE_STBY,
+	PM_SLEEP_MODE_RET,
+	PM_SLEEP_MODE_SPC,
+	PM_SLEEP_MODE_PC,
+	PM_SLEEP_MODE_NR,
+};
+
+enum spm_reg {
+	SPM_REG_CFG,
+	SPM_REG_SPM_CTL,
+	SPM_REG_DLY,
+	SPM_REG_PMIC_DLY,
+	SPM_REG_PMIC_DATA_0,
+	SPM_REG_PMIC_DATA_1,
+	SPM_REG_VCTL,
+	SPM_REG_SEQ_ENTRY,
+	SPM_REG_SPM_STS,
+	SPM_REG_PMIC_STS,
+	SPM_REG_NR,
+};
+
+struct spm_reg_data {
+	const u8 *reg_offset;
+	u32 spm_cfg;
+	u32 spm_dly;
+	u32 pmic_dly;
+	u32 pmic_data[MAX_PMIC_DATA];
+	u8 seq[MAX_SEQ_DATA];
+	u8 start_index[PM_SLEEP_MODE_NR];
+};
+
+struct spm_driver_data {
+	void __iomem *reg_base;
+	const struct spm_reg_data *reg_data;
+};
+
+static const u8 spm_reg_offset_v2_1[SPM_REG_NR] = {
+	[SPM_REG_CFG]		= 0x08,
+	[SPM_REG_SPM_CTL]	= 0x30,
+	[SPM_REG_DLY]		= 0x34,
+	[SPM_REG_SEQ_ENTRY]	= 0x80,
+};
+
+/* SPM register data for 8974, 8084 */
+static const struct spm_reg_data spm_reg_8974_8084_cpu  = {
+	.reg_offset = spm_reg_offset_v2_1,
+	.spm_cfg = 0x1,
+	.spm_dly = 0x3C102800,
+	.seq = { 0x03, 0x0B, 0x0F, 0x00, 0x20, 0x80, 0x10, 0xE8, 0x5B, 0x03,
+		0x3B, 0xE8, 0x5B, 0x82, 0x10, 0x0B, 0x30, 0x06, 0x26, 0x30,
+		0x0F },
+	.start_index[PM_SLEEP_MODE_STBY] = 0,
+	.start_index[PM_SLEEP_MODE_SPC] = 3,
+};
+
+static const u8 spm_reg_offset_v1_1[SPM_REG_NR] = {
+	[SPM_REG_CFG]		= 0x08,
+	[SPM_REG_SPM_CTL]	= 0x20,
+	[SPM_REG_PMIC_DLY]	= 0x24,
+	[SPM_REG_PMIC_DATA_0]	= 0x28,
+	[SPM_REG_PMIC_DATA_1]	= 0x2C,
+	[SPM_REG_SEQ_ENTRY]	= 0x80,
+};
+
+/* SPM register data for 8064 */
+static const struct spm_reg_data spm_reg_8064_cpu = {
+	.reg_offset = spm_reg_offset_v1_1,
+	.spm_cfg = 0x1F,
+	.pmic_dly = 0x02020004,
+	.pmic_data[0] = 0x0084009C,
+	.pmic_data[1] = 0x00A4001C,
+	.seq = { 0x03, 0x0F, 0x00, 0x24, 0x54, 0x10, 0x09, 0x03, 0x01,
+		0x10, 0x54, 0x30, 0x0C, 0x24, 0x30, 0x0F },
+	.start_index[PM_SLEEP_MODE_STBY] = 0,
+	.start_index[PM_SLEEP_MODE_SPC] = 2,
+};
+
+static DEFINE_PER_CPU(struct spm_driver_data *, cpu_spm_drv);
+
+typedef int (*idle_fn)(int);
+static DEFINE_PER_CPU(idle_fn*, qcom_idle_ops);
+
+static inline void spm_register_write(struct spm_driver_data *drv,
+					enum spm_reg reg, u32 val)
+{
+	if (drv->reg_data->reg_offset[reg])
+		writel_relaxed(val, drv->reg_base +
+				drv->reg_data->reg_offset[reg]);
+}
+
+/* Ensure a guaranteed write, before return */
+static inline void spm_register_write_sync(struct spm_driver_data *drv,
+					enum spm_reg reg, u32 val)
+{
+	u32 ret;
+
+	if (!drv->reg_data->reg_offset[reg])
+		return;
+
+	do {
+		writel_relaxed(val, drv->reg_base +
+				drv->reg_data->reg_offset[reg]);
+		ret = readl_relaxed(drv->reg_base +
+				drv->reg_data->reg_offset[reg]);
+		if (ret == val)
+			break;
+		cpu_relax();
+	} while (1);
+}
+
+static inline u32 spm_register_read(struct spm_driver_data *drv,
+					enum spm_reg reg)
+{
+	return readl_relaxed(drv->reg_base + drv->reg_data->reg_offset[reg]);
+}
+
+static void spm_set_low_power_mode(struct spm_driver_data *drv,
+					enum pm_sleep_mode mode)
+{
+	u32 start_index;
+	u32 ctl_val;
+
+	start_index = drv->reg_data->start_index[mode];
+
+	ctl_val = spm_register_read(drv, SPM_REG_SPM_CTL);
+	ctl_val &= ~(SPM_CTL_INDEX << SPM_CTL_INDEX_SHIFT);
+	ctl_val |= start_index << SPM_CTL_INDEX_SHIFT;
+	ctl_val |= SPM_CTL_EN;
+	spm_register_write_sync(drv, SPM_REG_SPM_CTL, ctl_val);
+}
+
+static int qcom_pm_collapse(unsigned long int unused)
+{
+	qcom_scm_cpu_power_down(QCOM_SCM_CPU_PWR_DOWN_L2_ON);
+
+	/*
+	 * Returns here only if there was a pending interrupt and we did not
+	 * power down as a result.
+	 */
+	return -1;
+}
+
+static int qcom_cpu_spc(int cpu)
+{
+	int ret;
+	struct spm_driver_data *drv = per_cpu(cpu_spm_drv, cpu);
+
+	spm_set_low_power_mode(drv, PM_SLEEP_MODE_SPC);
+	ret = cpu_suspend(0, qcom_pm_collapse);
+	/*
+	 * ARM common code executes WFI without calling into our driver and
+	 * if the SPM mode is not reset, then we may accidently power down the
+	 * cpu when we intended only to gate the cpu clock.
+	 * Ensure the state is set to standby before returning.
+	 */
+	spm_set_low_power_mode(drv, PM_SLEEP_MODE_STBY);
+
+	return ret;
+}
+
+static int qcom_idle_enter(int cpu, unsigned long index)
+{
+	return per_cpu(qcom_idle_ops, cpu)[index](cpu);
+}
+
+static const struct of_device_id qcom_idle_state_match[] __initconst = {
+	{ .compatible = "qcom,idle-state-spc", .data = qcom_cpu_spc },
+	{ },
+};
+
+static int __init qcom_cpuidle_init(struct device_node *cpu_node, int cpu)
+{
+	const struct of_device_id *match_id;
+	struct device_node *state_node;
+	int i;
+	int state_count = 1;
+	idle_fn idle_fns[CPUIDLE_STATE_MAX];
+	idle_fn *fns;
+	cpumask_t mask;
+	bool use_scm_power_down = false;
+
+	for (i = 0; ; i++) {
+		state_node = of_parse_phandle(cpu_node, "cpu-idle-states", i);
+		if (!state_node)
+			break;
+
+		if (!of_device_is_available(state_node))
+			continue;
+
+		if (i == CPUIDLE_STATE_MAX) {
+			pr_warn("%s: cpuidle states reached max possible\n",
+					__func__);
+			break;
+		}
+
+		match_id = of_match_node(qcom_idle_state_match, state_node);
+		if (!match_id)
+			return -ENODEV;
+
+		idle_fns[state_count] = match_id->data;
+
+		/* Check if any of the states allow power down */
+		if (match_id->data == qcom_cpu_spc)
+			use_scm_power_down = true;
+
+		state_count++;
+	}
+
+	if (state_count == 1)
+		goto check_spm;
+
+	fns = devm_kcalloc(get_cpu_device(cpu), state_count, sizeof(*fns),
+			GFP_KERNEL);
+	if (!fns)
+		return -ENOMEM;
+
+	for (i = 1; i < state_count; i++)
+		fns[i] = idle_fns[i];
+
+	if (use_scm_power_down) {
+		/* We have atleast one power down mode */
+		cpumask_clear(&mask);
+		cpumask_set_cpu(cpu, &mask);
+		qcom_scm_set_warm_boot_addr(cpu_resume, &mask);
+	}
+
+	per_cpu(qcom_idle_ops, cpu) = fns;
+
+	/*
+	 * SPM probe for the cpu should have happened by now, if the
+	 * SPM device does not exist, return -ENXIO to indicate that the
+	 * cpu does not support idle states.
+	 */
+check_spm:
+	return per_cpu(cpu_spm_drv, cpu) ? 0 : -ENXIO;
+}
+
+static struct cpuidle_ops qcom_cpuidle_ops __initdata = {
+	.suspend = qcom_idle_enter,
+	.init = qcom_cpuidle_init,
+};
+
+CPUIDLE_METHOD_OF_DECLARE(qcom_idle_v1, "qcom,kpss-acc-v1", &qcom_cpuidle_ops);
+CPUIDLE_METHOD_OF_DECLARE(qcom_idle_v2, "qcom,kpss-acc-v2", &qcom_cpuidle_ops);
+
+static struct spm_driver_data *spm_get_drv(struct platform_device *pdev,
+		int *spm_cpu)
+{
+	struct spm_driver_data *drv = NULL;
+	struct device_node *cpu_node, *saw_node;
+	int cpu;
+	bool found;
+
+	for_each_possible_cpu(cpu) {
+		cpu_node = of_cpu_device_node_get(cpu);
+		if (!cpu_node)
+			continue;
+		saw_node = of_parse_phandle(cpu_node, "qcom,saw", 0);
+		found = (saw_node == pdev->dev.of_node);
+		of_node_put(saw_node);
+		of_node_put(cpu_node);
+		if (found)
+			break;
+	}
+
+	if (found) {
+		drv = devm_kzalloc(&pdev->dev, sizeof(*drv), GFP_KERNEL);
+		if (drv)
+			*spm_cpu = cpu;
+	}
+
+	return drv;
+}
+
+static const struct of_device_id spm_match_table[] = {
+	{ .compatible = "qcom,msm8974-saw2-v2.1-cpu",
+	  .data = &spm_reg_8974_8084_cpu },
+	{ .compatible = "qcom,apq8084-saw2-v2.1-cpu",
+	  .data = &spm_reg_8974_8084_cpu },
+	{ .compatible = "qcom,apq8064-saw2-v1.1-cpu",
+	  .data = &spm_reg_8064_cpu },
+	{ },
+};
+
+static int spm_dev_probe(struct platform_device *pdev)
+{
+	struct spm_driver_data *drv;
+	struct resource *res;
+	const struct of_device_id *match_id;
+	void __iomem *addr;
+	int cpu;
+
+	drv = spm_get_drv(pdev, &cpu);
+	if (!drv)
+		return -EINVAL;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	drv->reg_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(drv->reg_base))
+		return PTR_ERR(drv->reg_base);
+
+	match_id = of_match_node(spm_match_table, pdev->dev.of_node);
+	if (!match_id)
+		return -ENODEV;
+
+	drv->reg_data = match_id->data;
+
+	/* Write the SPM sequences first.. */
+	addr = drv->reg_base + drv->reg_data->reg_offset[SPM_REG_SEQ_ENTRY];
+	__iowrite32_copy(addr, drv->reg_data->seq,
+			ARRAY_SIZE(drv->reg_data->seq) / 4);
+
+	/*
+	 * ..and then the control registers.
+	 * On some SoC if the control registers are written first and if the
+	 * CPU was held in reset, the reset signal could trigger the SPM state
+	 * machine, before the sequences are completely written.
+	 */
+	spm_register_write(drv, SPM_REG_CFG, drv->reg_data->spm_cfg);
+	spm_register_write(drv, SPM_REG_DLY, drv->reg_data->spm_dly);
+	spm_register_write(drv, SPM_REG_PMIC_DLY, drv->reg_data->pmic_dly);
+	spm_register_write(drv, SPM_REG_PMIC_DATA_0,
+				drv->reg_data->pmic_data[0]);
+	spm_register_write(drv, SPM_REG_PMIC_DATA_1,
+				drv->reg_data->pmic_data[1]);
+
+	/* Set up Standby as the default low power mode */
+	spm_set_low_power_mode(drv, PM_SLEEP_MODE_STBY);
+
+	per_cpu(cpu_spm_drv, cpu) = drv;
+
+	return 0;
+}
+
+static struct platform_driver spm_driver = {
+	.probe = spm_dev_probe,
+	.driver = {
+		.name = "saw",
+		.of_match_table = spm_match_table,
+	},
+};
+module_platform_driver(spm_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("SAW power controller driver");
+MODULE_ALIAS("platform:saw");
diff --git a/drivers/soc/sunxi/Kconfig b/drivers/soc/sunxi/Kconfig
new file mode 100644
index 000000000000..353b07e40176
--- /dev/null
+++ b/drivers/soc/sunxi/Kconfig
@@ -0,0 +1,10 @@
+#
+# Allwinner sunXi SoC drivers
+#
+config SUNXI_SRAM
+	bool
+	default ARCH_SUNXI
+	help
+	  Say y here to enable the SRAM controller support. This
+	  device is responsible on mapping the SRAM in the sunXi SoCs
+	  whether to the CPU/DMA, or to the devices.
diff --git a/drivers/soc/sunxi/Makefile b/drivers/soc/sunxi/Makefile
new file mode 100644
index 000000000000..4cf9dbdf346e
--- /dev/null
+++ b/drivers/soc/sunxi/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_SUNXI_SRAM) +=	sunxi_sram.o
diff --git a/drivers/soc/sunxi/sunxi_sram.c b/drivers/soc/sunxi/sunxi_sram.c
new file mode 100644
index 000000000000..bc52670c8f4b
--- /dev/null
+++ b/drivers/soc/sunxi/sunxi_sram.c
@@ -0,0 +1,284 @@
+/*
+ * Allwinner SoCs SRAM Controller Driver
+ *
+ * Copyright (C) 2015 Maxime Ripard
+ *
+ * Author: Maxime Ripard <maxime.ripard@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+
+#include <linux/soc/sunxi/sunxi_sram.h>
+
+struct sunxi_sram_func {
+	char	*func;
+	u8	val;
+};
+
+struct sunxi_sram_data {
+	char			*name;
+	u8			reg;
+	u8			offset;
+	u8			width;
+	struct sunxi_sram_func	*func;
+	struct list_head	list;
+};
+
+struct sunxi_sram_desc {
+	struct sunxi_sram_data	data;
+	bool			claimed;
+};
+
+#define SUNXI_SRAM_MAP(_val, _func)				\
+	{							\
+		.func = _func,					\
+		.val = _val,					\
+	}
+
+#define SUNXI_SRAM_DATA(_name, _reg, _off, _width, ...)		\
+	{							\
+		.name = _name,					\
+		.reg = _reg,					\
+		.offset = _off,					\
+		.width = _width,				\
+		.func = (struct sunxi_sram_func[]){		\
+			__VA_ARGS__, { } },			\
+	}
+
+static struct sunxi_sram_desc sun4i_a10_sram_a3_a4 = {
+	.data	= SUNXI_SRAM_DATA("A3-A4", 0x4, 0x4, 2,
+				  SUNXI_SRAM_MAP(0, "cpu"),
+				  SUNXI_SRAM_MAP(1, "emac")),
+};
+
+static struct sunxi_sram_desc sun4i_a10_sram_d = {
+	.data	= SUNXI_SRAM_DATA("D", 0x4, 0x0, 1,
+				  SUNXI_SRAM_MAP(0, "cpu"),
+				  SUNXI_SRAM_MAP(1, "usb-otg")),
+};
+
+static const struct of_device_id sunxi_sram_dt_ids[] = {
+	{
+		.compatible	= "allwinner,sun4i-a10-sram-a3-a4",
+		.data		= &sun4i_a10_sram_a3_a4.data,
+	},
+	{
+		.compatible	= "allwinner,sun4i-a10-sram-d",
+		.data		= &sun4i_a10_sram_d.data,
+	},
+	{}
+};
+
+static struct device *sram_dev;
+static LIST_HEAD(claimed_sram);
+static DEFINE_SPINLOCK(sram_lock);
+static void __iomem *base;
+
+static int sunxi_sram_show(struct seq_file *s, void *data)
+{
+	struct device_node *sram_node, *section_node;
+	const struct sunxi_sram_data *sram_data;
+	const struct of_device_id *match;
+	struct sunxi_sram_func *func;
+	const __be32 *sram_addr_p, *section_addr_p;
+	u32 val;
+
+	seq_puts(s, "Allwinner sunXi SRAM\n");
+	seq_puts(s, "--------------------\n\n");
+
+	for_each_child_of_node(sram_dev->of_node, sram_node) {
+		sram_addr_p = of_get_address(sram_node, 0, NULL, NULL);
+
+		seq_printf(s, "sram@%08x\n",
+			   be32_to_cpu(*sram_addr_p));
+
+		for_each_child_of_node(sram_node, section_node) {
+			match = of_match_node(sunxi_sram_dt_ids, section_node);
+			if (!match)
+				continue;
+			sram_data = match->data;
+
+			section_addr_p = of_get_address(section_node, 0,
+							NULL, NULL);
+
+			seq_printf(s, "\tsection@%04x\t(%s)\n",
+				   be32_to_cpu(*section_addr_p),
+				   sram_data->name);
+
+			val = readl(base + sram_data->reg);
+			val >>= sram_data->offset;
+			val &= sram_data->width;
+
+			for (func = sram_data->func; func->func; func++) {
+				seq_printf(s, "\t\t%s%c\n", func->func,
+					   func->val == val ? '*' : ' ');
+			}
+		}
+
+		seq_puts(s, "\n");
+	}
+
+	return 0;
+}
+
+static int sunxi_sram_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, sunxi_sram_show, inode->i_private);
+}
+
+static const struct file_operations sunxi_sram_fops = {
+	.open = sunxi_sram_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static inline struct sunxi_sram_desc *to_sram_desc(const struct sunxi_sram_data *data)
+{
+	return container_of(data, struct sunxi_sram_desc, data);
+}
+
+static const struct sunxi_sram_data *sunxi_sram_of_parse(struct device_node *node,
+							 unsigned int *value)
+{
+	const struct of_device_id *match;
+	struct of_phandle_args args;
+	int ret;
+
+	ret = of_parse_phandle_with_fixed_args(node, "allwinner,sram", 1, 0,
+					       &args);
+	if (ret)
+		return ERR_PTR(ret);
+
+	if (!of_device_is_available(args.np)) {
+		ret = -EBUSY;
+		goto err;
+	}
+
+	if (value)
+		*value = args.args[0];
+
+	match = of_match_node(sunxi_sram_dt_ids, args.np);
+	if (!match) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	of_node_put(args.np);
+	return match->data;
+
+err:
+	of_node_put(args.np);
+	return ERR_PTR(ret);
+}
+
+int sunxi_sram_claim(struct device *dev)
+{
+	const struct sunxi_sram_data *sram_data;
+	struct sunxi_sram_desc *sram_desc;
+	unsigned int device;
+	u32 val, mask;
+
+	if (IS_ERR(base))
+		return -EPROBE_DEFER;
+
+	if (!dev || !dev->of_node)
+		return -EINVAL;
+
+	sram_data = sunxi_sram_of_parse(dev->of_node, &device);
+	if (IS_ERR(sram_data))
+		return PTR_ERR(sram_data);
+
+	sram_desc = to_sram_desc(sram_data);
+
+	spin_lock(&sram_lock);
+
+	if (sram_desc->claimed) {
+		spin_unlock(&sram_lock);
+		return -EBUSY;
+	}
+
+	mask = GENMASK(sram_data->offset + sram_data->width, sram_data->offset);
+	val = readl(base + sram_data->reg);
+	val &= ~mask;
+	writel(val | ((device << sram_data->offset) & mask),
+	       base + sram_data->reg);
+
+	spin_unlock(&sram_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(sunxi_sram_claim);
+
+int sunxi_sram_release(struct device *dev)
+{
+	const struct sunxi_sram_data *sram_data;
+	struct sunxi_sram_desc *sram_desc;
+
+	if (!dev || !dev->of_node)
+		return -EINVAL;
+
+	sram_data = sunxi_sram_of_parse(dev->of_node, NULL);
+	if (IS_ERR(sram_data))
+		return -EINVAL;
+
+	sram_desc = to_sram_desc(sram_data);
+
+	spin_lock(&sram_lock);
+	sram_desc->claimed = false;
+	spin_unlock(&sram_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(sunxi_sram_release);
+
+static int sunxi_sram_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+	struct dentry *d;
+
+	sram_dev = &pdev->dev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	of_platform_populate(pdev->dev.of_node, NULL, NULL, &pdev->dev);
+
+	d = debugfs_create_file("sram", S_IRUGO, NULL, NULL,
+				&sunxi_sram_fops);
+	if (!d)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static const struct of_device_id sunxi_sram_dt_match[] = {
+	{ .compatible = "allwinner,sun4i-a10-sram-controller" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, sunxi_sram_dt_match);
+
+static struct platform_driver sunxi_sram_driver = {
+	.driver = {
+		.name		= "sunxi-sram",
+		.of_match_table	= sunxi_sram_dt_match,
+	},
+	.probe	= sunxi_sram_probe,
+};
+module_platform_driver(sunxi_sram_driver);
+
+MODULE_AUTHOR("Maxime Ripard <maxime.ripard@free-electrons.com>");
+MODULE_DESCRIPTION("Allwinner sunXi SRAM Controller Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/soc/tegra/fuse/tegra-apbmisc.c b/drivers/soc/tegra/fuse/tegra-apbmisc.c
index 3bf5aba4caaa..73fad05d8f2c 100644
--- a/drivers/soc/tegra/fuse/tegra-apbmisc.c
+++ b/drivers/soc/tegra/fuse/tegra-apbmisc.c
@@ -28,8 +28,15 @@
 #define APBMISC_SIZE	0x64
 #define FUSE_SKU_INFO	0x10
 
+#define PMC_STRAPPING_OPT_A_RAM_CODE_SHIFT	4
+#define PMC_STRAPPING_OPT_A_RAM_CODE_MASK_LONG	\
+	(0xf << PMC_STRAPPING_OPT_A_RAM_CODE_SHIFT)
+#define PMC_STRAPPING_OPT_A_RAM_CODE_MASK_SHORT	\
+	(0x3 << PMC_STRAPPING_OPT_A_RAM_CODE_SHIFT)
+
 static void __iomem *apbmisc_base;
 static void __iomem *strapping_base;
+static bool long_ram_code;
 
 u32 tegra_read_chipid(void)
 {
@@ -54,6 +61,18 @@ u32 tegra_read_straps(void)
 		return 0;
 }
 
+u32 tegra_read_ram_code(void)
+{
+	u32 straps = tegra_read_straps();
+
+	if (long_ram_code)
+		straps &= PMC_STRAPPING_OPT_A_RAM_CODE_MASK_LONG;
+	else
+		straps &= PMC_STRAPPING_OPT_A_RAM_CODE_MASK_SHORT;
+
+	return straps >> PMC_STRAPPING_OPT_A_RAM_CODE_SHIFT;
+}
+
 static const struct of_device_id apbmisc_match[] __initconst = {
 	{ .compatible = "nvidia,tegra20-apbmisc", },
 	{},
@@ -112,4 +131,6 @@ void __init tegra_init_apbmisc(void)
 	strapping_base = of_iomap(np, 1);
 	if (!strapping_base)
 		pr_err("ioremap tegra strapping_base failed\n");
+
+	long_ram_code = of_property_read_bool(np, "nvidia,long-ram-code");
 }