Merge remote-tracking branch 'airlied/drm-next' into drm-intel-next-queued

Backmerge the 4.8 pull request state from Dave - conflicts were getting out of hand, and Chris has some patches which outright don't apply without everything merged together again. Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
author: Daniel Vetter <daniel.vetter@ffwll.ch> 2016-08-05 11:36:15 +0300
committer: Daniel Vetter <daniel.vetter@ffwll.ch> 2016-08-05 11:36:15 +0300
commit: 94558e265b9539b2ecec98d037bae51c902663c1 (patch)
tree: e2782bc903759c2a8bf0af9b3a8ec090a3b1bd3e /drivers
parent: 5ac9056753e79ac5ad1ccc3c99b311688e46e8c9 (diff)
parent: 1cf915d305b6e1d57db6c35c208016f9747ba3c6 (diff)
download: linux-94558e265b9539b2ecec98d037bae51c902663c1.tar.xz
952 files changed, 23741 insertions, 10028 deletions
diff --git a/drivers/acpi/acpi_dbg.c b/drivers/acpi/acpi_dbg.c
index 1f4128487dd4..dee86925a9a1 100644
--- a/drivers/acpi/acpi_dbg.c
+++ b/drivers/acpi/acpi_dbg.c
@@ -602,7 +602,7 @@ static int acpi_aml_read_user(char __user *buf, int len)
 	crc->tail = (crc->tail + n) & (ACPI_AML_BUF_SIZE - 1);
 	ret = n;
 out:
-	acpi_aml_unlock_fifo(ACPI_AML_OUT_USER, !ret);
+	acpi_aml_unlock_fifo(ACPI_AML_OUT_USER, ret >= 0);
 	return ret;
 }
 
@@ -672,7 +672,7 @@ static int acpi_aml_write_user(const char __user *buf, int len)
 	crc->head = (crc->head + n) & (ACPI_AML_BUF_SIZE - 1);
 	ret = n;
 out:
-	acpi_aml_unlock_fifo(ACPI_AML_IN_USER, !ret);
+	acpi_aml_unlock_fifo(ACPI_AML_IN_USER, ret >= 0);
 	return n;
 }
 
diff --git a/drivers/acpi/acpica/hwregs.c b/drivers/acpi/acpica/hwregs.c
index daceb80022b0..3b7fb99362b6 100644
--- a/drivers/acpi/acpica/hwregs.c
+++ b/drivers/acpi/acpica/hwregs.c
@@ -306,12 +306,6 @@ acpi_status acpi_hw_read(u32 *value, struct acpi_generic_address *reg)
 acpi_status acpi_hw_write(u32 value, struct acpi_generic_address *reg)
 {
 	u64 address;
-	u8 access_width;
-	u32 bit_width;
-	u8 bit_offset;
-	u64 value64;
-	u32 new_value32, old_value32;
-	u8 index;
 	acpi_status status;
 
 	ACPI_FUNCTION_NAME(hw_write);
@@ -323,145 +317,23 @@ acpi_status acpi_hw_write(u32 value, struct acpi_generic_address *reg)
 		return (status);
 	}
 
-	/* Convert access_width into number of bits based */
-
-	access_width = acpi_hw_get_access_bit_width(reg, 32);
-	bit_width = reg->bit_offset + reg->bit_width;
-	bit_offset = reg->bit_offset;
-
 	/*
 	 * Two address spaces supported: Memory or IO. PCI_Config is
 	 * not supported here because the GAS structure is insufficient
 	 */
-	index = 0;
-	while (bit_width) {
-		/*
-		 * Use offset style bit reads because "Index * AccessWidth" is
-		 * ensured to be less than 32-bits by acpi_hw_validate_register().
-		 */
-		new_value32 = ACPI_GET_BITS(&value, index * access_width,
-					    ACPI_MASK_BITS_ABOVE_32
-					    (access_width));
-
-		if (bit_offset >= access_width) {
-			bit_offset -= access_width;
-		} else {
-			/*
-			 * Use offset style bit masks because access_width is ensured
-			 * to be less than 32-bits by acpi_hw_validate_register() and
-			 * bit_offset/bit_width is less than access_width here.
-			 */
-			if (bit_offset) {
-				new_value32 &= ACPI_MASK_BITS_BELOW(bit_offset);
-			}
-			if (bit_width < access_width) {
-				new_value32 &= ACPI_MASK_BITS_ABOVE(bit_width);
-			}
-
-			if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
-				if (bit_offset || bit_width < access_width) {
-					/*
-					 * Read old values in order not to modify the bits that
-					 * are beyond the register bit_width/bit_offset setting.
-					 */
-					status =
-					    acpi_os_read_memory((acpi_physical_address)
-								address +
-								index *
-								ACPI_DIV_8
-								(access_width),
-								&value64,
-								access_width);
-					old_value32 = (u32)value64;
-
-					/*
-					 * Use offset style bit masks because access_width is
-					 * ensured to be less than 32-bits by
-					 * acpi_hw_validate_register() and bit_offset/bit_width is
-					 * less than access_width here.
-					 */
-					if (bit_offset) {
-						old_value32 &=
-						    ACPI_MASK_BITS_ABOVE
-						    (bit_offset);
-						bit_offset = 0;
-					}
-					if (bit_width < access_width) {
-						old_value32 &=
-						    ACPI_MASK_BITS_BELOW
-						    (bit_width);
-					}
-
-					new_value32 |= old_value32;
-				}
-
-				value64 = (u64)new_value32;
-				status =
-				    acpi_os_write_memory((acpi_physical_address)
-							 address +
-							 index *
-							 ACPI_DIV_8
-							 (access_width),
-							 value64, access_width);
-			} else {	/* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */
-
-				if (bit_offset || bit_width < access_width) {
-					/*
-					 * Read old values in order not to modify the bits that
-					 * are beyond the register bit_width/bit_offset setting.
-					 */
-					status =
-					    acpi_hw_read_port((acpi_io_address)
-							      address +
-							      index *
-							      ACPI_DIV_8
-							      (access_width),
-							      &old_value32,
-							      access_width);
-
-					/*
-					 * Use offset style bit masks because access_width is
-					 * ensured to be less than 32-bits by
-					 * acpi_hw_validate_register() and bit_offset/bit_width is
-					 * less than access_width here.
-					 */
-					if (bit_offset) {
-						old_value32 &=
-						    ACPI_MASK_BITS_ABOVE
-						    (bit_offset);
-						bit_offset = 0;
-					}
-					if (bit_width < access_width) {
-						old_value32 &=
-						    ACPI_MASK_BITS_BELOW
-						    (bit_width);
-					}
-
-					new_value32 |= old_value32;
-				}
-
-				status = acpi_hw_write_port((acpi_io_address)
-							    address +
-							    index *
-							    ACPI_DIV_8
-							    (access_width),
-							    new_value32,
-							    access_width);
-			}
-		}
-
-		/*
-		 * Index * access_width is ensured to be less than 32-bits by
-		 * acpi_hw_validate_register().
-		 */
-		bit_width -=
-		    bit_width > access_width ? access_width : bit_width;
-		index++;
+	if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
+		status = acpi_os_write_memory((acpi_physical_address)
+					      address, (u64)value,
+					      reg->bit_width);
+	} else {		/* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */
+
+		status = acpi_hw_write_port((acpi_io_address)
+					    address, value, reg->bit_width);
 	}
 
 	ACPI_DEBUG_PRINT((ACPI_DB_IO,
 			  "Wrote: %8.8X width %2d   to %8.8X%8.8X (%s)\n",
-			  value, access_width, ACPI_FORMAT_UINT64(address),
+			  value, reg->bit_width, ACPI_FORMAT_UINT64(address),
 			  acpi_ut_get_region_name(reg->space_id)));
 
 	return (status);
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 31e8da648fff..262ca31b86d9 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -1051,7 +1051,7 @@ static int __init acpi_bus_init(void)
 	 * Maybe EC region is required at bus_scan/acpi_get_devices. So it
 	 * is necessary to enable it as early as possible.
 	 */
-	acpi_boot_ec_enable();
+	acpi_ec_dsdt_probe();
 
 	printk(KERN_INFO PREFIX "Interpreter enabled\n");
 
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 0e70181f150c..290d6f5be44b 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -1331,8 +1331,6 @@ static int ec_install_handlers(struct acpi_ec *ec)
 
 static void ec_remove_handlers(struct acpi_ec *ec)
 {
-	acpi_ec_stop(ec, false);
-
 	if (test_bit(EC_FLAGS_EC_HANDLER_INSTALLED, &ec->flags)) {
 		if (ACPI_FAILURE(acpi_remove_address_space_handler(ec->handle,
 					ACPI_ADR_SPACE_EC, &acpi_ec_space_handler)))
@@ -1340,6 +1338,19 @@ static void ec_remove_handlers(struct acpi_ec *ec)
 		clear_bit(EC_FLAGS_EC_HANDLER_INSTALLED, &ec->flags);
 	}
 
+	/*
+	 * Stops handling the EC transactions after removing the operation
+	 * region handler. This is required because _REG(DISCONNECT)
+	 * invoked during the removal can result in new EC transactions.
+	 *
+	 * Flushes the EC requests and thus disables the GPE before
+	 * removing the GPE handler. This is required by the current ACPICA
+	 * GPE core. ACPICA GPE core will automatically disable a GPE when
+	 * it is indicated but there is no way to handle it. So the drivers
+	 * must disable the GPEs prior to removing the GPE handlers.
+	 */
+	acpi_ec_stop(ec, false);
+
 	if (test_bit(EC_FLAGS_GPE_HANDLER_INSTALLED, &ec->flags)) {
 		if (ACPI_FAILURE(acpi_remove_gpe_handler(NULL, ec->gpe,
 					&acpi_ec_gpe_handler)))
@@ -1446,10 +1457,30 @@ ec_parse_io_ports(struct acpi_resource *resource, void *context)
 	return AE_OK;
 }
 
-int __init acpi_boot_ec_enable(void)
+static const struct acpi_device_id ec_device_ids[] = {
+	{"PNP0C09", 0},
+	{"", 0},
+};
+
+int __init acpi_ec_dsdt_probe(void)
 {
-	if (!boot_ec)
+	acpi_status status;
+
+	if (boot_ec)
 		return 0;
+
+	/*
+	 * Finding EC from DSDT if there is no ECDT EC available. When this
+	 * function is invoked, ACPI tables have been fully loaded, we can
+	 * walk namespace now.
+	 */
+	boot_ec = make_acpi_ec();
+	if (!boot_ec)
+		return -ENOMEM;
+	status = acpi_get_devices(ec_device_ids[0].id,
+				  ec_parse_device, boot_ec, NULL);
+	if (ACPI_FAILURE(status) || !boot_ec->handle)
+		return -ENODEV;
 	if (!ec_install_handlers(boot_ec)) {
 		first_ec = boot_ec;
 		return 0;
@@ -1457,11 +1488,6 @@ int __init acpi_boot_ec_enable(void)
 	return -EFAULT;
 }
 
-static const struct acpi_device_id ec_device_ids[] = {
-	{"PNP0C09", 0},
-	{"", 0},
-};
-
 #if 0
 /*
  * Some EC firmware variations refuses to respond QR_EC when SCI_EVT is not
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 9bb0773d39bf..27cc7feabfe4 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -181,7 +181,7 @@ typedef int (*acpi_ec_query_func) (void *data);
 
 int acpi_ec_init(void);
 int acpi_ec_ecdt_probe(void);
-int acpi_boot_ec_enable(void);
+int acpi_ec_dsdt_probe(void);
 void acpi_ec_block_transactions(void);
 void acpi_ec_unblock_transactions(void);
 void acpi_ec_unblock_transactions_early(void);
diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c
index 2215fc847fa9..1f0e06065ae6 100644
--- a/drivers/acpi/nfit.c
+++ b/drivers/acpi/nfit.c
@@ -928,7 +928,7 @@ static ssize_t format_show(struct device *dev,
 {
 	struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
 
-	return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->code));
+	return sprintf(buf, "0x%04x\n", le16_to_cpu(dcr->code));
 }
 static DEVICE_ATTR_RO(format);
 
@@ -961,8 +961,8 @@ static ssize_t format1_show(struct device *dev,
 				continue;
 			if (nfit_dcr->dcr->code == dcr->code)
 				continue;
-			rc = sprintf(buf, "%#x\n",
-					be16_to_cpu(nfit_dcr->dcr->code));
+			rc = sprintf(buf, "0x%04x\n",
+					le16_to_cpu(nfit_dcr->dcr->code));
 			break;
 		}
 		if (rc != ENXIO)
@@ -1151,9 +1151,10 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
 		if (disable_vendor_specific)
 			dsm_mask &= ~(1 << 8);
 	} else {
-		dev_err(dev, "unknown dimm command family\n");
+		dev_dbg(dev, "unknown dimm command family\n");
 		nfit_mem->family = -1;
-		return force_enable_dimms ? 0 : -ENODEV;
+		/* DSMs are optional, continue loading the driver... */
+		return 0;
 	}
 
 	uuid = to_nfit_uuid(nfit_mem->family);
diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h
index 11cb38348aef..02b9ea1e8d2e 100644
--- a/drivers/acpi/nfit.h
+++ b/drivers/acpi/nfit.h
@@ -53,12 +53,12 @@ enum nfit_uuids {
 };
 
 /*
- * Region format interface codes are stored as an array of bytes in the
- * NFIT DIMM Control Region structure
+ * Region format interface codes are stored with the interface as the
+ * LSB and the function as the MSB.
  */
-#define NFIT_FIC_BYTE cpu_to_be16(0x101) /* byte-addressable energy backed */
-#define NFIT_FIC_BLK cpu_to_be16(0x201) /* block-addressable non-energy backed */
-#define NFIT_FIC_BYTEN cpu_to_be16(0x301) /* byte-addressable non-energy backed */
+#define NFIT_FIC_BYTE cpu_to_le16(0x101) /* byte-addressable energy backed */
+#define NFIT_FIC_BLK cpu_to_le16(0x201) /* block-addressable non-energy backed */
+#define NFIT_FIC_BYTEN cpu_to_le16(0x301) /* byte-addressable non-energy backed */
 
 enum {
 	NFIT_BLK_READ_FLUSH = 1,
diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c
index 8fc7323ed3e8..c983bf733ad3 100644
--- a/drivers/acpi/pci_link.c
+++ b/drivers/acpi/pci_link.c
@@ -470,6 +470,7 @@ static int acpi_irq_pci_sharing_penalty(int irq)
 {
 	struct acpi_pci_link *link;
 	int penalty = 0;
+	int i;
 
 	list_for_each_entry(link, &acpi_link_list, list) {
 		/*
@@ -478,18 +479,14 @@ static int acpi_irq_pci_sharing_penalty(int irq)
 		 */
 		if (link->irq.active && link->irq.active == irq)
 			penalty += PIRQ_PENALTY_PCI_USING;
-		else {
-			int i;
-
-			/*
-			 * If a link is inactive, penalize the IRQs it
-			 * might use, but not as severely.
-			 */
-			for (i = 0; i < link->irq.possible_count; i++)
-				if (link->irq.possible[i] == irq)
-					penalty += PIRQ_PENALTY_PCI_POSSIBLE /
-						link->irq.possible_count;
-		}
+
+		/*
+		 * penalize the IRQs PCI might use, but not as severely.
+		 */
+		for (i = 0; i < link->irq.possible_count; i++)
+			if (link->irq.possible[i] == irq)
+				penalty += PIRQ_PENALTY_PCI_POSSIBLE /
+					link->irq.possible_count;
 	}
 
 	return penalty;
@@ -499,9 +496,6 @@ static int acpi_irq_get_penalty(int irq)
 {
 	int penalty = 0;
 
-	if (irq < ACPI_MAX_ISA_IRQS)
-		penalty += acpi_isa_irq_penalty[irq];
-
 	/*
 	* Penalize IRQ used by ACPI SCI. If ACPI SCI pin attributes conflict
 	* with PCI IRQ attributes, mark ACPI SCI as ISA_ALWAYS so it won't be
@@ -516,10 +510,49 @@ static int acpi_irq_get_penalty(int irq)
 			penalty += PIRQ_PENALTY_PCI_USING;
 	}
 
+	if (irq < ACPI_MAX_ISA_IRQS)
+		return penalty + acpi_isa_irq_penalty[irq];
+
 	penalty += acpi_irq_pci_sharing_penalty(irq);
 	return penalty;
 }
 
+int __init acpi_irq_penalty_init(void)
+{
+	struct acpi_pci_link *link;
+	int i;
+
+	/*
+	 * Update penalties to facilitate IRQ balancing.
+	 */
+	list_for_each_entry(link, &acpi_link_list, list) {
+
+		/*
+		 * reflect the possible and active irqs in the penalty table --
+		 * useful for breaking ties.
+		 */
+		if (link->irq.possible_count) {
+			int penalty =
+			    PIRQ_PENALTY_PCI_POSSIBLE /
+			    link->irq.possible_count;
+
+			for (i = 0; i < link->irq.possible_count; i++) {
+				if (link->irq.possible[i] < ACPI_MAX_ISA_IRQS)
+					acpi_isa_irq_penalty[link->irq.
+							 possible[i]] +=
+					    penalty;
+			}
+
+		} else if (link->irq.active &&
+				(link->irq.active < ACPI_MAX_ISA_IRQS)) {
+			acpi_isa_irq_penalty[link->irq.active] +=
+			    PIRQ_PENALTY_PCI_POSSIBLE;
+		}
+	}
+
+	return 0;
+}
+
 static int acpi_irq_balance = -1;	/* 0: static, 1: balance */
 
 static int acpi_pci_link_allocate(struct acpi_pci_link *link)
@@ -839,7 +872,7 @@ void acpi_penalize_isa_irq(int irq, int active)
 {
 	if ((irq >= 0) && (irq < ARRAY_SIZE(acpi_isa_irq_penalty)))
 		acpi_isa_irq_penalty[irq] = acpi_irq_get_penalty(irq) +
-			active ? PIRQ_PENALTY_ISA_USED : PIRQ_PENALTY_PCI_USING;
+		  (active ? PIRQ_PENALTY_ISA_USED : PIRQ_PENALTY_PCI_USING);
 }
 
 bool acpi_isa_irq_available(int irq)
diff --git a/drivers/ata/ahci_seattle.c b/drivers/ata/ahci_seattle.c
index 6e702ab57220..1d31c0c0fc20 100644
--- a/drivers/ata/ahci_seattle.c
+++ b/drivers/ata/ahci_seattle.c
@@ -137,7 +137,7 @@ static const struct ata_port_info *ahci_seattle_get_port_info(
 	u32 val;
 
 	plat_data = devm_kzalloc(dev, sizeof(*plat_data), GFP_KERNEL);
-	if (IS_ERR(plat_data))
+	if (!plat_data)
 		return &ahci_port_info;
 
 	plat_data->sgpio_ctrl = devm_ioremap_resource(dev,
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 6be7770f68e9..31c183aed368 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4314,6 +4314,12 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
 	 */
 	{ "ST380013AS",		"3.20",		ATA_HORKAGE_MAX_SEC_1024 },
 
+	/*
+	 * Device times out with higher max sects.
+	 * https://bugzilla.kernel.org/show_bug.cgi?id=121671
+	 */
+	{ "LITEON CX1-JB256-HP", NULL,		ATA_HORKAGE_MAX_SEC_1024 },
+
 	/* Devices we expect to fail diagnostics */
 
 	/* Devices where NCQ should be avoided */
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 61dc7a99e89a..c6f017458958 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -606,7 +606,7 @@ void ata_scsi_error(struct Scsi_Host *host)
 	ata_scsi_port_error_handler(host, ap);
 
 	/* finish or retry handled scmd's and clean up */
-	WARN_ON(host->host_failed || !list_empty(&eh_work_q));
+	WARN_ON(!list_empty(&eh_work_q));
 
 	DPRINTK("EXIT\n");
 }
diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
index bd74ee555278..745489a1c86a 100644
--- a/drivers/ata/sata_mv.c
+++ b/drivers/ata/sata_mv.c
@@ -986,7 +986,7 @@ static inline void mv_write_cached_reg(void __iomem *addr, u32 *old, u32 new)
 		 * Looks like a lot of fuss, but it avoids an unnecessary
 		 * +1 usec read-after-write delay for unaffected registers.
 		 */
-		laddr = (long)addr & 0xffff;
+		laddr = (unsigned long)addr & 0xffff;
 		if (laddr >= 0x300 && laddr <= 0x33c) {
 			laddr &= 0x000f;
 			if (laddr == 0x4 || laddr == 0xc) {
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index 6b2a84e7f2be..2609ba20b396 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -10,7 +10,7 @@ obj-$(CONFIG_DMA_CMA) += dma-contiguous.o
 obj-y			+= power/
 obj-$(CONFIG_HAS_DMA)	+= dma-mapping.o
 obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
-obj-$(CONFIG_ISA)	+= isa.o
+obj-$(CONFIG_ISA_BUS_API)	+= isa.o
 obj-$(CONFIG_FW_LOADER)	+= firmware_class.o
 obj-$(CONFIG_NUMA)	+= node.o
 obj-$(CONFIG_MEMORY_HOTPLUG_SPARSE) += memory.o
diff --git a/drivers/base/isa.c b/drivers/base/isa.c
index 91dba65d7264..cd6ccdcf9df0 100644
--- a/drivers/base/isa.c
+++ b/drivers/base/isa.c
@@ -180,4 +180,4 @@ static int __init isa_bus_init(void)
 	return error;
 }
 
-device_initcall(isa_bus_init);
+postcore_initcall(isa_bus_init);
diff --git a/drivers/base/module.c b/drivers/base/module.c
index db930d3ee312..2a215780eda2 100644
--- a/drivers/base/module.c
+++ b/drivers/base/module.c
@@ -24,10 +24,12 @@ static char *make_driver_name(struct device_driver *drv)
 
 static void module_create_drivers_dir(struct module_kobject *mk)
 {
-	if (!mk || mk->drivers_dir)
-		return;
+	static DEFINE_MUTEX(drivers_dir_mutex);
 
-	mk->drivers_dir = kobject_create_and_add("drivers", &mk->kobj);
+	mutex_lock(&drivers_dir_mutex);
+	if (mk && !mk->drivers_dir)
+		mk->drivers_dir = kobject_create_and_add("drivers", &mk->kobj);
+	mutex_unlock(&drivers_dir_mutex);
 }
 
 void module_add_driver(struct module *mod, struct device_driver *drv)
diff --git a/drivers/base/power/opp/cpu.c b/drivers/base/power/opp/cpu.c
index 83d6e7ba1a34..8c3434bdb26d 100644
--- a/drivers/base/power/opp/cpu.c
+++ b/drivers/base/power/opp/cpu.c
@@ -211,7 +211,7 @@ int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev,
 		}
 
 		/* Mark opp-table as multiple CPUs are sharing it now */
-		opp_table->shared_opp = true;
+		opp_table->shared_opp = OPP_TABLE_ACCESS_SHARED;
 	}
 unlock:
 	mutex_unlock(&opp_table_lock);
@@ -227,7 +227,8 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_sharing_cpus);
  *
  * This updates the @cpumask with CPUs that are sharing OPPs with @cpu_dev.
  *
- * Returns -ENODEV if OPP table isn't already present.
+ * Returns -ENODEV if OPP table isn't already present and -EINVAL if the OPP
+ * table's status is access-unknown.
  *
  * Locking: The internal opp_table and opp structures are RCU protected.
  * Hence this function internally uses RCU updater strategy with mutex locks
@@ -249,9 +250,14 @@ int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask)
 		goto unlock;
 	}
 
+	if (opp_table->shared_opp == OPP_TABLE_ACCESS_UNKNOWN) {
+		ret = -EINVAL;
+		goto unlock;
+	}
+
 	cpumask_clear(cpumask);
 
-	if (opp_table->shared_opp) {
+	if (opp_table->shared_opp == OPP_TABLE_ACCESS_SHARED) {
 		list_for_each_entry(opp_dev, &opp_table->dev_list, node)
 			cpumask_set_cpu(opp_dev->dev->id, cpumask);
 	} else {
diff --git a/drivers/base/power/opp/of.c b/drivers/base/power/opp/of.c
index 94d2010558e3..1dfd3dd92624 100644
--- a/drivers/base/power/opp/of.c
+++ b/drivers/base/power/opp/of.c
@@ -34,7 +34,10 @@ static struct opp_table *_managed_opp(const struct device_node *np)
 			 * But the OPPs will be considered as shared only if the
 			 * OPP table contains a "opp-shared" property.
 			 */
-			return opp_table->shared_opp ? opp_table : NULL;
+			if (opp_table->shared_opp == OPP_TABLE_ACCESS_SHARED)
+				return opp_table;
+
+			return NULL;
 		}
 	}
 
@@ -353,7 +356,10 @@ static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np)
 	}
 
 	opp_table->np = opp_np;
-	opp_table->shared_opp = of_property_read_bool(opp_np, "opp-shared");
+	if (of_property_read_bool(opp_np, "opp-shared"))
+		opp_table->shared_opp = OPP_TABLE_ACCESS_SHARED;
+	else
+		opp_table->shared_opp = OPP_TABLE_ACCESS_EXCLUSIVE;
 
 	mutex_unlock(&opp_table_lock);
 
diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h
index 20f3be22e060..fabd5ca1a083 100644
--- a/drivers/base/power/opp/opp.h
+++ b/drivers/base/power/opp/opp.h
@@ -119,6 +119,12 @@ struct opp_device {
 #endif
 };
 
+enum opp_table_access {
+	OPP_TABLE_ACCESS_UNKNOWN = 0,
+	OPP_TABLE_ACCESS_EXCLUSIVE = 1,
+	OPP_TABLE_ACCESS_SHARED = 2,
+};
+
 /**
  * struct opp_table - Device opp structure
  * @node:	table node - contains the devices with OPPs that
@@ -166,7 +172,7 @@ struct opp_table {
 	/* For backward compatibility with v1 bindings */
 	unsigned int voltage_tolerance_v1;
 
-	bool shared_opp;
+	enum opp_table_access shared_opp;
 	struct dev_pm_opp *suspend_opp;
 
 	unsigned int *supported_hw;
diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h
index eda09090cb52..f642c4264c27 100644
--- a/drivers/bcma/bcma_private.h
+++ b/drivers/bcma/bcma_private.h
@@ -8,8 +8,6 @@
 #include <linux/bcma/bcma.h>
 #include <linux/delay.h>
 
-#define BCMA_CORE_SIZE		0x1000
-
 #define bcma_err(bus, fmt, ...) \
 	pr_err("bus%d: " fmt, (bus)->num, ##__VA_ARGS__)
 #define bcma_warn(bus, fmt, ...) \
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index d597e432e195..ab19adb07a12 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -1750,7 +1750,7 @@ aoecmd_init(void)
 	int ret;
 
 	/* get_zeroed_page returns page with ref count 1 */
-	p = (void *) get_zeroed_page(GFP_KERNEL | __GFP_REPEAT);
+	p = (void *) get_zeroed_page(GFP_KERNEL);
 	if (!p)
 		return -ENOMEM;
 	empty_page = virt_to_page(p);
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 31e73a7a40f2..6a48ed41963f 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -941,7 +941,7 @@ static int nbd_dev_dbg_init(struct nbd_device *nbd)
 	debugfs_create_u64("size_bytes", 0444, dir, &nbd->bytesize);
 	debugfs_create_u32("timeout", 0444, dir, &nbd->xmit_timeout);
 	debugfs_create_u32("blocksize", 0444, dir, &nbd->blksize);
-	debugfs_create_file("flags", 0444, dir, &nbd, &nbd_dbg_flags_ops);
+	debugfs_create_file("flags", 0444, dir, nbd, &nbd_dbg_flags_ops);
 
 	return 0;
 }
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index ca13df854639..fcc5b4e0aef2 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -207,6 +207,9 @@ struct blkfront_info
 	struct blk_mq_tag_set tag_set;
 	struct blkfront_ring_info *rinfo;
 	unsigned int nr_rings;
+	/* Save uncomplete reqs and bios for migration. */
+	struct list_head requests;
+	struct bio_list bio_list;
 };
 
 static unsigned int nr_minors;
@@ -874,8 +877,12 @@ static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
 			  const struct blk_mq_queue_data *qd)
 {
 	unsigned long flags;
-	struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)hctx->driver_data;
+	int qid = hctx->queue_num;
+	struct blkfront_info *info = hctx->queue->queuedata;
+	struct blkfront_ring_info *rinfo = NULL;
 
+	BUG_ON(info->nr_rings <= qid);
+	rinfo = &info->rinfo[qid];
 	blk_mq_start_request(qd->rq);
 	spin_lock_irqsave(&rinfo->ring_lock, flags);
 	if (RING_FULL(&rinfo->ring))
@@ -901,20 +908,9 @@ out_busy:
 	return BLK_MQ_RQ_QUEUE_BUSY;
 }
 
-static int blk_mq_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
-			    unsigned int index)
-{
-	struct blkfront_info *info = (struct blkfront_info *)data;
-
-	BUG_ON(info->nr_rings <= index);
-	hctx->driver_data = &info->rinfo[index];
-	return 0;
-}
-
 static struct blk_mq_ops blkfront_mq_ops = {
 	.queue_rq = blkif_queue_rq,
 	.map_queue = blk_mq_map_queue,
-	.init_hctx = blk_mq_init_hctx,
 };
 
 static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
@@ -950,6 +946,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
 		return PTR_ERR(rq);
 	}
 
+	rq->queuedata = info;
 	queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
 
 	if (info->feature_discard) {
@@ -2008,69 +2005,22 @@ static int blkif_recover(struct blkfront_info *info)
 {
 	unsigned int i, r_index;
 	struct request *req, *n;
-	struct blk_shadow *copy;
 	int rc;
 	struct bio *bio, *cloned_bio;
-	struct bio_list bio_list, merge_bio;
 	unsigned int segs, offset;
 	int pending, size;
 	struct split_bio *split_bio;
-	struct list_head requests;
 
 	blkfront_gather_backend_features(info);
 	segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST;
 	blk_queue_max_segments(info->rq, segs);
-	bio_list_init(&bio_list);
-	INIT_LIST_HEAD(&requests);
 
 	for (r_index = 0; r_index < info->nr_rings; r_index++) {
-		struct blkfront_ring_info *rinfo;
-
-		rinfo = &info->rinfo[r_index];
-		/* Stage 1: Make a safe copy of the shadow state. */
-		copy = kmemdup(rinfo->shadow, sizeof(rinfo->shadow),
-			       GFP_NOIO | __GFP_REPEAT | __GFP_HIGH);
-		if (!copy)
-			return -ENOMEM;
-
-		/* Stage 2: Set up free list. */
-		memset(&rinfo->shadow, 0, sizeof(rinfo->shadow));
-		for (i = 0; i < BLK_RING_SIZE(info); i++)
-			rinfo->shadow[i].req.u.rw.id = i+1;
-		rinfo->shadow_free = rinfo->ring.req_prod_pvt;
-		rinfo->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
+		struct blkfront_ring_info *rinfo = &info->rinfo[r_index];
 
 		rc = blkfront_setup_indirect(rinfo);
-		if (rc) {
-			kfree(copy);
+		if (rc)
 			return rc;
-		}
-
-		for (i = 0; i < BLK_RING_SIZE(info); i++) {
-			/* Not in use? */
-			if (!copy[i].request)
-				continue;
-
-			/*
-			 * Get the bios in the request so we can re-queue them.
-			 */
-			if (copy[i].request->cmd_flags &
-			    (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
-				/*
-				 * Flush operations don't contain bios, so
-				 * we need to requeue the whole request
-				 */
-				list_add(&copy[i].request->queuelist, &requests);
-				continue;
-			}
-			merge_bio.head = copy[i].request->bio;
-			merge_bio.tail = copy[i].request->biotail;
-			bio_list_merge(&bio_list, &merge_bio);
-			copy[i].request->bio = NULL;
-			blk_end_request_all(copy[i].request, 0);
-		}
-
-		kfree(copy);
 	}
 	xenbus_switch_state(info->xbdev, XenbusStateConnected);
 
@@ -2085,7 +2035,7 @@ static int blkif_recover(struct blkfront_info *info)
 		kick_pending_request_queues(rinfo);
 	}
 
-	list_for_each_entry_safe(req, n, &requests, queuelist) {
+	list_for_each_entry_safe(req, n, &info->requests, queuelist) {
 		/* Requeue pending requests (flush or discard) */
 		list_del_init(&req->queuelist);
 		BUG_ON(req->nr_phys_segments > segs);
@@ -2093,7 +2043,7 @@ static int blkif_recover(struct blkfront_info *info)
 	}
 	blk_mq_kick_requeue_list(info->rq);
 
-	while ((bio = bio_list_pop(&bio_list)) != NULL) {
+	while ((bio = bio_list_pop(&info->bio_list)) != NULL) {
 		/* Traverse the list of pending bios and re-queue them */
 		if (bio_segments(bio) > segs) {
 			/*
@@ -2139,9 +2089,42 @@ static int blkfront_resume(struct xenbus_device *dev)
 {
 	struct blkfront_info *info = dev_get_drvdata(&dev->dev);
 	int err = 0;
+	unsigned int i, j;
 
 	dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename);
 
+	bio_list_init(&info->bio_list);
+	INIT_LIST_HEAD(&info->requests);
+	for (i = 0; i < info->nr_rings; i++) {
+		struct blkfront_ring_info *rinfo = &info->rinfo[i];
+		struct bio_list merge_bio;
+		struct blk_shadow *shadow = rinfo->shadow;
+
+		for (j = 0; j < BLK_RING_SIZE(info); j++) {
+			/* Not in use? */
+			if (!shadow[j].request)
+				continue;
+
+			/*
+			 * Get the bios in the request so we can re-queue them.
+			 */
+			if (shadow[j].request->cmd_flags &
+					(REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
+				/*
+				 * Flush operations don't contain bios, so
+				 * we need to requeue the whole request
+				 */
+				list_add(&shadow[j].request->queuelist, &info->requests);
+				continue;
+			}
+			merge_bio.head = shadow[j].request->bio;
+			merge_bio.tail = shadow[j].request->biotail;
+			bio_list_merge(&info->bio_list, &merge_bio);
+			shadow[j].request->bio = NULL;
+			blk_mq_end_request(shadow[j].request, 0);
+		}
+	}
+
 	blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
 
 	err = negotiate_mq(info);
@@ -2149,6 +2132,8 @@ static int blkfront_resume(struct xenbus_device *dev)
 		return err;
 
 	err = talk_to_blkback(dev, info);
+	if (!err)
+		blk_mq_update_nr_hw_queues(&info->tag_set, info->nr_rings);
 
 	/*
 	 * We have to wait for the backend to switch to
@@ -2485,10 +2470,23 @@ static void blkback_changed(struct xenbus_device *dev,
 		break;
 
 	case XenbusStateConnected:
-		if (dev->state != XenbusStateInitialised) {
+		/*
+		 * talk_to_blkback sets state to XenbusStateInitialised
+		 * and blkfront_connect sets it to XenbusStateConnected
+		 * (if connection went OK).
+		 *
+		 * If the backend (or toolstack) decides to poke at backend
+		 * state (and re-trigger the watch by setting the state repeatedly
+		 * to XenbusStateConnected (4)) we need to deal with this.
+		 * This is allowed as this is used to communicate to the guest
+		 * that the size of disk has changed!
+		 */
+		if ((dev->state != XenbusStateInitialised) &&
+		    (dev->state != XenbusStateConnected)) {
 			if (talk_to_blkback(dev, info))
 				break;
 		}
+
 		blkfront_connect(info);
 		break;
 
diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index 94fb407d8561..44b1bd6baa38 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -3820,6 +3820,7 @@ static void handle_new_recv_msgs(ipmi_smi_t intf)
 	while (!list_empty(&intf->waiting_rcv_msgs)) {
 		smi_msg = list_entry(intf->waiting_rcv_msgs.next,
 				     struct ipmi_smi_msg, link);
+		list_del(&smi_msg->link);
 		if (!run_to_completion)
 			spin_unlock_irqrestore(&intf->waiting_rcv_msgs_lock,
 					       flags);
@@ -3829,11 +3830,14 @@ static void handle_new_recv_msgs(ipmi_smi_t intf)
 		if (rv > 0) {
 			/*
 			 * To preserve message order, quit if we
-			 * can't handle a message.
+			 * can't handle a message.  Add the message
+			 * back at the head, this is safe because this
+			 * tasklet is the only thing that pulls the
+			 * messages.
 			 */
+			list_add(&smi_msg->link, &intf->waiting_rcv_msgs);
 			break;
 		} else {
-			list_del(&smi_msg->link);
 			if (rv == 0)
 				/* Message handled */
 				ipmi_free_smi_msg(smi_msg);
diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig
index 53ddba26578c..98efbfcdb503 100644
--- a/drivers/clk/Kconfig
+++ b/drivers/clk/Kconfig
@@ -175,6 +175,7 @@ config COMMON_CLK_KEYSTONE
 config COMMON_CLK_NXP
 	def_bool COMMON_CLK && (ARCH_LPC18XX || ARCH_LPC32XX)
 	select REGMAP_MMIO if ARCH_LPC32XX
+	select MFD_SYSCON if ARCH_LPC18XX
 	---help---
 	  Support for clock providers on NXP platforms.
 
diff --git a/drivers/clk/at91/clk-programmable.c b/drivers/clk/at91/clk-programmable.c
index 10f846cc8db1..25d5906640c3 100644
--- a/drivers/clk/at91/clk-programmable.c
+++ b/drivers/clk/at91/clk-programmable.c
@@ -99,7 +99,7 @@ static int clk_programmable_set_parent(struct clk_hw *hw, u8 index)
 	struct clk_programmable *prog = to_clk_programmable(hw);
 	const struct clk_programmable_layout *layout = prog->layout;
 	unsigned int mask = layout->css_mask;
-	unsigned int pckr = 0;
+	unsigned int pckr = index;
 
 	if (layout->have_slck_mck)
 		mask |= AT91_PMC_CSSMCK_MCK;
diff --git a/drivers/clk/clk-oxnas.c b/drivers/clk/clk-oxnas.c
index efba7d4dbcfc..79bcb2e42060 100644
--- a/drivers/clk/clk-oxnas.c
+++ b/drivers/clk/clk-oxnas.c
@@ -144,9 +144,9 @@ static int oxnas_stdclk_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	regmap = syscon_node_to_regmap(of_get_parent(np));
-	if (!regmap) {
+	if (IS_ERR(regmap)) {
 		dev_err(&pdev->dev, "failed to have parent regmap\n");
-		return -EINVAL;
+		return PTR_ERR(regmap);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(clk_oxnas_init); i++) {
diff --git a/drivers/clk/microchip/clk-pic32mzda.c b/drivers/clk/microchip/clk-pic32mzda.c
index 020a29acc5b0..51f54380474b 100644
--- a/drivers/clk/microchip/clk-pic32mzda.c
+++ b/drivers/clk/microchip/clk-pic32mzda.c
@@ -180,15 +180,15 @@ static int pic32mzda_clk_probe(struct platform_device *pdev)
 
 	/* register fixed rate clocks */
 	clks[POSCCLK] = clk_register_fixed_rate(&pdev->dev, "posc_clk", NULL,
-						CLK_IS_ROOT, 24000000);
+						0, 24000000);
 	clks[FRCCLK] =  clk_register_fixed_rate(&pdev->dev, "frc_clk", NULL,
-						CLK_IS_ROOT, 8000000);
+						0, 8000000);
 	clks[BFRCCLK] = clk_register_fixed_rate(&pdev->dev, "bfrc_clk", NULL,
-						CLK_IS_ROOT, 8000000);
+						0, 8000000);
 	clks[LPRCCLK] = clk_register_fixed_rate(&pdev->dev, "lprc_clk", NULL,
-						CLK_IS_ROOT, 32000);
+						0, 32000);
 	clks[UPLLCLK] = clk_register_fixed_rate(&pdev->dev, "usbphy_clk", NULL,
-						CLK_IS_ROOT, 24000000);
+						0, 24000000);
 	/* fixed rate (optional) clock */
 	if (of_find_property(np, "microchip,pic32mzda-sosc", NULL)) {
 		pr_info("pic32-clk: dt requests SOSC.\n");
diff --git a/drivers/clk/rockchip/clk-cpu.c b/drivers/clk/rockchip/clk-cpu.c
index 4bb130cd0062..05b3d73bfefa 100644
--- a/drivers/clk/rockchip/clk-cpu.c
+++ b/drivers/clk/rockchip/clk-cpu.c
@@ -321,9 +321,9 @@ struct clk *rockchip_clk_register_cpuclk(const char *name,
 	}
 
 	cclk = clk_register(NULL, &cpuclk->hw);
-	if (IS_ERR(clk)) {
+	if (IS_ERR(cclk)) {
 		pr_err("%s: could not register cpuclk %s\n", __func__,	name);
-		ret = PTR_ERR(clk);
+		ret = PTR_ERR(cclk);
 		goto free_rate_table;
 	}
 
diff --git a/drivers/clk/rockchip/clk-mmc-phase.c b/drivers/clk/rockchip/clk-mmc-phase.c
index bc856f21f6b2..077fcdc7908b 100644
--- a/drivers/clk/rockchip/clk-mmc-phase.c
+++ b/drivers/clk/rockchip/clk-mmc-phase.c
@@ -41,8 +41,6 @@ static unsigned long rockchip_mmc_recalc(struct clk_hw *hw,
 #define ROCKCHIP_MMC_DEGREE_MASK 0x3
 #define ROCKCHIP_MMC_DELAYNUM_OFFSET 2
 #define ROCKCHIP_MMC_DELAYNUM_MASK (0xff << ROCKCHIP_MMC_DELAYNUM_OFFSET)
-#define ROCKCHIP_MMC_INIT_STATE_RESET 0x1
-#define ROCKCHIP_MMC_INIT_STATE_SHIFT 1
 
 #define PSECS_PER_SEC 1000000000000LL
 
@@ -154,6 +152,7 @@ struct clk *rockchip_clk_register_mmc(const char *name,
 		return ERR_PTR(-ENOMEM);
 
 	init.name = name;
+	init.flags = 0;
 	init.num_parents = num_parents;
 	init.parent_names = parent_names;
 	init.ops = &rockchip_mmc_clk_ops;
@@ -162,15 +161,6 @@ struct clk *rockchip_clk_register_mmc(const char *name,
 	mmc_clock->reg = reg;
 	mmc_clock->shift = shift;
 
-	/*
-	 * Assert init_state to soft reset the CLKGEN
-	 * for mmc tuning phase and degree
-	 */
-	if (mmc_clock->shift == ROCKCHIP_MMC_INIT_STATE_SHIFT)
-		writel(HIWORD_UPDATE(ROCKCHIP_MMC_INIT_STATE_RESET,
-				     ROCKCHIP_MMC_INIT_STATE_RESET,
-				     mmc_clock->shift), mmc_clock->reg);
-
 	clk = clk_register(NULL, &mmc_clock->hw);
 	if (IS_ERR(clk))
 		kfree(mmc_clock);
diff --git a/drivers/clk/rockchip/clk-rk3399.c b/drivers/clk/rockchip/clk-rk3399.c
index 291543f52caa..8059a8d3ea36 100644
--- a/drivers/clk/rockchip/clk-rk3399.c
+++ b/drivers/clk/rockchip/clk-rk3399.c
@@ -832,9 +832,9 @@ static struct rockchip_clk_branch rk3399_clk_branches[] __initdata = {
 			RK3399_CLKGATE_CON(13), 1, GFLAGS),
 
 	/* perihp */
-	GATE(0, "cpll_aclk_perihp_src", "gpll", CLK_IGNORE_UNUSED,
+	GATE(0, "cpll_aclk_perihp_src", "cpll", CLK_IGNORE_UNUSED,
 			RK3399_CLKGATE_CON(5), 0, GFLAGS),
-	GATE(0, "gpll_aclk_perihp_src", "cpll", CLK_IGNORE_UNUSED,
+	GATE(0, "gpll_aclk_perihp_src", "gpll", CLK_IGNORE_UNUSED,
 			RK3399_CLKGATE_CON(5), 1, GFLAGS),
 	COMPOSITE(ACLK_PERIHP, "aclk_perihp", mux_aclk_perihp_p, CLK_IGNORE_UNUSED,
 			RK3399_CLKSEL_CON(14), 7, 1, MFLAGS, 0, 5, DFLAGS,
@@ -1466,6 +1466,8 @@ static struct rockchip_clk_branch rk3399_clk_pmu_branches[] __initdata = {
 
 static const char *const rk3399_cru_critical_clocks[] __initconst = {
 	"aclk_cci_pre",
+	"aclk_gic",
+	"aclk_gic_noc",
 	"pclk_perilp0",
 	"pclk_perilp0",
 	"hclk_perilp0",
@@ -1508,6 +1510,7 @@ static void __init rk3399_clk_init(struct device_node *np)
 	ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS);
 	if (IS_ERR(ctx)) {
 		pr_err("%s: rockchip clk init failed\n", __func__);
+		iounmap(reg_base);
 		return;
 	}
 
@@ -1553,6 +1556,7 @@ static void __init rk3399_pmu_clk_init(struct device_node *np)
 	ctx = rockchip_clk_init(np, reg_base, CLKPMU_NR_CLKS);
 	if (IS_ERR(ctx)) {
 		pr_err("%s: rockchip pmu clk init failed\n", __func__);
+		iounmap(reg_base);
 		return;
 	}
 
diff --git a/drivers/clk/sunxi/clk-sun4i-display.c b/drivers/clk/sunxi/clk-sun4i-display.c
index 445a7498d6df..9780fac6d029 100644
--- a/drivers/clk/sunxi/clk-sun4i-display.c
+++ b/drivers/clk/sunxi/clk-sun4i-display.c
@@ -33,6 +33,8 @@ struct sun4i_a10_display_clk_data {
 
 	u8	width_div;
 	u8	width_mux;
+
+	u32	flags;
 };
 
 struct reset_data {
@@ -166,7 +168,7 @@ static void __init sun4i_a10_display_init(struct device_node *node,
 				     data->has_div ? &div->hw : NULL,
 				     data->has_div ? &clk_divider_ops : NULL,
 				     &gate->hw, &clk_gate_ops,
-				     0);
+				     data->flags);
 	if (IS_ERR(clk)) {
 		pr_err("%s: Couldn't register the clock\n", clk_name);
 		goto free_div;
@@ -232,6 +234,7 @@ static const struct sun4i_a10_display_clk_data sun4i_a10_tcon_ch0_data __initcon
 	.offset_rst	= 29,
 	.offset_mux	= 24,
 	.width_mux	= 2,
+	.flags		= CLK_SET_RATE_PARENT,
 };
 
 static void __init sun4i_a10_tcon_ch0_setup(struct device_node *node)
diff --git a/drivers/clk/sunxi/clk-sun4i-tcon-ch1.c b/drivers/clk/sunxi/clk-sun4i-tcon-ch1.c
index 98a4582de56a..b6d29d1bedca 100644
--- a/drivers/clk/sunxi/clk-sun4i-tcon-ch1.c
+++ b/drivers/clk/sunxi/clk-sun4i-tcon-ch1.c
@@ -79,15 +79,11 @@ static int tcon_ch1_is_enabled(struct clk_hw *hw)
 static u8 tcon_ch1_get_parent(struct clk_hw *hw)
 {
 	struct tcon_ch1_clk *tclk = hw_to_tclk(hw);
-	int num_parents = clk_hw_get_num_parents(hw);
 	u32 reg;
 
 	reg = readl(tclk->reg) >> TCON_CH1_SCLK2_MUX_SHIFT;
 	reg &= reg >> TCON_CH1_SCLK2_MUX_MASK;
 
-	if (reg >= num_parents)
-		return -EINVAL;
-
 	return reg;
 }
 
diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c
index 15d06fcf0b50..b02f9c606e0b 100644
--- a/drivers/connector/cn_proc.c
+++ b/drivers/connector/cn_proc.c
@@ -56,11 +56,21 @@ static struct cb_id cn_proc_event_id = { CN_IDX_PROC, CN_VAL_PROC };
 /* proc_event_counts is used as the sequence number of the netlink message */
 static DEFINE_PER_CPU(__u32, proc_event_counts) = { 0 };
 
-static inline void get_seq(__u32 *ts, int *cpu)
+static inline void send_msg(struct cn_msg *msg)
 {
 	preempt_disable();
-	*ts = __this_cpu_inc_return(proc_event_counts) - 1;
-	*cpu = smp_processor_id();
+
+	msg->seq = __this_cpu_inc_return(proc_event_counts) - 1;
+	((struct proc_event *)msg->data)->cpu = smp_processor_id();
+
+	/*
+	 * Preemption remains disabled during send to ensure the messages are
+	 * ordered according to their sequence numbers.
+	 *
+	 * If cn_netlink_send() fails, the data is not sent.
+	 */
+	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_NOWAIT);
+
 	preempt_enable();
 }
 
@@ -77,7 +87,6 @@ void proc_fork_connector(struct task_struct *task)
 	msg = buffer_to_cn_msg(buffer);
 	ev = (struct proc_event *)msg->data;
 	memset(&ev->event_data, 0, sizeof(ev->event_data));
-	get_seq(&msg->seq, &ev->cpu);
 	ev->timestamp_ns = ktime_get_ns();
 	ev->what = PROC_EVENT_FORK;
 	rcu_read_lock();
@@ -92,8 +101,7 @@ void proc_fork_connector(struct task_struct *task)
 	msg->ack = 0; /* not used */
 	msg->len = sizeof(*ev);
 	msg->flags = 0; /* not used */
-	/*  If cn_netlink_send() failed, the data is not sent */
-	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL);
+	send_msg(msg);
 }
 
 void proc_exec_connector(struct task_struct *task)
@@ -108,7 +116,6 @@ void proc_exec_connector(struct task_struct *task)
 	msg = buffer_to_cn_msg(buffer);
 	ev = (struct proc_event *)msg->data;
 	memset(&ev->event_data, 0, sizeof(ev->event_data));
-	get_seq(&msg->seq, &ev->cpu);
 	ev->timestamp_ns = ktime_get_ns();
 	ev->what = PROC_EVENT_EXEC;
 	ev->event_data.exec.process_pid = task->pid;
@@ -118,7 +125,7 @@ void proc_exec_connector(struct task_struct *task)
 	msg->ack = 0; /* not used */
 	msg->len = sizeof(*ev);
 	msg->flags = 0; /* not used */
-	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL);
+	send_msg(msg);
 }
 
 void proc_id_connector(struct task_struct *task, int which_id)
@@ -150,14 +157,13 @@ void proc_id_connector(struct task_struct *task, int which_id)
 		return;
 	}
 	rcu_read_unlock();
-	get_seq(&msg->seq, &ev->cpu);
 	ev->timestamp_ns = ktime_get_ns();
 
 	memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
 	msg->ack = 0; /* not used */
 	msg->len = sizeof(*ev);
 	msg->flags = 0; /* not used */
-	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL);
+	send_msg(msg);
 }
 
 void proc_sid_connector(struct task_struct *task)
@@ -172,7 +178,6 @@ void proc_sid_connector(struct task_struct *task)
 	msg = buffer_to_cn_msg(buffer);
 	ev = (struct proc_event *)msg->data;
 	memset(&ev->event_data, 0, sizeof(ev->event_data));
-	get_seq(&msg->seq, &ev->cpu);
 	ev->timestamp_ns = ktime_get_ns();
 	ev->what = PROC_EVENT_SID;
 	ev->event_data.sid.process_pid = task->pid;
@@ -182,7 +187,7 @@ void proc_sid_connector(struct task_struct *task)
 	msg->ack = 0; /* not used */
 	msg->len = sizeof(*ev);
 	msg->flags = 0; /* not used */
-	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL);
+	send_msg(msg);
 }
 
 void proc_ptrace_connector(struct task_struct *task, int ptrace_id)
@@ -197,7 +202,6 @@ void proc_ptrace_connector(struct task_struct *task, int ptrace_id)
 	msg = buffer_to_cn_msg(buffer);
 	ev = (struct proc_event *)msg->data;
 	memset(&ev->event_data, 0, sizeof(ev->event_data));
-	get_seq(&msg->seq, &ev->cpu);
 	ev->timestamp_ns = ktime_get_ns();
 	ev->what = PROC_EVENT_PTRACE;
 	ev->event_data.ptrace.process_pid  = task->pid;
@@ -215,7 +219,7 @@ void proc_ptrace_connector(struct task_struct *task, int ptrace_id)
 	msg->ack = 0; /* not used */
 	msg->len = sizeof(*ev);
 	msg->flags = 0; /* not used */
-	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL);
+	send_msg(msg);
 }
 
 void proc_comm_connector(struct task_struct *task)
@@ -230,7 +234,6 @@ void proc_comm_connector(struct task_struct *task)
 	msg = buffer_to_cn_msg(buffer);
 	ev = (struct proc_event *)msg->data;
 	memset(&ev->event_data, 0, sizeof(ev->event_data));
-	get_seq(&msg->seq, &ev->cpu);
 	ev->timestamp_ns = ktime_get_ns();
 	ev->what = PROC_EVENT_COMM;
 	ev->event_data.comm.process_pid  = task->pid;
@@ -241,7 +244,7 @@ void proc_comm_connector(struct task_struct *task)
 	msg->ack = 0; /* not used */
 	msg->len = sizeof(*ev);
 	msg->flags = 0; /* not used */
-	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL);
+	send_msg(msg);
 }
 
 void proc_coredump_connector(struct task_struct *task)
@@ -256,7 +259,6 @@ void proc_coredump_connector(struct task_struct *task)
 	msg = buffer_to_cn_msg(buffer);
 	ev = (struct proc_event *)msg->data;
 	memset(&ev->event_data, 0, sizeof(ev->event_data));
-	get_seq(&msg->seq, &ev->cpu);
 	ev->timestamp_ns = ktime_get_ns();
 	ev->what = PROC_EVENT_COREDUMP;
 	ev->event_data.coredump.process_pid = task->pid;
@@ -266,7 +268,7 @@ void proc_coredump_connector(struct task_struct *task)
 	msg->ack = 0; /* not used */
 	msg->len = sizeof(*ev);
 	msg->flags = 0; /* not used */
-	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL);
+	send_msg(msg);
 }
 
 void proc_exit_connector(struct task_struct *task)
@@ -281,7 +283,6 @@ void proc_exit_connector(struct task_struct *task)
 	msg = buffer_to_cn_msg(buffer);
 	ev = (struct proc_event *)msg->data;
 	memset(&ev->event_data, 0, sizeof(ev->event_data));
-	get_seq(&msg->seq, &ev->cpu);
 	ev->timestamp_ns = ktime_get_ns();
 	ev->what = PROC_EVENT_EXIT;
 	ev->event_data.exit.process_pid = task->pid;
@@ -293,7 +294,7 @@ void proc_exit_connector(struct task_struct *task)
 	msg->ack = 0; /* not used */
 	msg->len = sizeof(*ev);
 	msg->flags = 0; /* not used */
-	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL);
+	send_msg(msg);
 }
 
 /*
@@ -325,7 +326,7 @@ static void cn_proc_ack(int err, int rcvd_seq, int rcvd_ack)
 	msg->ack = rcvd_ack + 1;
 	msg->len = sizeof(*ev);
 	msg->flags = 0; /* not used */
-	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL);
+	send_msg(msg);
 }
 
 /**
diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index 3646b143bbf5..0bb44d5b5df4 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -79,15 +79,16 @@ static const struct of_device_id machines[] __initconst = {
 static int __init cpufreq_dt_platdev_init(void)
 {
 	struct device_node *np = of_find_node_by_path("/");
+	const struct of_device_id *match;
 
 	if (!np)
 		return -ENODEV;
 
-	if (!of_match_node(machines, np))
+	match = of_match_node(machines, np);
+	of_node_put(np);
+	if (!match)
 		return -ENODEV;
 
-	of_node_put(of_root);
-
 	return PTR_ERR_OR_ZERO(platform_device_register_simple("cpufreq-dt", -1,
 							       NULL, 0));
 }
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 9009295f5134..5617c7087d77 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2261,6 +2261,10 @@ int cpufreq_update_policy(unsigned int cpu)
 	 * -> ask driver for current freq and notify governors about a change
 	 */
 	if (cpufreq_driver->get && !cpufreq_driver->setpolicy) {
+		if (cpufreq_suspended) {
+			ret = -EAGAIN;
+			goto unlock;
+		}
 		new_policy.cur = cpufreq_update_current_freq(policy);
 		if (WARN_ON(!new_policy.cur)) {
 			ret = -EIO;
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 0d159b513469..1fa1a32928d7 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -372,26 +372,9 @@ static bool intel_pstate_get_ppc_enable_status(void)
 	return acpi_ppc;
 }
 
-/*
- * The max target pstate ratio is a 8 bit value in both PLATFORM_INFO MSR and
- * in TURBO_RATIO_LIMIT MSR, which pstate driver stores in max_pstate and
- * max_turbo_pstate fields. The PERF_CTL MSR contains 16 bit value for P state
- * ratio, out of it only high 8 bits are used. For example 0x1700 is setting
- * target ratio 0x17. The _PSS control value stores in a format which can be
- * directly written to PERF_CTL MSR. But in intel_pstate driver this shift
- * occurs during write to PERF_CTL (E.g. for cores core_set_pstate()).
- * This function converts the _PSS control value to intel pstate driver format
- * for comparison and assignment.
- */
-static int convert_to_native_pstate_format(struct cpudata *cpu, int index)
-{
-	return cpu->acpi_perf_data.states[index].control >> 8;
-}
-
 static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
 {
 	struct cpudata *cpu;
-	int turbo_pss_ctl;
 	int ret;
 	int i;
 
@@ -441,11 +424,10 @@ static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
 	 * max frequency, which will cause a reduced performance as
 	 * this driver uses real max turbo frequency as the max
 	 * frequency. So correct this frequency in _PSS table to
-	 * correct max turbo frequency based on the turbo ratio.
+	 * correct max turbo frequency based on the turbo state.
 	 * Also need to convert to MHz as _PSS freq is in MHz.
 	 */
-	turbo_pss_ctl = convert_to_native_pstate_format(cpu, 0);
-	if (turbo_pss_ctl > cpu->pstate.max_pstate)
+	if (!limits->turbo_disabled)
 		cpu->acpi_perf_data.states[0].core_frequency =
 					policy->cpuinfo.max_freq / 1000;
 	cpu->valid_pss_table = true;
@@ -1418,6 +1400,9 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
 {
 	struct cpudata *cpu = all_cpu_data[cpu_num];
 
+	if (cpu->update_util_set)
+		return;
+
 	/* Prevent intel_pstate_update_util() from using stale data. */
 	cpu->sample.time = 0;
 	cpufreq_add_update_util_hook(cpu_num, &cpu->update_util,
@@ -1458,7 +1443,8 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 	if (!policy->cpuinfo.max_freq)
 		return -ENODEV;
 
-	intel_pstate_clear_update_util_hook(policy->cpu);
+	pr_debug("set_policy cpuinfo.max %u policy->max %u\n",
+		 policy->cpuinfo.max_freq, policy->max);
 
 	cpu = all_cpu_data[0];
 	if (cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate &&
@@ -1495,13 +1481,13 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 				   limits->max_sysfs_pct);
 	limits->max_perf_pct = max(limits->min_policy_pct,
 				   limits->max_perf_pct);
-	limits->max_perf = round_up(limits->max_perf, FRAC_BITS);
 
 	/* Make sure min_perf_pct <= max_perf_pct */
 	limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct);
 
 	limits->min_perf = div_fp(limits->min_perf_pct, 100);
 	limits->max_perf = div_fp(limits->max_perf_pct, 100);
+	limits->max_perf = round_up(limits->max_perf, FRAC_BITS);
 
  out:
 	intel_pstate_set_update_util_hook(policy->cpu);
@@ -1558,8 +1544,11 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
 
 	/* cpuinfo and default policy values */
 	policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
-	policy->cpuinfo.max_freq =
-		cpu->pstate.turbo_pstate * cpu->pstate.scaling;
+	update_turbo_state();
+	policy->cpuinfo.max_freq = limits->turbo_disabled ?
+			cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
+	policy->cpuinfo.max_freq *= cpu->pstate.scaling;
+
 	intel_pstate_init_acpi_perf_limits(policy);
 	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
 	cpumask_set_cpu(policy->cpu, policy->cpus);
diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c
index 808a320e9d5d..a7ecb9a84c15 100644
--- a/drivers/cpufreq/pcc-cpufreq.c
+++ b/drivers/cpufreq/pcc-cpufreq.c
@@ -487,7 +487,7 @@ static int __init pcc_cpufreq_probe(void)
 	doorbell.space_id = reg_resource->space_id;
 	doorbell.bit_width = reg_resource->bit_width;
 	doorbell.bit_offset = reg_resource->bit_offset;
-	doorbell.access_width = 64;
+	doorbell.access_width = 4;
 	doorbell.address = reg_resource->address;
 
 	pr_debug("probe: doorbell: space_id is %d, bit_width is %d, "
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index a4d0059e232c..c73207abb5a4 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -173,7 +173,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
 
 	struct cpuidle_state *target_state = &drv->states[index];
 	bool broadcast = !!(target_state->flags & CPUIDLE_FLAG_TIMER_STOP);
-	u64 time_start, time_end;
+	ktime_t time_start, time_end;
 	s64 diff;
 
 	/*
@@ -195,13 +195,13 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
 	sched_idle_set_state(target_state);
 
 	trace_cpu_idle_rcuidle(index, dev->cpu);
-	time_start = local_clock();
+	time_start = ns_to_ktime(local_clock());
 
 	stop_critical_timings();
 	entered_state = target_state->enter(dev, drv, index);
 	start_critical_timings();
 
-	time_end = local_clock();
+	time_end = ns_to_ktime(local_clock());
 	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
 
 	/* The cpu is no longer idle or about to enter idle. */
@@ -217,11 +217,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
 	if (!cpuidle_state_is_coupled(drv, index))
 		local_irq_enable();
 
-	/*
-	 * local_clock() returns the time in nanosecond, let's shift
-	 * by 10 (divide by 1024) to have microsecond based time.
-	 */
-	diff = (time_end - time_start) >> 10;
+	diff = ktime_us_delta(time_end, time_start);
 	if (diff > INT_MAX)
 		diff = INT_MAX;
 
diff --git a/drivers/crypto/qat/qat_common/Makefile b/drivers/crypto/qat/qat_common/Makefile
index 6d74b91f2152..5fc3dbb9ada0 100644
--- a/drivers/crypto/qat/qat_common/Makefile
+++ b/drivers/crypto/qat/qat_common/Makefile
@@ -2,6 +2,7 @@ $(obj)/qat_rsapubkey-asn1.o: $(obj)/qat_rsapubkey-asn1.c \
 			     $(obj)/qat_rsapubkey-asn1.h
 $(obj)/qat_rsaprivkey-asn1.o: $(obj)/qat_rsaprivkey-asn1.c \
 			      $(obj)/qat_rsaprivkey-asn1.h
+$(obj)/qat_asym_algs.o: $(obj)/qat_rsapubkey-asn1.h $(obj)/qat_rsaprivkey-asn1.h
 
 clean-files += qat_rsapubkey-asn1.c qat_rsapubkey-asn1.h
 clean-files += qat_rsaprivkey-asn1.c qat_rsaprivkey-asn1.h
diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c
index 574e87c7f2b8..9acccad26928 100644
--- a/drivers/crypto/ux500/hash/hash_core.c
+++ b/drivers/crypto/ux500/hash/hash_core.c
@@ -781,7 +781,7 @@ static int hash_process_data(struct hash_device_data *device_data,
 						&device_data->state);
 				memmove(req_ctx->state.buffer,
 					device_data->state.buffer,
-					HASH_BLOCK_SIZE / sizeof(u32));
+					HASH_BLOCK_SIZE);
 				if (ret) {
 					dev_err(device_data->dev,
 						"%s: hash_resume_state() failed!\n",
@@ -832,7 +832,7 @@ static int hash_process_data(struct hash_device_data *device_data,
 
 			memmove(device_data->state.buffer,
 				req_ctx->state.buffer,
-				HASH_BLOCK_SIZE / sizeof(u32));
+				HASH_BLOCK_SIZE);
 			if (ret) {
 				dev_err(device_data->dev, "%s: hash_save_state() failed!\n",
 					__func__);
diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c
index 495577b6d31b..94ad5c0adbcb 100644
--- a/drivers/crypto/vmx/aes_cbc.c
+++ b/drivers/crypto/vmx/aes_cbc.c
@@ -182,7 +182,7 @@ struct crypto_alg p8_aes_cbc_alg = {
 	.cra_name = "cbc(aes)",
 	.cra_driver_name = "p8_aes_cbc",
 	.cra_module = THIS_MODULE,
-	.cra_priority = 1000,
+	.cra_priority = 2000,
 	.cra_type = &crypto_blkcipher_type,
 	.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK,
 	.cra_alignmask = 0,
diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c
index 0a3c1b04cf3c..38ed10d761d0 100644
--- a/drivers/crypto/vmx/aes_ctr.c
+++ b/drivers/crypto/vmx/aes_ctr.c
@@ -166,7 +166,7 @@ struct crypto_alg p8_aes_ctr_alg = {
 	.cra_name = "ctr(aes)",
 	.cra_driver_name = "p8_aes_ctr",
 	.cra_module = THIS_MODULE,
-	.cra_priority = 1000,
+	.cra_priority = 2000,
 	.cra_type = &crypto_blkcipher_type,
 	.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK,
 	.cra_alignmask = 0,
diff --git a/drivers/crypto/vmx/ppc-xlate.pl b/drivers/crypto/vmx/ppc-xlate.pl
index 9f4994cabcc7..b18e67d0e065 100644
--- a/drivers/crypto/vmx/ppc-xlate.pl
+++ b/drivers/crypto/vmx/ppc-xlate.pl
@@ -141,7 +141,7 @@ my $vmr = sub {
 
 # Some ABIs specify vrsave, special-purpose register #256, as reserved
 # for system use.
-my $no_vrsave = ($flavour =~ /aix|linux64le/);
+my $no_vrsave = ($flavour =~ /linux-ppc64le/);
 my $mtspr = sub {
     my ($f,$idx,$ra) = @_;
     if ($idx == 256 && $no_vrsave) {
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 1d6c803804d5..e92418facc92 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -268,8 +268,11 @@ int update_devfreq(struct devfreq *devfreq)
 	devfreq_notify_transition(devfreq, &freqs, DEVFREQ_PRECHANGE);
 
 	err = devfreq->profile->target(devfreq->dev.parent, &freq, flags);
-	if (err)
+	if (err) {
+		freqs.new = cur_freq;
+		devfreq_notify_transition(devfreq, &freqs, DEVFREQ_POSTCHANGE);
 		return err;
+	}
 
 	freqs.new = freq;
 	devfreq_notify_transition(devfreq, &freqs, DEVFREQ_POSTCHANGE);
@@ -552,6 +555,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
 	devfreq->profile = profile;
 	strncpy(devfreq->governor_name, governor_name, DEVFREQ_NAME_LEN);
 	devfreq->previous_freq = profile->initial_freq;
+	devfreq->last_status.current_frequency = profile->initial_freq;
 	devfreq->data = data;
 	devfreq->nb.notifier_call = devfreq_notifier_call;
 
@@ -561,23 +565,22 @@ struct devfreq *devfreq_add_device(struct device *dev,
 		mutex_lock(&devfreq->lock);
 	}
 
-	devfreq->trans_table =	devm_kzalloc(dev, sizeof(unsigned int) *
-						devfreq->profile->max_state *
-						devfreq->profile->max_state,
-						GFP_KERNEL);
-	devfreq->time_in_state = devm_kzalloc(dev, sizeof(unsigned long) *
-						devfreq->profile->max_state,
-						GFP_KERNEL);
-	devfreq->last_stat_updated = jiffies;
-
 	dev_set_name(&devfreq->dev, "%s", dev_name(dev));
 	err = device_register(&devfreq->dev);
 	if (err) {
-		put_device(&devfreq->dev);
 		mutex_unlock(&devfreq->lock);
 		goto err_out;
 	}
 
+	devfreq->trans_table =	devm_kzalloc(&devfreq->dev, sizeof(unsigned int) *
+						devfreq->profile->max_state *
+						devfreq->profile->max_state,
+						GFP_KERNEL);
+	devfreq->time_in_state = devm_kzalloc(&devfreq->dev, sizeof(unsigned long) *
+						devfreq->profile->max_state,
+						GFP_KERNEL);
+	devfreq->last_stat_updated = jiffies;
+
 	srcu_init_notifier_head(&devfreq->transition_notifier_list);
 
 	mutex_unlock(&devfreq->lock);
@@ -603,7 +606,6 @@ struct devfreq *devfreq_add_device(struct device *dev,
 err_init:
 	list_del(&devfreq->node);
 	device_unregister(&devfreq->dev);
-	kfree(devfreq);
 err_out:
 	return ERR_PTR(err);
 }
@@ -621,7 +623,6 @@ int devfreq_remove_device(struct devfreq *devfreq)
 		return -EINVAL;
 
 	device_unregister(&devfreq->dev);
-	put_device(&devfreq->dev);
 
 	return 0;
 }
diff --git a/drivers/devfreq/event/exynos-nocp.c b/drivers/devfreq/event/exynos-nocp.c
index 6b6a5f310486..a5841403bde8 100644
--- a/drivers/devfreq/event/exynos-nocp.c
+++ b/drivers/devfreq/event/exynos-nocp.c
@@ -220,9 +220,6 @@ static int exynos_nocp_parse_dt(struct platform_device *pdev,
 
 	/* Maps the memory mapped IO to control nocp register */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (IS_ERR(res))
-		return PTR_ERR(res);
-
 	base = devm_ioremap_resource(dev, res);
 	if (IS_ERR(base))
 		return PTR_ERR(base);
diff --git a/drivers/dma-buf/Kconfig b/drivers/dma-buf/Kconfig
index 9824bc4addf8..25bcfa0b474f 100644
--- a/drivers/dma-buf/Kconfig
+++ b/drivers/dma-buf/Kconfig
@@ -1,11 +1,20 @@
 menu "DMABUF options"
 
 config SYNC_FILE
-	bool "sync_file support for fences"
+	bool "Explicit Synchronization Framework"
 	default n
 	select ANON_INODES
 	select DMA_SHARED_BUFFER
 	---help---
-	  This option enables the fence framework synchronization to export
-	  sync_files to userspace that can represent one or more fences.
+	  The Sync File Framework adds explicit syncronization via
+	  userspace. It enables send/receive 'struct fence' objects to/from
+	  userspace via Sync File fds for synchronization between drivers via
+	  userspace components. It has been ported from Android.
+
+	  The first and main user for this is graphics in which a fence is
+	  associated with a buffer. When a job is submitted to the GPU a fence
+	  is attached to the buffer and is transferred via userspace, using Sync
+	  Files fds, to the DRM driver for example. More details at
+	  Documentation/sync_file.txt.
+
 endmenu
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 20ce0687b111..ddaee60ae52a 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -334,6 +334,7 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
 	struct reservation_object *resv = exp_info->resv;
 	struct file *file;
 	size_t alloc_size = sizeof(struct dma_buf);
+	int ret;
 
 	if (!exp_info->resv)
 		alloc_size += sizeof(struct reservation_object);
@@ -357,8 +358,8 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
 
 	dmabuf = kzalloc(alloc_size, GFP_KERNEL);
 	if (!dmabuf) {
-		module_put(exp_info->owner);
-		return ERR_PTR(-ENOMEM);
+		ret = -ENOMEM;
+		goto err_module;
 	}
 
 	dmabuf->priv = exp_info->priv;
@@ -379,8 +380,8 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
 	file = anon_inode_getfile("dmabuf", &dma_buf_fops, dmabuf,
 					exp_info->flags);
 	if (IS_ERR(file)) {
-		kfree(dmabuf);
-		return ERR_CAST(file);
+		ret = PTR_ERR(file);
+		goto err_dmabuf;
 	}
 
 	file->f_mode |= FMODE_LSEEK;
@@ -394,6 +395,12 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
 	mutex_unlock(&db_list.lock);
 
 	return dmabuf;
+
+err_dmabuf:
+	kfree(dmabuf);
+err_module:
+	module_put(exp_info->owner);
+	return ERR_PTR(ret);
 }
 EXPORT_SYMBOL_GPL(dma_buf_export);
 
diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c
index 8e304b1befc5..75bd6621dc5d 100644
--- a/drivers/dma/at_xdmac.c
+++ b/drivers/dma/at_xdmac.c
@@ -242,7 +242,7 @@ struct at_xdmac_lld {
 	u32		mbr_dus;	/* Destination Microblock Stride Register */
 };
 
-
+/* 64-bit alignment needed to update CNDA and CUBC registers in an atomic way. */
 struct at_xdmac_desc {
 	struct at_xdmac_lld		lld;
 	enum dma_transfer_direction	direction;
@@ -253,7 +253,7 @@ struct at_xdmac_desc {
 	unsigned int			xfer_size;
 	struct list_head		descs_list;
 	struct list_head		xfer_node;
-};
+} __aligned(sizeof(u64));
 
 static inline void __iomem *at_xdmac_chan_reg_base(struct at_xdmac *atxdmac, unsigned int chan_nb)
 {
@@ -1400,6 +1400,7 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 	u32			cur_nda, check_nda, cur_ubc, mask, value;
 	u8			dwidth = 0;
 	unsigned long		flags;
+	bool			initd;
 
 	ret = dma_cookie_status(chan, cookie, txstate);
 	if (ret == DMA_COMPLETE)
@@ -1424,7 +1425,16 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 	residue = desc->xfer_size;
 	/*
 	 * Flush FIFO: only relevant when the transfer is source peripheral
-	 * synchronized.
+	 * synchronized. Flush is needed before reading CUBC because data in
+	 * the FIFO are not reported by CUBC. Reporting a residue of the
+	 * transfer length while we have data in FIFO can cause issue.
+	 * Usecase: atmel USART has a timeout which means I have received
+	 * characters but there is no more character received for a while. On
+	 * timeout, it requests the residue. If the data are in the DMA FIFO,
+	 * we will return a residue of the transfer length. It means no data
+	 * received. If an application is waiting for these data, it will hang
+	 * since we won't have another USART timeout without receiving new
+	 * data.
 	 */
 	mask = AT_XDMAC_CC_TYPE | AT_XDMAC_CC_DSYNC;
 	value = AT_XDMAC_CC_TYPE_PER_TRAN | AT_XDMAC_CC_DSYNC_PER2MEM;
@@ -1435,34 +1445,43 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 	}
 
 	/*
-	 * When processing the residue, we need to read two registers but we
-	 * can't do it in an atomic way. AT_XDMAC_CNDA is used to find where
-	 * we stand in the descriptor list and AT_XDMAC_CUBC is used
-	 * to know how many data are remaining for the current descriptor.
-	 * Since the dma channel is not paused to not loose data, between the
-	 * AT_XDMAC_CNDA and AT_XDMAC_CUBC read, we may have change of
-	 * descriptor.
-	 * For that reason, after reading AT_XDMAC_CUBC, we check if we are
-	 * still using the same descriptor by reading a second time
-	 * AT_XDMAC_CNDA. If AT_XDMAC_CNDA has changed, it means we have to
-	 * read again AT_XDMAC_CUBC.
+	 * The easiest way to compute the residue should be to pause the DMA
+	 * but doing this can lead to miss some data as some devices don't
+	 * have FIFO.
+	 * We need to read several registers because:
+	 * - DMA is running therefore a descriptor change is possible while
+	 * reading these registers
+	 * - When the block transfer is done, the value of the CUBC register
+	 * is set to its initial value until the fetch of the next descriptor.
+	 * This value will corrupt the residue calculation so we have to skip
+	 * it.
+	 *
+	 * INITD --------                    ------------
+	 *              |____________________|
+	 *       _______________________  _______________
+	 * NDA       @desc2             \/   @desc3
+	 *       _______________________/\_______________
+	 *       __________  ___________  _______________
+	 * CUBC       0    \/ MAX desc1 \/  MAX desc2
+	 *       __________/\___________/\_______________
+	 *
+	 * Since descriptors are aligned on 64 bits, we can assume that
+	 * the update of NDA and CUBC is atomic.
 	 * Memory barriers are used to ensure the read order of the registers.
-	 * A max number of retries is set because unlikely it can never ends if
-	 * we are transferring a lot of data with small buffers.
+	 * A max number of retries is set because unlikely it could never ends.
 	 */
-	cur_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc;
-	rmb();
-	cur_ubc = at_xdmac_chan_read(atchan, AT_XDMAC_CUBC);
 	for (retry = 0; retry < AT_XDMAC_RESIDUE_MAX_RETRIES; retry++) {
-		rmb();
 		check_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc;
-
-		if (likely(cur_nda == check_nda))
-			break;
-
-		cur_nda = check_nda;
+		rmb();
+		initd = !!(at_xdmac_chan_read(atchan, AT_XDMAC_CC) & AT_XDMAC_CC_INITD);
 		rmb();
 		cur_ubc = at_xdmac_chan_read(atchan, AT_XDMAC_CUBC);
+		rmb();
+		cur_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc;
+		rmb();
+
+		if ((check_nda == cur_nda) && initd)
+			break;
 	}
 
 	if (unlikely(retry >= AT_XDMAC_RESIDUE_MAX_RETRIES)) {
@@ -1471,6 +1490,19 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 	}
 
 	/*
+	 * Flush FIFO: only relevant when the transfer is source peripheral
+	 * synchronized. Another flush is needed here because CUBC is updated
+	 * when the controller sends the data write command. It can lead to
+	 * report data that are not written in the memory or the device. The
+	 * FIFO flush ensures that data are really written.
+	 */
+	if ((desc->lld.mbr_cfg & mask) == value) {
+		at_xdmac_write(atxdmac, AT_XDMAC_GSWF, atchan->mask);
+		while (!(at_xdmac_chan_read(atchan, AT_XDMAC_CIS) & AT_XDMAC_CIS_FIS))
+			cpu_relax();
+	}
+
+	/*
 	 * Remove size of all microblocks already transferred and the current
 	 * one. Then add the remaining size to transfer of the current
 	 * microblock.
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index 25d1dadcddd1..d0446a75990a 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -703,8 +703,9 @@ static int mv_chan_memcpy_self_test(struct mv_xor_chan *mv_chan)
 		goto free_resources;
 	}
 
-	src_dma = dma_map_page(dma_chan->device->dev, virt_to_page(src), 0,
-				 PAGE_SIZE, DMA_TO_DEVICE);
+	src_dma = dma_map_page(dma_chan->device->dev, virt_to_page(src),
+			       (size_t)src & ~PAGE_MASK, PAGE_SIZE,
+			       DMA_TO_DEVICE);
 	unmap->addr[0] = src_dma;
 
 	ret = dma_mapping_error(dma_chan->device->dev, src_dma);
@@ -714,8 +715,9 @@ static int mv_chan_memcpy_self_test(struct mv_xor_chan *mv_chan)
 	}
 	unmap->to_cnt = 1;
 
-	dest_dma = dma_map_page(dma_chan->device->dev, virt_to_page(dest), 0,
-				  PAGE_SIZE, DMA_FROM_DEVICE);
+	dest_dma = dma_map_page(dma_chan->device->dev, virt_to_page(dest),
+				(size_t)dest & ~PAGE_MASK, PAGE_SIZE,
+				DMA_FROM_DEVICE);
 	unmap->addr[1] = dest_dma;
 
 	ret = dma_mapping_error(dma_chan->device->dev, dest_dma);
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 6aa256b0a1ed..c3ee3ad98a63 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -565,7 +565,8 @@ void edac_mc_reset_delay_period(unsigned long value)
 	list_for_each(item, &mc_devices) {
 		mci = list_entry(item, struct mem_ctl_info, link);
 
-		edac_mod_work(&mci->work, value);
+		if (mci->op_state == OP_RUNNING_POLL)
+			edac_mod_work(&mci->work, value);
 	}
 	mutex_unlock(&mem_ctls_mutex);
 }
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index b4d0bf6534cf..4fb2eb7c800d 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -239,8 +239,11 @@ static const u32 rir_offset[MAX_RIR_RANGES][MAX_RIR_WAY] = {
 	{ 0x1a0, 0x1a4, 0x1a8, 0x1ac, 0x1b0, 0x1b4, 0x1b8, 0x1bc },
 };
 
-#define RIR_RNK_TGT(reg)		GET_BITFIELD(reg, 16, 19)
-#define RIR_OFFSET(reg)		GET_BITFIELD(reg,  2, 14)
+#define RIR_RNK_TGT(type, reg) (((type) == BROADWELL) ? \
+	GET_BITFIELD(reg, 20, 23) : GET_BITFIELD(reg, 16, 19))
+
+#define RIR_OFFSET(type, reg) (((type) == HASWELL || (type) == BROADWELL) ? \
+	GET_BITFIELD(reg,  2, 15) : GET_BITFIELD(reg,  2, 14))
 
 /* Device 16, functions 2-7 */
 
@@ -326,6 +329,7 @@ struct pci_id_descr {
 struct pci_id_table {
 	const struct pci_id_descr	*descr;
 	int				n_devs;
+	enum type			type;
 };
 
 struct sbridge_dev {
@@ -394,9 +398,14 @@ static const struct pci_id_descr pci_dev_descr_sbridge[] = {
 	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_BR, 0)		},
 };
 
-#define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
+#define PCI_ID_TABLE_ENTRY(A, T) {	\
+	.descr = A,			\
+	.n_devs = ARRAY_SIZE(A),	\
+	.type = T			\
+}
+
 static const struct pci_id_table pci_dev_descr_sbridge_table[] = {
-	PCI_ID_TABLE_ENTRY(pci_dev_descr_sbridge),
+	PCI_ID_TABLE_ENTRY(pci_dev_descr_sbridge, SANDY_BRIDGE),
 	{0,}			/* 0 terminated list. */
 };
 
@@ -463,7 +472,7 @@ static const struct pci_id_descr pci_dev_descr_ibridge[] = {
 };
 
 static const struct pci_id_table pci_dev_descr_ibridge_table[] = {
-	PCI_ID_TABLE_ENTRY(pci_dev_descr_ibridge),
+	PCI_ID_TABLE_ENTRY(pci_dev_descr_ibridge, IVY_BRIDGE),
 	{0,}			/* 0 terminated list. */
 };
 
@@ -536,7 +545,7 @@ static const struct pci_id_descr pci_dev_descr_haswell[] = {
 };
 
 static const struct pci_id_table pci_dev_descr_haswell_table[] = {
-	PCI_ID_TABLE_ENTRY(pci_dev_descr_haswell),
+	PCI_ID_TABLE_ENTRY(pci_dev_descr_haswell, HASWELL),
 	{0,}			/* 0 terminated list. */
 };
 
@@ -580,7 +589,7 @@ static const struct pci_id_descr pci_dev_descr_knl[] = {
 };
 
 static const struct pci_id_table pci_dev_descr_knl_table[] = {
-	PCI_ID_TABLE_ENTRY(pci_dev_descr_knl),
+	PCI_ID_TABLE_ENTRY(pci_dev_descr_knl, KNIGHTS_LANDING),
 	{0,}
 };
 
@@ -648,7 +657,7 @@ static const struct pci_id_descr pci_dev_descr_broadwell[] = {
 };
 
 static const struct pci_id_table pci_dev_descr_broadwell_table[] = {
-	PCI_ID_TABLE_ENTRY(pci_dev_descr_broadwell),
+	PCI_ID_TABLE_ENTRY(pci_dev_descr_broadwell, BROADWELL),
 	{0,}			/* 0 terminated list. */
 };
 
@@ -1894,14 +1903,14 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
 				pci_read_config_dword(pvt->pci_tad[i],
 						      rir_offset[j][k],
 						      &reg);
-				tmp_mb = RIR_OFFSET(reg) << 6;
+				tmp_mb = RIR_OFFSET(pvt->info.type, reg) << 6;
 
 				gb = div_u64_rem(tmp_mb, 1024, &mb);
 				edac_dbg(0, "CH#%d RIR#%d INTL#%d, offset %u.%03u GB (0x%016Lx), tgt: %d, reg=0x%08x\n",
 					 i, j, k,
 					 gb, (mb*1000)/1024,
 					 ((u64)tmp_mb) << 20L,
-					 (u32)RIR_RNK_TGT(reg),
+					 (u32)RIR_RNK_TGT(pvt->info.type, reg),
 					 reg);
 			}
 		}
@@ -2234,7 +2243,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
 	pci_read_config_dword(pvt->pci_tad[ch_add + base_ch],
 			      rir_offset[n_rir][idx],
 			      &reg);
-	*rank = RIR_RNK_TGT(reg);
+	*rank = RIR_RNK_TGT(pvt->info.type, reg);
 
 	edac_dbg(0, "RIR#%d: channel address 0x%08Lx < 0x%08Lx, RIR interleave %d, index %d\n",
 		 n_rir,
@@ -2369,22 +2378,19 @@ static int sbridge_get_onedevice(struct pci_dev **prev,
  * @num_mc: pointer to the memory controllers count, to be incremented in case
  *	    of success.
  * @table: model specific table
- * @allow_dups: allow for multiple devices to exist with the same device id
- *              (as implemented, this isn't expected to work correctly in the
- *              multi-socket case).
- * @multi_bus: don't assume devices on different buses belong to different
- *             memory controllers.
  *
  * returns 0 in case of success or error code
  */
-static int sbridge_get_all_devices_full(u8 *num_mc,
-					const struct pci_id_table *table,
-					int allow_dups,
-					int multi_bus)
+static int sbridge_get_all_devices(u8 *num_mc,
+					const struct pci_id_table *table)
 {
 	int i, rc;
 	struct pci_dev *pdev = NULL;
+	int allow_dups = 0;
+	int multi_bus = 0;
 
+	if (table->type == KNIGHTS_LANDING)
+		allow_dups = multi_bus = 1;
 	while (table && table->descr) {
 		for (i = 0; i < table->n_devs; i++) {
 			if (!allow_dups || i == 0 ||
@@ -2411,11 +2417,6 @@ static int sbridge_get_all_devices_full(u8 *num_mc,
 	return 0;
 }
 
-#define sbridge_get_all_devices(num_mc, table) \
-		sbridge_get_all_devices_full(num_mc, table, 0, 0)
-#define sbridge_get_all_devices_knl(num_mc, table) \
-		sbridge_get_all_devices_full(num_mc, table, 1, 1)
-
 static int sbridge_mci_bind_devs(struct mem_ctl_info *mci,
 				 struct sbridge_dev *sbridge_dev)
 {
@@ -3357,12 +3358,12 @@ fail0:
 #define ICPU(model, table) \
 	{ X86_VENDOR_INTEL, 6, model, 0, (unsigned long)&table }
 
-/* Order here must match "enum type" */
 static const struct x86_cpu_id sbridge_cpuids[] = {
 	ICPU(0x2d, pci_dev_descr_sbridge_table),	/* SANDY_BRIDGE */
 	ICPU(0x3e, pci_dev_descr_ibridge_table),	/* IVY_BRIDGE */
 	ICPU(0x3f, pci_dev_descr_haswell_table),	/* HASWELL */
 	ICPU(0x4f, pci_dev_descr_broadwell_table),	/* BROADWELL */
+	ICPU(0x56, pci_dev_descr_broadwell_table),	/* BROADWELL-DE */
 	ICPU(0x57, pci_dev_descr_knl_table),		/* KNIGHTS_LANDING */
 	{ }
 };
@@ -3398,7 +3399,7 @@ static int sbridge_probe(const struct x86_cpu_id *id)
 			 mc, mc + 1, num_mc);
 
 		sbridge_dev->mc = mc++;
-		rc = sbridge_register_mci(sbridge_dev, id - sbridge_cpuids);
+		rc = sbridge_register_mci(sbridge_dev, ptable->type);
 		if (unlikely(rc < 0))
 			goto fail1;
 	}
diff --git a/drivers/extcon/extcon-palmas.c b/drivers/extcon/extcon-palmas.c
index 8b3226dca1d9..caff46c0e214 100644
--- a/drivers/extcon/extcon-palmas.c
+++ b/drivers/extcon/extcon-palmas.c
@@ -360,6 +360,8 @@ static int palmas_usb_probe(struct platform_device *pdev)
 
 	palmas_enable_irq(palmas_usb);
 	/* perform initial detection */
+	if (palmas_usb->enable_gpio_vbus_detection)
+		palmas_vbus_irq_handler(palmas_usb->gpio_vbus_irq, palmas_usb);
 	palmas_gpio_id_detect(&palmas_usb->wq_detectid.work);
 	device_set_wakeup_capable(&pdev->dev, true);
 	return 0;
diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c
index a850cbc48d8d..c49d50e68aee 100644
--- a/drivers/firmware/efi/arm-init.c
+++ b/drivers/firmware/efi/arm-init.c
@@ -174,6 +174,7 @@ static __init void reserve_regions(void)
 {
 	efi_memory_desc_t *md;
 	u64 paddr, npages, size;
+	int resv;
 
 	if (efi_enabled(EFI_DBG))
 		pr_info("Processing EFI memory map:\n");
@@ -190,12 +191,14 @@ static __init void reserve_regions(void)
 		paddr = md->phys_addr;
 		npages = md->num_pages;
 
+		resv = is_reserve_region(md);
 		if (efi_enabled(EFI_DBG)) {
 			char buf[64];
 
-			pr_info("  0x%012llx-0x%012llx %s",
+			pr_info("  0x%012llx-0x%012llx %s%s\n",
 				paddr, paddr + (npages << EFI_PAGE_SHIFT) - 1,
-				efi_md_typeattr_format(buf, sizeof(buf), md));
+				efi_md_typeattr_format(buf, sizeof(buf), md),
+				resv ? "*" : "");
 		}
 
 		memrange_efi_to_native(&paddr, &npages);
@@ -204,14 +207,9 @@ static __init void reserve_regions(void)
 		if (is_normal_ram(md))
 			early_init_dt_add_memory_arch(paddr, size);
 
-		if (is_reserve_region(md)) {
+		if (resv)
 			memblock_mark_nomap(paddr, size);
-			if (efi_enabled(EFI_DBG))
-				pr_cont("*");
-		}
 
-		if (efi_enabled(EFI_DBG))
-			pr_cont("\n");
 	}
 
 	set_bit(EFI_MEMMAP, &efi.flags);
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 48da857f4774..d7860614f87f 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -33,6 +33,7 @@ config ARCH_REQUIRE_GPIOLIB
 
 menuconfig GPIOLIB
 	bool "GPIO Support"
+	select ANON_INODES
 	help
 	  This enables GPIO support through the generic GPIO library.
 	  You only need to enable this, if you also want to enable
@@ -48,7 +49,7 @@ config GPIO_DEVRES
 
 config OF_GPIO
 	def_bool y
-	depends on OF || COMPILE_TEST
+	depends on OF
 
 config GPIO_ACPI
 	def_bool y
@@ -401,9 +402,12 @@ config GPIO_TB10X
 	select OF_GPIO
 
 config GPIO_TEGRA
-	bool
-	default y
+	bool "NVIDIA Tegra GPIO support"
+	default ARCH_TEGRA
 	depends on ARCH_TEGRA || COMPILE_TEST
+	depends on OF
+	help
+	  Say yes here to support GPIO pins on NVIDIA Tegra SoCs.
 
 config GPIO_TS4800
 	tristate "TS-4800 DIO blocks and compatibles"
@@ -530,7 +534,7 @@ menu "Port-mapped I/O GPIO drivers"
 
 config GPIO_104_DIO_48E
 	tristate "ACCES 104-DIO-48E GPIO support"
-	depends on ISA
+	depends on ISA_BUS_API
 	select GPIOLIB_IRQCHIP
 	help
 	  Enables GPIO support for the ACCES 104-DIO-48E series (104-DIO-48E,
@@ -540,7 +544,7 @@ config GPIO_104_DIO_48E
 
 config GPIO_104_IDIO_16
 	tristate "ACCES 104-IDIO-16 GPIO support"
-	depends on ISA
+	depends on ISA_BUS_API
 	select GPIOLIB_IRQCHIP
 	help
 	  Enables GPIO support for the ACCES 104-IDIO-16 family (104-IDIO-16,
@@ -551,7 +555,7 @@ config GPIO_104_IDIO_16
 
 config GPIO_104_IDI_48
 	tristate "ACCES 104-IDI-48 GPIO support"
-	depends on ISA
+	depends on ISA_BUS_API
 	select GPIOLIB_IRQCHIP
 	help
 	  Enables GPIO support for the ACCES 104-IDI-48 family (104-IDI-48A,
@@ -627,7 +631,7 @@ config GPIO_TS5500
 
 config GPIO_WS16C48
 	tristate "WinSystems WS16C48 GPIO support"
-	depends on ISA
+	depends on ISA_BUS_API
 	select GPIOLIB_IRQCHIP
 	help
 	  Enables GPIO support for the WinSystems WS16C48. The base port
diff --git a/drivers/gpio/gpio-104-dio-48e.c b/drivers/gpio/gpio-104-dio-48e.c
index 1a647c07be67..fcf776971ca9 100644
--- a/drivers/gpio/gpio-104-dio-48e.c
+++ b/drivers/gpio/gpio-104-dio-48e.c
@@ -75,7 +75,7 @@ static int dio48e_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
 {
 	struct dio48e_gpio *const dio48egpio = gpiochip_get_data(chip);
 	const unsigned io_port = offset / 8;
-	const unsigned control_port = io_port / 2;
+	const unsigned int control_port = io_port / 3;
 	const unsigned control_addr = dio48egpio->base + 3 + control_port*4;
 	unsigned long flags;
 	unsigned control;
@@ -115,7 +115,7 @@ static int dio48e_gpio_direction_output(struct gpio_chip *chip, unsigned offset,
 {
 	struct dio48e_gpio *const dio48egpio = gpiochip_get_data(chip);
 	const unsigned io_port = offset / 8;
-	const unsigned control_port = io_port / 2;
+	const unsigned int control_port = io_port / 3;
 	const unsigned mask = BIT(offset % 8);
 	const unsigned control_addr = dio48egpio->base + 3 + control_port*4;
 	const unsigned out_port = (io_port > 2) ? io_port + 1 : io_port;
diff --git a/drivers/gpio/gpio-104-idi-48.c b/drivers/gpio/gpio-104-idi-48.c
index 6c75c83baf5a..2d2763ea1a68 100644
--- a/drivers/gpio/gpio-104-idi-48.c
+++ b/drivers/gpio/gpio-104-idi-48.c
@@ -247,6 +247,7 @@ static int idi_48_probe(struct device *dev, unsigned int id)
 	idi48gpio->irq = irq[id];
 
 	spin_lock_init(&idi48gpio->lock);
+	spin_lock_init(&idi48gpio->ack_lock);
 
 	dev_set_drvdata(dev, idi48gpio);
 
diff --git a/drivers/gpio/gpio-bcm-kona.c b/drivers/gpio/gpio-bcm-kona.c
index 9aabc48ff5de..953e4b829e32 100644
--- a/drivers/gpio/gpio-bcm-kona.c
+++ b/drivers/gpio/gpio-bcm-kona.c
@@ -547,11 +547,11 @@ static void bcm_kona_gpio_reset(struct bcm_kona_gpio *kona_gpio)
 	/* disable interrupts and clear status */
 	for (i = 0; i < kona_gpio->num_bank; i++) {
 		/* Unlock the entire bank first */
-		bcm_kona_gpio_write_lock_regs(kona_gpio, i, UNLOCK_CODE);
+		bcm_kona_gpio_write_lock_regs(reg_base, i, UNLOCK_CODE);
 		writel(0xffffffff, reg_base + GPIO_INT_MASK(i));
 		writel(0xffffffff, reg_base + GPIO_INT_STATUS(i));
 		/* Now re-lock the bank */
-		bcm_kona_gpio_write_lock_regs(kona_gpio, i, LOCK_CODE);
+		bcm_kona_gpio_write_lock_regs(reg_base, i, LOCK_CODE);
 	}
 }
 
diff --git a/drivers/gpio/gpio-sch.c b/drivers/gpio/gpio-sch.c
index e85e7539cf5d..eb43ae4835c1 100644
--- a/drivers/gpio/gpio-sch.c
+++ b/drivers/gpio/gpio-sch.c
@@ -61,9 +61,8 @@ static unsigned sch_gpio_bit(struct sch_gpio *sch, unsigned gpio)
 	return gpio % 8;
 }
 
-static int sch_gpio_reg_get(struct gpio_chip *gc, unsigned gpio, unsigned reg)
+static int sch_gpio_reg_get(struct sch_gpio *sch, unsigned gpio, unsigned reg)
 {
-	struct sch_gpio *sch = gpiochip_get_data(gc);
 	unsigned short offset, bit;
 	u8 reg_val;
 
@@ -75,10 +74,9 @@ static int sch_gpio_reg_get(struct gpio_chip *gc, unsigned gpio, unsigned reg)
 	return reg_val;
 }
 
-static void sch_gpio_reg_set(struct gpio_chip *gc, unsigned gpio, unsigned reg,
+static void sch_gpio_reg_set(struct sch_gpio *sch, unsigned gpio, unsigned reg,
 			     int val)
 {
-	struct sch_gpio *sch = gpiochip_get_data(gc);
 	unsigned short offset, bit;
 	u8 reg_val;
 
@@ -98,14 +96,15 @@ static int sch_gpio_direction_in(struct gpio_chip *gc, unsigned gpio_num)
 	struct sch_gpio *sch = gpiochip_get_data(gc);
 
 	spin_lock(&sch->lock);
-	sch_gpio_reg_set(gc, gpio_num, GIO, 1);
+	sch_gpio_reg_set(sch, gpio_num, GIO, 1);
 	spin_unlock(&sch->lock);
 	return 0;
 }
 
 static int sch_gpio_get(struct gpio_chip *gc, unsigned gpio_num)
 {
-	return sch_gpio_reg_get(gc, gpio_num, GLV);
+	struct sch_gpio *sch = gpiochip_get_data(gc);
+	return sch_gpio_reg_get(sch, gpio_num, GLV);
 }
 
 static void sch_gpio_set(struct gpio_chip *gc, unsigned gpio_num, int val)
@@ -113,7 +112,7 @@ static void sch_gpio_set(struct gpio_chip *gc, unsigned gpio_num, int val)
 	struct sch_gpio *sch = gpiochip_get_data(gc);
 
 	spin_lock(&sch->lock);
-	sch_gpio_reg_set(gc, gpio_num, GLV, val);
+	sch_gpio_reg_set(sch, gpio_num, GLV, val);
 	spin_unlock(&sch->lock);
 }
 
@@ -123,7 +122,7 @@ static int sch_gpio_direction_out(struct gpio_chip *gc, unsigned gpio_num,
 	struct sch_gpio *sch = gpiochip_get_data(gc);
 
 	spin_lock(&sch->lock);
-	sch_gpio_reg_set(gc, gpio_num, GIO, 0);
+	sch_gpio_reg_set(sch, gpio_num, GIO, 0);
 	spin_unlock(&sch->lock);
 
 	/*
@@ -182,13 +181,13 @@ static int sch_gpio_probe(struct platform_device *pdev)
 		 * GPIO7 is configured by the CMC as SLPIOVR
 		 * Enable GPIO[9:8] core powered gpios explicitly
 		 */
-		sch_gpio_reg_set(&sch->chip, 8, GEN, 1);
-		sch_gpio_reg_set(&sch->chip, 9, GEN, 1);
+		sch_gpio_reg_set(sch, 8, GEN, 1);
+		sch_gpio_reg_set(sch, 9, GEN, 1);
 		/*
 		 * SUS_GPIO[2:0] enabled by default
 		 * Enable SUS_GPIO3 resume powered gpio explicitly
 		 */
-		sch_gpio_reg_set(&sch->chip, 13, GEN, 1);
+		sch_gpio_reg_set(sch, 13, GEN, 1);
 		break;
 
 	case PCI_DEVICE_ID_INTEL_ITC_LPC:
diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c
index ec891a27952f..661b0e34e067 100644
--- a/drivers/gpio/gpio-tegra.c
+++ b/drivers/gpio/gpio-tegra.c
@@ -98,7 +98,6 @@ struct tegra_gpio_info {
 	const struct tegra_gpio_soc_config	*soc;
 	struct gpio_chip			gc;
 	struct irq_chip				ic;
-	struct lock_class_key			lock_class;
 	u32					bank_count;
 };
 
@@ -547,6 +546,12 @@ static const struct dev_pm_ops tegra_gpio_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(tegra_gpio_suspend, tegra_gpio_resume)
 };
 
+/*
+ * This lock class tells lockdep that GPIO irqs are in a different category
+ * than their parents, so it won't report false recursion.
+ */
+static struct lock_class_key gpio_lock_class;
+
 static int tegra_gpio_probe(struct platform_device *pdev)
 {
 	const struct tegra_gpio_soc_config *config;
@@ -660,7 +665,7 @@ static int tegra_gpio_probe(struct platform_device *pdev)
 
 		bank = &tgi->bank_info[GPIO_BANK(gpio)];
 
-		irq_set_lockdep_class(irq, &tgi->lock_class);
+		irq_set_lockdep_class(irq, &gpio_lock_class);
 		irq_set_chip_data(irq, bank);
 		irq_set_chip_and_handler(irq, &tgi->ic, handle_simple_irq);
 	}
diff --git a/drivers/gpio/gpio-zynq.c b/drivers/gpio/gpio-zynq.c
index 75c6355b018d..e72794e463aa 100644
--- a/drivers/gpio/gpio-zynq.c
+++ b/drivers/gpio/gpio-zynq.c
@@ -709,7 +709,13 @@ static int zynq_gpio_probe(struct platform_device *pdev)
 		dev_err(&pdev->dev, "input clock not found.\n");
 		return PTR_ERR(gpio->clk);
 	}
+	ret = clk_prepare_enable(gpio->clk);
+	if (ret) {
+		dev_err(&pdev->dev, "Unable to enable clock.\n");
+		return ret;
+	}
 
+	pm_runtime_set_active(&pdev->dev);
 	pm_runtime_enable(&pdev->dev);
 	ret = pm_runtime_get_sync(&pdev->dev);
 	if (ret < 0)
@@ -747,6 +753,7 @@ err_pm_put:
 	pm_runtime_put(&pdev->dev);
 err_pm_dis:
 	pm_runtime_disable(&pdev->dev);
+	clk_disable_unprepare(gpio->clk);
 
 	return ret;
 }
diff --git a/drivers/gpio/gpiolib-legacy.c b/drivers/gpio/gpiolib-legacy.c
index 3a5c7011ad3b..8b830996fe02 100644
--- a/drivers/gpio/gpiolib-legacy.c
+++ b/drivers/gpio/gpiolib-legacy.c
@@ -28,6 +28,10 @@ int gpio_request_one(unsigned gpio, unsigned long flags, const char *label)
 	if (!desc && gpio_is_valid(gpio))
 		return -EPROBE_DEFER;
 
+	err = gpiod_request(desc, label);
+	if (err)
+		return err;
+
 	if (flags & GPIOF_OPEN_DRAIN)
 		set_bit(FLAG_OPEN_DRAIN, &desc->flags);
 
@@ -37,10 +41,6 @@ int gpio_request_one(unsigned gpio, unsigned long flags, const char *label)
 	if (flags & GPIOF_ACTIVE_LOW)
 		set_bit(FLAG_ACTIVE_LOW, &desc->flags);
 
-	err = gpiod_request(desc, label);
-	if (err)
-		return err;
-
 	if (flags & GPIOF_DIR_IN)
 		err = gpiod_direction_input(desc);
 	else
diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index d22dcc38179d..4aabddb38b59 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -16,6 +16,7 @@
 #include <linux/errno.h>
 #include <linux/module.h>
 #include <linux/io.h>
+#include <linux/io-mapping.h>
 #include <linux/gpio/consumer.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 24f60d28f0c0..be74bd370f1f 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -449,7 +449,6 @@ static void gpiodevice_release(struct device *dev)
 {
 	struct gpio_device *gdev = dev_get_drvdata(dev);
 
-	cdev_del(&gdev->chrdev);
 	list_del(&gdev->list);
 	ida_simple_remove(&gpio_ida, gdev->id);
 	kfree(gdev->label);
@@ -482,7 +481,6 @@ static int gpiochip_setup_dev(struct gpio_device *gdev)
 
 	/* From this point, the .release() function cleans up gpio_device */
 	gdev->dev.release = gpiodevice_release;
-	get_device(&gdev->dev);
 	pr_debug("%s: registered GPIOs %d to %d on device: %s (%s)\n",
 		 __func__, gdev->base, gdev->base + gdev->ngpio - 1,
 		 dev_name(&gdev->dev), gdev->chip->label ? : "generic");
@@ -770,6 +768,8 @@ void gpiochip_remove(struct gpio_chip *chip)
 	 * be removed, else it will be dangling until the last user is
 	 * gone.
 	 */
+	cdev_del(&gdev->chrdev);
+	device_del(&gdev->dev);
 	put_device(&gdev->dev);
 }
 EXPORT_SYMBOL_GPL(gpiochip_remove);
@@ -869,7 +869,7 @@ struct gpio_chip *gpiochip_find(void *data,
 
 	spin_lock_irqsave(&gpio_lock, flags);
 	list_for_each_entry(gdev, &gpio_devices, list)
-		if (match(gdev->chip, data))
+		if (gdev->chip && match(gdev->chip, data))
 			break;
 
 	/* No match? */
@@ -1352,14 +1352,6 @@ static int __gpiod_request(struct gpio_desc *desc, const char *label)
 		spin_lock_irqsave(&gpio_lock, flags);
 	}
 done:
-	if (status < 0) {
-		/* Clear flags that might have been set by the caller before
-		 * requesting the GPIO.
-		 */
-		clear_bit(FLAG_ACTIVE_LOW, &desc->flags);
-		clear_bit(FLAG_OPEN_DRAIN, &desc->flags);
-		clear_bit(FLAG_OPEN_SOURCE, &desc->flags);
-	}
 	spin_unlock_irqrestore(&gpio_lock, flags);
 	return status;
 }
@@ -1373,8 +1365,12 @@ done:
 #define VALIDATE_DESC(desc) do { \
 	if (!desc) \
 		return 0; \
+	if (IS_ERR(desc)) {						\
+		pr_warn("%s: invalid GPIO (errorpointer)\n", __func__); \
+		return PTR_ERR(desc); \
+	} \
 	if (!desc->gdev) { \
-		pr_warn("%s: invalid GPIO\n", __func__); \
+		pr_warn("%s: invalid GPIO (no device)\n", __func__); \
 		return -EINVAL; \
 	} \
 	if ( !desc->gdev->chip ) { \
@@ -1386,8 +1382,12 @@ done:
 #define VALIDATE_DESC_VOID(desc) do { \
 	if (!desc) \
 		return; \
+	if (IS_ERR(desc)) {						\
+		pr_warn("%s: invalid GPIO (errorpointer)\n", __func__); \
+		return; \
+	} \
 	if (!desc->gdev) { \
-		pr_warn("%s: invalid GPIO\n", __func__); \
+		pr_warn("%s: invalid GPIO (no device)\n", __func__); \
 		return; \
 	} \
 	if (!desc->gdev->chip) { \
@@ -2056,7 +2056,14 @@ int gpiod_to_irq(const struct gpio_desc *desc)
 	struct gpio_chip *chip;
 	int offset;
 
-	VALIDATE_DESC(desc);
+	/*
+	 * Cannot VALIDATE_DESC() here as gpiod_to_irq() consumer semantics
+	 * requires this function to not return zero on an invalid descriptor
+	 * but rather a negative error number.
+	 */
+	if (!desc || IS_ERR(desc) || !desc->gdev || !desc->gdev->chip)
+		return -EINVAL;
+
 	chip = desc->gdev->chip;
 	offset = gpio_chip_hwgpio(desc);
 	if (chip->to_irq) {
@@ -2572,28 +2579,13 @@ struct gpio_desc *__must_check gpiod_get_optional(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(gpiod_get_optional);
 
-/**
- * gpiod_parse_flags - helper function to parse GPIO lookup flags
- * @desc:	gpio to be setup
- * @lflags:	gpio_lookup_flags - returned from of_find_gpio() or
- *		of_get_gpio_hog()
- *
- * Set the GPIO descriptor flags based on the given GPIO lookup flags.
- */
-static void gpiod_parse_flags(struct gpio_desc *desc, unsigned long lflags)
-{
-	if (lflags & GPIO_ACTIVE_LOW)
-		set_bit(FLAG_ACTIVE_LOW, &desc->flags);
-	if (lflags & GPIO_OPEN_DRAIN)
-		set_bit(FLAG_OPEN_DRAIN, &desc->flags);
-	if (lflags & GPIO_OPEN_SOURCE)
-		set_bit(FLAG_OPEN_SOURCE, &desc->flags);
-}
 
 /**
  * gpiod_configure_flags - helper function to configure a given GPIO
  * @desc:	gpio whose value will be assigned
  * @con_id:	function within the GPIO consumer
+ * @lflags:	gpio_lookup_flags - returned from of_find_gpio() or
+ *		of_get_gpio_hog()
  * @dflags:	gpiod_flags - optional GPIO initialization flags
  *
  * Return 0 on success, -ENOENT if no GPIO has been assigned to the
@@ -2601,10 +2593,17 @@ static void gpiod_parse_flags(struct gpio_desc *desc, unsigned long lflags)
  * occurred while trying to acquire the GPIO.
  */
 static int gpiod_configure_flags(struct gpio_desc *desc, const char *con_id,
-				 enum gpiod_flags dflags)
+		unsigned long lflags, enum gpiod_flags dflags)
 {
 	int status;
 
+	if (lflags & GPIO_ACTIVE_LOW)
+		set_bit(FLAG_ACTIVE_LOW, &desc->flags);
+	if (lflags & GPIO_OPEN_DRAIN)
+		set_bit(FLAG_OPEN_DRAIN, &desc->flags);
+	if (lflags & GPIO_OPEN_SOURCE)
+		set_bit(FLAG_OPEN_SOURCE, &desc->flags);
+
 	/* No particular flag request, return here... */
 	if (!(dflags & GPIOD_FLAGS_BIT_DIR_SET)) {
 		pr_debug("no flags found for %s\n", con_id);
@@ -2671,13 +2670,11 @@ struct gpio_desc *__must_check gpiod_get_index(struct device *dev,
 		return desc;
 	}
 
-	gpiod_parse_flags(desc, lookupflags);
-
 	status = gpiod_request(desc, con_id);
 	if (status < 0)
 		return ERR_PTR(status);
 
-	status = gpiod_configure_flags(desc, con_id, flags);
+	status = gpiod_configure_flags(desc, con_id, lookupflags, flags);
 	if (status < 0) {
 		dev_dbg(dev, "setup of GPIO %s failed\n", con_id);
 		gpiod_put(desc);
@@ -2733,6 +2730,10 @@ struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode,
 	if (IS_ERR(desc))
 		return desc;
 
+	ret = gpiod_request(desc, NULL);
+	if (ret)
+		return ERR_PTR(ret);
+
 	if (active_low)
 		set_bit(FLAG_ACTIVE_LOW, &desc->flags);
 
@@ -2743,10 +2744,6 @@ struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode,
 			set_bit(FLAG_OPEN_SOURCE, &desc->flags);
 	}
 
-	ret = gpiod_request(desc, NULL);
-	if (ret)
-		return ERR_PTR(ret);
-
 	return desc;
 }
 EXPORT_SYMBOL_GPL(fwnode_get_named_gpiod);
@@ -2799,8 +2796,6 @@ int gpiod_hog(struct gpio_desc *desc, const char *name,
 	chip = gpiod_to_chip(desc);
 	hwnum = gpio_chip_hwgpio(desc);
 
-	gpiod_parse_flags(desc, lflags);
-
 	local_desc = gpiochip_request_own_desc(chip, hwnum, name);
 	if (IS_ERR(local_desc)) {
 		status = PTR_ERR(local_desc);
@@ -2809,7 +2804,7 @@ int gpiod_hog(struct gpio_desc *desc, const char *name,
 		return status;
 	}
 
-	status = gpiod_configure_flags(desc, name, dflags);
+	status = gpiod_configure_flags(desc, name, lflags, dflags);
 	if (status < 0) {
 		pr_err("setup of hog GPIO %s (chip %s, offset %d) failed, %d\n",
 		       name, chip->label, hwnum, status);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index da3d02154fa6..8ebc5f1eb4c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -85,8 +85,12 @@ extern int amdgpu_vm_debug;
 extern int amdgpu_sched_jobs;
 extern int amdgpu_sched_hw_submission;
 extern int amdgpu_powerplay;
+extern int amdgpu_powercontainment;
 extern unsigned amdgpu_pcie_gen_cap;
 extern unsigned amdgpu_pcie_lane_cap;
+extern unsigned amdgpu_cg_mask;
+extern unsigned amdgpu_pg_mask;
+extern char *amdgpu_disable_cu;
 
 #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS	        3000
 #define AMDGPU_MAX_USEC_TIMEOUT			100000	/* 100 ms */
@@ -183,6 +187,10 @@ int amdgpu_set_clockgating_state(struct amdgpu_device *adev,
 int amdgpu_set_powergating_state(struct amdgpu_device *adev,
 				  enum amd_ip_block_type block_type,
 				  enum amd_powergating_state state);
+int amdgpu_wait_for_idle(struct amdgpu_device *adev,
+			 enum amd_ip_block_type block_type);
+bool amdgpu_is_idle(struct amdgpu_device *adev,
+		    enum amd_ip_block_type block_type);
 
 struct amdgpu_ip_block_version {
 	enum amd_ip_block_type type;
@@ -298,13 +306,16 @@ struct amdgpu_ring_funcs {
 				uint32_t oa_base, uint32_t oa_size);
 	/* testing functions */
 	int (*test_ring)(struct amdgpu_ring *ring);
-	int (*test_ib)(struct amdgpu_ring *ring);
+	int (*test_ib)(struct amdgpu_ring *ring, long timeout);
 	/* insert NOP packets */
 	void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count);
 	/* pad the indirect buffer to the necessary number of dw */
 	void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
 	unsigned (*init_cond_exec)(struct amdgpu_ring *ring);
 	void (*patch_cond_exec)(struct amdgpu_ring *ring, unsigned offset);
+	/* note usage for clock and power gating */
+	void (*begin_use)(struct amdgpu_ring *ring);
+	void (*end_use)(struct amdgpu_ring *ring);
 };
 
 /*
@@ -594,11 +605,9 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
 		     struct amdgpu_sync *sync,
 		     struct reservation_object *resv,
 		     void *owner);
-bool amdgpu_sync_is_idle(struct amdgpu_sync *sync);
-int amdgpu_sync_cycle_fences(struct amdgpu_sync *dst, struct amdgpu_sync *src,
-			     struct fence *fence);
+struct fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
+				     struct amdgpu_ring *ring);
 struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
-int amdgpu_sync_wait(struct amdgpu_sync *sync);
 void amdgpu_sync_free(struct amdgpu_sync *sync);
 int amdgpu_sync_init(void);
 void amdgpu_sync_fini(void);
@@ -754,12 +763,11 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
 int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
 			     struct amdgpu_job **job);
 
+void amdgpu_job_free_resources(struct amdgpu_job *job);
 void amdgpu_job_free(struct amdgpu_job *job);
-void amdgpu_job_free_func(struct kref *refcount);
 int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
 		      struct amd_sched_entity *entity, void *owner,
 		      struct fence **f);
-void amdgpu_job_timeout_func(struct work_struct *work);
 
 struct amdgpu_ring {
 	struct amdgpu_device		*adev;
@@ -767,12 +775,9 @@ struct amdgpu_ring {
 	struct amdgpu_fence_driver	fence_drv;
 	struct amd_gpu_scheduler	sched;
 
-	spinlock_t              fence_lock;
 	struct amdgpu_bo	*ring_obj;
 	volatile uint32_t	*ring;
 	unsigned		rptr_offs;
-	u64			next_rptr_gpu_addr;
-	volatile u32		*next_rptr_cpu_addr;
 	unsigned		wptr;
 	unsigned		wptr_old;
 	unsigned		ring_size;
@@ -791,14 +796,16 @@ struct amdgpu_ring {
 	u32			doorbell_index;
 	bool			use_doorbell;
 	unsigned		wptr_offs;
-	unsigned		next_rptr_offs;
 	unsigned		fence_offs;
 	uint64_t		current_ctx;
 	enum amdgpu_ring_type	type;
 	char			name[16];
 	unsigned		cond_exe_offs;
-	u64				cond_exe_gpu_addr;
-	volatile u32	*cond_exe_cpu_addr;
+	u64			cond_exe_gpu_addr;
+	volatile u32		*cond_exe_cpu_addr;
+#if defined(CONFIG_DEBUG_FS)
+	struct dentry *ent;
+#endif
 };
 
 /*
@@ -861,6 +868,7 @@ struct amdgpu_vm {
 	struct amdgpu_bo	*page_directory;
 	unsigned		max_pde_used;
 	struct fence		*page_directory_fence;
+	uint64_t		last_eviction_counter;
 
 	/* array of page tables, one for each page directory entry */
 	struct amdgpu_vm_pt	*page_tables;
@@ -883,13 +891,14 @@ struct amdgpu_vm_id {
 	struct fence		*first;
 	struct amdgpu_sync	active;
 	struct fence		*last_flush;
-	struct amdgpu_ring      *last_user;
 	atomic64_t		owner;
 
 	uint64_t		pd_gpu_addr;
 	/* last flushed PD/PT update */
 	struct fence		*flushed_updates;
 
+	uint32_t                current_gpu_reset_count;
+
 	uint32_t		gds_base;
 	uint32_t		gds_size;
 	uint32_t		gws_base;
@@ -905,6 +914,10 @@ struct amdgpu_vm_manager {
 	struct list_head			ids_lru;
 	struct amdgpu_vm_id			ids[AMDGPU_NUM_VM];
 
+	/* Handling of VM fences */
+	u64					fence_context;
+	unsigned				seqno[AMDGPU_MAX_RINGS];
+
 	uint32_t				max_pfn;
 	/* vram base address for page table entry  */
 	u64					vram_base_offset;
@@ -926,17 +939,14 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 			 struct list_head *validated,
 			 struct amdgpu_bo_list_entry *entry);
-void amdgpu_vm_get_pt_bos(struct amdgpu_vm *vm, struct list_head *duplicates);
+void amdgpu_vm_get_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+			  struct list_head *duplicates);
 void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
 				  struct amdgpu_vm *vm);
 int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		      struct amdgpu_sync *sync, struct fence *fence,
-		      unsigned *vm_id, uint64_t *vm_pd_addr);
-int amdgpu_vm_flush(struct amdgpu_ring *ring,
-		    unsigned vm_id, uint64_t pd_addr,
-		    uint32_t gds_base, uint32_t gds_size,
-		    uint32_t gws_base, uint32_t gws_size,
-		    uint32_t oa_base, uint32_t oa_size);
+		      struct amdgpu_job *job);
+int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job);
 void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id);
 uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr);
 int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
@@ -1142,6 +1152,12 @@ struct amdgpu_cu_info {
 	uint32_t bitmap[4][4];
 };
 
+struct amdgpu_gfx_funcs {
+	/* get the gpu clock counter */
+	uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev);
+	void (*select_se_sh)(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
+};
+
 struct amdgpu_gfx {
 	struct mutex			gpu_clock_mutex;
 	struct amdgpu_gca_config	config;
@@ -1178,6 +1194,7 @@ struct amdgpu_gfx {
 	/* ce ram size*/
 	unsigned			ce_ram_size;
 	struct amdgpu_cu_info		cu_info;
+	const struct amdgpu_gfx_funcs	*funcs;
 };
 
 int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
@@ -1195,10 +1212,6 @@ void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
 void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
 void amdgpu_ring_commit(struct amdgpu_ring *ring);
 void amdgpu_ring_undo(struct amdgpu_ring *ring);
-unsigned amdgpu_ring_backup(struct amdgpu_ring *ring,
-			    uint32_t **data);
-int amdgpu_ring_restore(struct amdgpu_ring *ring,
-			unsigned size, uint32_t *data);
 int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 		     unsigned ring_size, u32 nop, u32 align_mask,
 		     struct amdgpu_irq_src *irq_src, unsigned irq_type,
@@ -1250,6 +1263,7 @@ struct amdgpu_job {
 	uint32_t		num_ibs;
 	void			*owner;
 	uint64_t		ctx;
+	bool                    vm_needs_flush;
 	unsigned		vm_id;
 	uint64_t		vm_pd_addr;
 	uint32_t		gds_base, gds_size;
@@ -1257,8 +1271,7 @@ struct amdgpu_job {
 	uint32_t		oa_base, oa_size;
 
 	/* user fence handling */
-	struct amdgpu_bo	*uf_bo;
-	uint32_t		uf_offset;
+	uint64_t		uf_addr;
 	uint64_t		uf_sequence;
 
 };
@@ -1560,6 +1573,12 @@ struct amdgpu_dpm_funcs {
 	u32 (*get_fan_control_mode)(struct amdgpu_device *adev);
 	int (*set_fan_speed_percent)(struct amdgpu_device *adev, u32 speed);
 	int (*get_fan_speed_percent)(struct amdgpu_device *adev, u32 *speed);
+	int (*force_clock_level)(struct amdgpu_device *adev, enum pp_clock_type type, uint32_t mask);
+	int (*print_clock_levels)(struct amdgpu_device *adev, enum pp_clock_type type, char *buf);
+	int (*get_sclk_od)(struct amdgpu_device *adev);
+	int (*set_sclk_od)(struct amdgpu_device *adev, uint32_t value);
+	int (*get_mclk_od)(struct amdgpu_device *adev);
+	int (*set_mclk_od)(struct amdgpu_device *adev, uint32_t value);
 };
 
 struct amdgpu_dpm {
@@ -1662,6 +1681,7 @@ struct amdgpu_uvd {
 	struct amdgpu_ring	ring;
 	struct amdgpu_irq_src	irq;
 	bool			address_64_bit;
+	bool			use_ctx_buf;
 	struct amd_sched_entity entity;
 };
 
@@ -1683,6 +1703,7 @@ struct amdgpu_vce {
 	struct drm_file		*filp[AMDGPU_MAX_VCE_HANDLES];
 	uint32_t		img_size[AMDGPU_MAX_VCE_HANDLES];
 	struct delayed_work	idle_work;
+	struct mutex		idle_mutex;
 	const struct firmware	*fw;	/* VCE firmware */
 	struct amdgpu_ring	ring[AMDGPU_MAX_VCE_RINGS];
 	struct amdgpu_irq_src	irq;
@@ -1767,6 +1788,8 @@ int amdgpu_debugfs_init(struct drm_minor *minor);
 void amdgpu_debugfs_cleanup(struct drm_minor *minor);
 #endif
 
+int amdgpu_debugfs_firmware_init(struct amdgpu_device *adev);
+
 /*
  * amdgpu smumgr functions
  */
@@ -1811,15 +1834,13 @@ struct amdgpu_asic_funcs {
 			     u32 sh_num, u32 reg_offset, u32 *value);
 	void (*set_vga_state)(struct amdgpu_device *adev, bool state);
 	int (*reset)(struct amdgpu_device *adev);
-	/* wait for mc_idle */
-	int (*wait_for_mc_idle)(struct amdgpu_device *adev);
 	/* get the reference clock */
 	u32 (*get_xclk)(struct amdgpu_device *adev);
-	/* get the gpu clock counter */
-	uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev);
 	/* MM block clocks */
 	int (*set_uvd_clocks)(struct amdgpu_device *adev, u32 vclk, u32 dclk);
 	int (*set_vce_clocks)(struct amdgpu_device *adev, u32 evclk, u32 ecclk);
+	/* query virtual capabilities */
+	u32 (*get_virtual_caps)(struct amdgpu_device *adev);
 };
 
 /*
@@ -1914,8 +1935,12 @@ void amdgpu_cgs_destroy_device(struct cgs_device *cgs_device);
 
 
 /* GPU virtualization */
+#define AMDGPU_VIRT_CAPS_SRIOV_EN       (1 << 0)
+#define AMDGPU_VIRT_CAPS_IS_VF          (1 << 1)
 struct amdgpu_virtualization {
 	bool supports_sr_iov;
+	bool is_virtual;
+	u32 caps;
 };
 
 /*
@@ -1997,6 +2022,10 @@ struct amdgpu_device {
 	spinlock_t didt_idx_lock;
 	amdgpu_rreg_t			didt_rreg;
 	amdgpu_wreg_t			didt_wreg;
+	/* protects concurrent gc_cac register access */
+	spinlock_t gc_cac_idx_lock;
+	amdgpu_rreg_t			gc_cac_rreg;
+	amdgpu_wreg_t			gc_cac_wreg;
 	/* protects concurrent ENDPOINT (audio) register access */
 	spinlock_t audio_endpt_idx_lock;
 	amdgpu_block_rreg_t		audio_endpt_rreg;
@@ -2022,6 +2051,7 @@ struct amdgpu_device {
 	atomic64_t			vram_vis_usage;
 	atomic64_t			gtt_usage;
 	atomic64_t			num_bytes_moved;
+	atomic64_t			num_evictions;
 	atomic_t			gpu_reset_counter;
 
 	/* display */
@@ -2125,6 +2155,8 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v);
 #define WREG32_UVD_CTX(reg, v) adev->uvd_ctx_wreg(adev, (reg), (v))
 #define RREG32_DIDT(reg) adev->didt_rreg(adev, (reg))
 #define WREG32_DIDT(reg, v) adev->didt_wreg(adev, (reg), (v))
+#define RREG32_GC_CAC(reg) adev->gc_cac_rreg(adev, (reg))
+#define WREG32_GC_CAC(reg, v) adev->gc_cac_wreg(adev, (reg), (v))
 #define RREG32_AUDIO_ENDPT(block, reg) adev->audio_endpt_rreg(adev, (block), (reg))
 #define WREG32_AUDIO_ENDPT(block, reg, v) adev->audio_endpt_wreg(adev, (block), (reg), (v))
 #define WREG32_P(reg, val, mask)				\
@@ -2200,11 +2232,10 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
  */
 #define amdgpu_asic_set_vga_state(adev, state) (adev)->asic_funcs->set_vga_state((adev), (state))
 #define amdgpu_asic_reset(adev) (adev)->asic_funcs->reset((adev))
-#define amdgpu_asic_wait_for_mc_idle(adev) (adev)->asic_funcs->wait_for_mc_idle((adev))
 #define amdgpu_asic_get_xclk(adev) (adev)->asic_funcs->get_xclk((adev))
 #define amdgpu_asic_set_uvd_clocks(adev, v, d) (adev)->asic_funcs->set_uvd_clocks((adev), (v), (d))
 #define amdgpu_asic_set_vce_clocks(adev, ev, ec) (adev)->asic_funcs->set_vce_clocks((adev), (ev), (ec))
-#define amdgpu_asic_get_gpu_clock_counter(adev) (adev)->asic_funcs->get_gpu_clock_counter((adev))
+#define amdgpu_asic_get_virtual_caps(adev) ((adev)->asic_funcs->get_virtual_caps((adev)))
 #define amdgpu_asic_read_disabled_bios(adev) (adev)->asic_funcs->read_disabled_bios((adev))
 #define amdgpu_asic_read_bios_from_rom(adev, b, l) (adev)->asic_funcs->read_bios_from_rom((adev), (b), (l))
 #define amdgpu_asic_read_register(adev, se, sh, offset, v)((adev)->asic_funcs->read_register((adev), (se), (sh), (offset), (v)))
@@ -2215,7 +2246,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
 #define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib)))
 #define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
-#define amdgpu_ring_test_ib(r) (r)->funcs->test_ib((r))
+#define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t))
 #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
 #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
 #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
@@ -2257,6 +2288,8 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_dpm_print_power_state(adev, ps) (adev)->pm.funcs->print_power_state((adev), (ps))
 #define amdgpu_dpm_vblank_too_short(adev) (adev)->pm.funcs->vblank_too_short((adev))
 #define amdgpu_dpm_enable_bapm(adev, e) (adev)->pm.funcs->enable_bapm((adev), (e))
+#define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
+#define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance))
 
 #define amdgpu_dpm_get_temperature(adev) \
 	((adev)->pp_enabled ?						\
@@ -2335,6 +2368,18 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_dpm_force_clock_level(adev, type, level) \
 		(adev)->powerplay.pp_funcs->force_clock_level((adev)->powerplay.pp_handle, type, level)
 
+#define amdgpu_dpm_get_sclk_od(adev) \
+	(adev)->powerplay.pp_funcs->get_sclk_od((adev)->powerplay.pp_handle)
+
+#define amdgpu_dpm_set_sclk_od(adev, value) \
+	(adev)->powerplay.pp_funcs->set_sclk_od((adev)->powerplay.pp_handle, value)
+
+#define amdgpu_dpm_get_mclk_od(adev) \
+	((adev)->powerplay.pp_funcs->get_mclk_od((adev)->powerplay.pp_handle))
+
+#define amdgpu_dpm_set_mclk_od(adev, value) \
+	((adev)->powerplay.pp_funcs->set_mclk_od((adev)->powerplay.pp_handle, value))
+
 #define amdgpu_dpm_dispatch_task(adev, event_id, input, output)		\
 	(adev)->powerplay.pp_funcs->dispatch_tasks((adev)->powerplay.pp_handle, (event_id), (input), (output))
 
@@ -2376,9 +2421,13 @@ bool amdgpu_device_is_px(struct drm_device *dev);
 #if defined(CONFIG_VGA_SWITCHEROO)
 void amdgpu_register_atpx_handler(void);
 void amdgpu_unregister_atpx_handler(void);
+bool amdgpu_has_atpx_dgpu_power_cntl(void);
+bool amdgpu_is_atpx_hybrid(void);
 #else
 static inline void amdgpu_register_atpx_handler(void) {}
 static inline void amdgpu_unregister_atpx_handler(void) {}
+static inline bool amdgpu_has_atpx_dgpu_power_cntl(void) { return false; }
+static inline bool amdgpu_is_atpx_hybrid(void) { return false; }
 #endif
 
 /*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 32809f749903..d080d0807a5b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -240,8 +240,8 @@ uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
 {
 	struct amdgpu_device *rdev = (struct amdgpu_device *)kgd;
 
-	if (rdev->asic_funcs->get_gpu_clock_counter)
-		return rdev->asic_funcs->get_gpu_clock_counter(rdev);
+	if (rdev->gfx.funcs->get_gpu_clock_counter)
+		return rdev->gfx.funcs->get_gpu_clock_counter(rdev);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index 9df1bcb35bf0..983175363b06 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -551,28 +551,19 @@ int amdgpu_atombios_get_clock_info(struct amdgpu_device *adev)
 		    le16_to_cpu(firmware_info->info.usReferenceClock);
 		ppll->reference_div = 0;
 
-		if (crev < 2)
-			ppll->pll_out_min =
-				le16_to_cpu(firmware_info->info.usMinPixelClockPLL_Output);
-		else
-			ppll->pll_out_min =
-				le32_to_cpu(firmware_info->info_12.ulMinPixelClockPLL_Output);
+		ppll->pll_out_min =
+			le32_to_cpu(firmware_info->info_12.ulMinPixelClockPLL_Output);
 		ppll->pll_out_max =
 		    le32_to_cpu(firmware_info->info.ulMaxPixelClockPLL_Output);
 
-		if (crev >= 4) {
-			ppll->lcd_pll_out_min =
-				le16_to_cpu(firmware_info->info_14.usLcdMinPixelClockPLL_Output) * 100;
-			if (ppll->lcd_pll_out_min == 0)
-				ppll->lcd_pll_out_min = ppll->pll_out_min;
-			ppll->lcd_pll_out_max =
-				le16_to_cpu(firmware_info->info_14.usLcdMaxPixelClockPLL_Output) * 100;
-			if (ppll->lcd_pll_out_max == 0)
-				ppll->lcd_pll_out_max = ppll->pll_out_max;
-		} else {
+		ppll->lcd_pll_out_min =
+			le16_to_cpu(firmware_info->info_14.usLcdMinPixelClockPLL_Output) * 100;
+		if (ppll->lcd_pll_out_min == 0)
 			ppll->lcd_pll_out_min = ppll->pll_out_min;
+		ppll->lcd_pll_out_max =
+			le16_to_cpu(firmware_info->info_14.usLcdMaxPixelClockPLL_Output) * 100;
+		if (ppll->lcd_pll_out_max == 0)
 			ppll->lcd_pll_out_max = ppll->pll_out_max;
-		}
 
 		if (ppll->pll_out_min == 0)
 			ppll->pll_out_min = 64800;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
index 35a1248aaa77..49de92600074 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -10,6 +10,7 @@
 #include <linux/slab.h>
 #include <linux/acpi.h>
 #include <linux/pci.h>
+#include <linux/delay.h>
 
 #include "amd_acpi.h"
 
@@ -27,6 +28,7 @@ struct amdgpu_atpx_functions {
 struct amdgpu_atpx {
 	acpi_handle handle;
 	struct amdgpu_atpx_functions functions;
+	bool is_hybrid;
 };
 
 static struct amdgpu_atpx_priv {
@@ -63,6 +65,14 @@ bool amdgpu_has_atpx(void) {
 	return amdgpu_atpx_priv.atpx_detected;
 }
 
+bool amdgpu_has_atpx_dgpu_power_cntl(void) {
+	return amdgpu_atpx_priv.atpx.functions.power_cntl;
+}
+
+bool amdgpu_is_atpx_hybrid(void) {
+	return amdgpu_atpx_priv.atpx.is_hybrid;
+}
+
 /**
  * amdgpu_atpx_call - call an ATPX method
  *
@@ -142,18 +152,12 @@ static void amdgpu_atpx_parse_functions(struct amdgpu_atpx_functions *f, u32 mas
  */
 static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx)
 {
-	/* make sure required functions are enabled */
-	/* dGPU power control is required */
-	if (atpx->functions.power_cntl == false) {
-		printk("ATPX dGPU power cntl not present, forcing\n");
-		atpx->functions.power_cntl = true;
-	}
+	u32 valid_bits = 0;
 
 	if (atpx->functions.px_params) {
 		union acpi_object *info;
 		struct atpx_px_params output;
 		size_t size;
-		u32 valid_bits;
 
 		info = amdgpu_atpx_call(atpx->handle, ATPX_FUNCTION_GET_PX_PARAMETERS, NULL);
 		if (!info)
@@ -172,19 +176,43 @@ static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx)
 		memcpy(&output, info->buffer.pointer, size);
 
 		valid_bits = output.flags & output.valid_flags;
-		/* if separate mux flag is set, mux controls are required */
-		if (valid_bits & ATPX_SEPARATE_MUX_FOR_I2C) {
-			atpx->functions.i2c_mux_cntl = true;
-			atpx->functions.disp_mux_cntl = true;
-		}
-		/* if any outputs are muxed, mux controls are required */
-		if (valid_bits & (ATPX_CRT1_RGB_SIGNAL_MUXED |
-				  ATPX_TV_SIGNAL_MUXED |
-				  ATPX_DFP_SIGNAL_MUXED))
-			atpx->functions.disp_mux_cntl = true;
 
 		kfree(info);
 	}
+
+	/* if separate mux flag is set, mux controls are required */
+	if (valid_bits & ATPX_SEPARATE_MUX_FOR_I2C) {
+		atpx->functions.i2c_mux_cntl = true;
+		atpx->functions.disp_mux_cntl = true;
+	}
+	/* if any outputs are muxed, mux controls are required */
+	if (valid_bits & (ATPX_CRT1_RGB_SIGNAL_MUXED |
+			  ATPX_TV_SIGNAL_MUXED |
+			  ATPX_DFP_SIGNAL_MUXED))
+		atpx->functions.disp_mux_cntl = true;
+
+
+	/* some bioses set these bits rather than flagging power_cntl as supported */
+	if (valid_bits & (ATPX_DYNAMIC_PX_SUPPORTED |
+			  ATPX_DYNAMIC_DGPU_POWER_OFF_SUPPORTED))
+		atpx->functions.power_cntl = true;
+
+	atpx->is_hybrid = false;
+	if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) {
+		printk("ATPX Hybrid Graphics\n");
+#if 1
+		/* This is a temporary hack until the D3 cold support
+		 * makes it upstream.  The ATPX power_control method seems
+		 * to still work on even if the system should be using
+		 * the new standardized hybrid D3 cold ACPI interface.
+		 */
+		atpx->functions.power_cntl = true;
+#else
+		atpx->functions.power_cntl = false;
+#endif
+		atpx->is_hybrid = true;
+	}
+
 	return 0;
 }
 
@@ -259,6 +287,10 @@ static int amdgpu_atpx_set_discrete_state(struct amdgpu_atpx *atpx, u8 state)
 		if (!info)
 			return -EIO;
 		kfree(info);
+
+		/* 200ms delay is required after off */
+		if (state == 0)
+			msleep(200);
 	}
 	return 0;
 }
@@ -507,7 +539,6 @@ static int amdgpu_atpx_get_client_id(struct pci_dev *pdev)
 static const struct vga_switcheroo_handler amdgpu_atpx_handler = {
 	.switchto = amdgpu_atpx_switchto,
 	.power_state = amdgpu_atpx_power_state,
-	.init = amdgpu_atpx_init,
 	.get_client_id = amdgpu_atpx_get_client_id,
 };
 
@@ -542,6 +573,7 @@ static bool amdgpu_atpx_detect(void)
 		printk(KERN_INFO "vga_switcheroo: detected switching method %s handle\n",
 		       acpi_method_name);
 		amdgpu_atpx_priv.atpx_detected = true;
+		amdgpu_atpx_init();
 		return true;
 	}
 	return false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
index 99ca75baa47d..2b6afe123f3d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
@@ -352,22 +352,22 @@ bool amdgpu_get_bios(struct amdgpu_device *adev)
 	uint16_t tmp, bios_header_start;
 
 	r = amdgpu_atrm_get_bios(adev);
-	if (r == false)
+	if (!r)
 		r = amdgpu_acpi_vfct_bios(adev);
-	if (r == false)
+	if (!r)
 		r = igp_read_bios_from_vram(adev);
-	if (r == false)
+	if (!r)
 		r = amdgpu_read_bios(adev);
-	if (r == false) {
+	if (!r) {
 		r = amdgpu_read_bios_from_rom(adev);
 	}
-	if (r == false) {
+	if (!r) {
 		r = amdgpu_read_disabled_bios(adev);
 	}
-	if (r == false) {
+	if (!r) {
 		r = amdgpu_read_platform_bios(adev);
 	}
-	if (r == false || adev->bios == NULL) {
+	if (!r || adev->bios == NULL) {
 		DRM_ERROR("Unable to locate a BIOS ROM\n");
 		adev->bios = NULL;
 		return false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 823bf5e0b0c8..651115dcce12 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -94,6 +94,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
 	unsigned last_entry = 0, first_userptr = num_entries;
 	unsigned i;
 	int r;
+	unsigned long total_size = 0;
 
 	array = drm_malloc_ab(num_entries, sizeof(struct amdgpu_bo_list_entry));
 	if (!array)
@@ -140,6 +141,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
 		if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_OA)
 			oa_obj = entry->robj;
 
+		total_size += amdgpu_bo_size(entry->robj);
 		trace_amdgpu_bo_list_set(list, entry->robj);
 	}
 
@@ -155,6 +157,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
 	list->array = array;
 	list->num_entries = num_entries;
 
+	trace_amdgpu_cs_bo_status(list->num_entries, total_size);
 	return 0;
 
 error_free:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index 199f76baf22c..bc0440f7a31d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -312,6 +312,8 @@ static uint32_t amdgpu_cgs_read_ind_register(struct cgs_device *cgs_device,
 		return RREG32_UVD_CTX(index);
 	case CGS_IND_REG__DIDT:
 		return RREG32_DIDT(index);
+	case CGS_IND_REG_GC_CAC:
+		return RREG32_GC_CAC(index);
 	case CGS_IND_REG__AUDIO_ENDPT:
 		DRM_ERROR("audio endpt register access not implemented.\n");
 		return 0;
@@ -336,6 +338,8 @@ static void amdgpu_cgs_write_ind_register(struct cgs_device *cgs_device,
 		return WREG32_UVD_CTX(index, value);
 	case CGS_IND_REG__DIDT:
 		return WREG32_DIDT(index, value);
+	case CGS_IND_REG_GC_CAC:
+		return WREG32_GC_CAC(index, value);
 	case CGS_IND_REG__AUDIO_ENDPT:
 		DRM_ERROR("audio endpt register access not implemented.\n");
 		return;
@@ -696,6 +700,17 @@ static uint32_t fw_type_convert(struct cgs_device *cgs_device, uint32_t fw_type)
 	return result;
 }
 
+static int amdgpu_cgs_rel_firmware(struct cgs_device *cgs_device, enum cgs_ucode_id type)
+{
+	CGS_FUNC_ADEV;
+	if ((CGS_UCODE_ID_SMU == type) || (CGS_UCODE_ID_SMU_SK == type)) {
+		release_firmware(adev->pm.fw);
+		return 0;
+	}
+	/* cannot release other firmware because they are not created by cgs */
+	return -EINVAL;
+}
+
 static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
 					enum cgs_ucode_id type,
 					struct cgs_firmware_info *info)
@@ -737,6 +752,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
 
 		if (!adev->pm.fw) {
 			switch (adev->asic_type) {
+			case CHIP_TOPAZ:
+				strcpy(fw_name, "amdgpu/topaz_smc.bin");
+				break;
 			case CHIP_TONGA:
 				strcpy(fw_name, "amdgpu/tonga_smc.bin");
 				break;
@@ -776,6 +794,7 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
 		}
 
 		hdr = (const struct smc_firmware_header_v1_0 *)	adev->pm.fw->data;
+		amdgpu_ucode_print_smc_hdr(&hdr->header);
 		adev->pm.fw_version = le32_to_cpu(hdr->header.ucode_version);
 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes);
 		ucode_start_address = le32_to_cpu(hdr->ucode_start_addr);
@@ -784,13 +803,14 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
 
 		info->version = adev->pm.fw_version;
 		info->image_size = ucode_size;
+		info->ucode_start_address = ucode_start_address;
 		info->kptr = (void *)src;
 	}
 	return 0;
 }
 
 static int amdgpu_cgs_query_system_info(struct cgs_device *cgs_device,
-				struct cgs_system_info *sys_info)
+					struct cgs_system_info *sys_info)
 {
 	CGS_FUNC_ADEV;
 
@@ -810,6 +830,12 @@ static int amdgpu_cgs_query_system_info(struct cgs_device *cgs_device,
 	case CGS_SYSTEM_INFO_PCIE_MLW:
 		sys_info->value = adev->pm.pcie_mlw_mask;
 		break;
+	case CGS_SYSTEM_INFO_PCIE_DEV:
+		sys_info->value = adev->pdev->device;
+		break;
+	case CGS_SYSTEM_INFO_PCIE_REV:
+		sys_info->value = adev->pdev->revision;
+		break;
 	case CGS_SYSTEM_INFO_CG_FLAGS:
 		sys_info->value = adev->cg_flags;
 		break;
@@ -819,6 +845,9 @@ static int amdgpu_cgs_query_system_info(struct cgs_device *cgs_device,
 	case CGS_SYSTEM_INFO_GFX_CU_INFO:
 		sys_info->value = adev->gfx.cu_info.number;
 		break;
+	case CGS_SYSTEM_INFO_GFX_SE_INFO:
+		sys_info->value = adev->gfx.config.max_shader_engines;
+		break;
 	default:
 		return -ENODEV;
 	}
@@ -892,14 +921,12 @@ static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
 	acpi_handle handle;
 	struct acpi_object_list input;
 	struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
-	union acpi_object *params = NULL;
-	union acpi_object *obj = NULL;
+	union acpi_object *params, *obj;
 	uint8_t name[5] = {'\0'};
-	struct cgs_acpi_method_argument *argument = NULL;
+	struct cgs_acpi_method_argument *argument;
 	uint32_t i, count;
 	acpi_status status;
 	int result;
-	uint32_t func_no = 0xFFFFFFFF;
 
 	handle = ACPI_HANDLE(&adev->pdev->dev);
 	if (!handle)
@@ -916,7 +943,6 @@ static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
 		if (info->pinput_argument == NULL)
 			return -EINVAL;
 		argument = info->pinput_argument;
-		func_no = argument->value;
 		for (i = 0; i < info->input_count; i++) {
 			if (((argument->type == ACPI_TYPE_STRING) ||
 			     (argument->type == ACPI_TYPE_BUFFER)) &&
@@ -961,11 +987,11 @@ static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
 				params->integer.value = argument->value;
 				break;
 			case ACPI_TYPE_STRING:
-				params->string.length = argument->method_length;
+				params->string.length = argument->data_length;
 				params->string.pointer = argument->pointer;
 				break;
 			case ACPI_TYPE_BUFFER:
-				params->buffer.length = argument->method_length;
+				params->buffer.length = argument->data_length;
 				params->buffer.pointer = argument->pointer;
 				break;
 			default:
@@ -985,7 +1011,7 @@ static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
 
 	if (ACPI_FAILURE(status)) {
 		result = -EIO;
-		goto error;
+		goto free_input;
 	}
 
 	/* return the output info */
@@ -995,7 +1021,7 @@ static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
 		if ((obj->type != ACPI_TYPE_PACKAGE) ||
 			(obj->package.count != count)) {
 			result = -EIO;
-			goto error;
+			goto free_obj;
 		}
 		params = obj->package.elements;
 	} else
@@ -1003,13 +1029,13 @@ static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
 
 	if (params == NULL) {
 		result = -EIO;
-		goto error;
+		goto free_obj;
 	}
 
 	for (i = 0; i < count; i++) {
 		if (argument->type != params->type) {
 			result = -EIO;
-			goto error;
+			goto free_obj;
 		}
 		switch (params->type) {
 		case ACPI_TYPE_INTEGER:
@@ -1019,7 +1045,7 @@ static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
 			if ((params->string.length != argument->data_length) ||
 				(params->string.pointer == NULL)) {
 				result = -EIO;
-				goto error;
+				goto free_obj;
 			}
 			strncpy(argument->pointer,
 				params->string.pointer,
@@ -1028,7 +1054,7 @@ static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
 		case ACPI_TYPE_BUFFER:
 			if (params->buffer.pointer == NULL) {
 				result = -EIO;
-				goto error;
+				goto free_obj;
 			}
 			memcpy(argument->pointer,
 				params->buffer.pointer,
@@ -1041,9 +1067,10 @@ static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
 		params++;
 	}
 
-error:
-	if (obj != NULL)
-		kfree(obj);
+	result = 0;
+free_obj:
+	kfree(obj);
+free_input:
 	kfree((void *)input.pointer);
 	return result;
 }
@@ -1055,7 +1082,7 @@ static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
 }
 #endif
 
-int amdgpu_cgs_call_acpi_method(struct cgs_device *cgs_device,
+static int amdgpu_cgs_call_acpi_method(struct cgs_device *cgs_device,
 					uint32_t acpi_method,
 					uint32_t acpi_function,
 					void *pinput, void *poutput,
@@ -1068,17 +1095,14 @@ int amdgpu_cgs_call_acpi_method(struct cgs_device *cgs_device,
 	struct cgs_acpi_method_info info = {0};
 
 	acpi_input[0].type = CGS_ACPI_TYPE_INTEGER;
-	acpi_input[0].method_length = sizeof(uint32_t);
 	acpi_input[0].data_length = sizeof(uint32_t);
 	acpi_input[0].value = acpi_function;
 
 	acpi_input[1].type = CGS_ACPI_TYPE_BUFFER;
-	acpi_input[1].method_length = CGS_ACPI_MAX_BUFFER_SIZE;
 	acpi_input[1].data_length = input_size;
 	acpi_input[1].pointer = pinput;
 
 	acpi_output.type = CGS_ACPI_TYPE_BUFFER;
-	acpi_output.method_length = CGS_ACPI_MAX_BUFFER_SIZE;
 	acpi_output.data_length = output_size;
 	acpi_output.pointer = poutput;
 
@@ -1125,6 +1149,7 @@ static const struct cgs_ops amdgpu_cgs_ops = {
 	amdgpu_cgs_pm_query_clock_limits,
 	amdgpu_cgs_set_camera_voltages,
 	amdgpu_cgs_get_firmware_info,
+	amdgpu_cgs_rel_firmware,
 	amdgpu_cgs_set_powergating_state,
 	amdgpu_cgs_set_clockgating_state,
 	amdgpu_cgs_get_active_displays_info,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index cb07da41152b..ff0b55a65ca3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -1690,7 +1690,6 @@ amdgpu_connector_add(struct amdgpu_device *adev,
 						   DRM_MODE_SCALE_NONE);
 			/* no HPD on analog connectors */
 			amdgpu_connector->hpd.hpd = AMDGPU_HPD_NONE;
-			connector->polled = DRM_CONNECTOR_POLL_CONNECT;
 			connector->interlace_allowed = true;
 			connector->doublescan_allowed = true;
 			break;
@@ -1893,8 +1892,10 @@ amdgpu_connector_add(struct amdgpu_device *adev,
 	}
 
 	if (amdgpu_connector->hpd.hpd == AMDGPU_HPD_NONE) {
-		if (i2c_bus->valid)
-			connector->polled = DRM_CONNECTOR_POLL_CONNECT;
+		if (i2c_bus->valid) {
+			connector->polled = DRM_CONNECTOR_POLL_CONNECT |
+			                    DRM_CONNECTOR_POLL_DISCONNECT;
+		}
 	} else
 		connector->polled = DRM_CONNECTOR_POLL_HPD;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 9bc8f1d99733..0307ff5887c5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -216,11 +216,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 	if (ret)
 		goto free_all_kdata;
 
-	if (p->uf_entry.robj) {
-		p->job->uf_bo = amdgpu_bo_ref(p->uf_entry.robj);
-		p->job->uf_offset = uf_offset;
-	}
-
+	if (p->uf_entry.robj)
+		p->job->uf_addr = uf_offset;
 	kfree(chunk_array);
 	return 0;
 
@@ -459,7 +456,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 		list_splice(&need_pages, &p->validated);
 	}
 
-	amdgpu_vm_get_pt_bos(&fpriv->vm, &duplicates);
+	amdgpu_vm_get_pt_bos(p->adev, &fpriv->vm, &duplicates);
 
 	p->bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(p->adev);
 	p->bytes_moved = 0;
@@ -472,6 +469,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	if (r)
 		goto error_validate;
 
+	fpriv->vm.last_eviction_counter =
+		atomic64_read(&p->adev->num_evictions);
+
 	if (p->bo_list) {
 		struct amdgpu_bo *gds = p->bo_list->gds_obj;
 		struct amdgpu_bo *gws = p->bo_list->gws_obj;
@@ -499,6 +499,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 		}
 	}
 
+	if (p->uf_entry.robj)
+		p->job->uf_addr += amdgpu_bo_gpu_offset(p->uf_entry.robj);
+
 error_validate:
 	if (r) {
 		amdgpu_vm_move_pt_bos_in_lru(p->adev, &fpriv->vm);
@@ -653,18 +656,21 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
 
 	/* Only for UVD/VCE VM emulation */
 	if (ring->funcs->parse_cs) {
+		p->job->vm = NULL;
 		for (i = 0; i < p->job->num_ibs; i++) {
 			r = amdgpu_ring_parse_cs(ring, p, i);
 			if (r)
 				return r;
 		}
-	}
+	} else {
+		p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
 
-	r = amdgpu_bo_vm_update_pte(p, vm);
-	if (!r)
-		amdgpu_cs_sync_rings(p);
+		r = amdgpu_bo_vm_update_pte(p, vm);
+		if (r)
+			return r;
+	}
 
-	return r;
+	return amdgpu_cs_sync_rings(p);
 }
 
 static int amdgpu_cs_handle_lockup(struct amdgpu_device *adev, int r)
@@ -761,7 +767,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 	}
 
 	/* UVD & VCE fw doesn't support user fences */
-	if (parser->job->uf_bo && (
+	if (parser->job->uf_addr && (
 	    parser->job->ring->type == AMDGPU_RING_TYPE_UVD ||
 	    parser->job->ring->type == AMDGPU_RING_TYPE_VCE))
 		return -EINVAL;
@@ -830,17 +836,13 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 {
 	struct amdgpu_ring *ring = p->job->ring;
 	struct amd_sched_entity *entity = &p->ctx->rings[ring->idx].entity;
-	struct fence *fence;
 	struct amdgpu_job *job;
 	int r;
 
 	job = p->job;
 	p->job = NULL;
 
-	r = amd_sched_job_init(&job->base, &ring->sched,
-			       entity, amdgpu_job_timeout_func,
-			       amdgpu_job_free_func,
-			       p->filp, &fence);
+	r = amd_sched_job_init(&job->base, &ring->sched, entity, p->filp);
 	if (r) {
 		amdgpu_job_free(job);
 		return r;
@@ -848,9 +850,10 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 
 	job->owner = p->filp;
 	job->ctx = entity->fence_context;
-	p->fence = fence_get(fence);
-	cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, fence);
+	p->fence = fence_get(&job->base.s_fence->finished);
+	cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence);
 	job->uf_sequence = cs->out.handle;
+	amdgpu_job_free_resources(job);
 
 	trace_amdgpu_cs_ioctl(job);
 	amd_sched_entity_push_job(&job->base);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index bb8b149786d7..df7ab2458e50 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -25,6 +25,7 @@
  *          Alex Deucher
  *          Jerome Glisse
  */
+#include <linux/kthread.h>
 #include <linux/console.h>
 #include <linux/slab.h>
 #include <linux/debugfs.h>
@@ -35,6 +36,7 @@
 #include <linux/vga_switcheroo.h>
 #include <linux/efi.h>
 #include "amdgpu.h"
+#include "amdgpu_trace.h"
 #include "amdgpu_i2c.h"
 #include "atom.h"
 #include "amdgpu_atombios.h"
@@ -79,24 +81,27 @@ bool amdgpu_device_is_px(struct drm_device *dev)
 uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
 			bool always_indirect)
 {
+	uint32_t ret;
+
 	if ((reg * 4) < adev->rmmio_size && !always_indirect)
-		return readl(((void __iomem *)adev->rmmio) + (reg * 4));
+		ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
 	else {
 		unsigned long flags;
-		uint32_t ret;
 
 		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
 		writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
 		ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
 		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
-
-		return ret;
 	}
+	trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
+	return ret;
 }
 
 void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
 		    bool always_indirect)
 {
+	trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
+	
 	if ((reg * 4) < adev->rmmio_size && !always_indirect)
 		writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
 	else {
@@ -827,8 +832,10 @@ static uint32_t cail_ioreg_read(struct card_info *info, uint32_t reg)
  */
 static void amdgpu_atombios_fini(struct amdgpu_device *adev)
 {
-	if (adev->mode_info.atom_context)
+	if (adev->mode_info.atom_context) {
 		kfree(adev->mode_info.atom_context->scratch);
+		kfree(adev->mode_info.atom_context->iio);
+	}
 	kfree(adev->mode_info.atom_context);
 	adev->mode_info.atom_context = NULL;
 	kfree(adev->mode_info.atom_card_info);
@@ -1068,11 +1075,14 @@ int amdgpu_set_clockgating_state(struct amdgpu_device *adev,
 	int i, r = 0;
 
 	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_block_status[i].valid)
+			continue;
 		if (adev->ip_blocks[i].type == block_type) {
 			r = adev->ip_blocks[i].funcs->set_clockgating_state((void *)adev,
 									    state);
 			if (r)
 				return r;
+			break;
 		}
 	}
 	return r;
@@ -1085,16 +1095,53 @@ int amdgpu_set_powergating_state(struct amdgpu_device *adev,
 	int i, r = 0;
 
 	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_block_status[i].valid)
+			continue;
 		if (adev->ip_blocks[i].type == block_type) {
 			r = adev->ip_blocks[i].funcs->set_powergating_state((void *)adev,
 									    state);
 			if (r)
 				return r;
+			break;
 		}
 	}
 	return r;
 }
 
+int amdgpu_wait_for_idle(struct amdgpu_device *adev,
+			 enum amd_ip_block_type block_type)
+{
+	int i, r;
+
+	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_block_status[i].valid)
+			continue;
+		if (adev->ip_blocks[i].type == block_type) {
+			r = adev->ip_blocks[i].funcs->wait_for_idle((void *)adev);
+			if (r)
+				return r;
+			break;
+		}
+	}
+	return 0;
+
+}
+
+bool amdgpu_is_idle(struct amdgpu_device *adev,
+		    enum amd_ip_block_type block_type)
+{
+	int i;
+
+	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_block_status[i].valid)
+			continue;
+		if (adev->ip_blocks[i].type == block_type)
+			return adev->ip_blocks[i].funcs->is_idle((void *)adev);
+	}
+	return true;
+
+}
+
 const struct amdgpu_ip_block_version * amdgpu_get_ip_block(
 					struct amdgpu_device *adev,
 					enum amd_ip_block_type type)
@@ -1207,6 +1254,9 @@ static int amdgpu_early_init(struct amdgpu_device *adev)
 		}
 	}
 
+	adev->cg_flags &= amdgpu_cg_mask;
+	adev->pg_flags &= amdgpu_pg_mask;
+
 	return 0;
 }
 
@@ -1325,6 +1375,11 @@ static int amdgpu_fini(struct amdgpu_device *adev)
 		adev->ip_block_status[i].valid = false;
 	}
 
+	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
+		if (adev->ip_blocks[i].funcs->late_fini)
+			adev->ip_blocks[i].funcs->late_fini((void *)adev);
+	}
+
 	return 0;
 }
 
@@ -1378,6 +1433,15 @@ static int amdgpu_resume(struct amdgpu_device *adev)
 	return 0;
 }
 
+static bool amdgpu_device_is_virtual(void)
+{
+#ifdef CONFIG_X86
+	return boot_cpu_has(X86_FEATURE_HYPERVISOR);
+#else
+	return false;
+#endif
+}
+
 /**
  * amdgpu_device_init - initialize the driver
  *
@@ -1424,9 +1488,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
 	adev->didt_rreg = &amdgpu_invalid_rreg;
 	adev->didt_wreg = &amdgpu_invalid_wreg;
+	adev->gc_cac_rreg = &amdgpu_invalid_rreg;
+	adev->gc_cac_wreg = &amdgpu_invalid_wreg;
 	adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
 	adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
 
+
 	DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
 		 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
 		 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
@@ -1451,6 +1518,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	spin_lock_init(&adev->pcie_idx_lock);
 	spin_lock_init(&adev->uvd_ctx_idx_lock);
 	spin_lock_init(&adev->didt_idx_lock);
+	spin_lock_init(&adev->gc_cac_idx_lock);
 	spin_lock_init(&adev->audio_endpt_idx_lock);
 
 	adev->rmmio_base = pci_resource_start(adev->pdev, 5);
@@ -1495,29 +1563,38 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
 
 	/* Read BIOS */
-	if (!amdgpu_get_bios(adev))
-		return -EINVAL;
+	if (!amdgpu_get_bios(adev)) {
+		r = -EINVAL;
+		goto failed;
+	}
 	/* Must be an ATOMBIOS */
 	if (!adev->is_atom_bios) {
 		dev_err(adev->dev, "Expecting atombios for GPU\n");
-		return -EINVAL;
+		r = -EINVAL;
+		goto failed;
 	}
 	r = amdgpu_atombios_init(adev);
 	if (r) {
 		dev_err(adev->dev, "amdgpu_atombios_init failed\n");
-		return r;
+		goto failed;
 	}
 
 	/* See if the asic supports SR-IOV */
 	adev->virtualization.supports_sr_iov =
 		amdgpu_atombios_has_gpu_virtualization_table(adev);
 
+	/* Check if we are executing in a virtualized environment */
+	adev->virtualization.is_virtual = amdgpu_device_is_virtual();
+	adev->virtualization.caps = amdgpu_asic_get_virtual_caps(adev);
+
 	/* Post card if necessary */
 	if (!amdgpu_card_posted(adev) ||
-	    adev->virtualization.supports_sr_iov) {
+	    (adev->virtualization.is_virtual &&
+	     !(adev->virtualization.caps & AMDGPU_VIRT_CAPS_SRIOV_EN))) {
 		if (!adev->bios) {
 			dev_err(adev->dev, "Card not posted and no BIOS - ignoring\n");
-			return -EINVAL;
+			r = -EINVAL;
+			goto failed;
 		}
 		DRM_INFO("GPU not posted. posting now...\n");
 		amdgpu_atom_asic_init(adev->mode_info.atom_context);
@@ -1527,7 +1604,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	r = amdgpu_atombios_get_clock_info(adev);
 	if (r) {
 		dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
-		return r;
+		goto failed;
 	}
 	/* init i2c buses */
 	amdgpu_atombios_i2c_init(adev);
@@ -1536,7 +1613,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	r = amdgpu_fence_driver_init(adev);
 	if (r) {
 		dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
-		return r;
+		goto failed;
 	}
 
 	/* init the mode config */
@@ -1546,7 +1623,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	if (r) {
 		dev_err(adev->dev, "amdgpu_init failed\n");
 		amdgpu_fini(adev);
-		return r;
+		goto failed;
 	}
 
 	adev->accel_working = true;
@@ -1556,7 +1633,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	r = amdgpu_ib_pool_init(adev);
 	if (r) {
 		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
-		return r;
+		goto failed;
 	}
 
 	r = amdgpu_ib_ring_tests(adev);
@@ -1573,6 +1650,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 		DRM_ERROR("registering register debugfs failed (%d).\n", r);
 	}
 
+	r = amdgpu_debugfs_firmware_init(adev);
+	if (r) {
+		DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
+		return r;
+	}
+
 	if ((amdgpu_testing & 1)) {
 		if (adev->accel_working)
 			amdgpu_test_moves(adev);
@@ -1598,10 +1681,15 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	r = amdgpu_late_init(adev);
 	if (r) {
 		dev_err(adev->dev, "amdgpu_late_init failed\n");
-		return r;
+		goto failed;
 	}
 
 	return 0;
+
+failed:
+	if (runtime)
+		vga_switcheroo_fini_domain_pm_ops(adev->dev);
+	return r;
 }
 
 static void amdgpu_debugfs_remove_files(struct amdgpu_device *adev);
@@ -1624,6 +1712,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 	amdgpu_bo_evict_vram(adev);
 	amdgpu_ib_pool_fini(adev);
 	amdgpu_fence_driver_fini(adev);
+	drm_crtc_force_disable_all(adev->ddev);
 	amdgpu_fbdev_fini(adev);
 	r = amdgpu_fini(adev);
 	kfree(adev->ip_block_status);
@@ -1635,6 +1724,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 	kfree(adev->bios);
 	adev->bios = NULL;
 	vga_switcheroo_unregister_client(adev->pdev);
+	if (adev->flags & AMD_IS_PX)
+		vga_switcheroo_fini_domain_pm_ops(adev->dev);
 	vga_client_register(adev->pdev, NULL, NULL, NULL);
 	if (adev->rio_mem)
 		pci_iounmap(adev->pdev, adev->rio_mem);
@@ -1820,7 +1911,23 @@ int amdgpu_resume_kms(struct drm_device *dev, bool resume, bool fbcon)
 	}
 
 	drm_kms_helper_poll_enable(dev);
+
+	/*
+	 * Most of the connector probing functions try to acquire runtime pm
+	 * refs to ensure that the GPU is powered on when connector polling is
+	 * performed. Since we're calling this from a runtime PM callback,
+	 * trying to acquire rpm refs will cause us to deadlock.
+	 *
+	 * Since we're guaranteed to be holding the rpm lock, it's safe to
+	 * temporarily disable the rpm helpers so this doesn't deadlock us.
+	 */
+#ifdef CONFIG_PM
+	dev->dev->power.disable_depth++;
+#endif
 	drm_helper_hpd_irq_event(dev);
+#ifdef CONFIG_PM
+	dev->dev->power.disable_depth--;
+#endif
 
 	if (fbcon) {
 		amdgpu_fbdev_set_suspend(adev, 0);
@@ -1840,11 +1947,6 @@ int amdgpu_resume_kms(struct drm_device *dev, bool resume, bool fbcon)
  */
 int amdgpu_gpu_reset(struct amdgpu_device *adev)
 {
-	unsigned ring_sizes[AMDGPU_MAX_RINGS];
-	uint32_t *ring_data[AMDGPU_MAX_RINGS];
-
-	bool saved = false;
-
 	int i, r;
 	int resched;
 
@@ -1853,22 +1955,30 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
 	/* block TTM */
 	resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
 
-	r = amdgpu_suspend(adev);
-
+	/* block scheduler */
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 		struct amdgpu_ring *ring = adev->rings[i];
+
 		if (!ring)
 			continue;
-
-		ring_sizes[i] = amdgpu_ring_backup(ring, &ring_data[i]);
-		if (ring_sizes[i]) {
-			saved = true;
-			dev_info(adev->dev, "Saved %d dwords of commands "
-				 "on ring %d.\n", ring_sizes[i], i);
-		}
+		kthread_park(ring->sched.thread);
+		amd_sched_hw_job_reset(&ring->sched);
 	}
+	/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
+	amdgpu_fence_driver_force_completion(adev);
+
+	/* save scratch */
+	amdgpu_atombios_scratch_regs_save(adev);
+	r = amdgpu_suspend(adev);
 
 retry:
+	/* Disable fb access */
+	if (adev->mode_info.num_crtc) {
+		struct amdgpu_mode_mc_save save;
+		amdgpu_display_stop_mc_access(adev, &save);
+		amdgpu_wait_for_idle(adev, AMD_IP_BLOCK_TYPE_GMC);
+	}
+
 	r = amdgpu_asic_reset(adev);
 	/* post card */
 	amdgpu_atom_asic_init(adev->mode_info.atom_context);
@@ -1877,32 +1987,29 @@ retry:
 		dev_info(adev->dev, "GPU reset succeeded, trying to resume\n");
 		r = amdgpu_resume(adev);
 	}
-
+	/* restore scratch */
+	amdgpu_atombios_scratch_regs_restore(adev);
 	if (!r) {
+		r = amdgpu_ib_ring_tests(adev);
+		if (r) {
+			dev_err(adev->dev, "ib ring test failed (%d).\n", r);
+			r = amdgpu_suspend(adev);
+			goto retry;
+		}
+
 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 			struct amdgpu_ring *ring = adev->rings[i];
 			if (!ring)
 				continue;
-
-			amdgpu_ring_restore(ring, ring_sizes[i], ring_data[i]);
-			ring_sizes[i] = 0;
-			ring_data[i] = NULL;
-		}
-
-		r = amdgpu_ib_ring_tests(adev);
-		if (r) {
-			dev_err(adev->dev, "ib ring test failed (%d).\n", r);
-			if (saved) {
-				saved = false;
-				r = amdgpu_suspend(adev);
-				goto retry;
-			}
+			amd_sched_job_recovery(&ring->sched);
+			kthread_unpark(ring->sched.thread);
 		}
 	} else {
-		amdgpu_fence_driver_force_completion(adev);
+		dev_err(adev->dev, "asic resume failed (%d).\n", r);
 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-			if (adev->rings[i])
-				kfree(ring_data[i]);
+			if (adev->rings[i]) {
+				kthread_unpark(adev->rings[i]->sched.thread);
+			}
 		}
 	}
 
@@ -1913,13 +2020,11 @@ retry:
 		/* bad news, how to tell it to userspace ? */
 		dev_info(adev->dev, "GPU reset failed\n");
 	}
+	amdgpu_irq_gpu_reset_resume_helper(adev);
 
 	return r;
 }
 
-#define AMDGPU_DEFAULT_PCIE_GEN_MASK 0x30007  /* gen: chipset 1/2, asic 1/2/3 */
-#define AMDGPU_DEFAULT_PCIE_MLW_MASK 0x2f0000 /* 1/2/4/8/16 lanes */
-
 void amdgpu_get_pcie_info(struct amdgpu_device *adev)
 {
 	u32 mask;
@@ -2073,20 +2178,43 @@ static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
 	struct amdgpu_device *adev = f->f_inode->i_private;
 	ssize_t result = 0;
 	int r;
+	bool use_bank;
+	unsigned instance_bank, sh_bank, se_bank;
 
 	if (size & 0x3 || *pos & 0x3)
 		return -EINVAL;
 
+	if (*pos & (1ULL << 62)) {
+		se_bank = (*pos >> 24) & 0x3FF;
+		sh_bank = (*pos >> 34) & 0x3FF;
+		instance_bank = (*pos >> 44) & 0x3FF;
+		use_bank = 1;
+		*pos &= 0xFFFFFF;
+	} else {
+		use_bank = 0;
+	}
+
+	if (use_bank) {
+		if (sh_bank >= adev->gfx.config.max_sh_per_se ||
+		    se_bank >= adev->gfx.config.max_shader_engines)
+			return -EINVAL;
+		mutex_lock(&adev->grbm_idx_mutex);
+		amdgpu_gfx_select_se_sh(adev, se_bank,
+					sh_bank, instance_bank);
+	}
+
 	while (size) {
 		uint32_t value;
 
 		if (*pos > adev->rmmio_size)
-			return result;
+			goto end;
 
 		value = RREG32(*pos >> 2);
 		r = put_user(value, (uint32_t *)buf);
-		if (r)
-			return r;
+		if (r) {
+			result = r;
+			goto end;
+		}
 
 		result += 4;
 		buf += 4;
@@ -2094,6 +2222,12 @@ static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
 		size -= 4;
 	}
 
+end:
+	if (use_bank) {
+		amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+		mutex_unlock(&adev->grbm_idx_mutex);
+	}
+
 	return result;
 }
 
@@ -2293,6 +2427,68 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
 	return result;
 }
 
+static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf,
+					size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = f->f_inode->i_private;
+	ssize_t result = 0;
+	int r;
+	uint32_t *config, no_regs = 0;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	config = kmalloc(256 * sizeof(*config), GFP_KERNEL);
+	if (!config)
+		return -ENOMEM;
+
+	/* version, increment each time something is added */
+	config[no_regs++] = 0;
+	config[no_regs++] = adev->gfx.config.max_shader_engines;
+	config[no_regs++] = adev->gfx.config.max_tile_pipes;
+	config[no_regs++] = adev->gfx.config.max_cu_per_sh;
+	config[no_regs++] = adev->gfx.config.max_sh_per_se;
+	config[no_regs++] = adev->gfx.config.max_backends_per_se;
+	config[no_regs++] = adev->gfx.config.max_texture_channel_caches;
+	config[no_regs++] = adev->gfx.config.max_gprs;
+	config[no_regs++] = adev->gfx.config.max_gs_threads;
+	config[no_regs++] = adev->gfx.config.max_hw_contexts;
+	config[no_regs++] = adev->gfx.config.sc_prim_fifo_size_frontend;
+	config[no_regs++] = adev->gfx.config.sc_prim_fifo_size_backend;
+	config[no_regs++] = adev->gfx.config.sc_hiz_tile_fifo_size;
+	config[no_regs++] = adev->gfx.config.sc_earlyz_tile_fifo_size;
+	config[no_regs++] = adev->gfx.config.num_tile_pipes;
+	config[no_regs++] = adev->gfx.config.backend_enable_mask;
+	config[no_regs++] = adev->gfx.config.mem_max_burst_length_bytes;
+	config[no_regs++] = adev->gfx.config.mem_row_size_in_kb;
+	config[no_regs++] = adev->gfx.config.shader_engine_tile_size;
+	config[no_regs++] = adev->gfx.config.num_gpus;
+	config[no_regs++] = adev->gfx.config.multi_gpu_tile_size;
+	config[no_regs++] = adev->gfx.config.mc_arb_ramcfg;
+	config[no_regs++] = adev->gfx.config.gb_addr_config;
+	config[no_regs++] = adev->gfx.config.num_rbs;
+
+	while (size && (*pos < no_regs * 4)) {
+		uint32_t value;
+
+		value = config[*pos >> 2];
+		r = put_user(value, (uint32_t *)buf);
+		if (r) {
+			kfree(config);
+			return r;
+		}
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+	kfree(config);
+	return result;
+}
+
+
 static const struct file_operations amdgpu_debugfs_regs_fops = {
 	.owner = THIS_MODULE,
 	.read = amdgpu_debugfs_regs_read,
@@ -2318,11 +2514,18 @@ static const struct file_operations amdgpu_debugfs_regs_smc_fops = {
 	.llseek = default_llseek
 };
 
+static const struct file_operations amdgpu_debugfs_gca_config_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_gca_config_read,
+	.llseek = default_llseek
+};
+
 static const struct file_operations *debugfs_regs[] = {
 	&amdgpu_debugfs_regs_fops,
 	&amdgpu_debugfs_regs_didt_fops,
 	&amdgpu_debugfs_regs_pcie_fops,
 	&amdgpu_debugfs_regs_smc_fops,
+	&amdgpu_debugfs_gca_config_fops,
 };
 
 static const char *debugfs_regs_names[] = {
@@ -2330,6 +2533,7 @@ static const char *debugfs_regs_names[] = {
 	"amdgpu_regs_didt",
 	"amdgpu_regs_pcie",
 	"amdgpu_regs_smc",
+	"amdgpu_gca_config",
 };
 
 static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index a6eecf6f9065..76f96028313d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -122,7 +122,7 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
 		spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
 		usleep_range(min_udelay, 2 * min_udelay);
 		spin_lock_irqsave(&crtc->dev->event_lock, flags);
-	};
+	}
 
 	if (!repcnt)
 		DRM_DEBUG_DRIVER("Delay problem on crtc %d: min_udelay %d, "
@@ -220,19 +220,17 @@ int amdgpu_crtc_page_flip(struct drm_crtc *crtc,
 
 	r = amdgpu_bo_pin_restricted(new_rbo, AMDGPU_GEM_DOMAIN_VRAM, 0, 0, &base);
 	if (unlikely(r != 0)) {
-		amdgpu_bo_unreserve(new_rbo);
 		r = -EINVAL;
 		DRM_ERROR("failed to pin new rbo buffer before flip\n");
-		goto cleanup;
+		goto unreserve;
 	}
 
 	r = reservation_object_get_fences_rcu(new_rbo->tbo.resv, &work->excl,
 					      &work->shared_count,
 					      &work->shared);
 	if (unlikely(r != 0)) {
-		amdgpu_bo_unreserve(new_rbo);
 		DRM_ERROR("failed to get fences for buffer\n");
-		goto cleanup;
+		goto unpin;
 	}
 
 	amdgpu_bo_get_tiling_flags(new_rbo, &tiling_flags);
@@ -275,9 +273,11 @@ pflip_cleanup:
 		DRM_ERROR("failed to reserve new rbo in error path\n");
 		goto cleanup;
 	}
+unpin:
 	if (unlikely(amdgpu_bo_unpin(new_rbo) != 0)) {
 		DRM_ERROR("failed to unpin new rbo in error path\n");
 	}
+unreserve:
 	amdgpu_bo_unreserve(new_rbo);
 
 cleanup:
@@ -516,9 +516,7 @@ static void amdgpu_user_framebuffer_destroy(struct drm_framebuffer *fb)
 {
 	struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb);
 
-	if (amdgpu_fb->obj) {
-		drm_gem_object_unreference_unlocked(amdgpu_fb->obj);
-	}
+	drm_gem_object_unreference_unlocked(amdgpu_fb->obj);
 	drm_framebuffer_cleanup(fb);
 	kfree(amdgpu_fb);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index f888c015f76c..9aa533cf4ad1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -52,9 +52,10 @@
  * - 3.1.0 - allow reading more status registers (GRBM, SRBM, SDMA, CP)
  * - 3.2.0 - GFX8: Uses EOP_TC_WB_ACTION_EN, so UMDs don't have to do the same
  *           at the end of IBs.
+ * - 3.3.0 - Add VM support for UVD on supported hardware.
  */
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	2
+#define KMS_DRIVER_MINOR	3
 #define KMS_DRIVER_PATCHLEVEL	0
 
 int amdgpu_vram_limit = 0;
@@ -82,8 +83,12 @@ int amdgpu_exp_hw_support = 0;
 int amdgpu_sched_jobs = 32;
 int amdgpu_sched_hw_submission = 2;
 int amdgpu_powerplay = -1;
+int amdgpu_powercontainment = 1;
 unsigned amdgpu_pcie_gen_cap = 0;
 unsigned amdgpu_pcie_lane_cap = 0;
+unsigned amdgpu_cg_mask = 0xffffffff;
+unsigned amdgpu_pg_mask = 0xffffffff;
+char *amdgpu_disable_cu = NULL;
 
 MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
 module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
@@ -160,6 +165,9 @@ module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444);
 #ifdef CONFIG_DRM_AMD_POWERPLAY
 MODULE_PARM_DESC(powerplay, "Powerplay component (1 = enable, 0 = disable, -1 = auto (default))");
 module_param_named(powerplay, amdgpu_powerplay, int, 0444);
+
+MODULE_PARM_DESC(powercontainment, "Power Containment (1 = enable (default), 0 = disable)");
+module_param_named(powercontainment, amdgpu_powercontainment, int, 0444);
 #endif
 
 MODULE_PARM_DESC(pcie_gen_cap, "PCIE Gen Caps (0: autodetect (default))");
@@ -168,6 +176,15 @@ module_param_named(pcie_gen_cap, amdgpu_pcie_gen_cap, uint, 0444);
 MODULE_PARM_DESC(pcie_lane_cap, "PCIE Lane Caps (0: autodetect (default))");
 module_param_named(pcie_lane_cap, amdgpu_pcie_lane_cap, uint, 0444);
 
+MODULE_PARM_DESC(cg_mask, "Clockgating flags mask (0 = disable clock gating)");
+module_param_named(cg_mask, amdgpu_cg_mask, uint, 0444);
+
+MODULE_PARM_DESC(pg_mask, "Powergating flags mask (0 = disable power gating)");
+module_param_named(pg_mask, amdgpu_pg_mask, uint, 0444);
+
+MODULE_PARM_DESC(disable_cu, "Disable CUs (se.sh.cu,...)");
+module_param_named(disable_cu, amdgpu_disable_cu, charp, 0444);
+
 static const struct pci_device_id pciidlist[] = {
 #ifdef CONFIG_DRM_AMDGPU_CIK
 	/* Kaveri */
@@ -413,7 +430,10 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
 	pci_save_state(pdev);
 	pci_disable_device(pdev);
 	pci_ignore_hotplug(pdev);
-	pci_set_power_state(pdev, PCI_D3cold);
+	if (amdgpu_is_atpx_hybrid())
+		pci_set_power_state(pdev, PCI_D3cold);
+	else if (!amdgpu_has_atpx_dgpu_power_cntl())
+		pci_set_power_state(pdev, PCI_D3hot);
 	drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF;
 
 	return 0;
@@ -430,7 +450,9 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)
 
 	drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
 
-	pci_set_power_state(pdev, PCI_D0);
+	if (amdgpu_is_atpx_hybrid() ||
+	    !amdgpu_has_atpx_dgpu_power_cntl())
+		pci_set_power_state(pdev, PCI_D0);
 	pci_restore_state(pdev);
 	ret = pci_enable_device(pdev);
 	if (ret)
@@ -515,7 +537,7 @@ static struct drm_driver kms_driver = {
 	.driver_features =
 	    DRIVER_USE_AGP |
 	    DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM |
-	    DRIVER_PRIME | DRIVER_RENDER,
+	    DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET,
 	.dev_priv_size = 0,
 	.load = amdgpu_driver_load_kms,
 	.open = amdgpu_driver_open_kms,
@@ -590,7 +612,6 @@ static int __init amdgpu_init(void)
 	DRM_INFO("amdgpu kernel modesetting enabled.\n");
 	driver = &kms_driver;
 	pdriver = &amdgpu_kms_pci_driver;
-	driver->driver_features |= DRIVER_MODESET;
 	driver->num_ioctls = amdgpu_max_kms_ioctl;
 	amdgpu_register_atpx_handler();
 	/* let modprobe override vga console setting */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index d1558768cfb7..0b109aebfec6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -204,16 +204,25 @@ void amdgpu_fence_process(struct amdgpu_ring *ring)
 	if (seq != ring->fence_drv.sync_seq)
 		amdgpu_fence_schedule_fallback(ring);
 
-	while (last_seq != seq) {
+	if (unlikely(seq == last_seq))
+		return;
+
+	last_seq &= drv->num_fences_mask;
+	seq &= drv->num_fences_mask;
+
+	do {
 		struct fence *fence, **ptr;
 
-		ptr = &drv->fences[++last_seq & drv->num_fences_mask];
+		++last_seq;
+		last_seq &= drv->num_fences_mask;
+		ptr = &drv->fences[last_seq];
 
 		/* There is always exactly one thread signaling this fence slot */
 		fence = rcu_dereference_protected(*ptr, 1);
 		RCU_INIT_POINTER(*ptr, NULL);
 
-		BUG_ON(!fence);
+		if (!fence)
+			continue;
 
 		r = fence_signal(fence);
 		if (!r)
@@ -222,7 +231,7 @@ void amdgpu_fence_process(struct amdgpu_ring *ring)
 			BUG();
 
 		fence_put(fence);
-	}
+	} while (last_seq != seq);
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 8fab6486064f..88fbed2389c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -503,7 +503,7 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
 	if (r)
 		goto error_print;
 
-	amdgpu_vm_get_pt_bos(bo_va->vm, &duplicates);
+	amdgpu_vm_get_pt_bos(adev, bo_va->vm, &duplicates);
 	list_for_each_entry(entry, &list, head) {
 		domain = amdgpu_mem_type_to_domain(entry->bo->mem.mem_type);
 		/* if anything is swapped out don't swap it in here,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 9f95da4f0536..a074edd95c70 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -70,3 +70,47 @@ void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg)
 		}
 	}
 }
+
+/**
+ * amdgpu_gfx_parse_disable_cu - Parse the disable_cu module parameter
+ *
+ * @mask: array in which the per-shader array disable masks will be stored
+ * @max_se: number of SEs
+ * @max_sh: number of SHs
+ *
+ * The bitmask of CUs to be disabled in the shader array determined by se and
+ * sh is stored in mask[se * max_sh + sh].
+ */
+void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_sh)
+{
+	unsigned se, sh, cu;
+	const char *p;
+
+	memset(mask, 0, sizeof(*mask) * max_se * max_sh);
+
+	if (!amdgpu_disable_cu || !*amdgpu_disable_cu)
+		return;
+
+	p = amdgpu_disable_cu;
+	for (;;) {
+		char *next;
+		int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu);
+		if (ret < 3) {
+			DRM_ERROR("amdgpu: could not parse disable_cu\n");
+			return;
+		}
+
+		if (se < max_se && sh < max_sh && cu < 16) {
+			DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu);
+			mask[se * max_sh + sh] |= 1u << cu;
+		} else {
+			DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of range\n",
+				  se, sh, cu);
+		}
+
+		next = strchr(p, ',');
+		if (!next)
+			break;
+		p = next + 1;
+	}
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index dc06cbda7be6..51321e154c09 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -27,4 +27,6 @@
 int amdgpu_gfx_scratch_get(struct amdgpu_device *adev, uint32_t *reg);
 void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg);
 
+unsigned amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_sh);
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 34e35423b78e..a31d7ef3032c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -33,6 +33,8 @@
 #include "amdgpu.h"
 #include "atom.h"
 
+#define AMDGPU_IB_TEST_TIMEOUT	msecs_to_jiffies(1000)
+
 /*
  * IB
  * IBs (Indirect Buffers) and areas of GPU accessible memory where
@@ -122,7 +124,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	bool skip_preamble, need_ctx_switch;
 	unsigned patch_offset = ~0;
 	struct amdgpu_vm *vm;
-	struct fence *hwf;
 	uint64_t ctx;
 
 	unsigned i;
@@ -160,10 +161,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		patch_offset = amdgpu_ring_init_cond_exec(ring);
 
 	if (vm) {
-		r = amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr,
-				    job->gds_base, job->gds_size,
-				    job->gws_base, job->gws_size,
-				    job->oa_base, job->oa_size);
+		r = amdgpu_vm_flush(ring, job);
 		if (r) {
 			amdgpu_ring_undo(ring);
 			return r;
@@ -193,7 +191,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	if (ring->funcs->emit_hdp_invalidate)
 		amdgpu_ring_emit_hdp_invalidate(ring);
 
-	r = amdgpu_fence_emit(ring, &hwf);
+	r = amdgpu_fence_emit(ring, f);
 	if (r) {
 		dev_err(adev->dev, "failed to emit fence (%d)\n", r);
 		if (job && job->vm_id)
@@ -203,17 +201,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	}
 
 	/* wrap the last IB with fence */
-	if (job && job->uf_bo) {
-		uint64_t addr = amdgpu_bo_gpu_offset(job->uf_bo);
-
-		addr += job->uf_offset;
-		amdgpu_ring_emit_fence(ring, addr, job->uf_sequence,
+	if (job && job->uf_addr) {
+		amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence,
 				       AMDGPU_FENCE_FLAG_64BIT);
 	}
 
-	if (f)
-		*f = fence_get(hwf);
-
 	if (patch_offset != ~0 && ring->funcs->patch_cond_exec)
 		amdgpu_ring_patch_cond_exec(ring, patch_offset);
 
@@ -296,7 +288,7 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
 		if (!ring || !ring->ready)
 			continue;
 
-		r = amdgpu_ring_test_ib(ring);
+		r = amdgpu_ring_test_ib(ring, AMDGPU_IB_TEST_TIMEOUT);
 		if (r) {
 			ring->ready = false;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 835a3fa8d8df..278708f5a744 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -383,6 +383,18 @@ int amdgpu_irq_update(struct amdgpu_device *adev,
 	return r;
 }
 
+void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev)
+{
+	int i, j;
+	for (i = 0; i < AMDGPU_MAX_IRQ_SRC_ID; i++) {
+		struct amdgpu_irq_src *src = adev->irq.sources[i];
+		if (!src)
+			continue;
+		for (j = 0; j < src->num_types; j++)
+			amdgpu_irq_update(adev, src, j);
+	}
+}
+
 /**
  * amdgpu_irq_get - enable interrupt
  *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
index e124b59f39c1..7ef09352e534 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
@@ -94,6 +94,7 @@ int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
 		   unsigned type);
 bool amdgpu_irq_enabled(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
 			unsigned type);
+void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev);
 
 int amdgpu_irq_add_domain(struct amdgpu_device *adev);
 void amdgpu_irq_remove_domain(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index f0dafa514fe4..6674d40eb3ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -28,21 +28,15 @@
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 
-static void amdgpu_job_free_handler(struct work_struct *ws)
+static void amdgpu_job_timedout(struct amd_sched_job *s_job)
 {
-	struct amdgpu_job *job = container_of(ws, struct amdgpu_job, base.work_free_job);
-	amd_sched_job_put(&job->base);
-}
+	struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base);
 
-void amdgpu_job_timeout_func(struct work_struct *work)
-{
-	struct amdgpu_job *job = container_of(work, struct amdgpu_job, base.work_tdr.work);
 	DRM_ERROR("ring %s timeout, last signaled seq=%u, last emitted seq=%u\n",
-				job->base.sched->name,
-				(uint32_t)atomic_read(&job->ring->fence_drv.last_seq),
-				job->ring->fence_drv.sync_seq);
-
-	amd_sched_job_put(&job->base);
+		  job->base.sched->name,
+		  atomic_read(&job->ring->fence_drv.last_seq),
+		  job->ring->fence_drv.sync_seq);
+	amdgpu_gpu_reset(job->adev);
 }
 
 int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
@@ -63,7 +57,6 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
 	(*job)->vm = vm;
 	(*job)->ibs = (void *)&(*job)[1];
 	(*job)->num_ibs = num_ibs;
-	INIT_WORK(&(*job)->base.work_free_job, amdgpu_job_free_handler);
 
 	amdgpu_sync_create(&(*job)->sync);
 
@@ -86,27 +79,33 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
 	return r;
 }
 
-void amdgpu_job_free(struct amdgpu_job *job)
+void amdgpu_job_free_resources(struct amdgpu_job *job)
 {
-	unsigned i;
 	struct fence *f;
+	unsigned i;
+
 	/* use sched fence if available */
-	f = (job->base.s_fence)? &job->base.s_fence->base : job->fence;
+	f = job->base.s_fence ? &job->base.s_fence->finished : job->fence;
 
 	for (i = 0; i < job->num_ibs; ++i)
-		amdgpu_sa_bo_free(job->adev, &job->ibs[i].sa_bo, f);
-	fence_put(job->fence);
+		amdgpu_ib_free(job->adev, &job->ibs[i], f);
+}
 
-	amdgpu_bo_unref(&job->uf_bo);
-	amdgpu_sync_free(&job->sync);
+void amdgpu_job_free_cb(struct amd_sched_job *s_job)
+{
+	struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base);
 
-	if (!job->base.use_sched)
-		kfree(job);
+	fence_put(job->fence);
+	amdgpu_sync_free(&job->sync);
+	kfree(job);
 }
 
-void amdgpu_job_free_func(struct kref *refcount)
+void amdgpu_job_free(struct amdgpu_job *job)
 {
-	struct amdgpu_job *job = container_of(refcount, struct amdgpu_job, base.refcount);
+	amdgpu_job_free_resources(job);
+
+	fence_put(job->fence);
+	amdgpu_sync_free(&job->sync);
 	kfree(job);
 }
 
@@ -114,22 +113,20 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
 		      struct amd_sched_entity *entity, void *owner,
 		      struct fence **f)
 {
-	struct fence *fence;
 	int r;
 	job->ring = ring;
 
 	if (!f)
 		return -EINVAL;
 
-	r = amd_sched_job_init(&job->base, &ring->sched,
-			       entity, amdgpu_job_timeout_func,
-			       amdgpu_job_free_func, owner, &fence);
+	r = amd_sched_job_init(&job->base, &ring->sched, entity, owner);
 	if (r)
 		return r;
 
 	job->owner = owner;
 	job->ctx = entity->fence_context;
-	*f = fence_get(fence);
+	*f = fence_get(&job->base.s_fence->finished);
+	amdgpu_job_free_resources(job);
 	amd_sched_entity_push_job(&job->base);
 
 	return 0;
@@ -147,8 +144,8 @@ static struct fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
 		int r;
 
 		r = amdgpu_vm_grab_id(vm, ring, &job->sync,
-				      &job->base.s_fence->base,
-				      &job->vm_id, &job->vm_pd_addr);
+				      &job->base.s_fence->finished,
+				      job);
 		if (r)
 			DRM_ERROR("Error getting VM ID (%d)\n", r);
 
@@ -170,29 +167,24 @@ static struct fence *amdgpu_job_run(struct amd_sched_job *sched_job)
 	}
 	job = to_amdgpu_job(sched_job);
 
-	r = amdgpu_sync_wait(&job->sync);
-	if (r) {
-		DRM_ERROR("failed to sync wait (%d)\n", r);
-		return NULL;
-	}
+	BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL));
 
 	trace_amdgpu_sched_run_job(job);
 	r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs,
 			       job->sync.last_vm_update, job, &fence);
-	if (r) {
+	if (r)
 		DRM_ERROR("Error scheduling IBs (%d)\n", r);
-		goto err;
-	}
 
-err:
-	job->fence = fence;
-	amdgpu_job_free(job);
+	/* if gpu reset, hw fence will be replaced here */
+	fence_put(job->fence);
+	job->fence = fence_get(fence);
+	amdgpu_job_free_resources(job);
 	return fence;
 }
 
 const struct amd_sched_backend_ops amdgpu_sched_ops = {
 	.dependency = amdgpu_job_dependency,
 	.run_job = amdgpu_job_run,
-	.begin_job = amd_sched_job_begin,
-	.finish_job = amd_sched_job_finish,
+	.timedout_job = amdgpu_job_timedout,
+	.free_job = amdgpu_job_free_cb
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 40a23704a981..d942654a1de0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -60,7 +60,10 @@ int amdgpu_driver_unload_kms(struct drm_device *dev)
 	if (adev->rmmio == NULL)
 		goto done_free;
 
-	pm_runtime_get_sync(dev->dev);
+	if (amdgpu_device_is_px(dev)) {
+		pm_runtime_get_sync(dev->dev);
+		pm_runtime_forbid(dev->dev);
+	}
 
 	amdgpu_amdkfd_device_fini(adev);
 
@@ -135,13 +138,75 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
 	}
 
 out:
-	if (r)
+	if (r) {
+		/* balance pm_runtime_get_sync in amdgpu_driver_unload_kms */
+		if (adev->rmmio && amdgpu_device_is_px(dev))
+			pm_runtime_put_noidle(dev->dev);
 		amdgpu_driver_unload_kms(dev);
-
+	}
 
 	return r;
 }
 
+static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
+				struct drm_amdgpu_query_fw *query_fw,
+				struct amdgpu_device *adev)
+{
+	switch (query_fw->fw_type) {
+	case AMDGPU_INFO_FW_VCE:
+		fw_info->ver = adev->vce.fw_version;
+		fw_info->feature = adev->vce.fb_version;
+		break;
+	case AMDGPU_INFO_FW_UVD:
+		fw_info->ver = adev->uvd.fw_version;
+		fw_info->feature = 0;
+		break;
+	case AMDGPU_INFO_FW_GMC:
+		fw_info->ver = adev->mc.fw_version;
+		fw_info->feature = 0;
+		break;
+	case AMDGPU_INFO_FW_GFX_ME:
+		fw_info->ver = adev->gfx.me_fw_version;
+		fw_info->feature = adev->gfx.me_feature_version;
+		break;
+	case AMDGPU_INFO_FW_GFX_PFP:
+		fw_info->ver = adev->gfx.pfp_fw_version;
+		fw_info->feature = adev->gfx.pfp_feature_version;
+		break;
+	case AMDGPU_INFO_FW_GFX_CE:
+		fw_info->ver = adev->gfx.ce_fw_version;
+		fw_info->feature = adev->gfx.ce_feature_version;
+		break;
+	case AMDGPU_INFO_FW_GFX_RLC:
+		fw_info->ver = adev->gfx.rlc_fw_version;
+		fw_info->feature = adev->gfx.rlc_feature_version;
+		break;
+	case AMDGPU_INFO_FW_GFX_MEC:
+		if (query_fw->index == 0) {
+			fw_info->ver = adev->gfx.mec_fw_version;
+			fw_info->feature = adev->gfx.mec_feature_version;
+		} else if (query_fw->index == 1) {
+			fw_info->ver = adev->gfx.mec2_fw_version;
+			fw_info->feature = adev->gfx.mec2_feature_version;
+		} else
+			return -EINVAL;
+		break;
+	case AMDGPU_INFO_FW_SMC:
+		fw_info->ver = adev->pm.fw_version;
+		fw_info->feature = 0;
+		break;
+	case AMDGPU_INFO_FW_SDMA:
+		if (query_fw->index >= adev->sdma.num_instances)
+			return -EINVAL;
+		fw_info->ver = adev->sdma.instance[query_fw->index].fw_version;
+		fw_info->feature = adev->sdma.instance[query_fw->index].feature_version;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
 /*
  * Userspace get information ioctl
  */
@@ -288,67 +353,20 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 		return copy_to_user(out, &count, min(size, 4u)) ? -EFAULT : 0;
 	}
 	case AMDGPU_INFO_TIMESTAMP:
-		ui64 = amdgpu_asic_get_gpu_clock_counter(adev);
+		ui64 = amdgpu_gfx_get_gpu_clock_counter(adev);
 		return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
 	case AMDGPU_INFO_FW_VERSION: {
 		struct drm_amdgpu_info_firmware fw_info;
+		int ret;
 
 		/* We only support one instance of each IP block right now. */
 		if (info->query_fw.ip_instance != 0)
 			return -EINVAL;
 
-		switch (info->query_fw.fw_type) {
-		case AMDGPU_INFO_FW_VCE:
-			fw_info.ver = adev->vce.fw_version;
-			fw_info.feature = adev->vce.fb_version;
-			break;
-		case AMDGPU_INFO_FW_UVD:
-			fw_info.ver = adev->uvd.fw_version;
-			fw_info.feature = 0;
-			break;
-		case AMDGPU_INFO_FW_GMC:
-			fw_info.ver = adev->mc.fw_version;
-			fw_info.feature = 0;
-			break;
-		case AMDGPU_INFO_FW_GFX_ME:
-			fw_info.ver = adev->gfx.me_fw_version;
-			fw_info.feature = adev->gfx.me_feature_version;
-			break;
-		case AMDGPU_INFO_FW_GFX_PFP:
-			fw_info.ver = adev->gfx.pfp_fw_version;
-			fw_info.feature = adev->gfx.pfp_feature_version;
-			break;
-		case AMDGPU_INFO_FW_GFX_CE:
-			fw_info.ver = adev->gfx.ce_fw_version;
-			fw_info.feature = adev->gfx.ce_feature_version;
-			break;
-		case AMDGPU_INFO_FW_GFX_RLC:
-			fw_info.ver = adev->gfx.rlc_fw_version;
-			fw_info.feature = adev->gfx.rlc_feature_version;
-			break;
-		case AMDGPU_INFO_FW_GFX_MEC:
-			if (info->query_fw.index == 0) {
-				fw_info.ver = adev->gfx.mec_fw_version;
-				fw_info.feature = adev->gfx.mec_feature_version;
-			} else if (info->query_fw.index == 1) {
-				fw_info.ver = adev->gfx.mec2_fw_version;
-				fw_info.feature = adev->gfx.mec2_feature_version;
-			} else
-				return -EINVAL;
-			break;
-		case AMDGPU_INFO_FW_SMC:
-			fw_info.ver = adev->pm.fw_version;
-			fw_info.feature = 0;
-			break;
-		case AMDGPU_INFO_FW_SDMA:
-			if (info->query_fw.index >= adev->sdma.num_instances)
-				return -EINVAL;
-			fw_info.ver = adev->sdma.instance[info->query_fw.index].fw_version;
-			fw_info.feature = adev->sdma.instance[info->query_fw.index].feature_version;
-			break;
-		default:
-			return -EINVAL;
-		}
+		ret = amdgpu_firmware_info(&fw_info, &info->query_fw, adev);
+		if (ret)
+			return ret;
+
 		return copy_to_user(out, &fw_info,
 				    min((size_t)size, sizeof(fw_info))) ? -EFAULT : 0;
 	}
@@ -447,7 +465,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 			dev_info.max_memory_clock = adev->pm.default_mclk * 10;
 		}
 		dev_info.enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask;
-		dev_info.num_rb_pipes = adev->gfx.config.num_rbs;
+		dev_info.num_rb_pipes = adev->gfx.config.max_backends_per_se *
+			adev->gfx.config.max_shader_engines;
 		dev_info.num_hw_gfx_contexts = adev->gfx.config.max_hw_contexts;
 		dev_info._pad = 0;
 		dev_info.ids_flags = 0;
@@ -565,6 +584,9 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 
 	amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
 
+	amdgpu_uvd_free_handles(adev, file_priv);
+	amdgpu_vce_free_handles(adev, file_priv);
+
 	amdgpu_vm_fini(adev, &fpriv->vm);
 
 	idr_for_each_entry(&fpriv->bo_list_handles, list, handle)
@@ -589,10 +611,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 void amdgpu_driver_preclose_kms(struct drm_device *dev,
 				struct drm_file *file_priv)
 {
-	struct amdgpu_device *adev = dev->dev_private;
-
-	amdgpu_uvd_free_handles(adev, file_priv);
-	amdgpu_vce_free_handles(adev, file_priv);
 }
 
 /*
@@ -755,3 +773,130 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
 	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 };
 const int amdgpu_max_kms_ioctl = ARRAY_SIZE(amdgpu_ioctls_kms);
+
+/*
+ * Debugfs info
+ */
+#if defined(CONFIG_DEBUG_FS)
+
+static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct amdgpu_device *adev = dev->dev_private;
+	struct drm_amdgpu_info_firmware fw_info;
+	struct drm_amdgpu_query_fw query_fw;
+	int ret, i;
+
+	/* VCE */
+	query_fw.fw_type = AMDGPU_INFO_FW_VCE;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "VCE feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* UVD */
+	query_fw.fw_type = AMDGPU_INFO_FW_UVD;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "UVD feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* GMC */
+	query_fw.fw_type = AMDGPU_INFO_FW_GMC;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "MC feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* ME */
+	query_fw.fw_type = AMDGPU_INFO_FW_GFX_ME;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "ME feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* PFP */
+	query_fw.fw_type = AMDGPU_INFO_FW_GFX_PFP;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "PFP feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* CE */
+	query_fw.fw_type = AMDGPU_INFO_FW_GFX_CE;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "CE feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* RLC */
+	query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "RLC feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* MEC */
+	query_fw.fw_type = AMDGPU_INFO_FW_GFX_MEC;
+	query_fw.index = 0;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "MEC feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* MEC2 */
+	if (adev->asic_type == CHIP_KAVERI ||
+	    (adev->asic_type > CHIP_TOPAZ && adev->asic_type != CHIP_STONEY)) {
+		query_fw.index = 1;
+		ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+		if (ret)
+			return ret;
+		seq_printf(m, "MEC2 feature version: %u, firmware version: 0x%08x\n",
+			   fw_info.feature, fw_info.ver);
+	}
+
+	/* SMC */
+	query_fw.fw_type = AMDGPU_INFO_FW_SMC;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "SMC feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* SDMA */
+	query_fw.fw_type = AMDGPU_INFO_FW_SDMA;
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		query_fw.index = i;
+		ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+		if (ret)
+			return ret;
+		seq_printf(m, "SDMA%d feature version: %u, firmware version: 0x%08x\n",
+			   i, fw_info.feature, fw_info.ver);
+	}
+
+	return 0;
+}
+
+static const struct drm_info_list amdgpu_firmware_info_list[] = {
+	{"amdgpu_firmware_info", amdgpu_debugfs_firmware_info, 0, NULL},
+};
+#endif
+
+int amdgpu_debugfs_firmware_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+	return amdgpu_debugfs_add_files(adev, amdgpu_firmware_info_list,
+					ARRAY_SIZE(amdgpu_firmware_info_list));
+#else
+	return 0;
+#endif
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 7ecea83ce453..6f0873c75a25 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -589,6 +589,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
 			   struct ttm_mem_reg *new_mem)
 {
 	struct amdgpu_bo *rbo;
+	struct ttm_mem_reg *old_mem = &bo->mem;
 
 	if (!amdgpu_ttm_bo_is_amdgpu_bo(bo))
 		return;
@@ -602,6 +603,8 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
 
 	/* move_notify is called before move happens */
 	amdgpu_update_memory_usage(rbo->adev, &bo->mem, new_mem);
+
+	trace_amdgpu_ttm_bo_move(rbo, new_mem->mem_type, old_mem->mem_type);
 }
 
 int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 589b36e8c5cf..ff63b88b0ffa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -270,30 +270,28 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 	enum amd_pm_state_type state = 0;
-	long idx;
+	unsigned long idx;
 	int ret;
 
 	if (strlen(buf) == 1)
 		adev->pp_force_state_enabled = false;
-	else {
-		ret = kstrtol(buf, 0, &idx);
+	else if (adev->pp_enabled) {
+		struct pp_states_info data;
 
-		if (ret) {
+		ret = kstrtoul(buf, 0, &idx);
+		if (ret || idx >= ARRAY_SIZE(data.states)) {
 			count = -EINVAL;
 			goto fail;
 		}
 
-		if (adev->pp_enabled) {
-			struct pp_states_info data;
-			amdgpu_dpm_get_pp_num_states(adev, &data);
-			state = data.states[idx];
-			/* only set user selected power states */
-			if (state != POWER_STATE_TYPE_INTERNAL_BOOT &&
-				state != POWER_STATE_TYPE_DEFAULT) {
-				amdgpu_dpm_dispatch_task(adev,
-						AMD_PP_EVENT_ENABLE_USER_STATE, &state, NULL);
-				adev->pp_force_state_enabled = true;
-			}
+		amdgpu_dpm_get_pp_num_states(adev, &data);
+		state = data.states[idx];
+		/* only set user selected power states */
+		if (state != POWER_STATE_TYPE_INTERNAL_BOOT &&
+		    state != POWER_STATE_TYPE_DEFAULT) {
+			amdgpu_dpm_dispatch_task(adev,
+					AMD_PP_EVENT_ENABLE_USER_STATE, &state, NULL);
+			adev->pp_force_state_enabled = true;
 		}
 	}
 fail:
@@ -349,6 +347,8 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
 
 	if (adev->pp_enabled)
 		size = amdgpu_dpm_print_clock_levels(adev, PP_SCLK, buf);
+	else if (adev->pm.funcs->print_clock_levels)
+		size = adev->pm.funcs->print_clock_levels(adev, PP_SCLK, buf);
 
 	return size;
 }
@@ -365,7 +365,9 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
 	uint32_t i, mask = 0;
 	char sub_str[2];
 
-	for (i = 0; i < strlen(buf) - 1; i++) {
+	for (i = 0; i < strlen(buf); i++) {
+		if (*(buf + i) == '\n')
+			continue;
 		sub_str[0] = *(buf + i);
 		sub_str[1] = '\0';
 		ret = kstrtol(sub_str, 0, &level);
@@ -379,6 +381,8 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
 
 	if (adev->pp_enabled)
 		amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask);
+	else if (adev->pm.funcs->force_clock_level)
+		adev->pm.funcs->force_clock_level(adev, PP_SCLK, mask);
 fail:
 	return count;
 }
@@ -393,6 +397,8 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev,
 
 	if (adev->pp_enabled)
 		size = amdgpu_dpm_print_clock_levels(adev, PP_MCLK, buf);
+	else if (adev->pm.funcs->print_clock_levels)
+		size = adev->pm.funcs->print_clock_levels(adev, PP_MCLK, buf);
 
 	return size;
 }
@@ -409,7 +415,9 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
 	uint32_t i, mask = 0;
 	char sub_str[2];
 
-	for (i = 0; i < strlen(buf) - 1; i++) {
+	for (i = 0; i < strlen(buf); i++) {
+		if (*(buf + i) == '\n')
+			continue;
 		sub_str[0] = *(buf + i);
 		sub_str[1] = '\0';
 		ret = kstrtol(sub_str, 0, &level);
@@ -423,6 +431,8 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
 
 	if (adev->pp_enabled)
 		amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask);
+	else if (adev->pm.funcs->force_clock_level)
+		adev->pm.funcs->force_clock_level(adev, PP_MCLK, mask);
 fail:
 	return count;
 }
@@ -437,6 +447,8 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev,
 
 	if (adev->pp_enabled)
 		size = amdgpu_dpm_print_clock_levels(adev, PP_PCIE, buf);
+	else if (adev->pm.funcs->print_clock_levels)
+		size = adev->pm.funcs->print_clock_levels(adev, PP_PCIE, buf);
 
 	return size;
 }
@@ -453,7 +465,9 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
 	uint32_t i, mask = 0;
 	char sub_str[2];
 
-	for (i = 0; i < strlen(buf) - 1; i++) {
+	for (i = 0; i < strlen(buf); i++) {
+		if (*(buf + i) == '\n')
+			continue;
 		sub_str[0] = *(buf + i);
 		sub_str[1] = '\0';
 		ret = kstrtol(sub_str, 0, &level);
@@ -467,6 +481,100 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
 
 	if (adev->pp_enabled)
 		amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask);
+	else if (adev->pm.funcs->force_clock_level)
+		adev->pm.funcs->force_clock_level(adev, PP_PCIE, mask);
+fail:
+	return count;
+}
+
+static ssize_t amdgpu_get_pp_sclk_od(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	uint32_t value = 0;
+
+	if (adev->pp_enabled)
+		value = amdgpu_dpm_get_sclk_od(adev);
+	else if (adev->pm.funcs->get_sclk_od)
+		value = adev->pm.funcs->get_sclk_od(adev);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", value);
+}
+
+static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf,
+		size_t count)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	int ret;
+	long int value;
+
+	ret = kstrtol(buf, 0, &value);
+
+	if (ret) {
+		count = -EINVAL;
+		goto fail;
+	}
+
+	if (adev->pp_enabled) {
+		amdgpu_dpm_set_sclk_od(adev, (uint32_t)value);
+		amdgpu_dpm_dispatch_task(adev, AMD_PP_EVENT_READJUST_POWER_STATE, NULL, NULL);
+	} else if (adev->pm.funcs->set_sclk_od) {
+		adev->pm.funcs->set_sclk_od(adev, (uint32_t)value);
+		adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps;
+		amdgpu_pm_compute_clocks(adev);
+	}
+
+fail:
+	return count;
+}
+
+static ssize_t amdgpu_get_pp_mclk_od(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	uint32_t value = 0;
+
+	if (adev->pp_enabled)
+		value = amdgpu_dpm_get_mclk_od(adev);
+	else if (adev->pm.funcs->get_mclk_od)
+		value = adev->pm.funcs->get_mclk_od(adev);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", value);
+}
+
+static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf,
+		size_t count)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	int ret;
+	long int value;
+
+	ret = kstrtol(buf, 0, &value);
+
+	if (ret) {
+		count = -EINVAL;
+		goto fail;
+	}
+
+	if (adev->pp_enabled) {
+		amdgpu_dpm_set_mclk_od(adev, (uint32_t)value);
+		amdgpu_dpm_dispatch_task(adev, AMD_PP_EVENT_READJUST_POWER_STATE, NULL, NULL);
+	} else if (adev->pm.funcs->set_mclk_od) {
+		adev->pm.funcs->set_mclk_od(adev, (uint32_t)value);
+		adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps;
+		amdgpu_pm_compute_clocks(adev);
+	}
+
 fail:
 	return count;
 }
@@ -492,6 +600,12 @@ static DEVICE_ATTR(pp_dpm_mclk, S_IRUGO | S_IWUSR,
 static DEVICE_ATTR(pp_dpm_pcie, S_IRUGO | S_IWUSR,
 		amdgpu_get_pp_dpm_pcie,
 		amdgpu_set_pp_dpm_pcie);
+static DEVICE_ATTR(pp_sclk_od, S_IRUGO | S_IWUSR,
+		amdgpu_get_pp_sclk_od,
+		amdgpu_set_pp_sclk_od);
+static DEVICE_ATTR(pp_mclk_od, S_IRUGO | S_IWUSR,
+		amdgpu_get_pp_mclk_od,
+		amdgpu_set_pp_mclk_od);
 
 static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
 				      struct device_attribute *attr,
@@ -1110,22 +1224,34 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
 			DRM_ERROR("failed to create device file pp_table\n");
 			return ret;
 		}
-		ret = device_create_file(adev->dev, &dev_attr_pp_dpm_sclk);
-		if (ret) {
-			DRM_ERROR("failed to create device file pp_dpm_sclk\n");
-			return ret;
-		}
-		ret = device_create_file(adev->dev, &dev_attr_pp_dpm_mclk);
-		if (ret) {
-			DRM_ERROR("failed to create device file pp_dpm_mclk\n");
-			return ret;
-		}
-		ret = device_create_file(adev->dev, &dev_attr_pp_dpm_pcie);
-		if (ret) {
-			DRM_ERROR("failed to create device file pp_dpm_pcie\n");
-			return ret;
-		}
 	}
+
+	ret = device_create_file(adev->dev, &dev_attr_pp_dpm_sclk);
+	if (ret) {
+		DRM_ERROR("failed to create device file pp_dpm_sclk\n");
+		return ret;
+	}
+	ret = device_create_file(adev->dev, &dev_attr_pp_dpm_mclk);
+	if (ret) {
+		DRM_ERROR("failed to create device file pp_dpm_mclk\n");
+		return ret;
+	}
+	ret = device_create_file(adev->dev, &dev_attr_pp_dpm_pcie);
+	if (ret) {
+		DRM_ERROR("failed to create device file pp_dpm_pcie\n");
+		return ret;
+	}
+	ret = device_create_file(adev->dev, &dev_attr_pp_sclk_od);
+	if (ret) {
+		DRM_ERROR("failed to create device file pp_sclk_od\n");
+		return ret;
+	}
+	ret = device_create_file(adev->dev, &dev_attr_pp_mclk_od);
+	if (ret) {
+		DRM_ERROR("failed to create device file pp_mclk_od\n");
+		return ret;
+	}
+
 	ret = amdgpu_debugfs_pm_init(adev);
 	if (ret) {
 		DRM_ERROR("Failed to register debugfs file for dpm!\n");
@@ -1148,10 +1274,12 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
 		device_remove_file(adev->dev, &dev_attr_pp_cur_state);
 		device_remove_file(adev->dev, &dev_attr_pp_force_state);
 		device_remove_file(adev->dev, &dev_attr_pp_table);
-		device_remove_file(adev->dev, &dev_attr_pp_dpm_sclk);
-		device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk);
-		device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie);
 	}
+	device_remove_file(adev->dev, &dev_attr_pp_dpm_sclk);
+	device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk);
+	device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie);
+	device_remove_file(adev->dev, &dev_attr_pp_sclk_od);
+	device_remove_file(adev->dev, &dev_attr_pp_mclk_od);
 }
 
 void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
index 6bd961fb43dc..c5738a22b690 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
@@ -52,6 +52,7 @@ static int amdgpu_powerplay_init(struct amdgpu_device *adev)
 		pp_init->chip_family = adev->family;
 		pp_init->chip_id = adev->asic_type;
 		pp_init->device = amdgpu_cgs_create_device(adev);
+		pp_init->powercontainment_enabled = amdgpu_powercontainment;
 
 		ret = amd_powerplay_init(pp_init, amd_pp);
 		kfree(pp_init);
@@ -183,13 +184,6 @@ static int amdgpu_pp_sw_fini(void *handle)
 	if (ret)
 		return ret;
 
-#ifdef CONFIG_DRM_AMD_POWERPLAY
-	if (adev->pp_enabled) {
-		amdgpu_pm_sysfs_fini(adev);
-		amd_powerplay_fini(adev->powerplay.pp_handle);
-	}
-#endif
-
 	return ret;
 }
 
@@ -223,6 +217,22 @@ static int amdgpu_pp_hw_fini(void *handle)
 	return ret;
 }
 
+static void amdgpu_pp_late_fini(void *handle)
+{
+#ifdef CONFIG_DRM_AMD_POWERPLAY
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	if (adev->pp_enabled) {
+		amdgpu_pm_sysfs_fini(adev);
+		amd_powerplay_fini(adev->powerplay.pp_handle);
+	}
+
+	if (adev->powerplay.ip_funcs->late_fini)
+		adev->powerplay.ip_funcs->late_fini(
+			  adev->powerplay.pp_handle);
+#endif
+}
+
 static int amdgpu_pp_suspend(void *handle)
 {
 	int ret = 0;
@@ -311,6 +321,7 @@ const struct amd_ip_funcs amdgpu_pp_ip_funcs = {
 	.sw_fini = amdgpu_pp_sw_fini,
 	.hw_init = amdgpu_pp_hw_init,
 	.hw_fini = amdgpu_pp_hw_fini,
+	.late_fini = amdgpu_pp_late_fini,
 	.suspend = amdgpu_pp_suspend,
 	.resume = amdgpu_pp_resume,
 	.is_idle = amdgpu_pp_is_idle,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 3b02272db678..85aeb0a804bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -28,6 +28,7 @@
  */
 #include <linux/seq_file.h>
 #include <linux/slab.h>
+#include <linux/debugfs.h>
 #include <drm/drmP.h>
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
@@ -48,6 +49,7 @@
  */
 static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
 				    struct amdgpu_ring *ring);
+static void amdgpu_debugfs_ring_fini(struct amdgpu_ring *ring);
 
 /**
  * amdgpu_ring_alloc - allocate space on the ring buffer
@@ -73,6 +75,10 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw)
 
 	ring->count_dw = ndw;
 	ring->wptr_old = ring->wptr;
+
+	if (ring->funcs->begin_use)
+		ring->funcs->begin_use(ring);
+
 	return 0;
 }
 
@@ -125,6 +131,9 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring)
 
 	mb();
 	amdgpu_ring_set_wptr(ring);
+
+	if (ring->funcs->end_use)
+		ring->funcs->end_use(ring);
 }
 
 /**
@@ -137,78 +146,9 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring)
 void amdgpu_ring_undo(struct amdgpu_ring *ring)
 {
 	ring->wptr = ring->wptr_old;
-}
-
-/**
- * amdgpu_ring_backup - Back up the content of a ring
- *
- * @ring: the ring we want to back up
- *
- * Saves all unprocessed commits from a ring, returns the number of dwords saved.
- */
-unsigned amdgpu_ring_backup(struct amdgpu_ring *ring,
-			    uint32_t **data)
-{
-	unsigned size, ptr, i;
-
-	*data = NULL;
-
-	if (ring->ring_obj == NULL)
-		return 0;
-
-	/* it doesn't make sense to save anything if all fences are signaled */
-	if (!amdgpu_fence_count_emitted(ring))
-		return 0;
-
-	ptr = le32_to_cpu(*ring->next_rptr_cpu_addr);
-
-	size = ring->wptr + (ring->ring_size / 4);
-	size -= ptr;
-	size &= ring->ptr_mask;
-	if (size == 0)
-		return 0;
-
-	/* and then save the content of the ring */
-	*data = kmalloc_array(size, sizeof(uint32_t), GFP_KERNEL);
-	if (!*data)
-		return 0;
-	for (i = 0; i < size; ++i) {
-		(*data)[i] = ring->ring[ptr++];
-		ptr &= ring->ptr_mask;
-	}
-
-	return size;
-}
-
-/**
- * amdgpu_ring_restore - append saved commands to the ring again
- *
- * @ring: ring to append commands to
- * @size: number of dwords we want to write
- * @data: saved commands
- *
- * Allocates space on the ring and restore the previously saved commands.
- */
-int amdgpu_ring_restore(struct amdgpu_ring *ring,
-			unsigned size, uint32_t *data)
-{
-	int i, r;
-
-	if (!size || !data)
-		return 0;
-
-	/* restore the saved ring content */
-	r = amdgpu_ring_alloc(ring, size);
-	if (r)
-		return r;
-
-	for (i = 0; i < size; ++i) {
-		amdgpu_ring_write(ring, data[i]);
-	}
 
-	amdgpu_ring_commit(ring);
-	kfree(data);
-	return 0;
+	if (ring->funcs->end_use)
+		ring->funcs->end_use(ring);
 }
 
 /**
@@ -260,14 +200,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 		return r;
 	}
 
-	r = amdgpu_wb_get(adev, &ring->next_rptr_offs);
-	if (r) {
-		dev_err(adev->dev, "(%d) ring next_rptr wb alloc failed\n", r);
-		return r;
-	}
-	ring->next_rptr_gpu_addr = adev->wb.gpu_addr + ring->next_rptr_offs * 4;
-	ring->next_rptr_cpu_addr = &adev->wb.wb[ring->next_rptr_offs];
-
 	r = amdgpu_wb_get(adev, &ring->cond_exe_offs);
 	if (r) {
 		dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r);
@@ -276,7 +208,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 	ring->cond_exe_gpu_addr = adev->wb.gpu_addr + (ring->cond_exe_offs * 4);
 	ring->cond_exe_cpu_addr = &adev->wb.wb[ring->cond_exe_offs];
 
-	spin_lock_init(&ring->fence_lock);
 	r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type);
 	if (r) {
 		dev_err(adev->dev, "failed initializing fences (%d).\n", r);
@@ -310,6 +241,9 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 		}
 		r = amdgpu_bo_kmap(ring->ring_obj,
 				       (void **)&ring->ring);
+
+		memset((void *)ring->ring, 0, ring->ring_size);
+
 		amdgpu_bo_unreserve(ring->ring_obj);
 		if (r) {
 			dev_err(adev->dev, "(%d) ring map failed\n", r);
@@ -343,10 +277,10 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
 	ring->ring = NULL;
 	ring->ring_obj = NULL;
 
+	amdgpu_wb_free(ring->adev, ring->cond_exe_offs);
 	amdgpu_wb_free(ring->adev, ring->fence_offs);
 	amdgpu_wb_free(ring->adev, ring->rptr_offs);
 	amdgpu_wb_free(ring->adev, ring->wptr_offs);
-	amdgpu_wb_free(ring->adev, ring->next_rptr_offs);
 
 	if (ring_obj) {
 		r = amdgpu_bo_reserve(ring_obj, false);
@@ -357,6 +291,7 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
 		}
 		amdgpu_bo_unref(&ring_obj);
 	}
+	amdgpu_debugfs_ring_fini(ring);
 }
 
 /*
@@ -364,57 +299,62 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
  */
 #if defined(CONFIG_DEBUG_FS)
 
-static int amdgpu_debugfs_ring_info(struct seq_file *m, void *data)
+/* Layout of file is 12 bytes consisting of
+ * - rptr
+ * - wptr
+ * - driver's copy of wptr
+ *
+ * followed by n-words of ring data
+ */
+static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
+					size_t size, loff_t *pos)
 {
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
-	struct drm_device *dev = node->minor->dev;
-	struct amdgpu_device *adev = dev->dev_private;
-	int roffset = (unsigned long)node->info_ent->data;
-	struct amdgpu_ring *ring = (void *)(((uint8_t*)adev) + roffset);
-	uint32_t rptr, wptr, rptr_next;
-	unsigned i;
-
-	wptr = amdgpu_ring_get_wptr(ring);
-	seq_printf(m, "wptr: 0x%08x [%5d]\n", wptr, wptr);
-
-	rptr = amdgpu_ring_get_rptr(ring);
-	rptr_next = le32_to_cpu(*ring->next_rptr_cpu_addr);
-
-	seq_printf(m, "rptr: 0x%08x [%5d]\n", rptr, rptr);
-
-	seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n",
-		   ring->wptr, ring->wptr);
-
-	if (!ring->ready)
-		return 0;
-
-	/* print 8 dw before current rptr as often it's the last executed
-	 * packet that is the root issue
-	 */
-	i = (rptr + ring->ptr_mask + 1 - 32) & ring->ptr_mask;
-	while (i != rptr) {
-		seq_printf(m, "r[%5d]=0x%08x", i, ring->ring[i]);
-		if (i == rptr)
-			seq_puts(m, " *");
-		if (i == rptr_next)
-			seq_puts(m, " #");
-		seq_puts(m, "\n");
-		i = (i + 1) & ring->ptr_mask;
+	struct amdgpu_ring *ring = (struct amdgpu_ring*)f->f_inode->i_private;
+	int r, i;
+	uint32_t value, result, early[3];
+
+	if (*pos & 3 || size & 3)
+		return -EINVAL;
+
+	result = 0;
+
+	if (*pos < 12) {
+		early[0] = amdgpu_ring_get_rptr(ring);
+		early[1] = amdgpu_ring_get_wptr(ring);
+		early[2] = ring->wptr;
+		for (i = *pos / 4; i < 3 && size; i++) {
+			r = put_user(early[i], (uint32_t *)buf);
+			if (r)
+				return r;
+			buf += 4;
+			result += 4;
+			size -= 4;
+			*pos += 4;
+		}
 	}
-	while (i != wptr) {
-		seq_printf(m, "r[%5d]=0x%08x", i, ring->ring[i]);
-		if (i == rptr)
-			seq_puts(m, " *");
-		if (i == rptr_next)
-			seq_puts(m, " #");
-		seq_puts(m, "\n");
-		i = (i + 1) & ring->ptr_mask;
+
+	while (size) {
+		if (*pos >= (ring->ring_size + 12))
+			return result;
+			
+		value = ring->ring[(*pos - 12)/4];
+		r = put_user(value, (uint32_t*)buf);
+		if (r)
+			return r;
+		buf += 4;
+		result += 4;
+		size -= 4;
+		*pos += 4;
 	}
-	return 0;
+
+	return result;
 }
 
-static struct drm_info_list amdgpu_debugfs_ring_info_list[AMDGPU_MAX_RINGS];
-static char amdgpu_debugfs_ring_names[AMDGPU_MAX_RINGS][32];
+static const struct file_operations amdgpu_debugfs_ring_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_ring_read,
+	.llseek = default_llseek
+};
 
 #endif
 
@@ -422,28 +362,27 @@ static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
 				    struct amdgpu_ring *ring)
 {
 #if defined(CONFIG_DEBUG_FS)
-	unsigned offset = (uint8_t*)ring - (uint8_t*)adev;
-	unsigned i;
-	struct drm_info_list *info;
-	char *name;
-
-	for (i = 0; i < ARRAY_SIZE(amdgpu_debugfs_ring_info_list); ++i) {
-		info = &amdgpu_debugfs_ring_info_list[i];
-		if (!info->data)
-			break;
-	}
+	struct drm_minor *minor = adev->ddev->primary;
+	struct dentry *ent, *root = minor->debugfs_root;
+	char name[32];
 
-	if (i == ARRAY_SIZE(amdgpu_debugfs_ring_info_list))
-		return -ENOSPC;
-
-	name = &amdgpu_debugfs_ring_names[i][0];
 	sprintf(name, "amdgpu_ring_%s", ring->name);
-	info->name = name;
-	info->show = amdgpu_debugfs_ring_info;
-	info->driver_features = 0;
-	info->data = (void*)(uintptr_t)offset;
 
-	return amdgpu_debugfs_add_files(adev, info, 1);
+	ent = debugfs_create_file(name,
+				  S_IFREG | S_IRUGO, root,
+				  ring, &amdgpu_debugfs_ring_fops);
+	if (IS_ERR(ent))
+		return PTR_ERR(ent);
+
+	i_size_write(ent->d_inode, ring->ring_size + 12);
+	ring->ent = ent;
 #endif
 	return 0;
 }
+
+static void amdgpu_debugfs_ring_fini(struct amdgpu_ring *ring)
+{
+#if defined(CONFIG_DEBUG_FS)
+	debugfs_remove(ring->ent);
+#endif
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
index b16366c2b4a0..d8af37a845f4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -115,6 +115,7 @@ int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev,
 		return r;
 	}
 	r = amdgpu_bo_kmap(sa_manager->bo, &sa_manager->cpu_ptr);
+	memset(sa_manager->cpu_ptr, 0, sa_manager->size);
 	amdgpu_bo_unreserve(sa_manager->bo);
 	return r;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index 34a92808bbd4..5c8d3022fb87 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -223,13 +223,16 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
 }
 
 /**
- * amdgpu_sync_is_idle - test if all fences are signaled
+ * amdgpu_sync_peek_fence - get the next fence not signaled yet
  *
  * @sync: the sync object
+ * @ring: optional ring to use for test
  *
- * Returns true if all fences in the sync object are signaled.
+ * Returns the next fence not signaled yet without removing it from the sync
+ * object.
  */
-bool amdgpu_sync_is_idle(struct amdgpu_sync *sync)
+struct fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
+				     struct amdgpu_ring *ring)
 {
 	struct amdgpu_sync_entry *e;
 	struct hlist_node *tmp;
@@ -237,6 +240,19 @@ bool amdgpu_sync_is_idle(struct amdgpu_sync *sync)
 
 	hash_for_each_safe(sync->fences, i, tmp, e, node) {
 		struct fence *f = e->fence;
+		struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
+
+		if (ring && s_fence) {
+			/* For fences from the same ring it is sufficient
+			 * when they are scheduled.
+			 */
+			if (s_fence->sched == &ring->sched) {
+				if (fence_is_signaled(&s_fence->scheduled))
+					continue;
+
+				return &s_fence->scheduled;
+			}
+		}
 
 		if (fence_is_signaled(f)) {
 			hash_del(&e->node);
@@ -245,58 +261,19 @@ bool amdgpu_sync_is_idle(struct amdgpu_sync *sync)
 			continue;
 		}
 
-		return false;
+		return f;
 	}
 
-	return true;
+	return NULL;
 }
 
 /**
- * amdgpu_sync_cycle_fences - move fences from one sync object into another
+ * amdgpu_sync_get_fence - get the next fence from the sync object
  *
- * @dst: the destination sync object
- * @src: the source sync object
- * @fence: fence to add to source
+ * @sync: sync object to use
  *
- * Remove all fences from source and put them into destination and add
- * fence as new one into source.
+ * Get and removes the next fence from the sync object not signaled yet.
  */
-int amdgpu_sync_cycle_fences(struct amdgpu_sync *dst, struct amdgpu_sync *src,
-			     struct fence *fence)
-{
-	struct amdgpu_sync_entry *e, *newone;
-	struct hlist_node *tmp;
-	int i;
-
-	/* Allocate the new entry before moving the old ones */
-	newone = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL);
-	if (!newone)
-		return -ENOMEM;
-
-	hash_for_each_safe(src->fences, i, tmp, e, node) {
-		struct fence *f = e->fence;
-
-		hash_del(&e->node);
-		if (fence_is_signaled(f)) {
-			fence_put(f);
-			kmem_cache_free(amdgpu_sync_slab, e);
-			continue;
-		}
-
-		if (amdgpu_sync_add_later(dst, f)) {
-			kmem_cache_free(amdgpu_sync_slab, e);
-			continue;
-		}
-
-		hash_add(dst->fences, &e->node, f->context);
-	}
-
-	hash_add(src->fences, &newone->node, fence->context);
-	newone->fence = fence_get(fence);
-
-	return 0;
-}
-
 struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
 {
 	struct amdgpu_sync_entry *e;
@@ -319,25 +296,6 @@ struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
 	return NULL;
 }
 
-int amdgpu_sync_wait(struct amdgpu_sync *sync)
-{
-	struct amdgpu_sync_entry *e;
-	struct hlist_node *tmp;
-	int i, r;
-
-	hash_for_each_safe(sync->fences, i, tmp, e, node) {
-		r = fence_wait(e->fence, false);
-		if (r)
-			return r;
-
-		hash_del(&e->node);
-		fence_put(e->fence);
-		kmem_cache_free(amdgpu_sync_slab, e);
-	}
-
-	return 0;
-}
-
 /**
  * amdgpu_sync_free - free the sync object
  *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 26a5f4acf584..0d8d65eb46cd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -11,19 +11,68 @@
 #define TRACE_SYSTEM amdgpu
 #define TRACE_INCLUDE_FILE amdgpu_trace
 
+TRACE_EVENT(amdgpu_mm_rreg,
+	    TP_PROTO(unsigned did, uint32_t reg, uint32_t value),
+	    TP_ARGS(did, reg, value),
+	    TP_STRUCT__entry(
+				__field(unsigned, did)
+				__field(uint32_t, reg)
+				__field(uint32_t, value)
+			    ),
+	    TP_fast_assign(
+			   __entry->did = did;
+			   __entry->reg = reg;
+			   __entry->value = value;
+			   ),
+	    TP_printk("0x%04lx, 0x%04lx, 0x%08lx",
+		      (unsigned long)__entry->did,
+		      (unsigned long)__entry->reg,
+		      (unsigned long)__entry->value)
+);
+
+TRACE_EVENT(amdgpu_mm_wreg,
+	    TP_PROTO(unsigned did, uint32_t reg, uint32_t value),
+	    TP_ARGS(did, reg, value),
+	    TP_STRUCT__entry(
+				__field(unsigned, did)
+				__field(uint32_t, reg)
+				__field(uint32_t, value)
+			    ),
+	    TP_fast_assign(
+			   __entry->did = did;
+			   __entry->reg = reg;
+			   __entry->value = value;
+			   ),
+	    TP_printk("0x%04lx, 0x%04lx, 0x%08lx",
+		      (unsigned long)__entry->did,
+		      (unsigned long)__entry->reg,
+		      (unsigned long)__entry->value)
+);
+
 TRACE_EVENT(amdgpu_bo_create,
 	    TP_PROTO(struct amdgpu_bo *bo),
 	    TP_ARGS(bo),
 	    TP_STRUCT__entry(
 			     __field(struct amdgpu_bo *, bo)
 			     __field(u32, pages)
+			     __field(u32, type)
+			     __field(u32, prefer)
+			     __field(u32, allow)
+			     __field(u32, visible)
 			     ),
 
 	    TP_fast_assign(
 			   __entry->bo = bo;
 			   __entry->pages = bo->tbo.num_pages;
+			   __entry->type = bo->tbo.mem.mem_type;
+			   __entry->prefer = bo->prefered_domains;
+			   __entry->allow = bo->allowed_domains;
+			   __entry->visible = bo->flags;
 			   ),
-	    TP_printk("bo=%p, pages=%u", __entry->bo, __entry->pages)
+
+	    TP_printk("bo=%p,pages=%u,type=%d,prefered=%d,allowed=%d,visible=%d",
+		       __entry->bo, __entry->pages, __entry->type,
+		       __entry->prefer, __entry->allow, __entry->visible)
 );
 
 TRACE_EVENT(amdgpu_cs,
@@ -64,7 +113,7 @@ TRACE_EVENT(amdgpu_cs_ioctl,
 			   __entry->adev = job->adev;
 			   __entry->sched_job = &job->base;
 			   __entry->ib = job->ibs;
-			   __entry->fence = &job->base.s_fence->base;
+			   __entry->fence = &job->base.s_fence->finished;
 			   __entry->ring_name = job->ring->name;
 			   __entry->num_ibs = job->num_ibs;
 			   ),
@@ -89,7 +138,7 @@ TRACE_EVENT(amdgpu_sched_run_job,
 			   __entry->adev = job->adev;
 			   __entry->sched_job = &job->base;
 			   __entry->ib = job->ibs;
-			   __entry->fence = &job->base.s_fence->base;
+			   __entry->fence = &job->base.s_fence->finished;
 			   __entry->ring_name = job->ring->name;
 			   __entry->num_ibs = job->num_ibs;
 			   ),
@@ -100,24 +149,26 @@ TRACE_EVENT(amdgpu_sched_run_job,
 
 
 TRACE_EVENT(amdgpu_vm_grab_id,
-	    TP_PROTO(struct amdgpu_vm *vm, int ring, unsigned vmid,
-		     uint64_t pd_addr),
-	    TP_ARGS(vm, ring, vmid, pd_addr),
+	    TP_PROTO(struct amdgpu_vm *vm, int ring, struct amdgpu_job *job),
+	    TP_ARGS(vm, ring, job),
 	    TP_STRUCT__entry(
 			     __field(struct amdgpu_vm *, vm)
 			     __field(u32, ring)
 			     __field(u32, vmid)
 			     __field(u64, pd_addr)
+			     __field(u32, needs_flush)
 			     ),
 
 	    TP_fast_assign(
 			   __entry->vm = vm;
 			   __entry->ring = ring;
-			   __entry->vmid = vmid;
-			   __entry->pd_addr = pd_addr;
+			   __entry->vmid = job->vm_id;
+			   __entry->pd_addr = job->vm_pd_addr;
+			   __entry->needs_flush = job->vm_needs_flush;
 			   ),
-	    TP_printk("vm=%p, ring=%u, id=%u, pd_addr=%010Lx", __entry->vm,
-		      __entry->ring, __entry->vmid, __entry->pd_addr)
+	    TP_printk("vm=%p, ring=%u, id=%u, pd_addr=%010Lx needs_flush=%u",
+		      __entry->vm, __entry->ring, __entry->vmid,
+		      __entry->pd_addr, __entry->needs_flush)
 );
 
 TRACE_EVENT(amdgpu_vm_bo_map,
@@ -244,13 +295,55 @@ TRACE_EVENT(amdgpu_bo_list_set,
 	    TP_STRUCT__entry(
 			     __field(struct amdgpu_bo_list *, list)
 			     __field(struct amdgpu_bo *, bo)
+			     __field(u64, bo_size)
 			     ),
 
 	    TP_fast_assign(
 			   __entry->list = list;
 			   __entry->bo = bo;
+			   __entry->bo_size = amdgpu_bo_size(bo);
 			   ),
-	    TP_printk("list=%p, bo=%p", __entry->list, __entry->bo)
+	    TP_printk("list=%p, bo=%p, bo_size = %Ld",
+		      __entry->list,
+		      __entry->bo,
+		      __entry->bo_size)
+);
+
+TRACE_EVENT(amdgpu_cs_bo_status,
+	    TP_PROTO(uint64_t total_bo, uint64_t total_size),
+	    TP_ARGS(total_bo, total_size),
+	    TP_STRUCT__entry(
+			__field(u64, total_bo)
+			__field(u64, total_size)
+			),
+
+	    TP_fast_assign(
+			__entry->total_bo = total_bo;
+			__entry->total_size = total_size;
+			),
+	    TP_printk("total bo size = %Ld, total bo count = %Ld",
+			__entry->total_bo, __entry->total_size)
+);
+
+TRACE_EVENT(amdgpu_ttm_bo_move,
+	    TP_PROTO(struct amdgpu_bo* bo, uint32_t new_placement, uint32_t old_placement),
+	    TP_ARGS(bo, new_placement, old_placement),
+	    TP_STRUCT__entry(
+			__field(struct amdgpu_bo *, bo)
+			__field(u64, bo_size)
+			__field(u32, new_placement)
+			__field(u32, old_placement)
+			),
+
+	    TP_fast_assign(
+			__entry->bo      = bo;
+			__entry->bo_size = amdgpu_bo_size(bo);
+			__entry->new_placement = new_placement;
+			__entry->old_placement = old_placement;
+			),
+	    TP_printk("bo=%p from:%d to %d with size = %Ld",
+			__entry->bo, __entry->old_placement,
+			__entry->new_placement, __entry->bo_size)
 );
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 3b9053af4762..b7742e62972a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -286,9 +286,10 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
 	r = amdgpu_copy_buffer(ring, old_start, new_start,
 			       new_mem->num_pages * PAGE_SIZE, /* bytes */
 			       bo->resv, &fence);
-	/* FIXME: handle copy error */
-	r = ttm_bo_move_accel_cleanup(bo, fence,
-				      evict, no_wait_gpu, new_mem);
+	if (r)
+		return r;
+
+	r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
 	fence_put(fence);
 	return r;
 }
@@ -396,6 +397,11 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo,
 		return -EINVAL;
 
 	adev = amdgpu_get_adev(bo->bdev);
+
+	/* remember the eviction */
+	if (evict)
+		atomic64_inc(&adev->num_evictions);
+
 	if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
 		amdgpu_move_null(bo, new_mem);
 		return 0;
@@ -429,7 +435,8 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo,
 
 	if (r) {
 memcpy:
-		r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
+		r = ttm_bo_move_memcpy(bo, evict, interruptible,
+				       no_wait_gpu, new_mem);
 		if (r) {
 			return r;
 		}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 01abfc21b4a2..b11f4e8868d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -40,9 +40,16 @@
 #include "uvd/uvd_4_2_d.h"
 
 /* 1 second timeout */
-#define UVD_IDLE_TIMEOUT_MS	1000
+#define UVD_IDLE_TIMEOUT	msecs_to_jiffies(1000)
+
+/* Firmware versions for VI */
+#define FW_1_65_10	((1 << 24) | (65 << 16) | (10 << 8))
+#define FW_1_87_11	((1 << 24) | (87 << 16) | (11 << 8))
+#define FW_1_87_12	((1 << 24) | (87 << 16) | (12 << 8))
+#define FW_1_37_15	((1 << 24) | (37 << 16) | (15 << 8))
+
 /* Polaris10/11 firmware version */
-#define FW_1_66_16 ((1 << 24) | (66 << 16) | (16 << 8))
+#define FW_1_66_16	((1 << 24) | (66 << 16) | (16 << 8))
 
 /* Firmware Names */
 #ifdef CONFIG_DRM_AMDGPU_CIK
@@ -92,7 +99,6 @@ MODULE_FIRMWARE(FIRMWARE_STONEY);
 MODULE_FIRMWARE(FIRMWARE_POLARIS10);
 MODULE_FIRMWARE(FIRMWARE_POLARIS11);
 
-static void amdgpu_uvd_note_usage(struct amdgpu_device *adev);
 static void amdgpu_uvd_idle_work_handler(struct work_struct *work);
 
 int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
@@ -246,6 +252,23 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
 	if (!amdgpu_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0))
 		adev->uvd.address_64_bit = true;
 
+	switch (adev->asic_type) {
+	case CHIP_TONGA:
+		adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_65_10;
+		break;
+	case CHIP_CARRIZO:
+		adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_87_11;
+		break;
+	case CHIP_FIJI:
+		adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_87_12;
+		break;
+	case CHIP_STONEY:
+		adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_37_15;
+		break;
+	default:
+		adev->uvd.use_ctx_buf = adev->asic_type >= CHIP_POLARIS10;
+	}
+
 	return 0;
 }
 
@@ -253,19 +276,20 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
 {
 	int r;
 
-	if (adev->uvd.vcpu_bo == NULL)
-		return 0;
+	kfree(adev->uvd.saved_bo);
 
 	amd_sched_entity_fini(&adev->uvd.ring.sched, &adev->uvd.entity);
 
-	r = amdgpu_bo_reserve(adev->uvd.vcpu_bo, false);
-	if (!r) {
-		amdgpu_bo_kunmap(adev->uvd.vcpu_bo);
-		amdgpu_bo_unpin(adev->uvd.vcpu_bo);
-		amdgpu_bo_unreserve(adev->uvd.vcpu_bo);
-	}
+	if (adev->uvd.vcpu_bo) {
+		r = amdgpu_bo_reserve(adev->uvd.vcpu_bo, false);
+		if (!r) {
+			amdgpu_bo_kunmap(adev->uvd.vcpu_bo);
+			amdgpu_bo_unpin(adev->uvd.vcpu_bo);
+			amdgpu_bo_unreserve(adev->uvd.vcpu_bo);
+		}
 
-	amdgpu_bo_unref(&adev->uvd.vcpu_bo);
+		amdgpu_bo_unref(&adev->uvd.vcpu_bo);
+	}
 
 	amdgpu_ring_fini(&adev->uvd.ring);
 
@@ -345,8 +369,6 @@ void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
 		if (handle != 0 && adev->uvd.filp[i] == filp) {
 			struct fence *fence;
 
-			amdgpu_uvd_note_usage(adev);
-
 			r = amdgpu_uvd_get_destroy_msg(ring, handle,
 						       false, &fence);
 			if (r) {
@@ -437,7 +459,7 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
 	unsigned fs_in_mb = width_in_mb * height_in_mb;
 
 	unsigned image_size, tmp, min_dpb_size, num_dpb_buffer;
-	unsigned min_ctx_size = 0;
+	unsigned min_ctx_size = ~0;
 
 	image_size = width * height;
 	image_size += image_size / 2;
@@ -556,7 +578,7 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
 		/* reference picture buffer */
 		min_dpb_size = image_size * num_dpb_buffer;
 
-		if (adev->asic_type < CHIP_POLARIS10){
+		if (!adev->uvd.use_ctx_buf){
 			/* macroblock context buffer */
 			min_dpb_size +=
 				width_in_mb * height_in_mb * num_dpb_buffer * 192;
@@ -661,7 +683,7 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
 		}
 
 		DRM_ERROR("No more free UVD handles!\n");
-		return -EINVAL;
+		return -ENOSPC;
 
 	case 1:
 		/* it's a decode msg, calc buffer sizes */
@@ -912,8 +934,6 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
 		return -EINVAL;
 	}
 
-	amdgpu_uvd_note_usage(ctx.parser->adev);
-
 	return 0;
 }
 
@@ -967,7 +987,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 
 	if (direct) {
 		r = amdgpu_ib_schedule(ring, 1, ib, NULL, NULL, &f);
-		job->fence = f;
+		job->fence = fence_get(f);
 		if (r)
 			goto err_free;
 
@@ -1109,16 +1129,14 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
 			amdgpu_asic_set_uvd_clocks(adev, 0, 0);
 		}
 	} else {
-		schedule_delayed_work(&adev->uvd.idle_work,
-				      msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS));
+		schedule_delayed_work(&adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
 	}
 }
 
-static void amdgpu_uvd_note_usage(struct amdgpu_device *adev)
+void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring)
 {
+	struct amdgpu_device *adev = ring->adev;
 	bool set_clocks = !cancel_delayed_work_sync(&adev->uvd.idle_work);
-	set_clocks &= schedule_delayed_work(&adev->uvd.idle_work,
-					    msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS));
 
 	if (set_clocks) {
 		if (adev->pm.dpm_enabled) {
@@ -1128,3 +1146,48 @@ static void amdgpu_uvd_note_usage(struct amdgpu_device *adev)
 		}
 	}
 }
+
+void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring)
+{
+	schedule_delayed_work(&ring->adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
+}
+
+/**
+ * amdgpu_uvd_ring_test_ib - test ib execution
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Test if we can successfully execute an IB
+ */
+int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+	struct fence *fence;
+	long r;
+
+	r = amdgpu_uvd_get_create_msg(ring, 1, NULL);
+	if (r) {
+		DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
+		goto error;
+	}
+
+	r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence);
+	if (r) {
+		DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
+		goto error;
+	}
+
+	r = fence_wait_timeout(fence, false, timeout);
+	if (r == 0) {
+		DRM_ERROR("amdgpu: IB test timed out.\n");
+		r = -ETIMEDOUT;
+	} else if (r < 0) {
+		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+	} else {
+		DRM_INFO("ib test on ring %d succeeded\n",  ring->idx);
+		r = 0;
+	}
+
+error:
+	fence_put(fence);
+	return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
index 9a3b449081a7..c850009602d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
@@ -35,5 +35,8 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 void amdgpu_uvd_free_handles(struct amdgpu_device *adev,
 			     struct drm_file *filp);
 int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx);
+void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring);
+void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring);
+int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 875626a2eccb..05865ce35351 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -36,7 +36,7 @@
 #include "cikd.h"
 
 /* 1 second timeout */
-#define VCE_IDLE_TIMEOUT_MS	1000
+#define VCE_IDLE_TIMEOUT	msecs_to_jiffies(1000)
 
 /* Firmware Names */
 #ifdef CONFIG_DRM_AMDGPU_CIK
@@ -85,8 +85,6 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
 	unsigned ucode_version, version_major, version_minor, binary_id;
 	int i, r;
 
-	INIT_DELAYED_WORK(&adev->vce.idle_work, amdgpu_vce_idle_work_handler);
-
 	switch (adev->asic_type) {
 #ifdef CONFIG_DRM_AMDGPU_CIK
 	case CHIP_BONAIRE:
@@ -197,6 +195,9 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
 		adev->vce.filp[i] = NULL;
 	}
 
+	INIT_DELAYED_WORK(&adev->vce.idle_work, amdgpu_vce_idle_work_handler);
+	mutex_init(&adev->vce.idle_mutex);
+
 	return 0;
 }
 
@@ -220,6 +221,7 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
 	amdgpu_ring_fini(&adev->vce.ring[1]);
 
 	release_firmware(adev->vce.fw);
+	mutex_destroy(&adev->vce.idle_mutex);
 
 	return 0;
 }
@@ -310,37 +312,44 @@ static void amdgpu_vce_idle_work_handler(struct work_struct *work)
 			amdgpu_asic_set_vce_clocks(adev, 0, 0);
 		}
 	} else {
-		schedule_delayed_work(&adev->vce.idle_work,
-				      msecs_to_jiffies(VCE_IDLE_TIMEOUT_MS));
+		schedule_delayed_work(&adev->vce.idle_work, VCE_IDLE_TIMEOUT);
 	}
 }
 
 /**
- * amdgpu_vce_note_usage - power up VCE
+ * amdgpu_vce_ring_begin_use - power up VCE
  *
- * @adev: amdgpu_device pointer
+ * @ring: amdgpu ring
  *
  * Make sure VCE is powerd up when we want to use it
  */
-static void amdgpu_vce_note_usage(struct amdgpu_device *adev)
+void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring)
 {
-	bool streams_changed = false;
-	bool set_clocks = !cancel_delayed_work_sync(&adev->vce.idle_work);
-	set_clocks &= schedule_delayed_work(&adev->vce.idle_work,
-					    msecs_to_jiffies(VCE_IDLE_TIMEOUT_MS));
-
-	if (adev->pm.dpm_enabled) {
-		/* XXX figure out if the streams changed */
-		streams_changed = false;
-	}
+	struct amdgpu_device *adev = ring->adev;
+	bool set_clocks;
 
-	if (set_clocks || streams_changed) {
+	mutex_lock(&adev->vce.idle_mutex);
+	set_clocks = !cancel_delayed_work_sync(&adev->vce.idle_work);
+	if (set_clocks) {
 		if (adev->pm.dpm_enabled) {
 			amdgpu_dpm_enable_vce(adev, true);
 		} else {
 			amdgpu_asic_set_vce_clocks(adev, 53300, 40000);
 		}
 	}
+	mutex_unlock(&adev->vce.idle_mutex);
+}
+
+/**
+ * amdgpu_vce_ring_end_use - power VCE down
+ *
+ * @ring: amdgpu ring
+ *
+ * Schedule work to power VCE down again
+ */
+void amdgpu_vce_ring_end_use(struct amdgpu_ring *ring)
+{
+	schedule_delayed_work(&ring->adev->vce.idle_work, VCE_IDLE_TIMEOUT);
 }
 
 /**
@@ -357,11 +366,10 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
 	int i, r;
 	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
 		uint32_t handle = atomic_read(&adev->vce.handles[i]);
+
 		if (!handle || adev->vce.filp[i] != filp)
 			continue;
 
-		amdgpu_vce_note_usage(adev);
-
 		r = amdgpu_vce_get_destroy_msg(ring, handle, false, NULL);
 		if (r)
 			DRM_ERROR("Error destroying VCE handle (%d)!\n", r);
@@ -437,7 +445,7 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
 		ib->ptr[i] = 0x0;
 
 	r = amdgpu_ib_schedule(ring, 1, ib, NULL, NULL, &f);
-	job->fence = f;
+	job->fence = fence_get(f);
 	if (r)
 		goto err;
 
@@ -469,7 +477,6 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 	struct amdgpu_job *job;
 	struct amdgpu_ib *ib;
 	struct fence *f = NULL;
-	uint64_t dummy;
 	int i, r;
 
 	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
@@ -477,7 +484,6 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 		return r;
 
 	ib = &job->ibs[0];
-	dummy = ib->gpu_addr + 1024;
 
 	/* stitch together an VCE destroy msg */
 	ib->length_dw = 0;
@@ -485,11 +491,14 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 	ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */
 	ib->ptr[ib->length_dw++] = handle;
 
-	ib->ptr[ib->length_dw++] = 0x00000014; /* len */
-	ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */
-	ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
-	ib->ptr[ib->length_dw++] = dummy;
-	ib->ptr[ib->length_dw++] = 0x00000001;
+	ib->ptr[ib->length_dw++] = 0x00000020; /* len */
+	ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
+	ib->ptr[ib->length_dw++] = 0xffffffff; /* next task info, set to 0xffffffff if no */
+	ib->ptr[ib->length_dw++] = 0x00000001; /* destroy session */
+	ib->ptr[ib->length_dw++] = 0x00000000;
+	ib->ptr[ib->length_dw++] = 0x00000000;
+	ib->ptr[ib->length_dw++] = 0xffffffff; /* feedback is not needed, set to 0xffffffff and firmware will not output feedback */
+	ib->ptr[ib->length_dw++] = 0x00000000;
 
 	ib->ptr[ib->length_dw++] = 0x00000008; /* len */
 	ib->ptr[ib->length_dw++] = 0x02000001; /* destroy cmd */
@@ -499,7 +508,7 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 
 	if (direct) {
 		r = amdgpu_ib_schedule(ring, 1, ib, NULL, NULL, &f);
-		job->fence = f;
+		job->fence = fence_get(f);
 		if (r)
 			goto err;
 
@@ -580,12 +589,10 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx,
  * we we don't have another free session index.
  */
 static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p,
-				      uint32_t handle, bool *allocated)
+				      uint32_t handle, uint32_t *allocated)
 {
 	unsigned i;
 
-	*allocated = false;
-
 	/* validate the handle */
 	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
 		if (atomic_read(&p->adev->vce.handles[i]) == handle) {
@@ -602,7 +609,7 @@ static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p,
 		if (!atomic_cmpxchg(&p->adev->vce.handles[i], 0, handle)) {
 			p->adev->vce.filp[i] = p->filp;
 			p->adev->vce.img_size[i] = 0;
-			*allocated = true;
+			*allocated |= 1 << i;
 			return i;
 		}
 	}
@@ -622,15 +629,13 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
 	struct amdgpu_ib *ib = &p->job->ibs[ib_idx];
 	unsigned fb_idx = 0, bs_idx = 0;
 	int session_idx = -1;
-	bool destroyed = false;
-	bool created = false;
-	bool allocated = false;
+	uint32_t destroyed = 0;
+	uint32_t created = 0;
+	uint32_t allocated = 0;
 	uint32_t tmp, handle = 0;
 	uint32_t *size = &tmp;
 	int i, r = 0, idx = 0;
 
-	amdgpu_vce_note_usage(p->adev);
-
 	while (idx < ib->length_dw) {
 		uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
 		uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1);
@@ -641,30 +646,30 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
 			goto out;
 		}
 
-		if (destroyed) {
-			DRM_ERROR("No other command allowed after destroy!\n");
-			r = -EINVAL;
-			goto out;
-		}
-
 		switch (cmd) {
-		case 0x00000001: // session
+		case 0x00000001: /* session */
 			handle = amdgpu_get_ib_value(p, ib_idx, idx + 2);
 			session_idx = amdgpu_vce_validate_handle(p, handle,
 								 &allocated);
-			if (session_idx < 0)
-				return session_idx;
+			if (session_idx < 0) {
+				r = session_idx;
+				goto out;
+			}
 			size = &p->adev->vce.img_size[session_idx];
 			break;
 
-		case 0x00000002: // task info
+		case 0x00000002: /* task info */
 			fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6);
 			bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7);
 			break;
 
-		case 0x01000001: // create
-			created = true;
-			if (!allocated) {
+		case 0x01000001: /* create */
+			created |= 1 << session_idx;
+			if (destroyed & (1 << session_idx)) {
+				destroyed &= ~(1 << session_idx);
+				allocated |= 1 << session_idx;
+
+			} else if (!(allocated & (1 << session_idx))) {
 				DRM_ERROR("Handle already in use!\n");
 				r = -EINVAL;
 				goto out;
@@ -675,16 +680,16 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
 				8 * 3 / 2;
 			break;
 
-		case 0x04000001: // config extension
-		case 0x04000002: // pic control
-		case 0x04000005: // rate control
-		case 0x04000007: // motion estimation
-		case 0x04000008: // rdo
-		case 0x04000009: // vui
-		case 0x05000002: // auxiliary buffer
+		case 0x04000001: /* config extension */
+		case 0x04000002: /* pic control */
+		case 0x04000005: /* rate control */
+		case 0x04000007: /* motion estimation */
+		case 0x04000008: /* rdo */
+		case 0x04000009: /* vui */
+		case 0x05000002: /* auxiliary buffer */
 			break;
 
-		case 0x03000001: // encode
+		case 0x03000001: /* encode */
 			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 10, idx + 9,
 						*size, 0);
 			if (r)
@@ -696,18 +701,18 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
 				goto out;
 			break;
 
-		case 0x02000001: // destroy
-			destroyed = true;
+		case 0x02000001: /* destroy */
+			destroyed |= 1 << session_idx;
 			break;
 
-		case 0x05000001: // context buffer
+		case 0x05000001: /* context buffer */
 			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
 						*size * 2, 0);
 			if (r)
 				goto out;
 			break;
 
-		case 0x05000004: // video bitstream buffer
+		case 0x05000004: /* video bitstream buffer */
 			tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4);
 			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
 						tmp, bs_idx);
@@ -715,7 +720,7 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
 				goto out;
 			break;
 
-		case 0x05000005: // feedback buffer
+		case 0x05000005: /* feedback buffer */
 			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
 						4096, fb_idx);
 			if (r)
@@ -737,21 +742,24 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
 		idx += len / 4;
 	}
 
-	if (allocated && !created) {
+	if (allocated & ~created) {
 		DRM_ERROR("New session without create command!\n");
 		r = -ENOENT;
 	}
 
 out:
-	if ((!r && destroyed) || (r && allocated)) {
-		/*
-		 * IB contains a destroy msg or we have allocated an
-		 * handle and got an error, anyway free the handle
-		 */
-		for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
-			atomic_cmpxchg(&p->adev->vce.handles[i], handle, 0);
+	if (!r) {
+		/* No error, free all destroyed handle slots */
+		tmp = destroyed;
+	} else {
+		/* Error during parsing, free all allocated handle slots */
+		tmp = allocated;
 	}
 
+	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
+		if (tmp & (1 << i))
+			atomic_set(&p->adev->vce.handles[i], 0);
+
 	return r;
 }
 
@@ -837,10 +845,10 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
  * @ring: the engine to test on
  *
  */
-int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring)
+int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
 	struct fence *fence = NULL;
-	int r;
+	long r;
 
 	/* skip vce ring1 ib test for now, since it's not reliable */
 	if (ring == &ring->adev->vce.ring[1])
@@ -848,21 +856,25 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring)
 
 	r = amdgpu_vce_get_create_msg(ring, 1, NULL);
 	if (r) {
-		DRM_ERROR("amdgpu: failed to get create msg (%d).\n", r);
+		DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
 		goto error;
 	}
 
 	r = amdgpu_vce_get_destroy_msg(ring, 1, true, &fence);
 	if (r) {
-		DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r);
+		DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
 		goto error;
 	}
 
-	r = fence_wait(fence, false);
-	if (r) {
-		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
+	r = fence_wait_timeout(fence, false, timeout);
+	if (r == 0) {
+		DRM_ERROR("amdgpu: IB test timed out.\n");
+		r = -ETIMEDOUT;
+	} else if (r < 0) {
+		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
 	} else {
 		DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
+		r = 0;
 	}
 error:
 	fence_put(fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
index f40cf761c66f..63f83d0d985c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
@@ -39,6 +39,8 @@ void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib,
 void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
 				unsigned flags);
 int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring);
-int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring);
+int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout);
+void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring);
+void amdgpu_vce_ring_end_use(struct amdgpu_ring *ring);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9f36ed30ba11..8e642fc48df4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -25,6 +25,7 @@
  *          Alex Deucher
  *          Jerome Glisse
  */
+#include <linux/fence-array.h>
 #include <drm/drmP.h>
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
@@ -114,16 +115,26 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 /**
  * amdgpu_vm_get_bos - add the vm BOs to a duplicates list
  *
+ * @adev: amdgpu device pointer
  * @vm: vm providing the BOs
  * @duplicates: head of duplicates list
  *
  * Add the page directory to the BO duplicates list
  * for command submission.
  */
-void amdgpu_vm_get_pt_bos(struct amdgpu_vm *vm, struct list_head *duplicates)
+void amdgpu_vm_get_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+			  struct list_head *duplicates)
 {
+	uint64_t num_evictions;
 	unsigned i;
 
+	/* We only need to validate the page tables
+	 * if they aren't already valid.
+	 */
+	num_evictions = atomic64_read(&adev->num_evictions);
+	if (num_evictions == vm->last_eviction_counter)
+		return;
+
 	/* add the vm page table to the list */
 	for (i = 0; i <= vm->max_pde_used; ++i) {
 		struct amdgpu_bo_list_entry *entry = &vm->page_tables[i].entry;
@@ -162,6 +173,13 @@ void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
 	spin_unlock(&glob->lru_lock);
 }
 
+static bool amdgpu_vm_is_gpu_reset(struct amdgpu_device *adev,
+			      struct amdgpu_vm_id *id)
+{
+	return id->current_gpu_reset_count !=
+		atomic_read(&adev->gpu_reset_counter) ? true : false;
+}
+
 /**
  * amdgpu_vm_grab_id - allocate the next free VMID
  *
@@ -174,18 +192,67 @@ void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
  */
 int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		      struct amdgpu_sync *sync, struct fence *fence,
-		      unsigned *vm_id, uint64_t *vm_pd_addr)
+		      struct amdgpu_job *job)
 {
-	uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
 	struct amdgpu_device *adev = ring->adev;
+	uint64_t fence_context = adev->fence_context + ring->idx;
 	struct fence *updates = sync->last_vm_update;
-	struct amdgpu_vm_id *id;
-	unsigned i = ring->idx;
-	int r;
+	struct amdgpu_vm_id *id, *idle;
+	struct fence **fences;
+	unsigned i;
+	int r = 0;
+
+	fences = kmalloc_array(sizeof(void *), adev->vm_manager.num_ids,
+			       GFP_KERNEL);
+	if (!fences)
+		return -ENOMEM;
 
 	mutex_lock(&adev->vm_manager.lock);
 
+	/* Check if we have an idle VMID */
+	i = 0;
+	list_for_each_entry(idle, &adev->vm_manager.ids_lru, list) {
+		fences[i] = amdgpu_sync_peek_fence(&idle->active, ring);
+		if (!fences[i])
+			break;
+		++i;
+	}
+
+	/* If we can't find a idle VMID to use, wait till one becomes available */
+	if (&idle->list == &adev->vm_manager.ids_lru) {
+		u64 fence_context = adev->vm_manager.fence_context + ring->idx;
+		unsigned seqno = ++adev->vm_manager.seqno[ring->idx];
+		struct fence_array *array;
+		unsigned j;
+
+		for (j = 0; j < i; ++j)
+			fence_get(fences[j]);
+
+		array = fence_array_create(i, fences, fence_context,
+					   seqno, true);
+		if (!array) {
+			for (j = 0; j < i; ++j)
+				fence_put(fences[j]);
+			kfree(fences);
+			r = -ENOMEM;
+			goto error;
+		}
+
+
+		r = amdgpu_sync_fence(ring->adev, sync, &array->base);
+		fence_put(&array->base);
+		if (r)
+			goto error;
+
+		mutex_unlock(&adev->vm_manager.lock);
+		return 0;
+
+	}
+	kfree(fences);
+
+	job->vm_needs_flush = true;
 	/* Check if we can use a VMID already assigned to this VM */
+	i = ring->idx;
 	do {
 		struct fence *flushed;
 
@@ -196,67 +263,52 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		/* Check all the prerequisites to using this VMID */
 		if (!id)
 			continue;
+		if (amdgpu_vm_is_gpu_reset(adev, id))
+			continue;
 
 		if (atomic64_read(&id->owner) != vm->client_id)
 			continue;
 
-		if (pd_addr != id->pd_gpu_addr)
+		if (job->vm_pd_addr != id->pd_gpu_addr)
 			continue;
 
-		if (id->last_user != ring &&
-		    (!id->last_flush || !fence_is_signaled(id->last_flush)))
+		if (!id->last_flush)
 			continue;
 
-		flushed  = id->flushed_updates;
-		if (updates && (!flushed || fence_is_later(updates, flushed)))
+		if (id->last_flush->context != fence_context &&
+		    !fence_is_signaled(id->last_flush))
 			continue;
 
-		/* Good we can use this VMID */
-		if (id->last_user == ring) {
-			r = amdgpu_sync_fence(ring->adev, sync,
-					      id->first);
-			if (r)
-				goto error;
-		}
+		flushed  = id->flushed_updates;
+		if (updates &&
+		    (!flushed || fence_is_later(updates, flushed)))
+			continue;
 
-		/* And remember this submission as user of the VMID */
+		/* Good we can use this VMID. Remember this submission as
+		 * user of the VMID.
+		 */
 		r = amdgpu_sync_fence(ring->adev, &id->active, fence);
 		if (r)
 			goto error;
 
+		id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter);
 		list_move_tail(&id->list, &adev->vm_manager.ids_lru);
 		vm->ids[ring->idx] = id;
 
-		*vm_id = id - adev->vm_manager.ids;
-		*vm_pd_addr = AMDGPU_VM_NO_FLUSH;
-		trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr);
+		job->vm_id = id - adev->vm_manager.ids;
+		job->vm_needs_flush = false;
+		trace_amdgpu_vm_grab_id(vm, ring->idx, job);
 
 		mutex_unlock(&adev->vm_manager.lock);
 		return 0;
 
 	} while (i != ring->idx);
 
-	id = list_first_entry(&adev->vm_manager.ids_lru,
-			      struct amdgpu_vm_id,
-			      list);
-
-	if (!amdgpu_sync_is_idle(&id->active)) {
-		struct list_head *head = &adev->vm_manager.ids_lru;
-		struct amdgpu_vm_id *tmp;
+	/* Still no ID to use? Then use the idle one found earlier */
+	id = idle;
 
-		list_for_each_entry_safe(id, tmp, &adev->vm_manager.ids_lru,
-					 list) {
-			if (amdgpu_sync_is_idle(&id->active)) {
-				list_move(&id->list, head);
-				head = &id->list;
-			}
-		}
-		id = list_first_entry(&adev->vm_manager.ids_lru,
-				      struct amdgpu_vm_id,
-				      list);
-	}
-
-	r = amdgpu_sync_cycle_fences(sync, &id->active, fence);
+	/* Remember this submission as user of the VMID */
+	r = amdgpu_sync_fence(ring->adev, &id->active, fence);
 	if (r)
 		goto error;
 
@@ -269,22 +321,46 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 	fence_put(id->flushed_updates);
 	id->flushed_updates = fence_get(updates);
 
-	id->pd_gpu_addr = pd_addr;
-
+	id->pd_gpu_addr = job->vm_pd_addr;
+	id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter);
 	list_move_tail(&id->list, &adev->vm_manager.ids_lru);
-	id->last_user = ring;
 	atomic64_set(&id->owner, vm->client_id);
 	vm->ids[ring->idx] = id;
 
-	*vm_id = id - adev->vm_manager.ids;
-	*vm_pd_addr = pd_addr;
-	trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr);
+	job->vm_id = id - adev->vm_manager.ids;
+	trace_amdgpu_vm_grab_id(vm, ring->idx, job);
 
 error:
 	mutex_unlock(&adev->vm_manager.lock);
 	return r;
 }
 
+static bool amdgpu_vm_ring_has_compute_vm_bug(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	const struct amdgpu_ip_block_version *ip_block;
+
+	if (ring->type != AMDGPU_RING_TYPE_COMPUTE)
+		/* only compute rings */
+		return false;
+
+	ip_block = amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
+	if (!ip_block)
+		return false;
+
+	if (ip_block->major <= 7) {
+		/* gfx7 has no workaround */
+		return true;
+	} else if (ip_block->major == 8) {
+		if (adev->gfx.mec_fw_version >= 673)
+			/* gfx8 is fixed in MEC firmware 673 */
+			return false;
+		else
+			return true;
+	}
+	return false;
+}
+
 /**
  * amdgpu_vm_flush - hardware flush the vm
  *
@@ -294,59 +370,52 @@ error:
  *
  * Emit a VM flush when it is necessary.
  */
-int amdgpu_vm_flush(struct amdgpu_ring *ring,
-		    unsigned vm_id, uint64_t pd_addr,
-		    uint32_t gds_base, uint32_t gds_size,
-		    uint32_t gws_base, uint32_t gws_size,
-		    uint32_t oa_base, uint32_t oa_size)
+int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
 {
 	struct amdgpu_device *adev = ring->adev;
-	struct amdgpu_vm_id *id = &adev->vm_manager.ids[vm_id];
+	struct amdgpu_vm_id *id = &adev->vm_manager.ids[job->vm_id];
 	bool gds_switch_needed = ring->funcs->emit_gds_switch && (
-		id->gds_base != gds_base ||
-		id->gds_size != gds_size ||
-		id->gws_base != gws_base ||
-		id->gws_size != gws_size ||
-		id->oa_base != oa_base ||
-		id->oa_size != oa_size);
+		id->gds_base != job->gds_base ||
+		id->gds_size != job->gds_size ||
+		id->gws_base != job->gws_base ||
+		id->gws_size != job->gws_size ||
+		id->oa_base != job->oa_base ||
+		id->oa_size != job->oa_size);
 	int r;
 
 	if (ring->funcs->emit_pipeline_sync && (
-	    pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed ||
-		    ring->type == AMDGPU_RING_TYPE_COMPUTE))
+	    job->vm_needs_flush || gds_switch_needed ||
+	    amdgpu_vm_ring_has_compute_vm_bug(ring)))
 		amdgpu_ring_emit_pipeline_sync(ring);
 
-	if (ring->funcs->emit_vm_flush &&
-	    pd_addr != AMDGPU_VM_NO_FLUSH) {
+	if (ring->funcs->emit_vm_flush && (job->vm_needs_flush ||
+	    amdgpu_vm_is_gpu_reset(adev, id))) {
 		struct fence *fence;
 
-		trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id);
-		amdgpu_ring_emit_vm_flush(ring, vm_id, pd_addr);
+		trace_amdgpu_vm_flush(job->vm_pd_addr, ring->idx, job->vm_id);
+		amdgpu_ring_emit_vm_flush(ring, job->vm_id, job->vm_pd_addr);
+
+		r = amdgpu_fence_emit(ring, &fence);
+		if (r)
+			return r;
 
 		mutex_lock(&adev->vm_manager.lock);
-		if ((id->pd_gpu_addr == pd_addr) && (id->last_user == ring)) {
-			r = amdgpu_fence_emit(ring, &fence);
-			if (r) {
-				mutex_unlock(&adev->vm_manager.lock);
-				return r;
-			}
-			fence_put(id->last_flush);
-			id->last_flush = fence;
-		}
+		fence_put(id->last_flush);
+		id->last_flush = fence;
 		mutex_unlock(&adev->vm_manager.lock);
 	}
 
 	if (gds_switch_needed) {
-		id->gds_base = gds_base;
-		id->gds_size = gds_size;
-		id->gws_base = gws_base;
-		id->gws_size = gws_size;
-		id->oa_base = oa_base;
-		id->oa_size = oa_size;
-		amdgpu_ring_emit_gds_switch(ring, vm_id,
-					    gds_base, gds_size,
-					    gws_base, gws_size,
-					    oa_base, oa_size);
+		id->gds_base = job->gds_base;
+		id->gds_size = job->gds_size;
+		id->gws_base = job->gws_base;
+		id->gws_size = job->gws_size;
+		id->oa_base = job->oa_base;
+		id->oa_size = job->oa_size;
+		amdgpu_ring_emit_gds_switch(ring, job->vm_id,
+					    job->gds_base, job->gds_size,
+					    job->gws_base, job->gws_size,
+					    job->oa_base, job->oa_size);
 	}
 
 	return 0;
@@ -723,7 +792,7 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
  * @vm: requested vm
  * @start: start of GPU address range
  * @end: end of GPU address range
- * @dst: destination address to map to
+ * @dst: destination address to map to, the next dst inside the function
  * @flags: mapping flags
  *
  * Update the page tables in the range @start - @end.
@@ -737,49 +806,75 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
 {
 	const uint64_t mask = AMDGPU_VM_PTE_COUNT - 1;
 
-	uint64_t last_pe_start = ~0, last_pe_end = ~0, last_dst = ~0;
-	uint64_t addr;
+	uint64_t cur_pe_start, cur_pe_end, cur_dst;
+	uint64_t addr; /* next GPU address to be updated */
+	uint64_t pt_idx;
+	struct amdgpu_bo *pt;
+	unsigned nptes; /* next number of ptes to be updated */
+	uint64_t next_pe_start;
+
+	/* initialize the variables */
+	addr = start;
+	pt_idx = addr >> amdgpu_vm_block_size;
+	pt = vm->page_tables[pt_idx].entry.robj;
+
+	if ((addr & ~mask) == (end & ~mask))
+		nptes = end - addr;
+	else
+		nptes = AMDGPU_VM_PTE_COUNT - (addr & mask);
+
+	cur_pe_start = amdgpu_bo_gpu_offset(pt);
+	cur_pe_start += (addr & mask) * 8;
+	cur_pe_end = cur_pe_start + 8 * nptes;
+	cur_dst = dst;
+
+	/* for next ptb*/
+	addr += nptes;
+	dst += nptes * AMDGPU_GPU_PAGE_SIZE;
 
 	/* walk over the address space and update the page tables */
-	for (addr = start; addr < end; ) {
-		uint64_t pt_idx = addr >> amdgpu_vm_block_size;
-		struct amdgpu_bo *pt = vm->page_tables[pt_idx].entry.robj;
-		unsigned nptes;
-		uint64_t pe_start;
+	while (addr < end) {
+		pt_idx = addr >> amdgpu_vm_block_size;
+		pt = vm->page_tables[pt_idx].entry.robj;
 
 		if ((addr & ~mask) == (end & ~mask))
 			nptes = end - addr;
 		else
 			nptes = AMDGPU_VM_PTE_COUNT - (addr & mask);
 
-		pe_start = amdgpu_bo_gpu_offset(pt);
-		pe_start += (addr & mask) * 8;
-
-		if (last_pe_end != pe_start) {
+		next_pe_start = amdgpu_bo_gpu_offset(pt);
+		next_pe_start += (addr & mask) * 8;
 
+		if (cur_pe_end == next_pe_start) {
+			/* The next ptb is consecutive to current ptb.
+			 * Don't call amdgpu_vm_frag_ptes now.
+			 * Will update two ptbs together in future.
+			*/
+			cur_pe_end += 8 * nptes;
+		} else {
 			amdgpu_vm_frag_ptes(adev, vm_update_params,
-					    last_pe_start, last_pe_end,
-					    last_dst, flags);
+					    cur_pe_start, cur_pe_end,
+					    cur_dst, flags);
 
-			last_pe_start = pe_start;
-			last_pe_end = pe_start + 8 * nptes;
-			last_dst = dst;
-		} else {
-			last_pe_end += 8 * nptes;
+			cur_pe_start = next_pe_start;
+			cur_pe_end = next_pe_start + 8 * nptes;
+			cur_dst = dst;
 		}
 
+		/* for next ptb*/
 		addr += nptes;
 		dst += nptes * AMDGPU_GPU_PAGE_SIZE;
 	}
 
-	amdgpu_vm_frag_ptes(adev, vm_update_params, last_pe_start,
-			    last_pe_end, last_dst, flags);
+	amdgpu_vm_frag_ptes(adev, vm_update_params, cur_pe_start,
+			    cur_pe_end, cur_dst, flags);
 }
 
 /**
  * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
  *
  * @adev: amdgpu_device pointer
+ * @exclusive: fence we need to sync to
  * @src: address where to copy page table entries from
  * @pages_addr: DMA addresses to use for mapping
  * @vm: requested vm
@@ -793,6 +888,7 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
  * Returns 0 for success, -EINVAL for failure.
  */
 static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
+				       struct fence *exclusive,
 				       uint64_t src,
 				       dma_addr_t *pages_addr,
 				       struct amdgpu_vm *vm,
@@ -853,6 +949,10 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 
 	vm_update_params.ib = &job->ibs[0];
 
+	r = amdgpu_sync_fence(adev, &job->sync, exclusive);
+	if (r)
+		goto error_free;
+
 	r = amdgpu_sync_resv(adev, &job->sync, vm->page_directory->tbo.resv,
 			     owner);
 	if (r)
@@ -889,6 +989,7 @@ error_free:
  * amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks
  *
  * @adev: amdgpu_device pointer
+ * @exclusive: fence we need to sync to
  * @gtt_flags: flags as they are used for GTT
  * @pages_addr: DMA addresses to use for mapping
  * @vm: requested vm
@@ -902,6 +1003,7 @@ error_free:
  * Returns 0 for success, -EINVAL for failure.
  */
 static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
+				      struct fence *exclusive,
 				      uint32_t gtt_flags,
 				      dma_addr_t *pages_addr,
 				      struct amdgpu_vm *vm,
@@ -932,7 +1034,8 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 	addr += mapping->offset;
 
 	if (!pages_addr || src)
-		return amdgpu_vm_bo_update_mapping(adev, src, pages_addr, vm,
+		return amdgpu_vm_bo_update_mapping(adev, exclusive,
+						   src, pages_addr, vm,
 						   start, mapping->it.last,
 						   flags, addr, fence);
 
@@ -940,7 +1043,8 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 		uint64_t last;
 
 		last = min((uint64_t)mapping->it.last, start + max_size - 1);
-		r = amdgpu_vm_bo_update_mapping(adev, src, pages_addr, vm,
+		r = amdgpu_vm_bo_update_mapping(adev, exclusive,
+						src, pages_addr, vm,
 						start, last, flags, addr,
 						fence);
 		if (r)
@@ -973,6 +1077,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	struct amdgpu_bo_va_mapping *mapping;
 	dma_addr_t *pages_addr = NULL;
 	uint32_t gtt_flags, flags;
+	struct fence *exclusive;
 	uint64_t addr;
 	int r;
 
@@ -994,8 +1099,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 		default:
 			break;
 		}
+
+		exclusive = reservation_object_get_excl(bo_va->bo->tbo.resv);
 	} else {
 		addr = 0;
+		exclusive = NULL;
 	}
 
 	flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem);
@@ -1007,7 +1115,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	spin_unlock(&vm->status_lock);
 
 	list_for_each_entry(mapping, &bo_va->invalids, list) {
-		r = amdgpu_vm_bo_split_mapping(adev, gtt_flags, pages_addr, vm,
+		r = amdgpu_vm_bo_split_mapping(adev, exclusive,
+					       gtt_flags, pages_addr, vm,
 					       mapping, flags, addr,
 					       &bo_va->last_pt_update);
 		if (r)
@@ -1054,7 +1163,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 			struct amdgpu_bo_va_mapping, list);
 		list_del(&mapping->list);
 
-		r = amdgpu_vm_bo_split_mapping(adev, 0, NULL, vm, mapping,
+		r = amdgpu_vm_bo_split_mapping(adev, NULL, 0, NULL, vm, mapping,
 					       0, 0, NULL);
 		kfree(mapping);
 		if (r)
@@ -1445,6 +1554,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	amdgpu_bo_unreserve(vm->page_directory);
 	if (r)
 		goto error_free_page_directory;
+	vm->last_eviction_counter = atomic64_read(&adev->num_evictions);
 
 	return 0;
 
@@ -1516,6 +1626,10 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
 			      &adev->vm_manager.ids_lru);
 	}
 
+	adev->vm_manager.fence_context = fence_context_alloc(AMDGPU_MAX_RINGS);
+	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
+		adev->vm_manager.seqno[i] = 0;
+
 	atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
 	atomic64_set(&adev->vm_manager.client_counter, 0);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
index 48b6bd671cda..c32eca26155c 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
@@ -98,6 +98,7 @@ amdgpu_atombios_encoder_set_backlight_level(struct amdgpu_encoder *amdgpu_encode
 		case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
 		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
 		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
 			if (dig->backlight_level == 0)
 				amdgpu_atombios_encoder_setup_dig_transmitter(encoder,
 								       ATOM_TRANSMITTER_ACTION_LCD_BLOFF, 0, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
index 13cdb01e9b45..bc56c8a181e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
@@ -156,3 +156,18 @@ u32 amdgpu_atombios_i2c_func(struct i2c_adapter *adap)
 	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
 }
 
+void amdgpu_atombios_i2c_channel_trans(struct amdgpu_device* adev, u8 slave_addr, u8 line_number, u8 offset, u8 data)
+{
+	PROCESS_I2C_CHANNEL_TRANSACTION_PS_ALLOCATION args;
+	int index = GetIndexIntoMasterTable(COMMAND, ProcessI2cChannelTransaction);
+
+	args.ucRegIndex = offset;
+	args.lpI2CDataOut = data;
+	args.ucFlag = 1;
+	args.ucI2CSpeed = TARGET_HW_I2C_CLOCK;
+	args.ucTransBytes = 1;
+	args.ucSlaveAddr = slave_addr;
+	args.ucLineNumber = line_number;
+
+	amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.h b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.h
index d6128d9de56e..251aaf41f65d 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.h
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.h
@@ -27,5 +27,7 @@
 int amdgpu_atombios_i2c_xfer(struct i2c_adapter *i2c_adap,
 		      struct i2c_msg *msgs, int num);
 u32 amdgpu_atombios_i2c_func(struct i2c_adapter *adap);
+void amdgpu_atombios_i2c_channel_trans(struct amdgpu_device* adev,
+		u8 slave_addr, u8 line_number, u8 offset, u8 data);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index ea407db1fbcf..e2f0e5d58d5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -50,7 +50,9 @@
 #include "gmc/gmc_7_1_sh_mask.h"
 
 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
+MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
+MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
 
 #define MC_CG_ARB_FREQ_F0           0x0a
 #define MC_CG_ARB_FREQ_F1           0x0b
@@ -84,12 +86,14 @@ static const struct ci_pt_defaults defaults_bonaire_xt =
 	{ 0x17C, 0x172, 0x180, 0x1BC, 0x1B3, 0x1BD, 0x206, 0x200, 0x203, 0x25D, 0x25A, 0x255, 0x2C3, 0x2C5, 0x2B4 }
 };
 
+#if 0
 static const struct ci_pt_defaults defaults_bonaire_pro =
 {
 	1, 0xF, 0xFD, 0x19, 5, 45, 0, 0x65062,
 	{ 0x8C,  0x23F, 0x244, 0xA6,  0x83,  0x85,  0x86,  0x86,  0x83,  0xDB,  0xDB,  0xDA,  0x67,  0x60,  0x5F  },
 	{ 0x187, 0x193, 0x193, 0x1C7, 0x1D1, 0x1D1, 0x210, 0x219, 0x219, 0x266, 0x26C, 0x26C, 0x2C9, 0x2CB, 0x2CB }
 };
+#endif
 
 static const struct ci_pt_defaults defaults_saturn_xt =
 {
@@ -98,12 +102,14 @@ static const struct ci_pt_defaults defaults_saturn_xt =
 	{ 0x187, 0x187, 0x187, 0x1C7, 0x1C7, 0x1C7, 0x210, 0x210, 0x210, 0x266, 0x266, 0x266, 0x2C9, 0x2C9, 0x2C9 }
 };
 
+#if 0
 static const struct ci_pt_defaults defaults_saturn_pro =
 {
 	1, 0xF, 0xFD, 0x19, 5, 55, 0, 0x30000,
 	{ 0x96,  0x21D, 0x23B, 0xA1,  0x85,  0x87,  0x83,  0x84,  0x81,  0xE6,  0xE6,  0xE6,  0x71,  0x6A,  0x6A  },
 	{ 0x193, 0x19E, 0x19E, 0x1D2, 0x1DC, 0x1DC, 0x21A, 0x223, 0x223, 0x26E, 0x27E, 0x274, 0x2CF, 0x2D2, 0x2D2 }
 };
+#endif
 
 static const struct ci_pt_config_reg didt_config_ci[] =
 {
@@ -736,19 +742,19 @@ static int ci_enable_didt(struct amdgpu_device *adev, bool enable)
 
 	if (pi->caps_sq_ramping || pi->caps_db_ramping ||
 	    pi->caps_td_ramping || pi->caps_tcp_ramping) {
-		gfx_v7_0_enter_rlc_safe_mode(adev);
+		adev->gfx.rlc.funcs->enter_safe_mode(adev);
 
 		if (enable) {
 			ret = ci_program_pt_config_registers(adev, didt_config_ci);
 			if (ret) {
-				gfx_v7_0_exit_rlc_safe_mode(adev);
+				adev->gfx.rlc.funcs->exit_safe_mode(adev);
 				return ret;
 			}
 		}
 
 		ci_do_enable_didt(adev, enable);
 
-		gfx_v7_0_exit_rlc_safe_mode(adev);
+		adev->gfx.rlc.funcs->exit_safe_mode(adev);
 	}
 
 	return 0;
@@ -3030,7 +3036,7 @@ static int ci_populate_single_memory_level(struct amdgpu_device *adev,
 
 	if (pi->mclk_stutter_mode_threshold &&
 	    (memory_clock <= pi->mclk_stutter_mode_threshold) &&
-	    (pi->uvd_enabled == false) &&
+	    (!pi->uvd_enabled) &&
 	    (RREG32(mmDPG_PIPE_STUTTER_CONTROL) & DPG_PIPE_STUTTER_CONTROL__STUTTER_ENABLE_MASK) &&
 	    (adev->pm.dpm.new_active_crtc_count <= 2))
 		memory_level->StutterEnable = true;
@@ -3636,6 +3642,10 @@ static int ci_setup_default_dpm_tables(struct amdgpu_device *adev)
 
 	ci_setup_default_pcie_tables(adev);
 
+	/* save a copy of the default DPM table */
+	memcpy(&(pi->golden_dpm_table), &(pi->dpm_table),
+			sizeof(struct ci_dpm_table));
+
 	return 0;
 }
 
@@ -5754,10 +5764,18 @@ static int ci_dpm_init_microcode(struct amdgpu_device *adev)
 
 	switch (adev->asic_type) {
 	case CHIP_BONAIRE:
-		chip_name = "bonaire";
+		if ((adev->pdev->revision == 0x80) ||
+		    (adev->pdev->revision == 0x81) ||
+		    (adev->pdev->device == 0x665f))
+			chip_name = "bonaire_k";
+		else
+			chip_name = "bonaire";
 		break;
 	case CHIP_HAWAII:
-		chip_name = "hawaii";
+		if (adev->pdev->revision == 0x80)
+			chip_name = "hawaii_k";
+		else
+			chip_name = "hawaii";
 		break;
 	case CHIP_KAVERI:
 	case CHIP_KABINI:
@@ -6221,6 +6239,9 @@ static int ci_dpm_sw_fini(void *handle)
 	ci_dpm_fini(adev);
 	mutex_unlock(&adev->pm.mutex);
 
+	release_firmware(adev->pm.fw);
+	adev->pm.fw = NULL;
+
 	return 0;
 }
 
@@ -6401,6 +6422,186 @@ static int ci_dpm_set_powergating_state(void *handle,
 	return 0;
 }
 
+static int ci_dpm_print_clock_levels(struct amdgpu_device *adev,
+		enum pp_clock_type type, char *buf)
+{
+	struct ci_power_info *pi = ci_get_pi(adev);
+	struct ci_single_dpm_table *sclk_table = &pi->dpm_table.sclk_table;
+	struct ci_single_dpm_table *mclk_table = &pi->dpm_table.mclk_table;
+	struct ci_single_dpm_table *pcie_table = &pi->dpm_table.pcie_speed_table;
+
+	int i, now, size = 0;
+	uint32_t clock, pcie_speed;
+
+	switch (type) {
+	case PP_SCLK:
+		amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_API_GetSclkFrequency);
+		clock = RREG32(mmSMC_MSG_ARG_0);
+
+		for (i = 0; i < sclk_table->count; i++) {
+			if (clock > sclk_table->dpm_levels[i].value)
+				continue;
+			break;
+		}
+		now = i;
+
+		for (i = 0; i < sclk_table->count; i++)
+			size += sprintf(buf + size, "%d: %uMhz %s\n",
+					i, sclk_table->dpm_levels[i].value / 100,
+					(i == now) ? "*" : "");
+		break;
+	case PP_MCLK:
+		amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_API_GetMclkFrequency);
+		clock = RREG32(mmSMC_MSG_ARG_0);
+
+		for (i = 0; i < mclk_table->count; i++) {
+			if (clock > mclk_table->dpm_levels[i].value)
+				continue;
+			break;
+		}
+		now = i;
+
+		for (i = 0; i < mclk_table->count; i++)
+			size += sprintf(buf + size, "%d: %uMhz %s\n",
+					i, mclk_table->dpm_levels[i].value / 100,
+					(i == now) ? "*" : "");
+		break;
+	case PP_PCIE:
+		pcie_speed = ci_get_current_pcie_speed(adev);
+		for (i = 0; i < pcie_table->count; i++) {
+			if (pcie_speed != pcie_table->dpm_levels[i].value)
+				continue;
+			break;
+		}
+		now = i;
+
+		for (i = 0; i < pcie_table->count; i++)
+			size += sprintf(buf + size, "%d: %s %s\n", i,
+					(pcie_table->dpm_levels[i].value == 0) ? "2.5GB, x1" :
+					(pcie_table->dpm_levels[i].value == 1) ? "5.0GB, x16" :
+					(pcie_table->dpm_levels[i].value == 2) ? "8.0GB, x16" : "",
+					(i == now) ? "*" : "");
+		break;
+	default:
+		break;
+	}
+
+	return size;
+}
+
+static int ci_dpm_force_clock_level(struct amdgpu_device *adev,
+		enum pp_clock_type type, uint32_t mask)
+{
+	struct ci_power_info *pi = ci_get_pi(adev);
+
+	if (adev->pm.dpm.forced_level
+			!= AMDGPU_DPM_FORCED_LEVEL_MANUAL)
+		return -EINVAL;
+
+	switch (type) {
+	case PP_SCLK:
+		if (!pi->sclk_dpm_key_disabled)
+			amdgpu_ci_send_msg_to_smc_with_parameter(adev,
+					PPSMC_MSG_SCLKDPM_SetEnabledMask,
+					pi->dpm_level_enable_mask.sclk_dpm_enable_mask & mask);
+		break;
+
+	case PP_MCLK:
+		if (!pi->mclk_dpm_key_disabled)
+			amdgpu_ci_send_msg_to_smc_with_parameter(adev,
+					PPSMC_MSG_MCLKDPM_SetEnabledMask,
+					pi->dpm_level_enable_mask.mclk_dpm_enable_mask & mask);
+		break;
+
+	case PP_PCIE:
+	{
+		uint32_t tmp = mask & pi->dpm_level_enable_mask.pcie_dpm_enable_mask;
+		uint32_t level = 0;
+
+		while (tmp >>= 1)
+			level++;
+
+		if (!pi->pcie_dpm_key_disabled)
+			amdgpu_ci_send_msg_to_smc_with_parameter(adev,
+					PPSMC_MSG_PCIeDPM_ForceLevel,
+					level);
+		break;
+	}
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int ci_dpm_get_sclk_od(struct amdgpu_device *adev)
+{
+	struct ci_power_info *pi = ci_get_pi(adev);
+	struct ci_single_dpm_table *sclk_table = &(pi->dpm_table.sclk_table);
+	struct ci_single_dpm_table *golden_sclk_table =
+			&(pi->golden_dpm_table.sclk_table);
+	int value;
+
+	value = (sclk_table->dpm_levels[sclk_table->count - 1].value -
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value) *
+			100 /
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value;
+
+	return value;
+}
+
+static int ci_dpm_set_sclk_od(struct amdgpu_device *adev, uint32_t value)
+{
+	struct ci_power_info *pi = ci_get_pi(adev);
+	struct ci_ps *ps = ci_get_ps(adev->pm.dpm.requested_ps);
+	struct ci_single_dpm_table *golden_sclk_table =
+			&(pi->golden_dpm_table.sclk_table);
+
+	if (value > 20)
+		value = 20;
+
+	ps->performance_levels[ps->performance_level_count - 1].sclk =
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value *
+			value / 100 +
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value;
+
+	return 0;
+}
+
+static int ci_dpm_get_mclk_od(struct amdgpu_device *adev)
+{
+	struct ci_power_info *pi = ci_get_pi(adev);
+	struct ci_single_dpm_table *mclk_table = &(pi->dpm_table.mclk_table);
+	struct ci_single_dpm_table *golden_mclk_table =
+			&(pi->golden_dpm_table.mclk_table);
+	int value;
+
+	value = (mclk_table->dpm_levels[mclk_table->count - 1].value -
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value) *
+			100 /
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value;
+
+	return value;
+}
+
+static int ci_dpm_set_mclk_od(struct amdgpu_device *adev, uint32_t value)
+{
+	struct ci_power_info *pi = ci_get_pi(adev);
+	struct ci_ps *ps = ci_get_ps(adev->pm.dpm.requested_ps);
+	struct ci_single_dpm_table *golden_mclk_table =
+			&(pi->golden_dpm_table.mclk_table);
+
+	if (value > 20)
+		value = 20;
+
+	ps->performance_levels[ps->performance_level_count - 1].mclk =
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value *
+			value / 100 +
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value;
+
+	return 0;
+}
+
 const struct amd_ip_funcs ci_dpm_ip_funcs = {
 	.name = "ci_dpm",
 	.early_init = ci_dpm_early_init,
@@ -6435,6 +6636,12 @@ static const struct amdgpu_dpm_funcs ci_dpm_funcs = {
 	.get_fan_control_mode = &ci_dpm_get_fan_control_mode,
 	.set_fan_speed_percent = &ci_dpm_set_fan_speed_percent,
 	.get_fan_speed_percent = &ci_dpm_get_fan_speed_percent,
+	.print_clock_levels = ci_dpm_print_clock_levels,
+	.force_clock_level = ci_dpm_force_clock_level,
+	.get_sclk_od = ci_dpm_get_sclk_od,
+	.set_sclk_od = ci_dpm_set_sclk_od,
+	.get_mclk_od = ci_dpm_get_mclk_od,
+	.set_mclk_od = ci_dpm_set_mclk_od,
 };
 
 static void ci_dpm_set_dpm_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.h b/drivers/gpu/drm/amd/amdgpu/ci_dpm.h
index faccc30c93bf..91be2996ae7c 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.h
@@ -193,6 +193,7 @@ struct ci_pt_defaults {
 
 struct ci_power_info {
 	struct ci_dpm_table dpm_table;
+	struct ci_dpm_table golden_dpm_table;
 	u32 voltage_control;
 	u32 mvdd_control;
 	u32 vddci_control;
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 07bc795a4ca9..4efc901f658c 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -879,7 +879,7 @@ static void cik_vga_set_state(struct amdgpu_device *adev, bool state)
 	uint32_t tmp;
 
 	tmp = RREG32(mmCONFIG_CNTL);
-	if (state == false)
+	if (!state)
 		tmp |= CONFIG_CNTL__VGA_DIS_MASK;
 	else
 		tmp &= ~CONFIG_CNTL__VGA_DIS_MASK;
@@ -962,6 +962,12 @@ static bool cik_read_bios_from_rom(struct amdgpu_device *adev,
 	return true;
 }
 
+static u32 cik_get_virtual_caps(struct amdgpu_device *adev)
+{
+	/* CIK does not support SR-IOV */
+	return 0;
+}
+
 static const struct amdgpu_allowed_register_entry cik_allowed_read_registers[] = {
 	{mmGRBM_STATUS, false},
 	{mmGB_ADDR_CONFIG, false},
@@ -1029,12 +1035,12 @@ static uint32_t cik_read_indexed_register(struct amdgpu_device *adev,
 
 	mutex_lock(&adev->grbm_idx_mutex);
 	if (se_num != 0xffffffff || sh_num != 0xffffffff)
-		gfx_v7_0_select_se_sh(adev, se_num, sh_num);
+		amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff);
 
 	val = RREG32(reg_offset);
 
 	if (se_num != 0xffffffff || sh_num != 0xffffffff)
-		gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+		amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	mutex_unlock(&adev->grbm_idx_mutex);
 	return val;
 }
@@ -1152,10 +1158,11 @@ static void kv_restore_regs_for_reset(struct amdgpu_device *adev,
 	WREG32(mmGMCON_RENG_EXECUTE, save->gmcon_reng_execute);
 }
 
-static void cik_gpu_pci_config_reset(struct amdgpu_device *adev)
+static int cik_gpu_pci_config_reset(struct amdgpu_device *adev)
 {
 	struct kv_reset_save_regs kv_save = { 0 };
 	u32 i;
+	int r = -EINVAL;
 
 	dev_info(adev->dev, "GPU pci config reset\n");
 
@@ -1171,14 +1178,20 @@ static void cik_gpu_pci_config_reset(struct amdgpu_device *adev)
 
 	/* wait for asic to come out of reset */
 	for (i = 0; i < adev->usec_timeout; i++) {
-		if (RREG32(mmCONFIG_MEMSIZE) != 0xffffffff)
+		if (RREG32(mmCONFIG_MEMSIZE) != 0xffffffff) {
+			/* enable BM */
+			pci_set_master(adev->pdev);
+			r = 0;
 			break;
+		}
 		udelay(1);
 	}
 
 	/* does asic init need to be run first??? */
 	if (adev->flags & AMD_IS_APU)
 		kv_restore_regs_for_reset(adev, &kv_save);
+
+	return r;
 }
 
 static void cik_set_bios_scratch_engine_hung(struct amdgpu_device *adev, bool hung)
@@ -1204,13 +1217,14 @@ static void cik_set_bios_scratch_engine_hung(struct amdgpu_device *adev, bool hu
  */
 static int cik_asic_reset(struct amdgpu_device *adev)
 {
+	int r;
 	cik_set_bios_scratch_engine_hung(adev, true);
 
-	cik_gpu_pci_config_reset(adev);
+	r = cik_gpu_pci_config_reset(adev);
 
 	cik_set_bios_scratch_engine_hung(adev, false);
 
-	return 0;
+	return r;
 }
 
 static int cik_set_uvd_clock(struct amdgpu_device *adev, u32 clock,
@@ -2007,9 +2021,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
 	.get_xclk = &cik_get_xclk,
 	.set_uvd_clocks = &cik_set_uvd_clocks,
 	.set_vce_clocks = &cik_set_vce_clocks,
-	/* these should be moved to their own ip modules */
-	.get_gpu_clock_counter = &gfx_v7_0_get_gpu_clock_counter,
-	.wait_for_mc_idle = &gmc_v7_0_mc_wait_for_idle,
+	.get_virtual_caps = &cik_get_virtual_caps,
 };
 
 static int cik_common_early_init(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index 518dca43b133..ee6466912497 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -66,6 +66,16 @@ MODULE_FIRMWARE("radeon/mullins_sdma1.bin");
 
 u32 amdgpu_cik_gpu_check_soft_reset(struct amdgpu_device *adev);
 
+
+static void cik_sdma_free_microcode(struct amdgpu_device *adev)
+{
+	int i;
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+			release_firmware(adev->sdma.instance[i].fw);
+			adev->sdma.instance[i].fw = NULL;
+	}
+}
+
 /*
  * sDMA - System DMA
  * Starting with CIK, the GPU has new asynchronous
@@ -214,17 +224,6 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
 				  unsigned vm_id, bool ctx_switch)
 {
 	u32 extra_bits = vm_id & 0xf;
-	u32 next_rptr = ring->wptr + 5;
-
-	while ((next_rptr & 7) != 4)
-		next_rptr++;
-
-	next_rptr += 4;
-	amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
-	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
-	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
-	amdgpu_ring_write(ring, 1); /* number of DWs to follow */
-	amdgpu_ring_write(ring, next_rptr);
 
 	/* IB packet must end on a 8 DW boundary */
 	cik_sdma_ring_insert_nop(ring, (12 - (ring->wptr & 7)) % 8);
@@ -355,7 +354,7 @@ static void cik_sdma_enable(struct amdgpu_device *adev, bool enable)
 	u32 me_cntl;
 	int i;
 
-	if (enable == false) {
+	if (!enable) {
 		cik_sdma_gfx_stop(adev);
 		cik_sdma_rlc_stop(adev);
 	}
@@ -419,6 +418,8 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
 		/* Initialize the ring buffer's read and write pointers */
 		WREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i], 0);
 		WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0);
+		WREG32(mmSDMA0_GFX_IB_RPTR + sdma_offsets[i], 0);
+		WREG32(mmSDMA0_GFX_IB_OFFSET + sdma_offsets[i], 0);
 
 		/* set the wb address whether it's enabled or not */
 		WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i],
@@ -446,7 +447,12 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
 		WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
 
 		ring->ready = true;
+	}
+
+	cik_sdma_enable(adev, true);
 
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		ring = &adev->sdma.instance[i].ring;
 		r = amdgpu_ring_test_ring(ring);
 		if (r) {
 			ring->ready = false;
@@ -529,8 +535,8 @@ static int cik_sdma_start(struct amdgpu_device *adev)
 	if (r)
 		return r;
 
-	/* unhalt the MEs */
-	cik_sdma_enable(adev, true);
+	/* halt the engine before programing */
+	cik_sdma_enable(adev, false);
 
 	/* start the gfx rings and rlc compute queues */
 	r = cik_sdma_gfx_resume(adev);
@@ -611,20 +617,19 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring)
  * Test a simple IB in the DMA ring (CIK).
  * Returns 0 on success, error on failure.
  */
-static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring)
+static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_ib ib;
 	struct fence *f = NULL;
-	unsigned i;
 	unsigned index;
-	int r;
 	u32 tmp = 0;
 	u64 gpu_addr;
+	long r;
 
 	r = amdgpu_wb_get(adev, &index);
 	if (r) {
-		dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
+		dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
 		return r;
 	}
 
@@ -634,11 +639,12 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring)
 	memset(&ib, 0, sizeof(ib));
 	r = amdgpu_ib_get(adev, NULL, 256, &ib);
 	if (r) {
-		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
+		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
 		goto err0;
 	}
 
-	ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
+	ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE,
+				SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
 	ib.ptr[1] = lower_32_bits(gpu_addr);
 	ib.ptr[2] = upper_32_bits(gpu_addr);
 	ib.ptr[3] = 1;
@@ -648,28 +654,25 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring)
 	if (r)
 		goto err1;
 
-	r = fence_wait(f, false);
-	if (r) {
-		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
+	r = fence_wait_timeout(f, false, timeout);
+	if (r == 0) {
+		DRM_ERROR("amdgpu: IB test timed out\n");
+		r = -ETIMEDOUT;
 		goto err1;
-	}
-	for (i = 0; i < adev->usec_timeout; i++) {
-		tmp = le32_to_cpu(adev->wb.wb[index]);
-		if (tmp == 0xDEADBEEF)
-			break;
-		DRM_UDELAY(1);
-	}
-	if (i < adev->usec_timeout) {
-		DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
-			 ring->idx, i);
+	} else if (r < 0) {
+		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
 		goto err1;
+	}
+	tmp = le32_to_cpu(adev->wb.wb[index]);
+	if (tmp == 0xDEADBEEF) {
+		DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
+		r = 0;
 	} else {
 		DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
 		r = -EINVAL;
 	}
 
 err1:
-	fence_put(f);
 	amdgpu_ib_free(adev, &ib, NULL);
 	fence_put(f);
 err0:
@@ -998,6 +1001,7 @@ static int cik_sdma_sw_fini(void *handle)
 	for (i = 0; i < adev->sdma.num_instances; i++)
 		amdgpu_ring_fini(&adev->sdma.instance[i].ring);
 
+	cik_sdma_free_microcode(adev);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
index 933e425a8154..2a11413ed54a 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
@@ -425,7 +425,7 @@ static int cz_dpm_init(struct amdgpu_device *adev)
 	pi->mgcg_cgtt_local1 = 0x0;
 	pi->clock_slow_down_step = 25000;
 	pi->skip_clock_slow_down = 1;
-	pi->enable_nb_ps_policy = 0;
+	pi->enable_nb_ps_policy = false;
 	pi->caps_power_containment = true;
 	pi->caps_cac = true;
 	pi->didt_enabled = false;
@@ -2219,6 +2219,7 @@ static void cz_dpm_powergate_vce(struct amdgpu_device *adev, bool gate)
 			}
 		}
 	} else { /*pi->caps_vce_pg*/
+		pi->vce_power_gated = gate;
 		cz_update_vce_dpm(adev);
 		cz_enable_vce_dpm(adev, !gate);
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index c90408bc0fde..d4bf133908b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -307,11 +307,10 @@ static void dce_v11_0_page_flip(struct amdgpu_device *adev,
 	struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
 	u32 tmp;
 
-	/* flip at hsync for async, default is vsync */
-	/* use UPDATE_IMMEDIATE_EN instead for async? */
+	/* flip immediate for async, default is vsync */
 	tmp = RREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset);
 	tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL,
-			    GRPH_SURFACE_UPDATE_H_RETRACE_EN, async ? 1 : 0);
+			    GRPH_SURFACE_UPDATE_IMMEDIATE_EN, async ? 1 : 0);
 	WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp);
 	/* update the scanout addresses */
 	WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index 300ff4aab0fd..4fdfab1e9200 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -526,36 +526,16 @@ static void dce_v8_0_stop_mc_access(struct amdgpu_device *adev,
 		crtc_enabled = REG_GET_FIELD(RREG32(mmCRTC_CONTROL + crtc_offsets[i]),
 					     CRTC_CONTROL, CRTC_MASTER_EN);
 		if (crtc_enabled) {
-#if 0
-			u32 frame_count;
-			int j;
-
+#if 1
 			save->crtc_enabled[i] = true;
 			tmp = RREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i]);
 			if (REG_GET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN) == 0) {
-				amdgpu_display_vblank_wait(adev, i);
-				WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1);
+				/*it is correct only for RGB ; black is 0*/
+				WREG32(mmCRTC_BLANK_DATA_COLOR + crtc_offsets[i], 0);
 				tmp = REG_SET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN, 1);
 				WREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i], tmp);
-				WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0);
-			}
-			/* wait for the next frame */
-			frame_count = amdgpu_display_vblank_get_counter(adev, i);
-			for (j = 0; j < adev->usec_timeout; j++) {
-				if (amdgpu_display_vblank_get_counter(adev, i) != frame_count)
-					break;
-				udelay(1);
-			}
-			tmp = RREG32(mmGRPH_UPDATE + crtc_offsets[i]);
-			if (REG_GET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK) == 0) {
-				tmp = REG_SET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK, 1);
-				WREG32(mmGRPH_UPDATE + crtc_offsets[i], tmp);
-			}
-			tmp = RREG32(mmMASTER_UPDATE_LOCK + crtc_offsets[i]);
-			if (REG_GET_FIELD(tmp, MASTER_UPDATE_LOCK, MASTER_UPDATE_LOCK) == 0) {
-				tmp = REG_SET_FIELD(tmp, MASTER_UPDATE_LOCK, MASTER_UPDATE_LOCK, 1);
-				WREG32(mmMASTER_UPDATE_LOCK + crtc_offsets[i], tmp);
 			}
+			mdelay(20);
 #else
 			/* XXX this is a hack to avoid strange behavior with EFI on certain systems */
 			WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1);
@@ -575,55 +555,22 @@ static void dce_v8_0_stop_mc_access(struct amdgpu_device *adev,
 static void dce_v8_0_resume_mc_access(struct amdgpu_device *adev,
 				      struct amdgpu_mode_mc_save *save)
 {
-	u32 tmp, frame_count;
-	int i, j;
+	u32 tmp;
+	int i;
 
 	/* update crtc base addresses */
 	for (i = 0; i < adev->mode_info.num_crtc; i++) {
 		WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + crtc_offsets[i],
 		       upper_32_bits(adev->mc.vram_start));
-		WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + crtc_offsets[i],
-		       upper_32_bits(adev->mc.vram_start));
 		WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + crtc_offsets[i],
 		       (u32)adev->mc.vram_start);
-		WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + crtc_offsets[i],
-		       (u32)adev->mc.vram_start);
 
 		if (save->crtc_enabled[i]) {
-			tmp = RREG32(mmMASTER_UPDATE_MODE + crtc_offsets[i]);
-			if (REG_GET_FIELD(tmp, MASTER_UPDATE_MODE, MASTER_UPDATE_MODE) != 3) {
-				tmp = REG_SET_FIELD(tmp, MASTER_UPDATE_MODE, MASTER_UPDATE_MODE, 3);
-				WREG32(mmMASTER_UPDATE_MODE + crtc_offsets[i], tmp);
-			}
-			tmp = RREG32(mmGRPH_UPDATE + crtc_offsets[i]);
-			if (REG_GET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK)) {
-				tmp = REG_SET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK, 0);
-				WREG32(mmGRPH_UPDATE + crtc_offsets[i], tmp);
-			}
-			tmp = RREG32(mmMASTER_UPDATE_LOCK + crtc_offsets[i]);
-			if (REG_GET_FIELD(tmp, MASTER_UPDATE_LOCK, MASTER_UPDATE_LOCK)) {
-				tmp = REG_SET_FIELD(tmp, MASTER_UPDATE_LOCK, MASTER_UPDATE_LOCK, 0);
-				WREG32(mmMASTER_UPDATE_LOCK + crtc_offsets[i], tmp);
-			}
-			for (j = 0; j < adev->usec_timeout; j++) {
-				tmp = RREG32(mmGRPH_UPDATE + crtc_offsets[i]);
-				if (REG_GET_FIELD(tmp, GRPH_UPDATE, GRPH_SURFACE_UPDATE_PENDING) == 0)
-					break;
-				udelay(1);
-			}
 			tmp = RREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i]);
 			tmp = REG_SET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN, 0);
-			WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1);
 			WREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i], tmp);
-			WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0);
-			/* wait for the next frame */
-			frame_count = amdgpu_display_vblank_get_counter(adev, i);
-			for (j = 0; j < adev->usec_timeout; j++) {
-				if (amdgpu_display_vblank_get_counter(adev, i) != frame_count)
-					break;
-				udelay(1);
-			}
 		}
+		mdelay(20);
 	}
 
 	WREG32(mmVGA_MEMORY_BASE_ADDRESS_HIGH, upper_32_bits(adev->mc.vram_start));
diff --git a/drivers/gpu/drm/amd/amdgpu/fiji_dpm.c b/drivers/gpu/drm/amd/amdgpu/fiji_dpm.c
index 245cabf06575..ed03b75175d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/fiji_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/fiji_dpm.c
@@ -72,6 +72,11 @@ static int fiji_dpm_sw_init(void *handle)
 
 static int fiji_dpm_sw_fini(void *handle)
 {
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	release_firmware(adev->pm.fw);
+	adev->pm.fw = NULL;
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/fiji_smc.c b/drivers/gpu/drm/amd/amdgpu/fiji_smc.c
index b336c918d6a7..b3e19ba4c57f 100644
--- a/drivers/gpu/drm/amd/amdgpu/fiji_smc.c
+++ b/drivers/gpu/drm/amd/amdgpu/fiji_smc.c
@@ -173,7 +173,7 @@ static int fiji_send_msg_to_smc(struct amdgpu_device *adev, PPSMC_Msg msg)
 {
 	if (!fiji_is_smc_ram_running(adev))
 	{
-		return -EINVAL;;
+		return -EINVAL;
 	}
 
 	if (wait_smu_response(adev)) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 7f18a53ab53a..d869d058ef24 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -991,6 +991,22 @@ out:
 	return err;
 }
 
+static void gfx_v7_0_free_microcode(struct amdgpu_device *adev)
+{
+	release_firmware(adev->gfx.pfp_fw);
+	adev->gfx.pfp_fw = NULL;
+	release_firmware(adev->gfx.me_fw);
+	adev->gfx.me_fw = NULL;
+	release_firmware(adev->gfx.ce_fw);
+	adev->gfx.ce_fw = NULL;
+	release_firmware(adev->gfx.mec_fw);
+	adev->gfx.mec_fw = NULL;
+	release_firmware(adev->gfx.mec2_fw);
+	adev->gfx.mec2_fw = NULL;
+	release_firmware(adev->gfx.rlc_fw);
+	adev->gfx.rlc_fw = NULL;
+}
+
 /**
  * gfx_v7_0_tiling_mode_table_init - init the hw tiling table
  *
@@ -1567,9 +1583,15 @@ static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev)
  * registers are instanced per SE or SH.  0xffffffff means
  * broadcast to all SEs or SHs (CIK).
  */
-void gfx_v7_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
+static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev,
+				  u32 se_num, u32 sh_num, u32 instance)
 {
-	u32 data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK;
+	u32 data;
+
+	if (instance == 0xffffffff)
+		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
+	else
+		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
 
 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
 		data |= GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
@@ -1643,13 +1665,13 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)
 	mutex_lock(&adev->grbm_idx_mutex);
 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
-			gfx_v7_0_select_se_sh(adev, i, j);
+			gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
 			data = gfx_v7_0_get_rb_active_bitmap(adev);
 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
 					       rb_bitmap_width_per_sh);
 		}
 	}
-	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	mutex_unlock(&adev->grbm_idx_mutex);
 
 	adev->gfx.config.backend_enable_mask = active_rbs;
@@ -1730,7 +1752,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
 	 * making sure that the following register writes will be broadcasted
 	 * to all the shaders
 	 */
-	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 
 	/* XXX SH_MEM regs */
 	/* where to put LDS, scratch, GPUVM in FSA64 space */
@@ -2034,17 +2056,6 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 				      unsigned vm_id, bool ctx_switch)
 {
 	u32 header, control = 0;
-	u32 next_rptr = ring->wptr + 5;
-
-	if (ctx_switch)
-		next_rptr += 2;
-
-	next_rptr += 4;
-	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-	amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
-	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
-	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
-	amdgpu_ring_write(ring, next_rptr);
 
 	/* insert SWITCH_BUFFER packet before first IB in the ring frame */
 	if (ctx_switch) {
@@ -2073,22 +2084,9 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
 					  struct amdgpu_ib *ib,
 					  unsigned vm_id, bool ctx_switch)
 {
-	u32 header, control = 0;
-	u32 next_rptr = ring->wptr + 5;
+	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
 
-	control |= INDIRECT_BUFFER_VALID;
-	next_rptr += 4;
-	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-	amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
-	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
-	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
-	amdgpu_ring_write(ring, next_rptr);
-
-	header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
-
-	control |= ib->length_dw | (vm_id << 24);
-
-	amdgpu_ring_write(ring, header);
+	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
 	amdgpu_ring_write(ring,
 #ifdef __BIG_ENDIAN
 					  (2 << 0) |
@@ -2107,26 +2105,25 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
  * Provides a basic gfx ring test to verify that IBs are working.
  * Returns 0 on success, error on failure.
  */
-static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring)
+static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_ib ib;
 	struct fence *f = NULL;
 	uint32_t scratch;
 	uint32_t tmp = 0;
-	unsigned i;
-	int r;
+	long r;
 
 	r = amdgpu_gfx_scratch_get(adev, &scratch);
 	if (r) {
-		DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
+		DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
 		return r;
 	}
 	WREG32(scratch, 0xCAFEDEAD);
 	memset(&ib, 0, sizeof(ib));
 	r = amdgpu_ib_get(adev, NULL, 256, &ib);
 	if (r) {
-		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
+		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
 		goto err1;
 	}
 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
@@ -2138,21 +2135,19 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring)
 	if (r)
 		goto err2;
 
-	r = fence_wait(f, false);
-	if (r) {
-		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
+	r = fence_wait_timeout(f, false, timeout);
+	if (r == 0) {
+		DRM_ERROR("amdgpu: IB test timed out\n");
+		r = -ETIMEDOUT;
 		goto err2;
-	}
-	for (i = 0; i < adev->usec_timeout; i++) {
-		tmp = RREG32(scratch);
-		if (tmp == 0xDEADBEEF)
-			break;
-		DRM_UDELAY(1);
-	}
-	if (i < adev->usec_timeout) {
-		DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
-			 ring->idx, i);
+	} else if (r < 0) {
+		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
 		goto err2;
+	}
+	tmp = RREG32(scratch);
+	if (tmp == 0xDEADBEEF) {
+		DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
+		r = 0;
 	} else {
 		DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
 			  scratch, tmp);
@@ -2160,7 +2155,6 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring)
 	}
 
 err2:
-	fence_put(f);
 	amdgpu_ib_free(adev, &ib, NULL);
 	fence_put(f);
 err1:
@@ -3205,7 +3199,8 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
 		}
 	}
 	adev->gfx.rlc.cs_data = ci_cs_data;
-	adev->gfx.rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
+	adev->gfx.rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
+	adev->gfx.rlc.cp_table_size += 64 * 1024; /* GDS */
 
 	src_ptr = adev->gfx.rlc.reg_list;
 	dws = adev->gfx.rlc.reg_list_size;
@@ -3363,7 +3358,7 @@ static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
 	mutex_lock(&adev->grbm_idx_mutex);
 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
-			gfx_v7_0_select_se_sh(adev, i, j);
+			gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
 			for (k = 0; k < adev->usec_timeout; k++) {
 				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
 					break;
@@ -3371,7 +3366,7 @@ static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
 			}
 		}
 	}
-	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	mutex_unlock(&adev->grbm_idx_mutex);
 
 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
@@ -3418,7 +3413,7 @@ static u32 gfx_v7_0_halt_rlc(struct amdgpu_device *adev)
 	return orig;
 }
 
-void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev)
+static void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev)
 {
 	u32 tmp, i, mask;
 
@@ -3440,7 +3435,7 @@ void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev)
 	}
 }
 
-void gfx_v7_0_exit_rlc_safe_mode(struct amdgpu_device *adev)
+static void gfx_v7_0_exit_rlc_safe_mode(struct amdgpu_device *adev)
 {
 	u32 tmp;
 
@@ -3455,7 +3450,7 @@ void gfx_v7_0_exit_rlc_safe_mode(struct amdgpu_device *adev)
  *
  * Halt the RLC ME (MicroEngine) (CIK).
  */
-void gfx_v7_0_rlc_stop(struct amdgpu_device *adev)
+static void gfx_v7_0_rlc_stop(struct amdgpu_device *adev)
 {
 	WREG32(mmRLC_CNTL, 0);
 
@@ -3531,7 +3526,7 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev)
 	WREG32(mmRLC_LB_CNTR_MAX, 0x00008000);
 
 	mutex_lock(&adev->grbm_idx_mutex);
-	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	WREG32(mmRLC_LB_INIT_CU_MASK, 0xffffffff);
 	WREG32(mmRLC_LB_PARAMS, 0x00600408);
 	WREG32(mmRLC_LB_CNTL, 0x80000004);
@@ -3571,7 +3566,7 @@ static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable)
 		tmp = gfx_v7_0_halt_rlc(adev);
 
 		mutex_lock(&adev->grbm_idx_mutex);
-		gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+		gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 		WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
 		WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
 		tmp2 = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
@@ -3622,7 +3617,7 @@ static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
 		tmp = gfx_v7_0_halt_rlc(adev);
 
 		mutex_lock(&adev->grbm_idx_mutex);
-		gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+		gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 		WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
 		WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
 		data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
@@ -3673,7 +3668,7 @@ static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
 		tmp = gfx_v7_0_halt_rlc(adev);
 
 		mutex_lock(&adev->grbm_idx_mutex);
-		gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+		gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 		WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
 		WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
 		data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_1_MASK;
@@ -3851,6 +3846,20 @@ static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev,
 	}
 }
 
+static void gfx_v7_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
+						 u32 bitmap)
+{
+	u32 data;
+
+	if (!bitmap)
+		return;
+
+	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
+	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
+
+	WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
+}
+
 static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev)
 {
 	u32 data, mask;
@@ -4107,7 +4116,7 @@ static void gfx_v7_0_fini_pg(struct amdgpu_device *adev)
  * Fetches a GPU clock counter snapshot (SI).
  * Returns the 64 bit clock counter snapshot.
  */
-uint64_t gfx_v7_0_get_gpu_clock_counter(struct amdgpu_device *adev)
+static uint64_t gfx_v7_0_get_gpu_clock_counter(struct amdgpu_device *adev)
 {
 	uint64_t clock;
 
@@ -4167,12 +4176,24 @@ static void gfx_v7_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
 	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
 }
 
+static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = {
+	.get_gpu_clock_counter = &gfx_v7_0_get_gpu_clock_counter,
+	.select_se_sh = &gfx_v7_0_select_se_sh,
+};
+
+static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = {
+	.enter_safe_mode = gfx_v7_0_enter_rlc_safe_mode,
+	.exit_safe_mode = gfx_v7_0_exit_rlc_safe_mode
+};
+
 static int gfx_v7_0_early_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS;
 	adev->gfx.num_compute_rings = GFX7_NUM_COMPUTE_RINGS;
+	adev->gfx.funcs = &gfx_v7_0_gfx_funcs;
+	adev->gfx.rlc.funcs = &gfx_v7_0_rlc_funcs;
 	gfx_v7_0_set_ring_funcs(adev);
 	gfx_v7_0_set_irq_funcs(adev);
 	gfx_v7_0_set_gds_init(adev);
@@ -4489,6 +4510,7 @@ static int gfx_v7_0_sw_fini(void *handle)
 	gfx_v7_0_cp_compute_fini(adev);
 	gfx_v7_0_rlc_fini(adev);
 	gfx_v7_0_mec_fini(adev);
+	gfx_v7_0_free_microcode(adev);
 
 	return 0;
 }
@@ -4816,7 +4838,7 @@ static int gfx_v7_0_eop_irq(struct amdgpu_device *adev,
 	case 2:
 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 			ring = &adev->gfx.compute_ring[i];
-			if ((ring->me == me_id) & (ring->pipe == pipe_id))
+			if ((ring->me == me_id) && (ring->pipe == pipe_id))
 				amdgpu_fence_process(ring);
 		}
 		break;
@@ -5015,16 +5037,22 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
 	int i, j, k, counter, active_cu_number = 0;
 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
+	unsigned disable_masks[4 * 2];
 
 	memset(cu_info, 0, sizeof(*cu_info));
 
+	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
+
 	mutex_lock(&adev->grbm_idx_mutex);
 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
 			mask = 1;
 			ao_bitmap = 0;
 			counter = 0;
-			gfx_v7_0_select_se_sh(adev, i, j);
+			gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
+			if (i < 4 && j < 2)
+				gfx_v7_0_set_user_cu_inactive_bitmap(
+					adev, disable_masks[i * 2 + j]);
 			bitmap = gfx_v7_0_get_cu_active_bitmap(adev);
 			cu_info->bitmap[i][j] = bitmap;
 
@@ -5040,7 +5068,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
 			ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
 		}
 	}
-	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	mutex_unlock(&adev->grbm_idx_mutex);
 
 	cu_info->number = active_cu_number;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h
index e747aa935c88..94e3ea147c26 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h
@@ -26,11 +26,4 @@
 
 extern const struct amd_ip_funcs gfx_v7_0_ip_funcs;
 
-/* XXX these shouldn't be exported */
-void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev);
-void gfx_v7_0_exit_rlc_safe_mode(struct amdgpu_device *adev);
-void gfx_v7_0_rlc_stop(struct amdgpu_device *adev);
-uint64_t gfx_v7_0_get_gpu_clock_counter(struct amdgpu_device *adev);
-void gfx_v7_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num);
-
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index f19bab68fd83..bff8668e9e6d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -28,6 +28,7 @@
 #include "vid.h"
 #include "amdgpu_ucode.h"
 #include "amdgpu_atombios.h"
+#include "atombios_i2c.h"
 #include "clearstate_vi.h"
 
 #include "gmc/gmc_8_2_d.h"
@@ -47,6 +48,8 @@
 #include "dce/dce_10_0_d.h"
 #include "dce/dce_10_0_sh_mask.h"
 
+#include "smu/smu_7_1_3_d.h"
+
 #define GFX8_NUM_GFX_RINGS     1
 #define GFX8_NUM_COMPUTE_RINGS 8
 
@@ -282,6 +285,7 @@ static const u32 golden_settings_polaris11_a11[] =
 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
+	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 };
 
 static const u32 polaris11_golden_common_all[] =
@@ -297,7 +301,8 @@ static const u32 polaris11_golden_common_all[] =
 static const u32 golden_settings_polaris10_a11[] =
 {
 	mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
-	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208,
+	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
+	mmCB_HW_CONTROL_2, 0, 0x0f000000,
 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
@@ -311,6 +316,7 @@ static const u32 golden_settings_polaris10_a11[] =
 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
+	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 };
 
 static const u32 polaris10_golden_common_all[] =
@@ -692,6 +698,11 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
 		amdgpu_program_register_sequence(adev,
 						 polaris10_golden_common_all,
 						 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
+		WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
+		if (adev->pdev->revision == 0xc7) {
+			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
+			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
+		}
 		break;
 	case CHIP_CARRIZO:
 		amdgpu_program_register_sequence(adev,
@@ -776,26 +787,25 @@ static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
 	return r;
 }
 
-static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
+static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_ib ib;
 	struct fence *f = NULL;
 	uint32_t scratch;
 	uint32_t tmp = 0;
-	unsigned i;
-	int r;
+	long r;
 
 	r = amdgpu_gfx_scratch_get(adev, &scratch);
 	if (r) {
-		DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
+		DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
 		return r;
 	}
 	WREG32(scratch, 0xCAFEDEAD);
 	memset(&ib, 0, sizeof(ib));
 	r = amdgpu_ib_get(adev, NULL, 256, &ib);
 	if (r) {
-		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
+		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
 		goto err1;
 	}
 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
@@ -807,28 +817,25 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
 	if (r)
 		goto err2;
 
-	r = fence_wait(f, false);
-	if (r) {
-		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
+	r = fence_wait_timeout(f, false, timeout);
+	if (r == 0) {
+		DRM_ERROR("amdgpu: IB test timed out.\n");
+		r = -ETIMEDOUT;
 		goto err2;
-	}
-	for (i = 0; i < adev->usec_timeout; i++) {
-		tmp = RREG32(scratch);
-		if (tmp == 0xDEADBEEF)
-			break;
-		DRM_UDELAY(1);
-	}
-	if (i < adev->usec_timeout) {
-		DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
-			 ring->idx, i);
+	} else if (r < 0) {
+		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
 		goto err2;
+	}
+	tmp = RREG32(scratch);
+	if (tmp == 0xDEADBEEF) {
+		DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
+		r = 0;
 	} else {
 		DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
 			  scratch, tmp);
 		r = -EINVAL;
 	}
 err2:
-	fence_put(f);
 	amdgpu_ib_free(adev, &ib, NULL);
 	fence_put(f);
 err1:
@@ -836,6 +843,26 @@ err1:
 	return r;
 }
 
+
+static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
+	release_firmware(adev->gfx.pfp_fw);
+	adev->gfx.pfp_fw = NULL;
+	release_firmware(adev->gfx.me_fw);
+	adev->gfx.me_fw = NULL;
+	release_firmware(adev->gfx.ce_fw);
+	adev->gfx.ce_fw = NULL;
+	release_firmware(adev->gfx.rlc_fw);
+	adev->gfx.rlc_fw = NULL;
+	release_firmware(adev->gfx.mec_fw);
+	adev->gfx.mec_fw = NULL;
+	if ((adev->asic_type != CHIP_STONEY) &&
+	    (adev->asic_type != CHIP_TOPAZ))
+		release_firmware(adev->gfx.mec2_fw);
+	adev->gfx.mec2_fw = NULL;
+
+	kfree(adev->gfx.rlc.register_list_format);
+}
+
 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
 {
 	const char *chip_name;
@@ -1129,6 +1156,71 @@ static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
 	buffer[count++] = cpu_to_le32(0);
 }
 
+static void cz_init_cp_jump_table(struct amdgpu_device *adev)
+{
+	const __le32 *fw_data;
+	volatile u32 *dst_ptr;
+	int me, i, max_me = 4;
+	u32 bo_offset = 0;
+	u32 table_offset, table_size;
+
+	if (adev->asic_type == CHIP_CARRIZO)
+		max_me = 5;
+
+	/* write the cp table buffer */
+	dst_ptr = adev->gfx.rlc.cp_table_ptr;
+	for (me = 0; me < max_me; me++) {
+		if (me == 0) {
+			const struct gfx_firmware_header_v1_0 *hdr =
+				(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
+			fw_data = (const __le32 *)
+				(adev->gfx.ce_fw->data +
+				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+			table_offset = le32_to_cpu(hdr->jt_offset);
+			table_size = le32_to_cpu(hdr->jt_size);
+		} else if (me == 1) {
+			const struct gfx_firmware_header_v1_0 *hdr =
+				(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
+			fw_data = (const __le32 *)
+				(adev->gfx.pfp_fw->data +
+				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+			table_offset = le32_to_cpu(hdr->jt_offset);
+			table_size = le32_to_cpu(hdr->jt_size);
+		} else if (me == 2) {
+			const struct gfx_firmware_header_v1_0 *hdr =
+				(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
+			fw_data = (const __le32 *)
+				(adev->gfx.me_fw->data +
+				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+			table_offset = le32_to_cpu(hdr->jt_offset);
+			table_size = le32_to_cpu(hdr->jt_size);
+		} else if (me == 3) {
+			const struct gfx_firmware_header_v1_0 *hdr =
+				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
+			fw_data = (const __le32 *)
+				(adev->gfx.mec_fw->data +
+				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+			table_offset = le32_to_cpu(hdr->jt_offset);
+			table_size = le32_to_cpu(hdr->jt_size);
+		} else  if (me == 4) {
+			const struct gfx_firmware_header_v1_0 *hdr =
+				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
+			fw_data = (const __le32 *)
+				(adev->gfx.mec2_fw->data +
+				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+			table_offset = le32_to_cpu(hdr->jt_offset);
+			table_size = le32_to_cpu(hdr->jt_size);
+		}
+
+		for (i = 0; i < table_size; i ++) {
+			dst_ptr[bo_offset + i] =
+				cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
+		}
+
+		bo_offset += table_size;
+	}
+}
+
 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
 {
 	int r;
@@ -1144,6 +1236,18 @@ static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
 		amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
 		adev->gfx.rlc.clear_state_obj = NULL;
 	}
+
+	/* jump table block */
+	if (adev->gfx.rlc.cp_table_obj) {
+		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
+		if (unlikely(r != 0))
+			dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
+		amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
+		amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
+
+		amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
+		adev->gfx.rlc.cp_table_obj = NULL;
+	}
 }
 
 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
@@ -1200,6 +1304,46 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
 	}
 
+	if ((adev->asic_type == CHIP_CARRIZO) ||
+	    (adev->asic_type == CHIP_STONEY)) {
+		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
+		if (adev->gfx.rlc.cp_table_obj == NULL) {
+			r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
+					     AMDGPU_GEM_DOMAIN_VRAM,
+					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+					     NULL, NULL,
+					     &adev->gfx.rlc.cp_table_obj);
+			if (r) {
+				dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
+				return r;
+			}
+		}
+
+		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
+		if (unlikely(r != 0)) {
+			dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
+			return r;
+		}
+		r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
+				  &adev->gfx.rlc.cp_table_gpu_addr);
+		if (r) {
+			amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
+			dev_warn(adev->dev, "(%d) pin RLC cp_table bo failed\n", r);
+			return r;
+		}
+		r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
+		if (r) {
+			dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
+			return r;
+		}
+
+		cz_init_cp_jump_table(adev);
+
+		amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
+		amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
+
+	}
+
 	return 0;
 }
 
@@ -1581,7 +1725,6 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
 		RREG32(sec_ded_counter_registers[i]);
 
 fail:
-	fence_put(f);
 	amdgpu_ib_free(adev, &ib, NULL);
 	fence_put(f);
 
@@ -1983,7 +2126,7 @@ static int gfx_v8_0_sw_fini(void *handle)
 
 	gfx_v8_0_rlc_fini(adev);
 
-	kfree(adev->gfx.rlc.register_list_format);
+	gfx_v8_0_free_microcode(adev);
 
 	return 0;
 }
@@ -3308,9 +3451,15 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
 	}
 }
 
-void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
+static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
+				  u32 se_num, u32 sh_num, u32 instance)
 {
-	u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
+	u32 data;
+
+	if (instance == 0xffffffff)
+		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
+	else
+		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
 
 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
@@ -3360,13 +3509,13 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
 	mutex_lock(&adev->grbm_idx_mutex);
 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
-			gfx_v8_0_select_se_sh(adev, i, j);
+			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
 			data = gfx_v8_0_get_rb_active_bitmap(adev);
 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
 					       rb_bitmap_width_per_sh);
 		}
 	}
-	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	mutex_unlock(&adev->grbm_idx_mutex);
 
 	adev->gfx.config.backend_enable_mask = active_rbs;
@@ -3470,7 +3619,7 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
 	 * making sure that the following register writes will be broadcasted
 	 * to all the shaders
 	 */
-	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 
 	WREG32(mmPA_SC_FIFO_SIZE,
 		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
@@ -3493,7 +3642,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
 	mutex_lock(&adev->grbm_idx_mutex);
 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
-			gfx_v8_0_select_se_sh(adev, i, j);
+			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
 			for (k = 0; k < adev->usec_timeout; k++) {
 				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
 					break;
@@ -3501,7 +3650,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
 			}
 		}
 	}
-	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	mutex_unlock(&adev->grbm_idx_mutex);
 
 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
@@ -3662,13 +3811,13 @@ static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
 	WREG32(mmRLC_SRM_CNTL, data);
 }
 
-static void polaris11_init_power_gating(struct amdgpu_device *adev)
+static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
 {
 	uint32_t data;
 
 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
-			AMD_PG_SUPPORT_GFX_SMG |
-			AMD_PG_SUPPORT_GFX_DMG)) {
+			      AMD_PG_SUPPORT_GFX_SMG |
+			      AMD_PG_SUPPORT_GFX_DMG)) {
 		data = RREG32(mmCP_RB_WPTR_POLL_CNTL);
 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
@@ -3693,6 +3842,53 @@ static void polaris11_init_power_gating(struct amdgpu_device *adev)
 	}
 }
 
+static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
+						bool enable)
+{
+	u32 data, orig;
+
+	orig = data = RREG32(mmRLC_PG_CNTL);
+
+	if (enable)
+		data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
+	else
+		data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
+
+	if (orig != data)
+		WREG32(mmRLC_PG_CNTL, data);
+}
+
+static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
+						  bool enable)
+{
+	u32 data, orig;
+
+	orig = data = RREG32(mmRLC_PG_CNTL);
+
+	if (enable)
+		data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
+	else
+		data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
+
+	if (orig != data)
+		WREG32(mmRLC_PG_CNTL, data);
+}
+
+static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
+{
+	u32 data, orig;
+
+	orig = data = RREG32(mmRLC_PG_CNTL);
+
+	if (enable)
+		data &= ~RLC_PG_CNTL__CP_PG_DISABLE_MASK;
+	else
+		data |= RLC_PG_CNTL__CP_PG_DISABLE_MASK;
+
+	if (orig != data)
+		WREG32(mmRLC_PG_CNTL, data);
+}
+
 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
 {
 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
@@ -3705,8 +3901,25 @@ static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
 		gfx_v8_0_init_save_restore_list(adev);
 		gfx_v8_0_enable_save_restore_machine(adev);
 
-		if (adev->asic_type == CHIP_POLARIS11)
-			polaris11_init_power_gating(adev);
+		if ((adev->asic_type == CHIP_CARRIZO) ||
+		    (adev->asic_type == CHIP_STONEY)) {
+			WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
+			gfx_v8_0_init_power_gating(adev);
+			WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
+			if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
+				cz_enable_sck_slow_down_on_power_up(adev, true);
+				cz_enable_sck_slow_down_on_power_down(adev, true);
+			} else {
+				cz_enable_sck_slow_down_on_power_up(adev, false);
+				cz_enable_sck_slow_down_on_power_down(adev, false);
+			}
+			if (adev->pg_flags & AMD_PG_SUPPORT_CP)
+				cz_enable_cp_power_gating(adev, true);
+			else
+				cz_enable_cp_power_gating(adev, false);
+		} else if (adev->asic_type == CHIP_POLARIS11) {
+			gfx_v8_0_init_power_gating(adev);
+		}
 	}
 }
 
@@ -3974,11 +4187,15 @@ static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
 		amdgpu_ring_write(ring, 0x3a00161a);
 		amdgpu_ring_write(ring, 0x0000002e);
 		break;
-	case CHIP_TOPAZ:
 	case CHIP_CARRIZO:
 		amdgpu_ring_write(ring, 0x00000002);
 		amdgpu_ring_write(ring, 0x00000000);
 		break;
+	case CHIP_TOPAZ:
+		amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
+				0x00000000 : 0x00000002);
+		amdgpu_ring_write(ring, 0x00000000);
+		break;
 	case CHIP_STONEY:
 		amdgpu_ring_write(ring, 0x00000000);
 		amdgpu_ring_write(ring, 0x00000000);
@@ -4941,7 +5158,7 @@ static int gfx_v8_0_soft_reset(void *handle)
  * Fetches a GPU clock counter snapshot.
  * Returns the 64 bit clock counter snapshot.
  */
-uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
+static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
 {
 	uint64_t clock;
 
@@ -5001,12 +5218,18 @@ static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
 	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
 }
 
+static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
+	.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
+	.select_se_sh = &gfx_v8_0_select_se_sh,
+};
+
 static int gfx_v8_0_early_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
 	adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
+	adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
 	gfx_v8_0_set_ring_funcs(adev);
 	gfx_v8_0_set_irq_funcs(adev);
 	gfx_v8_0_set_gds_init(adev);
@@ -5039,51 +5262,43 @@ static int gfx_v8_0_late_init(void *handle)
 	return 0;
 }
 
-static void polaris11_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
-		bool enable)
+static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
+						       bool enable)
 {
 	uint32_t data, temp;
 
-	/* Send msg to SMU via Powerplay */
-	amdgpu_set_powergating_state(adev,
-			AMD_IP_BLOCK_TYPE_SMC,
-			enable ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
+	if (adev->asic_type == CHIP_POLARIS11)
+		/* Send msg to SMU via Powerplay */
+		amdgpu_set_powergating_state(adev,
+					     AMD_IP_BLOCK_TYPE_SMC,
+					     enable ?
+					     AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
 
-	if (enable) {
-		/* Enable static MGPG */
-		temp = data = RREG32(mmRLC_PG_CNTL);
+	temp = data = RREG32(mmRLC_PG_CNTL);
+	/* Enable static MGPG */
+	if (enable)
 		data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
-
-		if (temp != data)
-			WREG32(mmRLC_PG_CNTL, data);
-	} else {
-		temp = data = RREG32(mmRLC_PG_CNTL);
+	else
 		data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
 
-		if (temp != data)
-			WREG32(mmRLC_PG_CNTL, data);
-	}
+	if (temp != data)
+		WREG32(mmRLC_PG_CNTL, data);
 }
 
-static void polaris11_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
-		bool enable)
+static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
+							bool enable)
 {
 	uint32_t data, temp;
 
-	if (enable) {
-		/* Enable dynamic MGPG */
-		temp = data = RREG32(mmRLC_PG_CNTL);
+	temp = data = RREG32(mmRLC_PG_CNTL);
+	/* Enable dynamic MGPG */
+	if (enable)
 		data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
-
-		if (temp != data)
-			WREG32(mmRLC_PG_CNTL, data);
-	} else {
-		temp = data = RREG32(mmRLC_PG_CNTL);
+	else
 		data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
 
-		if (temp != data)
-			WREG32(mmRLC_PG_CNTL, data);
-	}
+	if (temp != data)
+		WREG32(mmRLC_PG_CNTL, data);
 }
 
 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
@@ -5091,19 +5306,63 @@ static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *ade
 {
 	uint32_t data, temp;
 
-	if (enable) {
-		/* Enable quick PG */
-		temp = data = RREG32(mmRLC_PG_CNTL);
-		data |= 0x100000;
+	temp = data = RREG32(mmRLC_PG_CNTL);
+	/* Enable quick PG */
+	if (enable)
+		data |= RLC_PG_CNTL__QUICK_PG_ENABLE_MASK;
+	else
+		data &= ~RLC_PG_CNTL__QUICK_PG_ENABLE_MASK;
 
-		if (temp != data)
-			WREG32(mmRLC_PG_CNTL, data);
-	} else {
-		temp = data = RREG32(mmRLC_PG_CNTL);
-		data &= ~0x100000;
+	if (temp != data)
+		WREG32(mmRLC_PG_CNTL, data);
+}
 
-		if (temp != data)
-			WREG32(mmRLC_PG_CNTL, data);
+static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
+					  bool enable)
+{
+	u32 data, orig;
+
+	orig = data = RREG32(mmRLC_PG_CNTL);
+
+	if (enable)
+		data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
+	else
+		data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
+
+	if (orig != data)
+		WREG32(mmRLC_PG_CNTL, data);
+}
+
+static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
+						bool enable)
+{
+	u32 data, orig;
+
+	orig = data = RREG32(mmRLC_PG_CNTL);
+
+	if (enable)
+		data |= RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK;
+	else
+		data &= ~RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK;
+
+	if (orig != data)
+		WREG32(mmRLC_PG_CNTL, data);
+
+	/* Read any GFX register to wake up GFX. */
+	if (!enable)
+		data = RREG32(mmDB_RENDER_CONTROL);
+}
+
+static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
+					  bool enable)
+{
+	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
+		cz_enable_gfx_cg_power_gating(adev, true);
+		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
+			cz_enable_gfx_pipeline_power_gating(adev, true);
+	} else {
+		cz_enable_gfx_cg_power_gating(adev, false);
+		cz_enable_gfx_pipeline_power_gating(adev, false);
 	}
 }
 
@@ -5111,21 +5370,42 @@ static int gfx_v8_0_set_powergating_state(void *handle,
 					  enum amd_powergating_state state)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
 
 	if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
 		return 0;
 
 	switch (adev->asic_type) {
+	case CHIP_CARRIZO:
+	case CHIP_STONEY:
+		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)
+			cz_update_gfx_cg_power_gating(adev, enable);
+
+		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
+			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
+		else
+			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
+
+		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
+			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
+		else
+			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
+		break;
 	case CHIP_POLARIS11:
-		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG)
-			polaris11_enable_gfx_static_mg_power_gating(adev,
-					state == AMD_PG_STATE_GATE ? true : false);
-		else if (adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG)
-			polaris11_enable_gfx_dynamic_mg_power_gating(adev,
-					state == AMD_PG_STATE_GATE ? true : false);
+		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
+			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
 		else
-			polaris11_enable_gfx_quick_mg_power_gating(adev,
-					state == AMD_PG_STATE_GATE ? true : false);
+			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
+
+		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
+			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
+		else
+			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
+
+		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
+			polaris11_enable_gfx_quick_mg_power_gating(adev, true);
+		else
+			polaris11_enable_gfx_quick_mg_power_gating(adev, false);
 		break;
 	default:
 		break;
@@ -5139,7 +5419,7 @@ static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
 {
 	uint32_t data;
 
-	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 
 	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
 	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
@@ -5527,6 +5807,8 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
 	}
 
+	gfx_v8_0_wait_for_rlc_serdes(adev);
+
 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
 }
 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
@@ -5652,17 +5934,6 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 				      unsigned vm_id, bool ctx_switch)
 {
 	u32 header, control = 0;
-	u32 next_rptr = ring->wptr + 5;
-
-	if (ctx_switch)
-		next_rptr += 2;
-
-	next_rptr += 4;
-	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-	amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
-	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
-	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
-	amdgpu_ring_write(ring, next_rptr);
 
 	/* insert SWITCH_BUFFER packet before first IB in the ring frame */
 	if (ctx_switch) {
@@ -5691,23 +5962,9 @@ static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
 					  struct amdgpu_ib *ib,
 					  unsigned vm_id, bool ctx_switch)
 {
-	u32 header, control = 0;
-	u32 next_rptr = ring->wptr + 5;
-
-	control |= INDIRECT_BUFFER_VALID;
+	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
 
-	next_rptr += 4;
-	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-	amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
-	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
-	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
-	amdgpu_ring_write(ring, next_rptr);
-
-	header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
-
-	control |= ib->length_dw | (vm_id << 24);
-
-	amdgpu_ring_write(ring, header);
+	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
 	amdgpu_ring_write(ring,
 #ifdef __BIG_ENDIAN
 					  (2 << 0) |
@@ -6160,9 +6417,9 @@ static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
 {
 	switch (adev->asic_type) {
 	case CHIP_TOPAZ:
-	case CHIP_STONEY:
 		adev->gfx.rlc.funcs = &iceland_rlc_funcs;
 		break;
+	case CHIP_STONEY:
 	case CHIP_CARRIZO:
 		adev->gfx.rlc.funcs = &cz_rlc_funcs;
 		break;
@@ -6200,6 +6457,20 @@ static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
 	}
 }
 
+static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
+						 u32 bitmap)
+{
+	u32 data;
+
+	if (!bitmap)
+		return;
+
+	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
+	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
+
+	WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
+}
+
 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
 {
 	u32 data, mask;
@@ -6220,16 +6491,22 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
 	int i, j, k, counter, active_cu_number = 0;
 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
+	unsigned disable_masks[4 * 2];
 
 	memset(cu_info, 0, sizeof(*cu_info));
 
+	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
+
 	mutex_lock(&adev->grbm_idx_mutex);
 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
 			mask = 1;
 			ao_bitmap = 0;
 			counter = 0;
-			gfx_v8_0_select_se_sh(adev, i, j);
+			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
+			if (i < 4 && j < 2)
+				gfx_v8_0_set_user_cu_inactive_bitmap(
+					adev, disable_masks[i * 2 + j]);
 			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
 			cu_info->bitmap[i][j] = bitmap;
 
@@ -6245,7 +6522,7 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
 			ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
 		}
 	}
-	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	mutex_unlock(&adev->grbm_idx_mutex);
 
 	cu_info->number = active_cu_number;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h
index 16a49f53a2fa..bc82c794312c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h
@@ -26,7 +26,6 @@
 
 extern const struct amd_ip_funcs gfx_v8_0_ip_funcs;
 
-uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev);
 void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 1feb6439cb0b..d24a82bd0c7a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -39,6 +39,7 @@
 
 static void gmc_v7_0_set_gart_funcs(struct amdgpu_device *adev);
 static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev);
+static int gmc_v7_0_wait_for_idle(void *handle);
 
 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
@@ -73,39 +74,15 @@ static void gmc_v7_0_init_golden_registers(struct amdgpu_device *adev)
 	}
 }
 
-/**
- * gmc7_mc_wait_for_idle - wait for MC idle callback.
- *
- * @adev: amdgpu_device pointer
- *
- * Wait for the MC (memory controller) to be idle.
- * (evergreen+).
- * Returns 0 if the MC is idle, -1 if not.
- */
-int gmc_v7_0_mc_wait_for_idle(struct amdgpu_device *adev)
-{
-	unsigned i;
-	u32 tmp;
-
-	for (i = 0; i < adev->usec_timeout; i++) {
-		/* read MC_STATUS */
-		tmp = RREG32(mmSRBM_STATUS) & 0x1F00;
-		if (!tmp)
-			return 0;
-		udelay(1);
-	}
-	return -1;
-}
-
-void gmc_v7_0_mc_stop(struct amdgpu_device *adev,
-		      struct amdgpu_mode_mc_save *save)
+static void gmc_v7_0_mc_stop(struct amdgpu_device *adev,
+			     struct amdgpu_mode_mc_save *save)
 {
 	u32 blackout;
 
 	if (adev->mode_info.num_crtc)
 		amdgpu_display_stop_mc_access(adev, save);
 
-	amdgpu_asic_wait_for_mc_idle(adev);
+	gmc_v7_0_wait_for_idle((void *)adev);
 
 	blackout = RREG32(mmMC_SHARED_BLACKOUT_CNTL);
 	if (REG_GET_FIELD(blackout, MC_SHARED_BLACKOUT_CNTL, BLACKOUT_MODE) != 1) {
@@ -120,8 +97,8 @@ void gmc_v7_0_mc_stop(struct amdgpu_device *adev,
 	udelay(100);
 }
 
-void gmc_v7_0_mc_resume(struct amdgpu_device *adev,
-			struct amdgpu_mode_mc_save *save)
+static void gmc_v7_0_mc_resume(struct amdgpu_device *adev,
+			       struct amdgpu_mode_mc_save *save)
 {
 	u32 tmp;
 
@@ -311,7 +288,7 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev)
 		amdgpu_display_set_vga_render_state(adev, false);
 
 	gmc_v7_0_mc_stop(adev, &save);
-	if (amdgpu_asic_wait_for_mc_idle(adev)) {
+	if (gmc_v7_0_wait_for_idle((void *)adev)) {
 		dev_warn(adev->dev, "Wait for MC idle timedout !\n");
 	}
 	/* Update configuration */
@@ -331,7 +308,7 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev)
 	WREG32(mmMC_VM_AGP_BASE, 0);
 	WREG32(mmMC_VM_AGP_TOP, 0x0FFFFFFF);
 	WREG32(mmMC_VM_AGP_BOT, 0x0FFFFFFF);
-	if (amdgpu_asic_wait_for_mc_idle(adev)) {
+	if (gmc_v7_0_wait_for_idle((void *)adev)) {
 		dev_warn(adev->dev, "Wait for MC idle timedout !\n");
 	}
 	gmc_v7_0_mc_resume(adev, &save);
@@ -1137,7 +1114,7 @@ static int gmc_v7_0_soft_reset(void *handle)
 
 	if (srbm_soft_reset) {
 		gmc_v7_0_mc_stop(adev, &save);
-		if (gmc_v7_0_wait_for_idle(adev)) {
+		if (gmc_v7_0_wait_for_idle((void *)adev)) {
 			dev_warn(adev->dev, "Wait for GMC idle timed out !\n");
 		}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.h b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.h
index 36fcbbc46ada..0b386b5d2f7a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.h
@@ -26,11 +26,4 @@
 
 extern const struct amd_ip_funcs gmc_v7_0_ip_funcs;
 
-/* XXX these shouldn't be exported */
-void gmc_v7_0_mc_stop(struct amdgpu_device *adev,
-		      struct amdgpu_mode_mc_save *save);
-void gmc_v7_0_mc_resume(struct amdgpu_device *adev,
-			struct amdgpu_mode_mc_save *save);
-int gmc_v7_0_mc_wait_for_idle(struct amdgpu_device *adev);
-
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 9945d5bbf1fe..717359d3ba8c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -41,6 +41,7 @@
 
 static void gmc_v8_0_set_gart_funcs(struct amdgpu_device *adev);
 static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev);
+static int gmc_v8_0_wait_for_idle(void *handle);
 
 MODULE_FIRMWARE("amdgpu/tonga_mc.bin");
 MODULE_FIRMWARE("amdgpu/polaris11_mc.bin");
@@ -147,44 +148,15 @@ static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev)
 	}
 }
 
-/**
- * gmc8_mc_wait_for_idle - wait for MC idle callback.
- *
- * @adev: amdgpu_device pointer
- *
- * Wait for the MC (memory controller) to be idle.
- * (evergreen+).
- * Returns 0 if the MC is idle, -1 if not.
- */
-int gmc_v8_0_mc_wait_for_idle(struct amdgpu_device *adev)
-{
-	unsigned i;
-	u32 tmp;
-
-	for (i = 0; i < adev->usec_timeout; i++) {
-		/* read MC_STATUS */
-		tmp = RREG32(mmSRBM_STATUS) & (SRBM_STATUS__VMC_BUSY_MASK |
-					       SRBM_STATUS__MCB_BUSY_MASK |
-					       SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
-					       SRBM_STATUS__MCC_BUSY_MASK |
-					       SRBM_STATUS__MCD_BUSY_MASK |
-					       SRBM_STATUS__VMC1_BUSY_MASK);
-		if (!tmp)
-			return 0;
-		udelay(1);
-	}
-	return -1;
-}
-
-void gmc_v8_0_mc_stop(struct amdgpu_device *adev,
-		      struct amdgpu_mode_mc_save *save)
+static void gmc_v8_0_mc_stop(struct amdgpu_device *adev,
+			     struct amdgpu_mode_mc_save *save)
 {
 	u32 blackout;
 
 	if (adev->mode_info.num_crtc)
 		amdgpu_display_stop_mc_access(adev, save);
 
-	amdgpu_asic_wait_for_mc_idle(adev);
+	gmc_v8_0_wait_for_idle(adev);
 
 	blackout = RREG32(mmMC_SHARED_BLACKOUT_CNTL);
 	if (REG_GET_FIELD(blackout, MC_SHARED_BLACKOUT_CNTL, BLACKOUT_MODE) != 1) {
@@ -199,8 +171,8 @@ void gmc_v8_0_mc_stop(struct amdgpu_device *adev,
 	udelay(100);
 }
 
-void gmc_v8_0_mc_resume(struct amdgpu_device *adev,
-			struct amdgpu_mode_mc_save *save)
+static void gmc_v8_0_mc_resume(struct amdgpu_device *adev,
+			       struct amdgpu_mode_mc_save *save)
 {
 	u32 tmp;
 
@@ -393,7 +365,7 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
 		amdgpu_display_set_vga_render_state(adev, false);
 
 	gmc_v8_0_mc_stop(adev, &save);
-	if (amdgpu_asic_wait_for_mc_idle(adev)) {
+	if (gmc_v8_0_wait_for_idle((void *)adev)) {
 		dev_warn(adev->dev, "Wait for MC idle timedout !\n");
 	}
 	/* Update configuration */
@@ -413,7 +385,7 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
 	WREG32(mmMC_VM_AGP_BASE, 0);
 	WREG32(mmMC_VM_AGP_TOP, 0x0FFFFFFF);
 	WREG32(mmMC_VM_AGP_BOT, 0x0FFFFFFF);
-	if (amdgpu_asic_wait_for_mc_idle(adev)) {
+	if (gmc_v8_0_wait_for_idle((void *)adev)) {
 		dev_warn(adev->dev, "Wait for MC idle timedout !\n");
 	}
 	gmc_v8_0_mc_resume(adev, &save);
@@ -1140,7 +1112,7 @@ static int gmc_v8_0_soft_reset(void *handle)
 
 	if (srbm_soft_reset) {
 		gmc_v8_0_mc_stop(adev, &save);
-		if (gmc_v8_0_wait_for_idle(adev)) {
+		if (gmc_v8_0_wait_for_idle((void *)adev)) {
 			dev_warn(adev->dev, "Wait for GMC idle timed out !\n");
 		}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.h b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.h
index 973436086b38..fc5001a8119d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.h
@@ -26,11 +26,4 @@
 
 extern const struct amd_ip_funcs gmc_v8_0_ip_funcs;
 
-/* XXX these shouldn't be exported */
-void gmc_v8_0_mc_stop(struct amdgpu_device *adev,
-		      struct amdgpu_mode_mc_save *save);
-void gmc_v8_0_mc_resume(struct amdgpu_device *adev,
-			struct amdgpu_mode_mc_save *save);
-int gmc_v8_0_mc_wait_for_idle(struct amdgpu_device *adev);
-
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_dpm.c b/drivers/gpu/drm/amd/amdgpu/iceland_dpm.c
index 460bc8ad37e6..2f078ad6095c 100644
--- a/drivers/gpu/drm/amd/amdgpu/iceland_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_dpm.c
@@ -24,7 +24,7 @@
 #include <linux/firmware.h>
 #include "drmP.h"
 #include "amdgpu.h"
-#include "iceland_smumgr.h"
+#include "iceland_smum.h"
 
 MODULE_FIRMWARE("amdgpu/topaz_smc.bin");
 
@@ -72,6 +72,11 @@ static int iceland_dpm_sw_init(void *handle)
 
 static int iceland_dpm_sw_fini(void *handle)
 {
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	release_firmware(adev->pm.fw);
+	adev->pm.fw = NULL;
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_smc.c b/drivers/gpu/drm/amd/amdgpu/iceland_smc.c
index 52ee08193295..211839913728 100644
--- a/drivers/gpu/drm/amd/amdgpu/iceland_smc.c
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_smc.c
@@ -25,7 +25,7 @@
 #include "drmP.h"
 #include "amdgpu.h"
 #include "ppsmc.h"
-#include "iceland_smumgr.h"
+#include "iceland_smum.h"
 #include "smu_ucode_xfer_vi.h"
 #include "amdgpu_ucode.h"
 
@@ -211,7 +211,7 @@ static int iceland_send_msg_to_smc_without_waiting(struct amdgpu_device *adev,
 						   PPSMC_Msg msg)
 {
 	if (!iceland_is_smc_ram_running(adev))
-		return -EINVAL;;
+		return -EINVAL;
 
 	if (wait_smu_response(adev)) {
 		DRM_ERROR("Failed to send previous message\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_smumgr.h b/drivers/gpu/drm/amd/amdgpu/iceland_smum.h
index 1e0769e110fa..5983e3150cc5 100644
--- a/drivers/gpu/drm/amd/amdgpu/iceland_smumgr.h
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_smum.h
@@ -21,8 +21,8 @@
  *
  */
 
-#ifndef ICELAND_SMUMGR_H
-#define ICELAND_SMUMGR_H
+#ifndef ICELAND_SMUM_H
+#define ICELAND_SMUM_H
 
 #include "ppsmc.h"
 
diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
index a789a863d677..a845e883f5fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
@@ -191,6 +191,7 @@ static void sumo_construct_vid_mapping_table(struct amdgpu_device *adev,
 	vid_mapping_table->num_entries = i;
 }
 
+#if 0
 static const struct kv_lcac_config_values sx_local_cac_cfg_kv[] =
 {
 	{  0,       4,        1    },
@@ -289,6 +290,7 @@ static const struct kv_lcac_config_reg cpl_cac_config_reg[] =
 {
 	{ 0xc0400d80, 0x003e0000, 17, 0x3fc00000, 22, 0x0001fffe, 1, 0x00000001, 0 }
 };
+#endif
 
 static const struct kv_pt_config_reg didt_config_kv[] =
 {
@@ -507,19 +509,19 @@ static int kv_enable_didt(struct amdgpu_device *adev, bool enable)
 	    pi->caps_db_ramping ||
 	    pi->caps_td_ramping ||
 	    pi->caps_tcp_ramping) {
-		gfx_v7_0_enter_rlc_safe_mode(adev);
+		adev->gfx.rlc.funcs->enter_safe_mode(adev);
 
 		if (enable) {
 			ret = kv_program_pt_config_registers(adev, didt_config_kv);
 			if (ret) {
-				gfx_v7_0_exit_rlc_safe_mode(adev);
+				adev->gfx.rlc.funcs->exit_safe_mode(adev);
 				return ret;
 			}
 		}
 
 		kv_do_enable_didt(adev, enable);
 
-		gfx_v7_0_exit_rlc_safe_mode(adev);
+		adev->gfx.rlc.funcs->exit_safe_mode(adev);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/ppsmc.h b/drivers/gpu/drm/amd/amdgpu/ppsmc.h
index 7837f2ecc357..8463245f424f 100644
--- a/drivers/gpu/drm/amd/amdgpu/ppsmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/ppsmc.h
@@ -90,7 +90,9 @@ typedef uint8_t PPSMC_Result;
 #define PPSMC_StartFanControl               ((uint8_t)0x5B)
 #define PPSMC_StopFanControl                ((uint8_t)0x5C)
 #define PPSMC_MSG_NoDisplay                 ((uint8_t)0x5D)
+#define PPSMC_NoDisplay                     ((uint8_t)0x5D)
 #define PPSMC_MSG_HasDisplay                ((uint8_t)0x5E)
+#define PPSMC_HasDisplay                    ((uint8_t)0x5E)
 #define PPSMC_MSG_UVDPowerOFF               ((uint8_t)0x60)
 #define PPSMC_MSG_UVDPowerON                ((uint8_t)0x61)
 #define PPSMC_MSG_EnableULV                 ((uint8_t)0x62)
@@ -108,6 +110,7 @@ typedef uint8_t PPSMC_Result;
 #define PPSMC_MSG_DisableDTE                ((uint8_t)0x88)
 #define PPSMC_MSG_ThrottleOVRDSCLKDS        ((uint8_t)0x96)
 #define PPSMC_MSG_CancelThrottleOVRDSCLKDS  ((uint8_t)0x97)
+#define PPSMC_MSG_EnableACDCGPIOInterrupt   ((uint16_t) 0x149)
 
 /* CI/KV/KB */
 #define PPSMC_MSG_UVDDPM_SetEnabledMask       ((uint16_t) 0x12D)
@@ -161,6 +164,7 @@ typedef uint8_t PPSMC_Result;
 #define PPSMC_MSG_MASTER_DeepSleep_OFF        ((uint16_t) 0x190)
 #define PPSMC_MSG_Remove_DC_Clamp             ((uint16_t) 0x191)
 #define PPSMC_MSG_SetFanPwmMax                ((uint16_t) 0x19A)
+#define PPSMC_MSG_SetFanRpmMax                ((uint16_t) 0x205)
 
 #define PPSMC_MSG_ENABLE_THERMAL_DPM          ((uint16_t) 0x19C)
 #define PPSMC_MSG_DISABLE_THERMAL_DPM         ((uint16_t) 0x19D)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index f4c3130d3fdb..1351c7e834a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -105,6 +105,15 @@ static void sdma_v2_4_init_golden_registers(struct amdgpu_device *adev)
 	}
 }
 
+static void sdma_v2_4_free_microcode(struct amdgpu_device *adev)
+{
+	int i;
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		release_firmware(adev->sdma.instance[i].fw);
+		adev->sdma.instance[i].fw = NULL;
+	}
+}
+
 /**
  * sdma_v2_4_init_microcode - load ucode images from disk
  *
@@ -246,19 +255,6 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
 				   unsigned vm_id, bool ctx_switch)
 {
 	u32 vmid = vm_id & 0xf;
-	u32 next_rptr = ring->wptr + 5;
-
-	while ((next_rptr & 7) != 2)
-		next_rptr++;
-
-	next_rptr += 6;
-
-	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
-			  SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
-	amdgpu_ring_write(ring, lower_32_bits(ring->next_rptr_gpu_addr) & 0xfffffffc);
-	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
-	amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1));
-	amdgpu_ring_write(ring, next_rptr);
 
 	/* IB packet must end on a 8 DW boundary */
 	sdma_v2_4_ring_insert_nop(ring, (10 - (ring->wptr & 7)) % 8);
@@ -397,7 +393,7 @@ static void sdma_v2_4_enable(struct amdgpu_device *adev, bool enable)
 	u32 f32_cntl;
 	int i;
 
-	if (enable == false) {
+	if (!enable) {
 		sdma_v2_4_gfx_stop(adev);
 		sdma_v2_4_rlc_stop(adev);
 	}
@@ -461,6 +457,8 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
 		/* Initialize the ring buffer's read and write pointers */
 		WREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i], 0);
 		WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0);
+		WREG32(mmSDMA0_GFX_IB_RPTR + sdma_offsets[i], 0);
+		WREG32(mmSDMA0_GFX_IB_OFFSET + sdma_offsets[i], 0);
 
 		/* set the wb address whether it's enabled or not */
 		WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i],
@@ -489,7 +487,11 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
 		WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
 
 		ring->ready = true;
+	}
 
+	sdma_v2_4_enable(adev, true);
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		ring = &adev->sdma.instance[i].ring;
 		r = amdgpu_ring_test_ring(ring);
 		if (r) {
 			ring->ready = false;
@@ -565,23 +567,25 @@ static int sdma_v2_4_start(struct amdgpu_device *adev)
 {
 	int r;
 
-	if (!adev->firmware.smu_load) {
-		r = sdma_v2_4_load_microcode(adev);
-		if (r)
-			return r;
-	} else {
-		r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
-						AMDGPU_UCODE_ID_SDMA0);
-		if (r)
-			return -EINVAL;
-		r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
-						AMDGPU_UCODE_ID_SDMA1);
-		if (r)
-			return -EINVAL;
+	if (!adev->pp_enabled) {
+		if (!adev->firmware.smu_load) {
+			r = sdma_v2_4_load_microcode(adev);
+			if (r)
+				return r;
+		} else {
+			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
+							AMDGPU_UCODE_ID_SDMA0);
+			if (r)
+				return -EINVAL;
+			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
+							AMDGPU_UCODE_ID_SDMA1);
+			if (r)
+				return -EINVAL;
+		}
 	}
 
-	/* unhalt the MEs */
-	sdma_v2_4_enable(adev, true);
+	/* halt the engine before programing */
+	sdma_v2_4_enable(adev, false);
 
 	/* start the gfx rings and rlc compute queues */
 	r = sdma_v2_4_gfx_resume(adev);
@@ -664,20 +668,19 @@ static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring)
  * Test a simple IB in the DMA ring (VI).
  * Returns 0 on success, error on failure.
  */
-static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring)
+static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_ib ib;
 	struct fence *f = NULL;
-	unsigned i;
 	unsigned index;
-	int r;
 	u32 tmp = 0;
 	u64 gpu_addr;
+	long r;
 
 	r = amdgpu_wb_get(adev, &index);
 	if (r) {
-		dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
+		dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
 		return r;
 	}
 
@@ -687,7 +690,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring)
 	memset(&ib, 0, sizeof(ib));
 	r = amdgpu_ib_get(adev, NULL, 256, &ib);
 	if (r) {
-		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
+		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
 		goto err0;
 	}
 
@@ -706,28 +709,25 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring)
 	if (r)
 		goto err1;
 
-	r = fence_wait(f, false);
-	if (r) {
-		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
+	r = fence_wait_timeout(f, false, timeout);
+	if (r == 0) {
+		DRM_ERROR("amdgpu: IB test timed out\n");
+		r = -ETIMEDOUT;
 		goto err1;
-	}
-	for (i = 0; i < adev->usec_timeout; i++) {
-		tmp = le32_to_cpu(adev->wb.wb[index]);
-		if (tmp == 0xDEADBEEF)
-			break;
-		DRM_UDELAY(1);
-	}
-	if (i < adev->usec_timeout) {
-		DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
-			 ring->idx, i);
+	} else if (r) {
+		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
 		goto err1;
+	}
+	tmp = le32_to_cpu(adev->wb.wb[index]);
+	if (tmp == 0xDEADBEEF) {
+		DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
+		r = 0;
 	} else {
 		DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
 		r = -EINVAL;
 	}
 
 err1:
-	fence_put(f);
 	amdgpu_ib_free(adev, &ib, NULL);
 	fence_put(f);
 err0:
@@ -1012,6 +1012,7 @@ static int sdma_v2_4_sw_fini(void *handle)
 	for (i = 0; i < adev->sdma.num_instances; i++)
 		amdgpu_ring_fini(&adev->sdma.instance[i].ring);
 
+	sdma_v2_4_free_microcode(adev);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 31d99b0010f7..653ce5ed55ae 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -236,6 +236,15 @@ static void sdma_v3_0_init_golden_registers(struct amdgpu_device *adev)
 	}
 }
 
+static void sdma_v3_0_free_microcode(struct amdgpu_device *adev)
+{
+	int i;
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		release_firmware(adev->sdma.instance[i].fw);
+		adev->sdma.instance[i].fw = NULL;
+	}
+}
+
 /**
  * sdma_v3_0_init_microcode - load ucode images from disk
  *
@@ -406,18 +415,6 @@ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
 				   unsigned vm_id, bool ctx_switch)
 {
 	u32 vmid = vm_id & 0xf;
-	u32 next_rptr = ring->wptr + 5;
-
-	while ((next_rptr & 7) != 2)
-		next_rptr++;
-	next_rptr += 6;
-
-	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
-			  SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
-	amdgpu_ring_write(ring, lower_32_bits(ring->next_rptr_gpu_addr) & 0xfffffffc);
-	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
-	amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1));
-	amdgpu_ring_write(ring, next_rptr);
 
 	/* IB packet must end on a 8 DW boundary */
 	sdma_v3_0_ring_insert_nop(ring, (10 - (ring->wptr & 7)) % 8);
@@ -607,7 +604,7 @@ static void sdma_v3_0_enable(struct amdgpu_device *adev, bool enable)
 	u32 f32_cntl;
 	int i;
 
-	if (enable == false) {
+	if (!enable) {
 		sdma_v3_0_gfx_stop(adev);
 		sdma_v3_0_rlc_stop(adev);
 	}
@@ -672,6 +669,8 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
 		/* Initialize the ring buffer's read and write pointers */
 		WREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i], 0);
 		WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0);
+		WREG32(mmSDMA0_GFX_IB_RPTR + sdma_offsets[i], 0);
+		WREG32(mmSDMA0_GFX_IB_OFFSET + sdma_offsets[i], 0);
 
 		/* set the wb address whether it's enabled or not */
 		WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i],
@@ -711,7 +710,15 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
 		WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
 
 		ring->ready = true;
+	}
+
+	/* unhalt the MEs */
+	sdma_v3_0_enable(adev, true);
+	/* enable sdma ring preemption */
+	sdma_v3_0_ctx_switch_enable(adev, true);
 
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		ring = &adev->sdma.instance[i].ring;
 		r = amdgpu_ring_test_ring(ring);
 		if (r) {
 			ring->ready = false;
@@ -804,10 +811,9 @@ static int sdma_v3_0_start(struct amdgpu_device *adev)
 		}
 	}
 
-	/* unhalt the MEs */
-	sdma_v3_0_enable(adev, true);
-	/* enable sdma ring preemption */
-	sdma_v3_0_ctx_switch_enable(adev, true);
+	/* disble sdma engine before programing it */
+	sdma_v3_0_ctx_switch_enable(adev, false);
+	sdma_v3_0_enable(adev, false);
 
 	/* start the gfx rings and rlc compute queues */
 	r = sdma_v3_0_gfx_resume(adev);
@@ -890,20 +896,19 @@ static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring)
  * Test a simple IB in the DMA ring (VI).
  * Returns 0 on success, error on failure.
  */
-static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring)
+static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_ib ib;
 	struct fence *f = NULL;
-	unsigned i;
 	unsigned index;
-	int r;
 	u32 tmp = 0;
 	u64 gpu_addr;
+	long r;
 
 	r = amdgpu_wb_get(adev, &index);
 	if (r) {
-		dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
+		dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
 		return r;
 	}
 
@@ -913,7 +918,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring)
 	memset(&ib, 0, sizeof(ib));
 	r = amdgpu_ib_get(adev, NULL, 256, &ib);
 	if (r) {
-		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
+		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
 		goto err0;
 	}
 
@@ -932,27 +937,24 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring)
 	if (r)
 		goto err1;
 
-	r = fence_wait(f, false);
-	if (r) {
-		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
+	r = fence_wait_timeout(f, false, timeout);
+	if (r == 0) {
+		DRM_ERROR("amdgpu: IB test timed out\n");
+		r = -ETIMEDOUT;
 		goto err1;
-	}
-	for (i = 0; i < adev->usec_timeout; i++) {
-		tmp = le32_to_cpu(adev->wb.wb[index]);
-		if (tmp == 0xDEADBEEF)
-			break;
-		DRM_UDELAY(1);
-	}
-	if (i < adev->usec_timeout) {
-		DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
-			 ring->idx, i);
+	} else if (r < 0) {
+		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
 		goto err1;
+	}
+	tmp = le32_to_cpu(adev->wb.wb[index]);
+	if (tmp == 0xDEADBEEF) {
+		DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
+		r = 0;
 	} else {
 		DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
 		r = -EINVAL;
 	}
 err1:
-	fence_put(f);
 	amdgpu_ib_free(adev, &ib, NULL);
 	fence_put(f);
 err0:
@@ -1247,6 +1249,7 @@ static int sdma_v3_0_sw_fini(void *handle)
 	for (i = 0; i < adev->sdma.num_instances; i++)
 		amdgpu_ring_fini(&adev->sdma.instance[i].ring);
 
+	sdma_v3_0_free_microcode(adev);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c b/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c
index b7615cefcac4..f06f6f4dc3a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c
@@ -71,6 +71,11 @@ static int tonga_dpm_sw_init(void *handle)
 
 static int tonga_dpm_sw_fini(void *handle)
 {
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	release_firmware(adev->pm.fw);
+	adev->pm.fw = NULL;
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_smc.c b/drivers/gpu/drm/amd/amdgpu/tonga_smc.c
index 083893dd68c0..940de1836f8f 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_smc.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_smc.c
@@ -173,7 +173,7 @@ static int tonga_send_msg_to_smc(struct amdgpu_device *adev, PPSMC_Msg msg)
 {
 	if (!tonga_is_smc_ram_running(adev))
 	{
-		return -EINVAL;;
+		return -EINVAL;
 	}
 
 	if (wait_smu_response(adev)) {
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index f07551476a70..132e613ed674 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -34,6 +34,8 @@
 #include "oss/oss_2_0_d.h"
 #include "oss/oss_2_0_sh_mask.h"
 
+#include "bif/bif_4_1_d.h"
+
 static void uvd_v4_2_mc_resume(struct amdgpu_device *adev);
 static void uvd_v4_2_init_cg(struct amdgpu_device *adev);
 static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev);
@@ -439,6 +441,32 @@ static void uvd_v4_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
 }
 
 /**
+ * uvd_v4_2_ring_emit_hdp_flush - emit an hdp flush
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Emits an hdp flush.
+ */
+static void uvd_v4_2_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, PACKET0(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0));
+	amdgpu_ring_write(ring, 0);
+}
+
+/**
+ * uvd_v4_2_ring_hdp_invalidate - emit an hdp invalidate
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Emits an hdp invalidate.
+ */
+static void uvd_v4_2_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, PACKET0(mmHDP_DEBUG0, 0));
+	amdgpu_ring_write(ring, 1);
+}
+
+/**
  * uvd_v4_2_ring_test_ring - register write test
  *
  * @ring: amdgpu_ring pointer
@@ -499,49 +527,6 @@ static void uvd_v4_2_ring_emit_ib(struct amdgpu_ring *ring,
 }
 
 /**
- * uvd_v4_2_ring_test_ib - test ib execution
- *
- * @ring: amdgpu_ring pointer
- *
- * Test if we can successfully execute an IB
- */
-static int uvd_v4_2_ring_test_ib(struct amdgpu_ring *ring)
-{
-	struct amdgpu_device *adev = ring->adev;
-	struct fence *fence = NULL;
-	int r;
-
-	r = amdgpu_asic_set_uvd_clocks(adev, 53300, 40000);
-	if (r) {
-		DRM_ERROR("amdgpu: failed to raise UVD clocks (%d).\n", r);
-		return r;
-	}
-
-	r = amdgpu_uvd_get_create_msg(ring, 1, NULL);
-	if (r) {
-		DRM_ERROR("amdgpu: failed to get create msg (%d).\n", r);
-		goto error;
-	}
-
-	r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence);
-	if (r) {
-		DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r);
-		goto error;
-	}
-
-	r = fence_wait(fence, false);
-	if (r) {
-		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
-		goto error;
-	}
-	DRM_INFO("ib test on ring %d succeeded\n",  ring->idx);
-error:
-	fence_put(fence);
-	amdgpu_asic_set_uvd_clocks(adev, 0, 0);
-	return r;
-}
-
-/**
  * uvd_v4_2_mc_resume - memory controller programming
  *
  * @adev: amdgpu_device pointer
@@ -763,10 +748,14 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = {
 	.parse_cs = amdgpu_uvd_ring_parse_cs,
 	.emit_ib = uvd_v4_2_ring_emit_ib,
 	.emit_fence = uvd_v4_2_ring_emit_fence,
+	.emit_hdp_flush = uvd_v4_2_ring_emit_hdp_flush,
+	.emit_hdp_invalidate = uvd_v4_2_ring_emit_hdp_invalidate,
 	.test_ring = uvd_v4_2_ring_test_ring,
-	.test_ib = uvd_v4_2_ring_test_ib,
+	.test_ib = amdgpu_uvd_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
+	.begin_use = amdgpu_uvd_ring_begin_use,
+	.end_use = amdgpu_uvd_ring_end_use,
 };
 
 static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index e0a76a883d46..101de136ba63 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -31,6 +31,7 @@
 #include "uvd/uvd_5_0_sh_mask.h"
 #include "oss/oss_2_0_d.h"
 #include "oss/oss_2_0_sh_mask.h"
+#include "bif/bif_5_0_d.h"
 #include "vi.h"
 
 static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev);
@@ -489,6 +490,32 @@ static void uvd_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
 }
 
 /**
+ * uvd_v5_0_ring_emit_hdp_flush - emit an hdp flush
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Emits an hdp flush.
+ */
+static void uvd_v5_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, PACKET0(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0));
+	amdgpu_ring_write(ring, 0);
+}
+
+/**
+ * uvd_v5_0_ring_hdp_invalidate - emit an hdp invalidate
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Emits an hdp invalidate.
+ */
+static void uvd_v5_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, PACKET0(mmHDP_DEBUG0, 0));
+	amdgpu_ring_write(ring, 1);
+}
+
+/**
  * uvd_v5_0_ring_test_ring - register write test
  *
  * @ring: amdgpu_ring pointer
@@ -550,49 +577,6 @@ static void uvd_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
 	amdgpu_ring_write(ring, ib->length_dw);
 }
 
-/**
- * uvd_v5_0_ring_test_ib - test ib execution
- *
- * @ring: amdgpu_ring pointer
- *
- * Test if we can successfully execute an IB
- */
-static int uvd_v5_0_ring_test_ib(struct amdgpu_ring *ring)
-{
-	struct amdgpu_device *adev = ring->adev;
-	struct fence *fence = NULL;
-	int r;
-
-	r = amdgpu_asic_set_uvd_clocks(adev, 53300, 40000);
-	if (r) {
-		DRM_ERROR("amdgpu: failed to raise UVD clocks (%d).\n", r);
-		return r;
-	}
-
-	r = amdgpu_uvd_get_create_msg(ring, 1, NULL);
-	if (r) {
-		DRM_ERROR("amdgpu: failed to get create msg (%d).\n", r);
-		goto error;
-	}
-
-	r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence);
-	if (r) {
-		DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r);
-		goto error;
-	}
-
-	r = fence_wait(fence, false);
-	if (r) {
-		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
-		goto error;
-	}
-	DRM_INFO("ib test on ring %d succeeded\n",  ring->idx);
-error:
-	fence_put(fence);
-	amdgpu_asic_set_uvd_clocks(adev, 0, 0);
-	return r;
-}
-
 static bool uvd_v5_0_is_idle(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -815,10 +799,14 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = {
 	.parse_cs = amdgpu_uvd_ring_parse_cs,
 	.emit_ib = uvd_v5_0_ring_emit_ib,
 	.emit_fence = uvd_v5_0_ring_emit_fence,
+	.emit_hdp_flush = uvd_v5_0_ring_emit_hdp_flush,
+	.emit_hdp_invalidate = uvd_v5_0_ring_emit_hdp_invalidate,
 	.test_ring = uvd_v5_0_ring_test_ring,
-	.test_ib = uvd_v5_0_ring_test_ib,
+	.test_ib = amdgpu_uvd_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
+	.begin_use = amdgpu_uvd_ring_begin_use,
+	.end_use = amdgpu_uvd_ring_end_use,
 };
 
 static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index c9929d665c01..7f21102bfb99 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -33,6 +33,8 @@
 #include "oss/oss_2_0_sh_mask.h"
 #include "smu/smu_7_1_3_d.h"
 #include "smu/smu_7_1_3_sh_mask.h"
+#include "bif/bif_5_1_d.h"
+#include "gmc/gmc_8_1_d.h"
 #include "vi.h"
 
 static void uvd_v6_0_set_ring_funcs(struct amdgpu_device *adev);
@@ -385,8 +387,8 @@ static int uvd_v6_0_start(struct amdgpu_device *adev)
 	uint32_t mp_swap_cntl;
 	int i, j, r;
 
-	/*disable DPG */
-	WREG32_P(mmUVD_POWER_STATUS, 0, ~(1 << 2));
+	/* disable DPG */
+	WREG32_P(mmUVD_POWER_STATUS, 0, ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
 
 	/* disable byte swapping */
 	lmi_swap_cntl = 0;
@@ -405,17 +407,21 @@ static int uvd_v6_0_start(struct amdgpu_device *adev)
 	}
 
 	/* disable interupt */
-	WREG32_P(mmUVD_MASTINT_EN, 0, ~(1 << 1));
+	WREG32_P(mmUVD_MASTINT_EN, 0, ~UVD_MASTINT_EN__VCPU_EN_MASK);
 
 	/* stall UMC and register bus before resetting VCPU */
-	WREG32_P(mmUVD_LMI_CTRL2, 1 << 8, ~(1 << 8));
+	WREG32_P(mmUVD_LMI_CTRL2, UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
 	mdelay(1);
 
 	/* put LMI, VCPU, RBC etc... into reset */
-	WREG32(mmUVD_SOFT_RESET, UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
-		UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK | UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
-		UVD_SOFT_RESET__RBC_SOFT_RESET_MASK | UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
-		UVD_SOFT_RESET__CXW_SOFT_RESET_MASK | UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
+	WREG32(mmUVD_SOFT_RESET,
+		UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
+		UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
+		UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
+		UVD_SOFT_RESET__RBC_SOFT_RESET_MASK |
+		UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
+		UVD_SOFT_RESET__CXW_SOFT_RESET_MASK |
+		UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
 		UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK);
 	mdelay(5);
 
@@ -424,8 +430,13 @@ static int uvd_v6_0_start(struct amdgpu_device *adev)
 	mdelay(5);
 
 	/* initialize UVD memory controller */
-	WREG32(mmUVD_LMI_CTRL, 0x40 | (1 << 8) | (1 << 13) |
-			     (1 << 21) | (1 << 9) | (1 << 20));
+	WREG32(mmUVD_LMI_CTRL,
+		(0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+		UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+		UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+		UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+		UVD_LMI_CTRL__REQ_MODE_MASK |
+		UVD_LMI_CTRL__DISABLE_ON_FWV_FAIL_MASK);
 
 #ifdef __BIG_ENDIAN
 	/* swap (8 in 32) RB and IB */
@@ -447,10 +458,10 @@ static int uvd_v6_0_start(struct amdgpu_device *adev)
 	mdelay(5);
 
 	/* enable VCPU clock */
-	WREG32(mmUVD_VCPU_CNTL,  1 << 9);
+	WREG32(mmUVD_VCPU_CNTL, UVD_VCPU_CNTL__CLK_EN_MASK);
 
 	/* enable UMC */
-	WREG32_P(mmUVD_LMI_CTRL2, 0, ~(1 << 8));
+	WREG32_P(mmUVD_LMI_CTRL2, 0, ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
 
 	/* boot up the VCPU */
 	WREG32(mmUVD_SOFT_RESET, 0);
@@ -484,10 +495,12 @@ static int uvd_v6_0_start(struct amdgpu_device *adev)
 		return r;
 	}
 	/* enable master interrupt */
-	WREG32_P(mmUVD_MASTINT_EN, 3 << 1, ~(3 << 1));
+	WREG32_P(mmUVD_MASTINT_EN,
+		(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK),
+		~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK));
 
 	/* clear the bit 4 of UVD_STATUS */
-	WREG32_P(mmUVD_STATUS, 0, ~(2 << 1));
+	WREG32_P(mmUVD_STATUS, 0, ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
 
 	rb_bufsz = order_base_2(ring->ring_size);
 	tmp = 0;
@@ -581,6 +594,32 @@ static void uvd_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
 }
 
 /**
+ * uvd_v6_0_ring_emit_hdp_flush - emit an hdp flush
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Emits an hdp flush.
+ */
+static void uvd_v6_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, PACKET0(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0));
+	amdgpu_ring_write(ring, 0);
+}
+
+/**
+ * uvd_v6_0_ring_hdp_invalidate - emit an hdp invalidate
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Emits an hdp invalidate.
+ */
+static void uvd_v6_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, PACKET0(mmHDP_DEBUG0, 0));
+	amdgpu_ring_write(ring, 1);
+}
+
+/**
  * uvd_v6_0_ring_test_ring - register write test
  *
  * @ring: amdgpu_ring pointer
@@ -634,6 +673,9 @@ static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
 				  struct amdgpu_ib *ib,
 				  unsigned vm_id, bool ctx_switch)
 {
+	amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_VMID, 0));
+	amdgpu_ring_write(ring, vm_id);
+
 	amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW, 0));
 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
 	amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH, 0));
@@ -642,39 +684,55 @@ static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
 	amdgpu_ring_write(ring, ib->length_dw);
 }
 
-/**
- * uvd_v6_0_ring_test_ib - test ib execution
- *
- * @ring: amdgpu_ring pointer
- *
- * Test if we can successfully execute an IB
- */
-static int uvd_v6_0_ring_test_ib(struct amdgpu_ring *ring)
+static void uvd_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+					 unsigned vm_id, uint64_t pd_addr)
 {
-	struct fence *fence = NULL;
-	int r;
+	uint32_t reg;
 
-	r = amdgpu_uvd_get_create_msg(ring, 1, NULL);
-	if (r) {
-		DRM_ERROR("amdgpu: failed to get create msg (%d).\n", r);
-		goto error;
-	}
+	if (vm_id < 8)
+		reg = mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id;
+	else
+		reg = mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8;
 
-	r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence);
-	if (r) {
-		DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r);
-		goto error;
-	}
+	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0));
+	amdgpu_ring_write(ring, reg << 2);
+	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0));
+	amdgpu_ring_write(ring, pd_addr >> 12);
+	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0));
+	amdgpu_ring_write(ring, 0x8);
 
-	r = fence_wait(fence, false);
-	if (r) {
-		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
-		goto error;
-	}
-	DRM_INFO("ib test on ring %d succeeded\n",  ring->idx);
-error:
-	fence_put(fence);
-	return r;
+	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0));
+	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2);
+	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0));
+	amdgpu_ring_write(ring, 1 << vm_id);
+	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0));
+	amdgpu_ring_write(ring, 0x8);
+
+	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0));
+	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2);
+	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0));
+	amdgpu_ring_write(ring, 0);
+	amdgpu_ring_write(ring, PACKET0(mmUVD_GP_SCRATCH8, 0));
+	amdgpu_ring_write(ring, 1 << vm_id); /* mask */
+	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0));
+	amdgpu_ring_write(ring, 0xC);
+}
+
+static void uvd_v6_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+	uint32_t seq = ring->fence_drv.sync_seq;
+	uint64_t addr = ring->fence_drv.gpu_addr;
+
+	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0));
+	amdgpu_ring_write(ring, lower_32_bits(addr));
+	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0));
+	amdgpu_ring_write(ring, upper_32_bits(addr));
+	amdgpu_ring_write(ring, PACKET0(mmUVD_GP_SCRATCH8, 0));
+	amdgpu_ring_write(ring, 0xffffffff); /* mask */
+	amdgpu_ring_write(ring, PACKET0(mmUVD_GP_SCRATCH9, 0));
+	amdgpu_ring_write(ring, seq);
+	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0));
+	amdgpu_ring_write(ring, 0xE);
 }
 
 static bool uvd_v6_0_is_idle(void *handle)
@@ -847,7 +905,8 @@ static int uvd_v6_0_set_clockgating_state(void *handle,
 	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
 	static int curstate = -1;
 
-	if (adev->asic_type == CHIP_FIJI)
+	if (adev->asic_type == CHIP_FIJI ||
+			adev->asic_type == CHIP_POLARIS10)
 		uvd_v6_set_bypass_mode(adev, enable);
 
 	if (!(adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG))
@@ -912,22 +971,51 @@ const struct amd_ip_funcs uvd_v6_0_ip_funcs = {
 	.set_powergating_state = uvd_v6_0_set_powergating_state,
 };
 
-static const struct amdgpu_ring_funcs uvd_v6_0_ring_funcs = {
+static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = {
 	.get_rptr = uvd_v6_0_ring_get_rptr,
 	.get_wptr = uvd_v6_0_ring_get_wptr,
 	.set_wptr = uvd_v6_0_ring_set_wptr,
 	.parse_cs = amdgpu_uvd_ring_parse_cs,
 	.emit_ib = uvd_v6_0_ring_emit_ib,
 	.emit_fence = uvd_v6_0_ring_emit_fence,
+	.emit_hdp_flush = uvd_v6_0_ring_emit_hdp_flush,
+	.emit_hdp_invalidate = uvd_v6_0_ring_emit_hdp_invalidate,
+	.test_ring = uvd_v6_0_ring_test_ring,
+	.test_ib = amdgpu_uvd_ring_test_ib,
+	.insert_nop = amdgpu_ring_insert_nop,
+	.pad_ib = amdgpu_ring_generic_pad_ib,
+	.begin_use = amdgpu_uvd_ring_begin_use,
+	.end_use = amdgpu_uvd_ring_end_use,
+};
+
+static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = {
+	.get_rptr = uvd_v6_0_ring_get_rptr,
+	.get_wptr = uvd_v6_0_ring_get_wptr,
+	.set_wptr = uvd_v6_0_ring_set_wptr,
+	.parse_cs = NULL,
+	.emit_ib = uvd_v6_0_ring_emit_ib,
+	.emit_fence = uvd_v6_0_ring_emit_fence,
+	.emit_vm_flush = uvd_v6_0_ring_emit_vm_flush,
+	.emit_pipeline_sync = uvd_v6_0_ring_emit_pipeline_sync,
+	.emit_hdp_flush = uvd_v6_0_ring_emit_hdp_flush,
+	.emit_hdp_invalidate = uvd_v6_0_ring_emit_hdp_invalidate,
 	.test_ring = uvd_v6_0_ring_test_ring,
-	.test_ib = uvd_v6_0_ring_test_ib,
+	.test_ib = amdgpu_uvd_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
+	.begin_use = amdgpu_uvd_ring_begin_use,
+	.end_use = amdgpu_uvd_ring_end_use,
 };
 
 static void uvd_v6_0_set_ring_funcs(struct amdgpu_device *adev)
 {
-	adev->uvd.ring.funcs = &uvd_v6_0_ring_funcs;
+	if (adev->asic_type >= CHIP_POLARIS10) {
+		adev->uvd.ring.funcs = &uvd_v6_0_ring_vm_funcs;
+		DRM_INFO("UVD is enabled in VM mode\n");
+	} else {
+		adev->uvd.ring.funcs = &uvd_v6_0_ring_phys_funcs;
+		DRM_INFO("UVD is enabled in physical mode\n");
+	}
 }
 
 static const struct amdgpu_irq_src_funcs uvd_v6_0_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
index 45d92aceb485..80a37a602181 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
@@ -594,6 +594,8 @@ static const struct amdgpu_ring_funcs vce_v2_0_ring_funcs = {
 	.test_ib = amdgpu_vce_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
+	.begin_use = amdgpu_vce_ring_begin_use,
+	.end_use = amdgpu_vce_ring_end_use,
 };
 
 static void vce_v2_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index 30e8099e94c5..c271abffd8dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -43,6 +43,7 @@
 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR0	0x8616
 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR1	0x8617
 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR2	0x8618
+#define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
 
 #define VCE_V3_0_FW_SIZE	(384 * 1024)
 #define VCE_V3_0_STACK_SIZE	(64 * 1024)
@@ -51,6 +52,7 @@
 static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx);
 static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev);
 static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev);
+static int vce_v3_0_wait_for_idle(void *handle);
 
 /**
  * vce_v3_0_ring_get_rptr - get read pointer
@@ -205,6 +207,32 @@ static void vce_v3_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
 	vce_v3_0_override_vce_clock_gating(adev, false);
 }
 
+static int vce_v3_0_firmware_loaded(struct amdgpu_device *adev)
+{
+	int i, j;
+
+	for (i = 0; i < 10; ++i) {
+		for (j = 0; j < 100; ++j) {
+			uint32_t status = RREG32(mmVCE_STATUS);
+
+			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
+				return 0;
+			mdelay(10);
+		}
+
+		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
+		WREG32_P(mmVCE_SOFT_RESET,
+			VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
+			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
+		mdelay(10);
+		WREG32_P(mmVCE_SOFT_RESET, 0,
+			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
+		mdelay(10);
+	}
+
+	return -ETIMEDOUT;
+}
+
 /**
  * vce_v3_0_start - start VCE block
  *
@@ -215,11 +243,24 @@ static void vce_v3_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
 static int vce_v3_0_start(struct amdgpu_device *adev)
 {
 	struct amdgpu_ring *ring;
-	int idx, i, j, r;
+	int idx, r;
+
+	ring = &adev->vce.ring[0];
+	WREG32(mmVCE_RB_RPTR, ring->wptr);
+	WREG32(mmVCE_RB_WPTR, ring->wptr);
+	WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr);
+	WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+	WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);
+
+	ring = &adev->vce.ring[1];
+	WREG32(mmVCE_RB_RPTR2, ring->wptr);
+	WREG32(mmVCE_RB_WPTR2, ring->wptr);
+	WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr);
+	WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+	WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);
 
 	mutex_lock(&adev->grbm_idx_mutex);
 	for (idx = 0; idx < 2; ++idx) {
-
 		if (adev->vce.harvest_config & (1 << idx))
 			continue;
 
@@ -233,48 +274,24 @@ static int vce_v3_0_start(struct amdgpu_device *adev)
 
 		vce_v3_0_mc_resume(adev, idx);
 
-		/* set BUSY flag */
-		WREG32_P(mmVCE_STATUS, 1, ~1);
+		WREG32_P(mmVCE_STATUS, VCE_STATUS__JOB_BUSY_MASK,
+		         ~VCE_STATUS__JOB_BUSY_MASK);
+
 		if (adev->asic_type >= CHIP_STONEY)
 			WREG32_P(mmVCE_VCPU_CNTL, 1, ~0x200001);
 		else
 			WREG32_P(mmVCE_VCPU_CNTL, VCE_VCPU_CNTL__CLK_EN_MASK,
 				~VCE_VCPU_CNTL__CLK_EN_MASK);
 
-		WREG32_P(mmVCE_SOFT_RESET,
-			 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
-			 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
-
-		mdelay(100);
-
 		WREG32_P(mmVCE_SOFT_RESET, 0,
 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
 
-		for (i = 0; i < 10; ++i) {
-			uint32_t status;
-			for (j = 0; j < 100; ++j) {
-				status = RREG32(mmVCE_STATUS);
-				if (status & 2)
-					break;
-				mdelay(10);
-			}
-			r = 0;
-			if (status & 2)
-				break;
-
-			DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
-			WREG32_P(mmVCE_SOFT_RESET,
-				VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
-				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
-			mdelay(10);
-			WREG32_P(mmVCE_SOFT_RESET, 0,
-				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
-			mdelay(10);
-			r = -1;
-		}
+		mdelay(100);
+
+		r = vce_v3_0_firmware_loaded(adev);
 
 		/* clear BUSY flag */
-		WREG32_P(mmVCE_STATUS, 0, ~1);
+		WREG32_P(mmVCE_STATUS, 0, ~VCE_STATUS__JOB_BUSY_MASK);
 
 		/* Set Clock-Gating off */
 		if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
@@ -290,19 +307,46 @@ static int vce_v3_0_start(struct amdgpu_device *adev)
 	WREG32_P(mmGRBM_GFX_INDEX, 0, ~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
 	mutex_unlock(&adev->grbm_idx_mutex);
 
-	ring = &adev->vce.ring[0];
-	WREG32(mmVCE_RB_RPTR, ring->wptr);
-	WREG32(mmVCE_RB_WPTR, ring->wptr);
-	WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr);
-	WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
-	WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);
+	return 0;
+}
 
-	ring = &adev->vce.ring[1];
-	WREG32(mmVCE_RB_RPTR2, ring->wptr);
-	WREG32(mmVCE_RB_WPTR2, ring->wptr);
-	WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr);
-	WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
-	WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);
+static int vce_v3_0_stop(struct amdgpu_device *adev)
+{
+	int idx;
+
+	mutex_lock(&adev->grbm_idx_mutex);
+	for (idx = 0; idx < 2; ++idx) {
+		if (adev->vce.harvest_config & (1 << idx))
+			continue;
+
+		if (idx == 0)
+			WREG32_P(mmGRBM_GFX_INDEX, 0,
+				~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
+		else
+			WREG32_P(mmGRBM_GFX_INDEX,
+				GRBM_GFX_INDEX__VCE_INSTANCE_MASK,
+				~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
+
+		if (adev->asic_type >= CHIP_STONEY)
+			WREG32_P(mmVCE_VCPU_CNTL, 0, ~0x200001);
+		else
+			WREG32_P(mmVCE_VCPU_CNTL, 0,
+				~VCE_VCPU_CNTL__CLK_EN_MASK);
+		/* hold on ECPU */
+		WREG32_P(mmVCE_SOFT_RESET,
+			 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
+			 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
+
+		/* clear BUSY flag */
+		WREG32_P(mmVCE_STATUS, 0, ~VCE_STATUS__JOB_BUSY_MASK);
+
+		/* Set Clock-Gating off */
+		if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
+			vce_v3_0_set_vce_sw_clock_gating(adev, false);
+	}
+
+	WREG32_P(mmGRBM_GFX_INDEX, 0, ~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
+	mutex_unlock(&adev->grbm_idx_mutex);
 
 	return 0;
 }
@@ -441,7 +485,14 @@ static int vce_v3_0_hw_init(void *handle)
 
 static int vce_v3_0_hw_fini(void *handle)
 {
-	return 0;
+	int r;
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	r = vce_v3_0_wait_for_idle(handle);
+	if (r)
+		return r;
+
+	return vce_v3_0_stop(adev);
 }
 
 static int vce_v3_0_suspend(void *handle)
@@ -604,6 +655,18 @@ static int vce_v3_0_process_interrupt(struct amdgpu_device *adev,
 	return 0;
 }
 
+static void vce_v3_set_bypass_mode(struct amdgpu_device *adev, bool enable)
+{
+	u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
+
+	if (enable)
+		tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
+	else
+		tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
+
+	WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
+}
+
 static int vce_v3_0_set_clockgating_state(void *handle,
 					  enum amd_clockgating_state state)
 {
@@ -611,6 +674,9 @@ static int vce_v3_0_set_clockgating_state(void *handle,
 	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
 	int i;
 
+	if (adev->asic_type == CHIP_POLARIS10)
+		vce_v3_set_bypass_mode(adev, enable);
+
 	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
 		return 0;
 
@@ -701,6 +767,8 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_funcs = {
 	.test_ib = amdgpu_vce_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
+	.begin_use = amdgpu_vce_ring_begin_use,
+	.end_use = amdgpu_vce_ring_end_use,
 };
 
 static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 2c88d0b66cf3..03a31c53aec3 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -203,6 +203,29 @@ static void vi_didt_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
 	spin_unlock_irqrestore(&adev->didt_idx_lock, flags);
 }
 
+static u32 vi_gc_cac_rreg(struct amdgpu_device *adev, u32 reg)
+{
+	unsigned long flags;
+	u32 r;
+
+	spin_lock_irqsave(&adev->gc_cac_idx_lock, flags);
+	WREG32(mmGC_CAC_IND_INDEX, (reg));
+	r = RREG32(mmGC_CAC_IND_DATA);
+	spin_unlock_irqrestore(&adev->gc_cac_idx_lock, flags);
+	return r;
+}
+
+static void vi_gc_cac_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&adev->gc_cac_idx_lock, flags);
+	WREG32(mmGC_CAC_IND_INDEX, (reg));
+	WREG32(mmGC_CAC_IND_DATA, (v));
+	spin_unlock_irqrestore(&adev->gc_cac_idx_lock, flags);
+}
+
+
 static const u32 tonga_mgcg_cgcg_init[] =
 {
 	mmCGTT_DRM_CLK_CTRL0, 0xffffffff, 0x00600100,
@@ -421,6 +444,20 @@ static bool vi_read_bios_from_rom(struct amdgpu_device *adev,
 	return true;
 }
 
+static u32 vi_get_virtual_caps(struct amdgpu_device *adev)
+{
+	u32 caps = 0;
+	u32 reg = RREG32(mmBIF_IOV_FUNC_IDENTIFIER);
+
+	if (REG_GET_FIELD(reg, BIF_IOV_FUNC_IDENTIFIER, IOV_ENABLE))
+		caps |= AMDGPU_VIRT_CAPS_SRIOV_EN;
+
+	if (REG_GET_FIELD(reg, BIF_IOV_FUNC_IDENTIFIER, FUNC_IDENTIFIER))
+		caps |= AMDGPU_VIRT_CAPS_IS_VF;
+
+	return caps;
+}
+
 static const struct amdgpu_allowed_register_entry tonga_allowed_read_registers[] = {
 	{mmGB_MACROTILE_MODE7, true},
 };
@@ -519,12 +556,12 @@ static uint32_t vi_read_indexed_register(struct amdgpu_device *adev, u32 se_num,
 
 	mutex_lock(&adev->grbm_idx_mutex);
 	if (se_num != 0xffffffff || sh_num != 0xffffffff)
-		gfx_v8_0_select_se_sh(adev, se_num, sh_num);
+		amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff);
 
 	val = RREG32(reg_offset);
 
 	if (se_num != 0xffffffff || sh_num != 0xffffffff)
-		gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+		amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	mutex_unlock(&adev->grbm_idx_mutex);
 	return val;
 }
@@ -583,7 +620,7 @@ static int vi_read_register(struct amdgpu_device *adev, u32 se_num,
 	return -EINVAL;
 }
 
-static void vi_gpu_pci_config_reset(struct amdgpu_device *adev)
+static int vi_gpu_pci_config_reset(struct amdgpu_device *adev)
 {
 	u32 i;
 
@@ -598,11 +635,14 @@ static void vi_gpu_pci_config_reset(struct amdgpu_device *adev)
 
 	/* wait for asic to come out of reset */
 	for (i = 0; i < adev->usec_timeout; i++) {
-		if (RREG32(mmCONFIG_MEMSIZE) != 0xffffffff)
-			break;
+		if (RREG32(mmCONFIG_MEMSIZE) != 0xffffffff) {
+			/* enable BM */
+			pci_set_master(adev->pdev);
+			return 0;
+		}
 		udelay(1);
 	}
-
+	return -EINVAL;
 }
 
 static void vi_set_bios_scratch_engine_hung(struct amdgpu_device *adev, bool hung)
@@ -628,13 +668,15 @@ static void vi_set_bios_scratch_engine_hung(struct amdgpu_device *adev, bool hun
  */
 static int vi_asic_reset(struct amdgpu_device *adev)
 {
+	int r;
+
 	vi_set_bios_scratch_engine_hung(adev, true);
 
-	vi_gpu_pci_config_reset(adev);
+	r = vi_gpu_pci_config_reset(adev);
 
 	vi_set_bios_scratch_engine_hung(adev, false);
 
-	return 0;
+	return r;
 }
 
 static int vi_set_uvd_clock(struct amdgpu_device *adev, u32 clock,
@@ -1118,9 +1160,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs =
 	.get_xclk = &vi_get_xclk,
 	.set_uvd_clocks = &vi_set_uvd_clocks,
 	.set_vce_clocks = &vi_set_vce_clocks,
-	/* these should be moved to their own ip modules */
-	.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
-	.wait_for_mc_idle = &gmc_v8_0_mc_wait_for_idle,
+	.get_virtual_caps = &vi_get_virtual_caps,
 };
 
 static int vi_common_early_init(void *handle)
@@ -1141,6 +1181,8 @@ static int vi_common_early_init(void *handle)
 	adev->uvd_ctx_wreg = &vi_uvd_ctx_wreg;
 	adev->didt_rreg = &vi_didt_rreg;
 	adev->didt_wreg = &vi_didt_wreg;
+	adev->gc_cac_rreg = &vi_gc_cac_rreg;
+	adev->gc_cac_wreg = &vi_gc_cac_wreg;
 
 	adev->asic_funcs = &vi_asic_funcs;
 
@@ -1214,12 +1256,18 @@ static int vi_common_early_init(void *handle)
 		adev->cg_flags = AMD_CG_SUPPORT_UVD_MGCG |
 			AMD_CG_SUPPORT_GFX_MGCG |
 			AMD_CG_SUPPORT_GFX_MGLS |
+			AMD_CG_SUPPORT_GFX_RLC_LS |
+			AMD_CG_SUPPORT_GFX_CP_LS |
+			AMD_CG_SUPPORT_GFX_CGTS |
+			AMD_CG_SUPPORT_GFX_MGLS |
+			AMD_CG_SUPPORT_GFX_CGTS_LS |
+			AMD_CG_SUPPORT_GFX_CGCG |
+			AMD_CG_SUPPORT_GFX_CGLS |
 			AMD_CG_SUPPORT_BIF_LS |
 			AMD_CG_SUPPORT_HDP_MGCG |
 			AMD_CG_SUPPORT_HDP_LS |
 			AMD_CG_SUPPORT_SDMA_MGCG |
 			AMD_CG_SUPPORT_SDMA_LS;
-		adev->pg_flags = 0;
 		adev->external_rev_id = adev->rev_id + 0x1;
 		break;
 	default:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index ac005796b71c..4f3849ac8c07 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -63,13 +63,12 @@ static struct kfd_process *create_process(const struct task_struct *thread);
 void kfd_process_create_wq(void)
 {
 	if (!kfd_process_wq)
-		kfd_process_wq = create_workqueue("kfd_process_wq");
+		kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0);
 }
 
 void kfd_process_destroy_wq(void)
 {
 	if (kfd_process_wq) {
-		flush_workqueue(kfd_process_wq);
 		destroy_workqueue(kfd_process_wq);
 		kfd_process_wq = NULL;
 	}
@@ -242,13 +241,19 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
 	pqm_uninit(&p->pqm);
 
 	/* Iterate over all process device data structure and check
-	 * if we should reset all wavefronts */
-	list_for_each_entry(pdd, &p->per_device_data, per_device_list)
+	 * if we should delete debug managers and reset all wavefronts
+	 */
+	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
+		if ((pdd->dev->dbgmgr) &&
+				(pdd->dev->dbgmgr->pasid == p->pasid))
+			kfd_dbgmgr_destroy(pdd->dev->dbgmgr);
+
 		if (pdd->reset_wavefronts) {
 			pr_warn("amdkfd: Resetting all wave fronts\n");
 			dbgdev_wave_reset_wavefronts(pdd->dev, p);
 			pdd->reset_wavefronts = false;
 		}
+	}
 
 	mutex_unlock(&p->mutex);
 
@@ -324,6 +329,7 @@ err_process_pqm_init:
 	synchronize_rcu();
 	mmu_notifier_unregister_no_release(&process->mmu_notifier, process->mm);
 err_mmu_notifier:
+	mutex_destroy(&process->mutex);
 	kfd_pasid_free(process->pasid);
 err_alloc_pasid:
 	kfree(process->queues);
@@ -404,42 +410,52 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid)
 
 	idx = srcu_read_lock(&kfd_processes_srcu);
 
+	/*
+	 * Look for the process that matches the pasid. If there is no such
+	 * process, we either released it in amdkfd's own notifier, or there
+	 * is a bug. Unfortunately, there is no way to tell...
+	 */
 	hash_for_each_rcu(kfd_processes_table, i, p, kfd_processes)
-		if (p->pasid == pasid)
-			break;
+		if (p->pasid == pasid) {
 
-	srcu_read_unlock(&kfd_processes_srcu, idx);
+			srcu_read_unlock(&kfd_processes_srcu, idx);
 
-	BUG_ON(p->pasid != pasid);
+			pr_debug("Unbinding process %d from IOMMU\n", pasid);
 
-	mutex_lock(&p->mutex);
+			mutex_lock(&p->mutex);
 
-	if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid))
-		kfd_dbgmgr_destroy(dev->dbgmgr);
+			if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid))
+				kfd_dbgmgr_destroy(dev->dbgmgr);
 
-	pqm_uninit(&p->pqm);
+			pqm_uninit(&p->pqm);
 
-	pdd = kfd_get_process_device_data(dev, p);
+			pdd = kfd_get_process_device_data(dev, p);
 
-	if (!pdd) {
-		mutex_unlock(&p->mutex);
-		return;
-	}
+			if (!pdd) {
+				mutex_unlock(&p->mutex);
+				return;
+			}
 
-	if (pdd->reset_wavefronts) {
-		dbgdev_wave_reset_wavefronts(pdd->dev, p);
-		pdd->reset_wavefronts = false;
-	}
+			if (pdd->reset_wavefronts) {
+				dbgdev_wave_reset_wavefronts(pdd->dev, p);
+				pdd->reset_wavefronts = false;
+			}
 
-	/*
-	 * Just mark pdd as unbound, because we still need it to call
-	 * amd_iommu_unbind_pasid() in when the process exits.
-	 * We don't call amd_iommu_unbind_pasid() here
-	 * because the IOMMU called us.
-	 */
-	pdd->bound = false;
+			/*
+			 * Just mark pdd as unbound, because we still need it
+			 * to call amd_iommu_unbind_pasid() in when the
+			 * process exits.
+			 * We don't call amd_iommu_unbind_pasid() here
+			 * because the IOMMU called us.
+			 */
+			pdd->bound = false;
 
-	mutex_unlock(&p->mutex);
+			mutex_unlock(&p->mutex);
+
+			return;
+		}
+
+	srcu_read_unlock(&kfd_processes_srcu, idx);
 }
 
 struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 74909e72a009..884c96f50c3d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -666,7 +666,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
 			dev->node_props.simd_count);
 
 	if (dev->mem_bank_count < dev->node_props.mem_banks_count) {
-		pr_warn("kfd: mem_banks_count truncated from %d to %d\n",
+		pr_info_once("kfd: mem_banks_count truncated from %d to %d\n",
 				dev->node_props.mem_banks_count,
 				dev->mem_bank_count);
 		sysfs_show_32bit_prop(buffer, "mem_banks_count",
diff --git a/drivers/gpu/drm/amd/include/amd_pcie.h b/drivers/gpu/drm/amd/include/amd_pcie.h
index 7c2a916c1e63..5eb895fd98bf 100644
--- a/drivers/gpu/drm/amd/include/amd_pcie.h
+++ b/drivers/gpu/drm/amd/include/amd_pcie.h
@@ -37,6 +37,13 @@
 #define CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_MASK   0x0000FFFF
 #define CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_SHIFT  0
 
+/* gen: chipset 1/2, asic 1/2/3 */
+#define AMDGPU_DEFAULT_PCIE_GEN_MASK (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 \
+				      | CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 \
+				      | CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 \
+				      | CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 \
+				      | CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3)
+
 /* Following flags shows PCIe lane width switch supported in driver which are decided by chipset and ASIC */
 #define CAIL_PCIE_LINK_WIDTH_SUPPORT_X1          0x00010000
 #define CAIL_PCIE_LINK_WIDTH_SUPPORT_X2          0x00020000
@@ -47,4 +54,11 @@
 #define CAIL_PCIE_LINK_WIDTH_SUPPORT_X32         0x00400000
 #define CAIL_PCIE_LINK_WIDTH_SUPPORT_SHIFT       16
 
+/* 1/2/4/8/16 lanes */
+#define AMDGPU_DEFAULT_PCIE_MLW_MASK (CAIL_PCIE_LINK_WIDTH_SUPPORT_X1 \
+				      | CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 \
+				      | CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 \
+				      | CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 \
+				      | CAIL_PCIE_LINK_WIDTH_SUPPORT_X16)
+
 #endif
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index 6080951d539d..a74a0d2ff1ca 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -26,15 +26,6 @@
 #define AMD_MAX_USEC_TIMEOUT		100000  /* 100 ms */
 
 /*
-* Supported GPU families (aligned with amdgpu_drm.h)
-*/
-#define AMD_FAMILY_UNKNOWN              0
-#define AMD_FAMILY_CI                   120 /* Bonaire, Hawaii */
-#define AMD_FAMILY_KV                   125 /* Kaveri, Kabini, Mullins */
-#define AMD_FAMILY_VI                   130 /* Iceland, Tonga */
-#define AMD_FAMILY_CZ                   135 /* Carrizo */
-
-/*
  * Supported ASIC types
  */
 enum amd_asic_type {
@@ -120,6 +111,8 @@ enum amd_powergating_state {
 #define AMD_PG_SUPPORT_SDMA			(1 << 8)
 #define AMD_PG_SUPPORT_ACP			(1 << 9)
 #define AMD_PG_SUPPORT_SAMU			(1 << 10)
+#define AMD_PG_SUPPORT_GFX_QUICK_MG		(1 << 11)
+#define AMD_PG_SUPPORT_GFX_PIPELINE		(1 << 12)
 
 enum amd_pm_state_type {
 	/* not used for dpm */
@@ -157,6 +150,7 @@ struct amd_ip_funcs {
 	int (*hw_init)(void *handle);
 	/* tears down the hw state */
 	int (*hw_fini)(void *handle);
+	void (*late_fini)(void *handle);
 	/* handles IP specific hw/sw changes for suspend */
 	int (*suspend)(void *handle);
 	/* handles IP specific hw/sw changes for resume */
diff --git a/drivers/gpu/drm/amd/include/asic_reg/bif/bif_5_0_d.h b/drivers/gpu/drm/amd/include/asic_reg/bif/bif_5_0_d.h
index 293329719bba..809759f7bb81 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/bif/bif_5_0_d.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/bif/bif_5_0_d.h
@@ -27,6 +27,7 @@
 #define mmMM_INDEX                                                              0x0
 #define mmMM_INDEX_HI                                                           0x6
 #define mmMM_DATA                                                               0x1
+#define mmCC_BIF_BX_STRAP2							0x152A
 #define mmBIF_MM_INDACCESS_CNTL                                                 0x1500
 #define mmBIF_DOORBELL_APER_EN                                                  0x1501
 #define mmBUS_CNTL                                                              0x1508
diff --git a/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_8_0_d.h b/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_8_0_d.h
index ebaf67bb1589..90ff7c8a6011 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_8_0_d.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_8_0_d.h
@@ -2823,4 +2823,7 @@
 #define mmDC_EDC_CSINVOC_CNT                                                    0x3192
 #define mmDC_EDC_RESTORE_CNT                                                    0x3193
 
+#define mmGC_CAC_IND_INDEX                                                      0x129a
+#define mmGC_CAC_IND_DATA                                                       0x129b
+
 #endif /* GFX_8_0_D_H */
diff --git a/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_8_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_8_0_sh_mask.h
index 7d722458d9f5..4070ca3a68eb 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_8_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_8_0_sh_mask.h
@@ -8730,8 +8730,6 @@
 #define RLC_GPM_STAT__DYN_CU_POWERING_DOWN__SHIFT 0x10
 #define RLC_GPM_STAT__ABORTED_PD_SEQUENCE_MASK 0x20000
 #define RLC_GPM_STAT__ABORTED_PD_SEQUENCE__SHIFT 0x11
-#define RLC_GPM_STAT__RESERVED_MASK 0xfc0000
-#define RLC_GPM_STAT__RESERVED__SHIFT 0x12
 #define RLC_GPM_STAT__PG_ERROR_STATUS_MASK 0xff000000
 #define RLC_GPM_STAT__PG_ERROR_STATUS__SHIFT 0x18
 #define RLC_GPU_CLOCK_32_RES_SEL__RES_SEL_MASK 0x3f
@@ -8764,8 +8762,10 @@
 #define RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE__SHIFT 0x12
 #define RLC_PG_CNTL__SMU_HANDSHAKE_ENABLE_MASK 0x80000
 #define RLC_PG_CNTL__SMU_HANDSHAKE_ENABLE__SHIFT 0x13
-#define RLC_PG_CNTL__RESERVED1_MASK 0xf00000
-#define RLC_PG_CNTL__RESERVED1__SHIFT 0x14
+#define RLC_PG_CNTL__QUICK_PG_ENABLE_MASK 0x100000
+#define RLC_PG_CNTL__QUICK_PG_ENABLE__SHIFT 0x14
+#define RLC_PG_CNTL__RESERVED1_MASK 0xe00000
+#define RLC_PG_CNTL__RESERVED1__SHIFT 0x15
 #define RLC_GPM_THREAD_PRIORITY__THREAD0_PRIORITY_MASK 0xff
 #define RLC_GPM_THREAD_PRIORITY__THREAD0_PRIORITY__SHIFT 0x0
 #define RLC_GPM_THREAD_PRIORITY__THREAD1_PRIORITY_MASK 0xff00
@@ -9102,8 +9102,6 @@
 #define RLC_GPM_LOG_CONT__CONT__SHIFT 0x0
 #define RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK 0xff
 #define RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT 0x0
-#define RLC_PG_DELAY_3__RESERVED_MASK 0xffffff00
-#define RLC_PG_DELAY_3__RESERVED__SHIFT 0x8
 #define RLC_GPM_INT_DISABLE_TH0__DISABLE_MASK 0xffffffff
 #define RLC_GPM_INT_DISABLE_TH0__DISABLE__SHIFT 0x0
 #define RLC_GPM_INT_DISABLE_TH1__DISABLE_MASK 0xffffffff
@@ -9124,14 +9122,8 @@
 #define RLC_SRM_DEBUG_SELECT__RESERVED__SHIFT 0x8
 #define RLC_SRM_DEBUG__DATA_MASK 0xffffffff
 #define RLC_SRM_DEBUG__DATA__SHIFT 0x0
-#define RLC_SRM_ARAM_ADDR__ADDR_MASK 0x3ff
-#define RLC_SRM_ARAM_ADDR__ADDR__SHIFT 0x0
-#define RLC_SRM_ARAM_ADDR__RESERVED_MASK 0xfffffc00
-#define RLC_SRM_ARAM_ADDR__RESERVED__SHIFT 0xa
 #define RLC_SRM_ARAM_DATA__DATA_MASK 0xffffffff
 #define RLC_SRM_ARAM_DATA__DATA__SHIFT 0x0
-#define RLC_SRM_DRAM_ADDR__ADDR_MASK 0x3ff
-#define RLC_SRM_DRAM_ADDR__ADDR__SHIFT 0x0
 #define RLC_SRM_DRAM_ADDR__RESERVED_MASK 0xfffffc00
 #define RLC_SRM_DRAM_ADDR__RESERVED__SHIFT 0xa
 #define RLC_SRM_DRAM_DATA__DATA_MASK 0xffffffff
@@ -17946,8 +17938,6 @@
 #define VGT_TESS_DISTRIBUTION__ACCUM_TRI__SHIFT 0x8
 #define VGT_TESS_DISTRIBUTION__ACCUM_QUAD_MASK 0xff0000
 #define VGT_TESS_DISTRIBUTION__ACCUM_QUAD__SHIFT 0x10
-#define VGT_TESS_DISTRIBUTION__DONUT_SPLIT_MASK 0xff000000
-#define VGT_TESS_DISTRIBUTION__DONUT_SPLIT__SHIFT 0x18
 #define VGT_TF_RING_SIZE__SIZE_MASK 0xffff
 #define VGT_TF_RING_SIZE__SIZE__SHIFT 0x0
 #define VGT_SYS_CONFIG__DUAL_CORE_EN_MASK 0x1
@@ -20502,8 +20492,6 @@
 #define DIDT_SQ_CTRL0__DIDT_CTRL_RST__SHIFT 0x4
 #define DIDT_SQ_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK 0x20
 #define DIDT_SQ_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT 0x5
-#define DIDT_SQ_CTRL0__UNUSED_0_MASK 0xffffffc0
-#define DIDT_SQ_CTRL0__UNUSED_0__SHIFT 0x6
 #define DIDT_SQ_CTRL1__MIN_POWER_MASK 0xffff
 #define DIDT_SQ_CTRL1__MIN_POWER__SHIFT 0x0
 #define DIDT_SQ_CTRL1__MAX_POWER_MASK 0xffff0000
@@ -20558,8 +20546,6 @@
 #define DIDT_DB_CTRL0__DIDT_CTRL_RST__SHIFT 0x4
 #define DIDT_DB_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK 0x20
 #define DIDT_DB_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT 0x5
-#define DIDT_DB_CTRL0__UNUSED_0_MASK 0xffffffc0
-#define DIDT_DB_CTRL0__UNUSED_0__SHIFT 0x6
 #define DIDT_DB_CTRL1__MIN_POWER_MASK 0xffff
 #define DIDT_DB_CTRL1__MIN_POWER__SHIFT 0x0
 #define DIDT_DB_CTRL1__MAX_POWER_MASK 0xffff0000
@@ -20614,8 +20600,6 @@
 #define DIDT_TD_CTRL0__DIDT_CTRL_RST__SHIFT 0x4
 #define DIDT_TD_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK 0x20
 #define DIDT_TD_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT 0x5
-#define DIDT_TD_CTRL0__UNUSED_0_MASK 0xffffffc0
-#define DIDT_TD_CTRL0__UNUSED_0__SHIFT 0x6
 #define DIDT_TD_CTRL1__MIN_POWER_MASK 0xffff
 #define DIDT_TD_CTRL1__MIN_POWER__SHIFT 0x0
 #define DIDT_TD_CTRL1__MAX_POWER_MASK 0xffff0000
@@ -20670,8 +20654,6 @@
 #define DIDT_TCP_CTRL0__DIDT_CTRL_RST__SHIFT 0x4
 #define DIDT_TCP_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK 0x20
 #define DIDT_TCP_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT 0x5
-#define DIDT_TCP_CTRL0__UNUSED_0_MASK 0xffffffc0
-#define DIDT_TCP_CTRL0__UNUSED_0__SHIFT 0x6
 #define DIDT_TCP_CTRL1__MIN_POWER_MASK 0xffff
 #define DIDT_TCP_CTRL1__MIN_POWER__SHIFT 0x0
 #define DIDT_TCP_CTRL1__MAX_POWER_MASK 0xffff0000
@@ -20726,8 +20708,6 @@
 #define DIDT_DBR_CTRL0__DIDT_CTRL_RST__SHIFT 0x4
 #define DIDT_DBR_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK 0x20
 #define DIDT_DBR_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT 0x5
-#define DIDT_DBR_CTRL0__UNUSED_0_MASK 0xffffffc0
-#define DIDT_DBR_CTRL0__UNUSED_0__SHIFT 0x6
 #define DIDT_DBR_CTRL1__MIN_POWER_MASK 0xffff
 #define DIDT_DBR_CTRL1__MIN_POWER__SHIFT 0x0
 #define DIDT_DBR_CTRL1__MAX_POWER_MASK 0xffff0000
@@ -20773,4 +20753,84 @@
 #define DIDT_DBR_WEIGHT8_11__WEIGHT11_MASK 0xff000000
 #define DIDT_DBR_WEIGHT8_11__WEIGHT11__SHIFT 0x18
 
+#define DIDT_SQ_STALL_CTRL__DIDT_STALL_CTRL_ENABLE_MASK    0x00000001
+#define DIDT_SQ_STALL_CTRL__DIDT_STALL_CTRL_ENABLE__SHIFT  0x00000000
+
+#define DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK       0x0000007e
+#define DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK       0x00001f80L
+#define DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT     0x00000001
+#define DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT     0x00000007
+
+#define DIDT_SQ_STALL_CTRL__DIDT_HI_POWER_THRESHOLD_MASK   0x1fffe000L
+#define DIDT_SQ_STALL_CTRL__DIDT_HI_POWER_THRESHOLD__SHIFT 0x0000000d
+
+#define DIDT_SQ_STALL_CTRL__UNUSED_0_MASK                  0xe0000000L
+#define DIDT_SQ_STALL_CTRL__UNUSED_0__SHIFT                0x0000001d
+
+#define DIDT_SQ_TUNING_CTRL__DIDT_TUNING_ENABLE_MASK       0x00000001L
+#define DIDT_SQ_TUNING_CTRL__DIDT_TUNING_ENABLE__SHIFT     0x00000000
+
+#define DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK       0x00007ffeL
+#define DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT     0x00000001
+#define DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK       0x1fff8000L
+#define DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT     0x0000000f
+
+#define DIDT_TD_STALL_CTRL__DIDT_STALL_CTRL_ENABLE_MASK    0x00000001L
+#define DIDT_TD_STALL_CTRL__DIDT_STALL_CTRL_ENABLE__SHIFT  0x00000000
+
+#define DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK       0x0000007eL
+#define DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK       0x00001f80L
+#define DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT     0x00000001
+#define DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT     0x00000007
+
+#define DIDT_TD_STALL_CTRL__DIDT_HI_POWER_THRESHOLD_MASK   0x1fffe000L
+#define DIDT_TD_STALL_CTRL__DIDT_HI_POWER_THRESHOLD__SHIFT 0x0000000d
+
+#define DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI_MASK     0x00000fc0L
+#define DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO_MASK     0x0003f000L
+#define DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT   0x00000006
+#define DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT   0x0000000c
+
+#define DIDT_TD_TUNING_CTRL__DIDT_TUNING_ENABLE_MASK       0x00000001L
+#define DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK       0x00007ffeL
+#define DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK       0x1fff8000L
+
+#define DIDT_TD_TUNING_CTRL__DIDT_TUNING_ENABLE__SHIFT     0x00000000
+#define DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT     0x00000001
+#define DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT     0x0000000f
+
+#define DIDT_TD_STALL_CTRL__UNUSED_0_MASK                  0xe0000000L
+#define DIDT_TD_STALL_CTRL__UNUSED_0__SHIFT                0x0000001d
+
+#define DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI_MASK     0x00000fc0L
+#define DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO_MASK     0x0003f000L
+#define DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT   0x00000006
+#define DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT   0x0000000c
+
+#define DIDT_TCP_STALL_CTRL__DIDT_STALL_CTRL_ENABLE_MASK   0x00000001L
+#define DIDT_TCP_STALL_CTRL__DIDT_STALL_CTRL_ENABLE__SHIFT 0x00000000
+
+#define DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK      0x0000007eL
+#define DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK      0x00001f80L
+#define DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT    0x00000001
+#define DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT    0x00000007
+
+#define DIDT_TCP_STALL_CTRL__DIDT_HI_POWER_THRESHOLD_MASK  0x1fffe000L
+#define DIDT_TCP_STALL_CTRL__DIDT_HI_POWER_THRESHOLD__SHIFT 0x0000000d
+
+#define DIDT_TCP_STALL_CTRL__UNUSED_0_MASK                 0xe0000000L
+#define DIDT_TCP_STALL_CTRL__UNUSED_0__SHIFT               0x0000001d
+
+#define DIDT_TCP_TUNING_CTRL__DIDT_TUNING_ENABLE_MASK      0x00000001L
+#define DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK      0x00007ffeL
+#define DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK      0x1fff8000L
+#define DIDT_TCP_TUNING_CTRL__DIDT_TUNING_ENABLE__SHIFT    0x00000000
+#define DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT    0x00000001
+#define DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT    0x0000000f
+
+#define DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI_MASK    0x00000fc0L
+#define DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO_MASK    0x0003f000L
+#define DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT  0x00000006
+#define DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT  0x0000000c
+
 #endif /* GFX_8_0_SH_MASK_H */
diff --git a/drivers/gpu/drm/amd/include/asic_reg/uvd/uvd_6_0_d.h b/drivers/gpu/drm/amd/include/asic_reg/uvd/uvd_6_0_d.h
index 6f6fb34742d2..ec69869c55ff 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/uvd/uvd_6_0_d.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/uvd/uvd_6_0_d.h
@@ -111,6 +111,8 @@
 #define mmUVD_MIF_RECON1_ADDR_CONFIG                                            0x39c5
 #define ixUVD_MIF_SCLR_ADDR_CONFIG                                              0x4
 #define mmUVD_JPEG_ADDR_CONFIG                                                  0x3a1f
+#define mmUVD_GP_SCRATCH8                                                       0x3c0a
+#define mmUVD_GP_SCRATCH9                                                       0x3c0b
 #define mmUVD_GP_SCRATCH4                                                       0x3d38
 
 #endif /* UVD_6_0_D_H */
diff --git a/drivers/gpu/drm/amd/include/atombios.h b/drivers/gpu/drm/amd/include/atombios.h
index 32f3e345de08..3493da5c8f0e 100644
--- a/drivers/gpu/drm/amd/include/atombios.h
+++ b/drivers/gpu/drm/amd/include/atombios.h
@@ -5538,6 +5538,78 @@ typedef struct  _ATOM_ASIC_PROFILING_INFO_V3_5
   ULONG  ulReserved[12];
 }ATOM_ASIC_PROFILING_INFO_V3_5;
 
+/* for Polars10/11 AVFS parameters */
+typedef struct  _ATOM_ASIC_PROFILING_INFO_V3_6
+{
+  ATOM_COMMON_TABLE_HEADER         asHeader;
+  ULONG  ulMaxVddc;
+  ULONG  ulMinVddc;
+  USHORT usLkgEuseIndex;
+  UCHAR  ucLkgEfuseBitLSB;
+  UCHAR  ucLkgEfuseLength;
+  ULONG  ulLkgEncodeLn_MaxDivMin;
+  ULONG  ulLkgEncodeMax;
+  ULONG  ulLkgEncodeMin;
+  EFUSE_LINEAR_FUNC_PARAM sRoFuse;
+  ULONG  ulEvvDefaultVddc;
+  ULONG  ulEvvNoCalcVddc;
+  ULONG  ulSpeed_Model;
+  ULONG  ulSM_A0;
+  ULONG  ulSM_A1;
+  ULONG  ulSM_A2;
+  ULONG  ulSM_A3;
+  ULONG  ulSM_A4;
+  ULONG  ulSM_A5;
+  ULONG  ulSM_A6;
+  ULONG  ulSM_A7;
+  UCHAR  ucSM_A0_sign;
+  UCHAR  ucSM_A1_sign;
+  UCHAR  ucSM_A2_sign;
+  UCHAR  ucSM_A3_sign;
+  UCHAR  ucSM_A4_sign;
+  UCHAR  ucSM_A5_sign;
+  UCHAR  ucSM_A6_sign;
+  UCHAR  ucSM_A7_sign;
+  ULONG  ulMargin_RO_a;
+  ULONG  ulMargin_RO_b;
+  ULONG  ulMargin_RO_c;
+  ULONG  ulMargin_fixed;
+  ULONG  ulMargin_Fmax_mean;
+  ULONG  ulMargin_plat_mean;
+  ULONG  ulMargin_Fmax_sigma;
+  ULONG  ulMargin_plat_sigma;
+  ULONG  ulMargin_DC_sigma;
+  ULONG  ulLoadLineSlop;
+  ULONG  ulaTDClimitPerDPM[8];
+  ULONG  ulaNoCalcVddcPerDPM[8];
+  ULONG  ulAVFS_meanNsigma_Acontant0;
+  ULONG  ulAVFS_meanNsigma_Acontant1;
+  ULONG  ulAVFS_meanNsigma_Acontant2;
+  USHORT usAVFS_meanNsigma_DC_tol_sigma;
+  USHORT usAVFS_meanNsigma_Platform_mean;
+  USHORT usAVFS_meanNsigma_Platform_sigma;
+  ULONG  ulGB_VDROOP_TABLE_CKSOFF_a0;
+  ULONG  ulGB_VDROOP_TABLE_CKSOFF_a1;
+  ULONG  ulGB_VDROOP_TABLE_CKSOFF_a2;
+  ULONG  ulGB_VDROOP_TABLE_CKSON_a0;
+  ULONG  ulGB_VDROOP_TABLE_CKSON_a1;
+  ULONG  ulGB_VDROOP_TABLE_CKSON_a2;
+  ULONG  ulAVFSGB_FUSE_TABLE_CKSOFF_m1;
+  USHORT usAVFSGB_FUSE_TABLE_CKSOFF_m2;
+  ULONG  ulAVFSGB_FUSE_TABLE_CKSOFF_b;
+  ULONG  ulAVFSGB_FUSE_TABLE_CKSON_m1;
+  USHORT usAVFSGB_FUSE_TABLE_CKSON_m2;
+  ULONG  ulAVFSGB_FUSE_TABLE_CKSON_b;
+  USHORT usMaxVoltage_0_25mv;
+  UCHAR  ucEnableGB_VDROOP_TABLE_CKSOFF;
+  UCHAR  ucEnableGB_VDROOP_TABLE_CKSON;
+  UCHAR  ucEnableGB_FUSE_TABLE_CKSOFF;
+  UCHAR  ucEnableGB_FUSE_TABLE_CKSON;
+  USHORT usPSM_Age_ComFactor;
+  UCHAR  ucEnableApplyAVFS_CKS_OFF_Voltage;
+  UCHAR  ucReserved;
+}ATOM_ASIC_PROFILING_INFO_V3_6;
+
 
 typedef struct _ATOM_SCLK_FCW_RANGE_ENTRY_V1{
   ULONG  ulMaxSclkFreq;
diff --git a/drivers/gpu/drm/amd/include/cgs_common.h b/drivers/gpu/drm/amd/include/cgs_common.h
index a461e155a160..b86aba9d019f 100644
--- a/drivers/gpu/drm/amd/include/cgs_common.h
+++ b/drivers/gpu/drm/amd/include/cgs_common.h
@@ -49,6 +49,7 @@ enum cgs_ind_reg {
 	CGS_IND_REG__SMC,
 	CGS_IND_REG__UVD_CTX,
 	CGS_IND_REG__DIDT,
+	CGS_IND_REG_GC_CAC,
 	CGS_IND_REG__AUDIO_ENDPT
 };
 
@@ -112,20 +113,23 @@ enum cgs_system_info_id {
 	CGS_SYSTEM_INFO_ADAPTER_BDF_ID = 1,
 	CGS_SYSTEM_INFO_PCIE_GEN_INFO,
 	CGS_SYSTEM_INFO_PCIE_MLW,
+	CGS_SYSTEM_INFO_PCIE_DEV,
+	CGS_SYSTEM_INFO_PCIE_REV,
 	CGS_SYSTEM_INFO_CG_FLAGS,
 	CGS_SYSTEM_INFO_PG_FLAGS,
 	CGS_SYSTEM_INFO_GFX_CU_INFO,
+	CGS_SYSTEM_INFO_GFX_SE_INFO,
 	CGS_SYSTEM_INFO_ID_MAXIMUM,
 };
 
 struct cgs_system_info {
-	uint64_t       size;
-	uint64_t       info_id;
+	uint64_t			size;
+	enum cgs_system_info_id		info_id;
 	union {
-		void           *ptr;
-		uint64_t        value;
+		void			*ptr;
+		uint64_t		value;
 	};
-	uint64_t               padding[13];
+	uint64_t			padding[13];
 };
 
 /*
@@ -158,6 +162,10 @@ struct cgs_firmware_info {
 	uint16_t		feature_version;
 	uint32_t		image_size;
 	uint64_t		mc_addr;
+
+	/* only for smc firmware */
+	uint32_t		ucode_start_address;
+
 	void			*kptr;
 };
 
@@ -189,7 +197,6 @@ typedef unsigned long cgs_handle_t;
 
 struct cgs_acpi_method_argument {
 	uint32_t type;
-	uint32_t method_length;
 	uint32_t data_length;
 	union{
 		uint32_t value;
@@ -581,6 +588,9 @@ typedef int (*cgs_get_firmware_info)(struct cgs_device *cgs_device,
 				     enum cgs_ucode_id type,
 				     struct cgs_firmware_info *info);
 
+typedef int (*cgs_rel_firmware)(struct cgs_device *cgs_device,
+					 enum cgs_ucode_id type);
+
 typedef int(*cgs_set_powergating_state)(struct cgs_device *cgs_device,
 				  enum amd_ip_block_type block_type,
 				  enum amd_powergating_state state);
@@ -645,6 +655,7 @@ struct cgs_ops {
 	cgs_set_camera_voltages_t set_camera_voltages;
 	/* Firmware Info */
 	cgs_get_firmware_info get_firmware_info;
+	cgs_rel_firmware rel_firmware;
 	/* cg pg interface*/
 	cgs_set_powergating_state set_powergating_state;
 	cgs_set_clockgating_state set_clockgating_state;
@@ -738,6 +749,8 @@ struct cgs_device
 	CGS_CALL(set_camera_voltages,dev,mask,voltages)
 #define cgs_get_firmware_info(dev, type, info)	\
 	CGS_CALL(get_firmware_info, dev, type, info)
+#define cgs_rel_firmware(dev, type)	\
+	CGS_CALL(rel_firmware, dev, type)
 #define cgs_set_powergating_state(dev, block_type, state)	\
 	CGS_CALL(set_powergating_state, dev, block_type, state)
 #define cgs_set_clockgating_state(dev, block_type, state)	\
diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
index 8e345bfddb69..abbb658bdc1e 100644
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
@@ -73,11 +73,14 @@ static int pp_sw_init(void *handle)
 
 	ret = hwmgr->hwmgr_func->backend_init(hwmgr);
 	if (ret)
-		goto err;
+		goto err1;
 
 	pr_info("amdgpu: powerplay initialized\n");
 
 	return 0;
+err1:
+	if (hwmgr->pptable_func->pptable_fini)
+		hwmgr->pptable_func->pptable_fini(hwmgr);
 err:
 	pr_err("amdgpu: powerplay initialization failed\n");
 	return ret;
@@ -100,6 +103,9 @@ static int pp_sw_fini(void *handle)
 	if (hwmgr->hwmgr_func->backend_fini != NULL)
 		ret = hwmgr->hwmgr_func->backend_fini(hwmgr);
 
+	if (hwmgr->pptable_func->pptable_fini)
+		hwmgr->pptable_func->pptable_fini(hwmgr);
+
 	return ret;
 }
 
@@ -170,7 +176,7 @@ static int pp_hw_fini(void *handle)
 
 static bool pp_is_idle(void *handle)
 {
-	return 0;
+	return false;
 }
 
 static int pp_wait_for_idle(void *handle)
@@ -530,6 +536,10 @@ int pp_dpm_dispatch_tasks(void *handle, enum amd_pp_event event_id, void *input,
 	case AMD_PP_EVENT_COMPLETE_INIT:
 		ret = pem_handle_event(pp_handle->eventmgr, event_id, &data);
 		break;
+	case AMD_PP_EVENT_READJUST_POWER_STATE:
+		pp_handle->hwmgr->current_ps = pp_handle->hwmgr->boot_ps;
+		ret = pem_handle_event(pp_handle->eventmgr, event_id, &data);
+		break;
 	default:
 		break;
 	}
@@ -734,12 +744,12 @@ static int pp_dpm_get_pp_table(void *handle, char **table)
 
 	PP_CHECK_HW(hwmgr);
 
-	if (hwmgr->hwmgr_func->get_pp_table == NULL) {
-		printk(KERN_INFO "%s was not implemented.\n", __func__);
-		return 0;
-	}
+	if (!hwmgr->soft_pp_table)
+		return -EINVAL;
 
-	return hwmgr->hwmgr_func->get_pp_table(hwmgr, table);
+	*table = (char *)hwmgr->soft_pp_table;
+
+	return hwmgr->soft_pp_table_size;
 }
 
 static int pp_dpm_set_pp_table(void *handle, const char *buf, size_t size)
@@ -753,12 +763,23 @@ static int pp_dpm_set_pp_table(void *handle, const char *buf, size_t size)
 
 	PP_CHECK_HW(hwmgr);
 
-	if (hwmgr->hwmgr_func->set_pp_table == NULL) {
-		printk(KERN_INFO "%s was not implemented.\n", __func__);
-		return 0;
+	if (!hwmgr->hardcode_pp_table) {
+		hwmgr->hardcode_pp_table =
+				kzalloc(hwmgr->soft_pp_table_size, GFP_KERNEL);
+
+		if (!hwmgr->hardcode_pp_table)
+			return -ENOMEM;
+
+		/* to avoid powerplay crash when hardcode pptable is empty */
+		memcpy(hwmgr->hardcode_pp_table, hwmgr->soft_pp_table,
+				hwmgr->soft_pp_table_size);
 	}
 
-	return hwmgr->hwmgr_func->set_pp_table(hwmgr, buf, size);
+	memcpy(hwmgr->hardcode_pp_table, buf, size);
+
+	hwmgr->soft_pp_table = hwmgr->hardcode_pp_table;
+
+	return amd_powerplay_reset(handle);
 }
 
 static int pp_dpm_force_clock_level(void *handle,
@@ -800,6 +821,82 @@ static int pp_dpm_print_clock_levels(void *handle,
 	return hwmgr->hwmgr_func->print_clock_levels(hwmgr, type, buf);
 }
 
+static int pp_dpm_get_sclk_od(void *handle)
+{
+	struct pp_hwmgr *hwmgr;
+
+	if (!handle)
+		return -EINVAL;
+
+	hwmgr = ((struct pp_instance *)handle)->hwmgr;
+
+	PP_CHECK_HW(hwmgr);
+
+	if (hwmgr->hwmgr_func->get_sclk_od == NULL) {
+		printk(KERN_INFO "%s was not implemented.\n", __func__);
+		return 0;
+	}
+
+	return hwmgr->hwmgr_func->get_sclk_od(hwmgr);
+}
+
+static int pp_dpm_set_sclk_od(void *handle, uint32_t value)
+{
+	struct pp_hwmgr *hwmgr;
+
+	if (!handle)
+		return -EINVAL;
+
+	hwmgr = ((struct pp_instance *)handle)->hwmgr;
+
+	PP_CHECK_HW(hwmgr);
+
+	if (hwmgr->hwmgr_func->set_sclk_od == NULL) {
+		printk(KERN_INFO "%s was not implemented.\n", __func__);
+		return 0;
+	}
+
+	return hwmgr->hwmgr_func->set_sclk_od(hwmgr, value);
+}
+
+static int pp_dpm_get_mclk_od(void *handle)
+{
+	struct pp_hwmgr *hwmgr;
+
+	if (!handle)
+		return -EINVAL;
+
+	hwmgr = ((struct pp_instance *)handle)->hwmgr;
+
+	PP_CHECK_HW(hwmgr);
+
+	if (hwmgr->hwmgr_func->get_mclk_od == NULL) {
+		printk(KERN_INFO "%s was not implemented.\n", __func__);
+		return 0;
+	}
+
+	return hwmgr->hwmgr_func->get_mclk_od(hwmgr);
+}
+
+static int pp_dpm_set_mclk_od(void *handle, uint32_t value)
+{
+	struct pp_hwmgr *hwmgr;
+
+	if (!handle)
+		return -EINVAL;
+
+	hwmgr = ((struct pp_instance *)handle)->hwmgr;
+
+	PP_CHECK_HW(hwmgr);
+
+	if (hwmgr->hwmgr_func->set_mclk_od == NULL) {
+		printk(KERN_INFO "%s was not implemented.\n", __func__);
+		return 0;
+	}
+
+	return hwmgr->hwmgr_func->set_mclk_od(hwmgr, value);
+}
+
 const struct amd_powerplay_funcs pp_dpm_funcs = {
 	.get_temperature = pp_dpm_get_temperature,
 	.load_firmware = pp_dpm_load_fw,
@@ -822,6 +919,10 @@ const struct amd_powerplay_funcs pp_dpm_funcs = {
 	.set_pp_table = pp_dpm_set_pp_table,
 	.force_clock_level = pp_dpm_force_clock_level,
 	.print_clock_levels = pp_dpm_print_clock_levels,
+	.get_sclk_od = pp_dpm_get_sclk_od,
+	.set_sclk_od = pp_dpm_set_sclk_od,
+	.get_mclk_od = pp_dpm_get_mclk_od,
+	.set_mclk_od = pp_dpm_set_mclk_od,
 };
 
 static int amd_pp_instance_init(struct amd_pp_init *pp_init,
@@ -903,6 +1004,44 @@ int amd_powerplay_fini(void *handle)
 	return 0;
 }
 
+int amd_powerplay_reset(void *handle)
+{
+	struct pp_instance *instance = (struct pp_instance *)handle;
+	struct pp_eventmgr *eventmgr;
+	struct pem_event_data event_data = { {0} };
+	int ret;
+
+	if (instance == NULL)
+		return -EINVAL;
+
+	eventmgr = instance->eventmgr;
+	if (!eventmgr || !eventmgr->pp_eventmgr_fini)
+		return -EINVAL;
+
+	eventmgr->pp_eventmgr_fini(eventmgr);
+
+	ret = pp_sw_fini(handle);
+	if (ret)
+		return ret;
+
+	kfree(instance->hwmgr->ps);
+
+	ret = pp_sw_init(handle);
+	if (ret)
+		return ret;
+
+	hw_init_power_state_table(instance->hwmgr);
+
+	if (eventmgr == NULL || eventmgr->pp_eventmgr_init == NULL)
+		return -EINVAL;
+
+	ret = eventmgr->pp_eventmgr_init(eventmgr);
+	if (ret)
+		return ret;
+
+	return pem_handle_event(eventmgr, AMD_PP_EVENT_COMPLETE_INIT, &event_data);
+}
+
 /* export this function to DAL */
 
 int amd_powerplay_display_configuration_change(void *handle,
diff --git a/drivers/gpu/drm/amd/powerplay/eventmgr/eventactionchains.c b/drivers/gpu/drm/amd/powerplay/eventmgr/eventactionchains.c
index d6635cc4b0fc..635fc4b48184 100644
--- a/drivers/gpu/drm/amd/powerplay/eventmgr/eventactionchains.c
+++ b/drivers/gpu/drm/amd/powerplay/eventmgr/eventactionchains.c
@@ -30,7 +30,6 @@ static const pem_event_action * const initialize_event[] = {
 	system_config_tasks,
 	setup_asic_tasks,
 	enable_dynamic_state_management_tasks,
-	enable_clock_power_gatings_tasks,
 	get_2d_performance_state_tasks,
 	set_performance_state_tasks,
 	initialize_thermal_controller_tasks,
@@ -140,7 +139,6 @@ static const pem_event_action * const resume_event[] = {
 	setup_asic_tasks,
 	enable_stutter_mode_tasks, /*must do this in boot state and before SMC is started */
 	enable_dynamic_state_management_tasks,
-	enable_clock_power_gatings_tasks,
 	enable_disable_bapm_tasks,
 	initialize_thermal_controller_tasks,
 	get_2d_performance_state_tasks,
diff --git a/drivers/gpu/drm/amd/powerplay/eventmgr/eventmgr.c b/drivers/gpu/drm/amd/powerplay/eventmgr/eventmgr.c
index 46410e3c7349..fb88e4e5d625 100644
--- a/drivers/gpu/drm/amd/powerplay/eventmgr/eventmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/eventmgr/eventmgr.c
@@ -58,9 +58,6 @@ static void pem_fini(struct pp_eventmgr *eventmgr)
 	pem_unregister_interrupts(eventmgr);
 
 	pem_handle_event(eventmgr, AMD_PP_EVENT_UNINITIALIZE, &event_data);
-
-	if (eventmgr != NULL)
-		kfree(eventmgr);
 }
 
 int eventmgr_init(struct pp_instance *handle)
diff --git a/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.c b/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.c
index 5cd123472db4..b6f45fd01fa6 100644
--- a/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.c
+++ b/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.c
@@ -132,8 +132,7 @@ int pem_task_enable_dynamic_state_management(struct pp_eventmgr *eventmgr, struc
 
 int pem_task_disable_dynamic_state_management(struct pp_eventmgr *eventmgr, struct pem_event_data *event_data)
 {
-	/* TODO */
-	return 0;
+	return phm_disable_dynamic_state_management(eventmgr->hwmgr);
 }
 
 int pem_task_enable_clock_power_gatings_tasks(struct pp_eventmgr *eventmgr, struct pem_event_data *event_data)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c
index 436fc16dabb6..2028980f1ed4 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c
@@ -177,12 +177,12 @@ int cz_dpm_powergate_uvd(struct pp_hwmgr *hwmgr, bool bgate)
 		cz_dpm_powerdown_uvd(hwmgr);
 	} else {
 		cz_dpm_powerup_uvd(hwmgr);
-		cgs_set_clockgating_state(hwmgr->device,
-						AMD_IP_BLOCK_TYPE_UVD,
-						AMD_PG_STATE_GATE);
 		cgs_set_powergating_state(hwmgr->device,
 						AMD_IP_BLOCK_TYPE_UVD,
 						AMD_CG_STATE_UNGATE);
+		cgs_set_clockgating_state(hwmgr->device,
+						AMD_IP_BLOCK_TYPE_UVD,
+						AMD_PG_STATE_GATE);
 		cz_dpm_update_uvd_dpm(hwmgr, false);
 	}
 
@@ -206,25 +206,26 @@ int cz_dpm_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate)
 							AMD_IP_BLOCK_TYPE_VCE,
 							AMD_PG_STATE_GATE);
 				cz_enable_disable_vce_dpm(hwmgr, false);
-			/* TODO: to figure out why vce can't be poweroff*/
+				cz_dpm_powerdown_vce(hwmgr);
 				cz_hwmgr->vce_power_gated = true;
 			} else {
 				cz_dpm_powerup_vce(hwmgr);
 				cz_hwmgr->vce_power_gated = false;
-				cgs_set_clockgating_state(
-							hwmgr->device,
-							AMD_IP_BLOCK_TYPE_VCE,
-							AMD_PG_STATE_GATE);
 				cgs_set_powergating_state(
 							hwmgr->device,
 							AMD_IP_BLOCK_TYPE_VCE,
 							AMD_CG_STATE_UNGATE);
+				cgs_set_clockgating_state(
+							hwmgr->device,
+							AMD_IP_BLOCK_TYPE_VCE,
+							AMD_PG_STATE_GATE);
 				cz_dpm_update_vce_dpm(hwmgr);
 				cz_enable_disable_vce_dpm(hwmgr, true);
 				return 0;
 			}
 		}
 	} else {
+		cz_hwmgr->vce_power_gated = bgate;
 		cz_dpm_update_vce_dpm(hwmgr);
 		cz_enable_disable_vce_dpm(hwmgr, !bgate);
 		return 0;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c
index 1f14c477d15d..8cc0df9b534a 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c
@@ -1167,9 +1167,9 @@ static int cz_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
 
 	cz_ps->action = cz_current_ps->action;
 
-	if ((force_high == false) && (cz_ps->action == FORCE_HIGH))
+	if (!force_high && (cz_ps->action == FORCE_HIGH))
 		cz_ps->action = CANCEL_FORCE_HIGH;
-	else if ((force_high == true) && (cz_ps->action != FORCE_HIGH))
+	else if (force_high && (cz_ps->action != FORCE_HIGH))
 		cz_ps->action = FORCE_HIGH;
 	else
 		cz_ps->action = DO_NOTHING;
@@ -1180,6 +1180,13 @@ static int cz_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
 static int cz_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 {
 	int result = 0;
+	struct cz_hwmgr *data;
+
+	data = kzalloc(sizeof(struct cz_hwmgr), GFP_KERNEL);
+	if (data == NULL)
+		return -ENOMEM;
+
+	hwmgr->backend = data;
 
 	result = cz_initialize_dpm_defaults(hwmgr);
 	if (result != 0) {
@@ -1649,7 +1656,7 @@ static void cz_hw_print_display_cfg(
 	struct cz_hwmgr *hw_data = (struct cz_hwmgr *)(hwmgr->backend);
 	uint32_t data = 0;
 
-	if (hw_data->cc6_settings.cc6_setting_changed == true) {
+	if (hw_data->cc6_settings.cc6_setting_changed) {
 
 		hw_data->cc6_settings.cc6_setting_changed = false;
 
@@ -1909,15 +1916,7 @@ static const struct pp_hwmgr_func cz_hwmgr_funcs = {
 
 int cz_hwmgr_init(struct pp_hwmgr *hwmgr)
 {
-	struct cz_hwmgr *cz_hwmgr;
-	int ret = 0;
-
-	cz_hwmgr = kzalloc(sizeof(struct cz_hwmgr), GFP_KERNEL);
-	if (cz_hwmgr == NULL)
-		return -ENOMEM;
-
-	hwmgr->backend = cz_hwmgr;
 	hwmgr->hwmgr_func = &cz_hwmgr_funcs;
 	hwmgr->pptable_func = &pptable_funcs;
-	return ret;
+	return 0;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_clockpowergating.c b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_clockpowergating.c
index e1b649bd5344..5afe82068b29 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_clockpowergating.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_clockpowergating.c
@@ -56,7 +56,7 @@ int fiji_phm_powergate_uvd(struct pp_hwmgr *hwmgr, bool bgate)
 		fiji_update_uvd_dpm(hwmgr, false);
 		cgs_set_clockgating_state(hwmgr->device,
 					  AMD_IP_BLOCK_TYPE_UVD,
-					  AMD_PG_STATE_UNGATE);
+					  AMD_CG_STATE_UNGATE);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
index 24a16e49b571..120a9e2c3152 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
@@ -581,25 +581,24 @@ static int fiji_patch_boot_state(struct pp_hwmgr *hwmgr,
 
 static int fiji_hwmgr_backend_fini(struct pp_hwmgr *hwmgr)
 {
-	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
-
-	if (data->soft_pp_table) {
-		kfree(data->soft_pp_table);
-		data->soft_pp_table = NULL;
-	}
-
 	return phm_hwmgr_backend_fini(hwmgr);
 }
 
 static int fiji_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 {
-	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+	struct fiji_hwmgr *data;
 	uint32_t i;
 	struct phm_ppt_v1_information *table_info =
 			(struct phm_ppt_v1_information *)(hwmgr->pptable);
 	bool stay_in_boot;
 	int result;
 
+	data = kzalloc(sizeof(struct fiji_hwmgr), GFP_KERNEL);
+	if (data == NULL)
+		return -ENOMEM;
+
+	hwmgr->backend = data;
+
 	data->dll_default_on = false;
 	data->sram_end = SMC_RAM_END;
 
@@ -633,6 +632,8 @@ static int fiji_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 	data->vddci_control = FIJI_VOLTAGE_CONTROL_NONE;
 	data->mvdd_control = FIJI_VOLTAGE_CONTROL_NONE;
 
+	data->force_pcie_gen = PP_PCIEGenInvalid;
+
 	if (atomctrl_is_voltage_controled_by_gpio_v3(hwmgr,
 			VOLTAGE_TYPE_VDDC, VOLTAGE_OBJ_SVID2))
 		data->voltage_control = FIJI_VOLTAGE_CONTROL_BY_SVID2;
@@ -697,7 +698,7 @@ static int fiji_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 	if (0 == result) {
 		struct cgs_system_info sys_info = {0};
 
-		data->is_tlu_enabled = 0;
+		data->is_tlu_enabled = false;
 		hwmgr->platform_descriptor.hardwareActivityPerformanceLevels =
 				FIJI_MAX_HARDWARE_POWERLEVELS;
 		hwmgr->platform_descriptor.hardwarePerformanceLevels = 2;
@@ -732,7 +733,7 @@ static int fiji_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 		sys_info.info_id = CGS_SYSTEM_INFO_PCIE_GEN_INFO;
 		result = cgs_query_system_info(hwmgr->device, &sys_info);
 		if (result)
-			data->pcie_gen_cap = 0x30007;
+			data->pcie_gen_cap = AMDGPU_DEFAULT_PCIE_GEN_MASK;
 		else
 			data->pcie_gen_cap = (uint32_t)sys_info.value;
 		if (data->pcie_gen_cap & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
@@ -741,7 +742,7 @@ static int fiji_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 		sys_info.info_id = CGS_SYSTEM_INFO_PCIE_MLW;
 		result = cgs_query_system_info(hwmgr->device, &sys_info);
 		if (result)
-			data->pcie_lane_cap = 0x2f0000;
+			data->pcie_lane_cap = AMDGPU_DEFAULT_PCIE_MLW_MASK;
 		else
 			data->pcie_lane_cap = (uint32_t)sys_info.value;
 	} else {
@@ -1234,6 +1235,34 @@ static int fiji_program_voting_clients(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
+static int fiji_clear_voting_clients(struct pp_hwmgr *hwmgr)
+{
+	/* Reset voting clients before disabling DPM */
+	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+			SCLK_PWRMGT_CNTL, RESET_SCLK_CNT, 1);
+	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+			SCLK_PWRMGT_CNTL, RESET_BUSY_CNT, 1);
+
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_0, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_1, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_2, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_3, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_4, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_5, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_6, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_7, 0);
+
+	return 0;
+}
+
 /**
 * Get the location of various tables inside the FW image.
 *
@@ -1361,6 +1390,17 @@ static int fiji_copy_and_switch_arb_sets(struct pp_hwmgr *hwmgr,
 }
 
 /**
+* Call SMC to reset S0/S1 to S1 and Reset SMIO to initial value
+*
+* @param    hwmgr  the address of the powerplay hardware manager.
+* @return   if success then 0;
+*/
+static int fiji_reset_to_default(struct pp_hwmgr *hwmgr)
+{
+	return smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_ResetToDefaults);
+}
+
+/**
 * Initial switch from ARB F0->F1
 *
 * @param    hwmgr  the address of the powerplay hardware manager.
@@ -1373,6 +1413,21 @@ static int fiji_initial_switch_from_arbf0_to_f1(struct pp_hwmgr *hwmgr)
 			MC_CG_ARB_FREQ_F0, MC_CG_ARB_FREQ_F1);
 }
 
+static int fiji_force_switch_to_arbf0(struct pp_hwmgr *hwmgr)
+{
+	uint32_t tmp;
+
+	tmp = (cgs_read_ind_register(hwmgr->device,
+			CGS_IND_REG__SMC, ixSMC_SCRATCH9) &
+			0x0000ff00) >> 8;
+
+	if (tmp == MC_CG_ARB_FREQ_F0)
+		return 0;
+
+	return fiji_copy_and_switch_arb_sets(hwmgr,
+			tmp, MC_CG_ARB_FREQ_F0);
+}
+
 static int fiji_reset_single_dpm_table(struct pp_hwmgr *hwmgr,
 		struct fiji_single_dpm_table *dpm_table, uint32_t count)
 {
@@ -1395,7 +1450,7 @@ static void fiji_setup_pcie_table_entry(
 {
 	dpm_table->dpm_levels[index].value = pcie_gen;
 	dpm_table->dpm_levels[index].param1 = pcie_lanes;
-	dpm_table->dpm_levels[index].enabled = 1;
+	dpm_table->dpm_levels[index].enabled = true;
 }
 
 static int fiji_setup_default_pcie_table(struct pp_hwmgr *hwmgr)
@@ -1607,7 +1662,6 @@ static int fiji_populate_cac_table(struct pp_hwmgr *hwmgr,
 {
 	uint32_t count;
 	uint8_t index;
-	int result = 0;
 	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
 	struct phm_ppt_v1_information *table_info =
 			(struct phm_ppt_v1_information *)(hwmgr->pptable);
@@ -1629,7 +1683,7 @@ static int fiji_populate_cac_table(struct pp_hwmgr *hwmgr,
 						VOLTAGE_SCALE)) / 25);
 	}
 
-	return result;
+	return 0;
 }
 
 /**
@@ -1830,7 +1884,7 @@ static uint16_t fiji_find_closest_vddci(struct pp_hwmgr *hwmgr, uint16_t vddci)
 
 	PP_ASSERT_WITH_CODE(false,
 			"VDDCI is larger than max VDDCI in VDDCI Voltage Table!",
-			return vddci_table->entries[i].value);
+			return vddci_table->entries[i-1].value);
 }
 
 static int fiji_get_dependency_volt_by_clk(struct pp_hwmgr *hwmgr,
@@ -3175,6 +3229,17 @@ static int fiji_enable_ulv(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
+static int fiji_disable_ulv(struct pp_hwmgr *hwmgr)
+{
+	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+	struct fiji_ulv_parm *ulv = &(data->ulv);
+
+	if (ulv->ulv_supported)
+		return smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_DisableULV);
+
+	return 0;
+}
+
 static int fiji_enable_deep_sleep_master_switch(struct pp_hwmgr *hwmgr)
 {
 	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
@@ -3195,6 +3260,21 @@ static int fiji_enable_deep_sleep_master_switch(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
+static int fiji_disable_deep_sleep_master_switch(struct pp_hwmgr *hwmgr)
+{
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_SclkDeepSleep)) {
+		if (smum_send_msg_to_smc(hwmgr->smumgr,
+				PPSMC_MSG_MASTER_DeepSleep_OFF)) {
+			PP_ASSERT_WITH_CODE(false,
+					"Attempt to disable Master Deep Sleep switch failed!",
+					return -1);
+		}
+	}
+
+	return 0;
+}
+
 static int fiji_enable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 {
 	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
@@ -3355,6 +3435,70 @@ static int fiji_start_dpm(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
+static int fiji_disable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
+{
+	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+
+	/* disable SCLK dpm */
+	if (!data->sclk_dpm_key_disabled)
+		PP_ASSERT_WITH_CODE(
+				(smum_send_msg_to_smc(hwmgr->smumgr,
+						PPSMC_MSG_DPM_Disable) == 0),
+				"Failed to disable SCLK DPM!",
+				return -1);
+
+	/* disable MCLK dpm */
+	if (!data->mclk_dpm_key_disabled) {
+		PP_ASSERT_WITH_CODE(
+				(smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
+				PPSMC_MSG_MCLKDPM_SetEnabledMask, 1) == 0),
+				"Failed to force MCLK DPM0!",
+				return -1);
+
+		PP_ASSERT_WITH_CODE(
+				(smum_send_msg_to_smc(hwmgr->smumgr,
+						PPSMC_MSG_MCLKDPM_Disable) == 0),
+				"Failed to disable MCLK DPM!",
+				return -1);
+	}
+
+	return 0;
+}
+
+static int fiji_stop_dpm(struct pp_hwmgr *hwmgr)
+{
+	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+
+	/* disable general power management */
+	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, GENERAL_PWRMGT,
+			GLOBAL_PWRMGT_EN, 0);
+	/* disable sclk deep sleep */
+	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, SCLK_PWRMGT_CNTL,
+			DYNAMIC_PM_EN, 0);
+
+	/* disable PCIE dpm */
+	if (!data->pcie_dpm_key_disabled) {
+		PP_ASSERT_WITH_CODE(
+				(smum_send_msg_to_smc(hwmgr->smumgr,
+						PPSMC_MSG_PCIeDPM_Disable) == 0),
+				"Failed to disable pcie DPM during DPM Stop Function!",
+				return -1);
+	}
+
+	if (fiji_disable_sclk_mclk_dpm(hwmgr)) {
+		printk(KERN_ERR "Failed to disable Sclk DPM and Mclk DPM!");
+		return -1;
+	}
+
+	PP_ASSERT_WITH_CODE(
+			(smum_send_msg_to_smc(hwmgr->smumgr,
+					PPSMC_MSG_Voltage_Cntl_Disable) == 0),
+			"Failed to disable voltage DPM during DPM Stop Function!",
+			return -1);
+
+	return 0;
+}
+
 static void fiji_set_dpm_event_sources(struct pp_hwmgr *hwmgr,
 		uint32_t sources)
 {
@@ -3413,6 +3557,23 @@ static int fiji_enable_thermal_auto_throttle(struct pp_hwmgr *hwmgr)
 	return fiji_enable_auto_throttle_source(hwmgr, PHM_AutoThrottleSource_Thermal);
 }
 
+static int fiji_disable_auto_throttle_source(struct pp_hwmgr *hwmgr,
+		PHM_AutoThrottleSource source)
+{
+	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+
+	if (data->active_auto_throttle_sources & (1 << source)) {
+		data->active_auto_throttle_sources &= ~(1 << source);
+		fiji_set_dpm_event_sources(hwmgr, data->active_auto_throttle_sources);
+	}
+	return 0;
+}
+
+static int fiji_disable_thermal_auto_throttle(struct pp_hwmgr *hwmgr)
+{
+	return fiji_disable_auto_throttle_source(hwmgr, PHM_AutoThrottleSource_Thermal);
+}
+
 static int fiji_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
 {
 	int tmp_result, result = 0;
@@ -3527,6 +3688,64 @@ static int fiji_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
 	return result;
 }
 
+static int fiji_disable_dpm_tasks(struct pp_hwmgr *hwmgr)
+{
+	int tmp_result, result = 0;
+
+	tmp_result = (fiji_is_dpm_running(hwmgr)) ? 0 : -1;
+	PP_ASSERT_WITH_CODE(tmp_result == 0,
+			"DPM is not running right now, no need to disable DPM!",
+			return 0);
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_ThermalController))
+		PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+				GENERAL_PWRMGT, THERMAL_PROTECTION_DIS, 1);
+
+	tmp_result = fiji_disable_power_containment(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to disable power containment!", result = tmp_result);
+
+	tmp_result = fiji_disable_smc_cac(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to disable SMC CAC!", result = tmp_result);
+
+	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+			CG_SPLL_SPREAD_SPECTRUM, SSEN, 0);
+	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+			GENERAL_PWRMGT, DYN_SPREAD_SPECTRUM_EN, 0);
+
+	tmp_result = fiji_disable_thermal_auto_throttle(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to disable thermal auto throttle!", result = tmp_result);
+
+	tmp_result = fiji_stop_dpm(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to stop DPM!", result = tmp_result);
+
+	tmp_result = fiji_disable_deep_sleep_master_switch(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to disable deep sleep master switch!", result = tmp_result);
+
+	tmp_result = fiji_disable_ulv(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to disable ULV!", result = tmp_result);
+
+	tmp_result = fiji_clear_voting_clients(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to clear voting clients!", result = tmp_result);
+
+	tmp_result = fiji_reset_to_default(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to reset to default!", result = tmp_result);
+
+	tmp_result = fiji_force_switch_to_arbf0(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to force to switch arbf0!", result = tmp_result);
+
+	return result;
+}
+
 static int fiji_force_dpm_highest(struct pp_hwmgr *hwmgr)
 {
 	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
@@ -4169,8 +4388,9 @@ static int fiji_freeze_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 	if ((0 == data->sclk_dpm_key_disabled) &&
 		(data->need_update_smu7_dpm_table &
 			(DPMTABLE_OD_UPDATE_SCLK + DPMTABLE_UPDATE_SCLK))) {
-		PP_ASSERT_WITH_CODE(true == fiji_is_dpm_running(hwmgr),
-				"Trying to freeze SCLK DPM when DPM is disabled",);
+		PP_ASSERT_WITH_CODE(fiji_is_dpm_running(hwmgr),
+				    "Trying to freeze SCLK DPM when DPM is disabled",
+				    );
 		PP_ASSERT_WITH_CODE(0 == smum_send_msg_to_smc(hwmgr->smumgr,
 				PPSMC_MSG_SCLKDPM_FreezeLevel),
 				"Failed to freeze SCLK DPM during FreezeSclkMclkDPM Function!",
@@ -4180,8 +4400,9 @@ static int fiji_freeze_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 	if ((0 == data->mclk_dpm_key_disabled) &&
 		(data->need_update_smu7_dpm_table &
 		 DPMTABLE_OD_UPDATE_MCLK)) {
-		PP_ASSERT_WITH_CODE(true == fiji_is_dpm_running(hwmgr),
-				"Trying to freeze MCLK DPM when DPM is disabled",);
+		PP_ASSERT_WITH_CODE(fiji_is_dpm_running(hwmgr),
+				    "Trying to freeze MCLK DPM when DPM is disabled",
+				    );
 		PP_ASSERT_WITH_CODE(0 == smum_send_msg_to_smc(hwmgr->smumgr,
 				PPSMC_MSG_MCLKDPM_FreezeLevel),
 				"Failed to freeze MCLK DPM during FreezeSclkMclkDPM Function!",
@@ -4351,7 +4572,6 @@ static int fiji_trim_single_dpm_states(struct pp_hwmgr *hwmgr,
 static int fiji_trim_dpm_states(struct pp_hwmgr *hwmgr,
 		const struct fiji_power_state *fiji_ps)
 {
-	int result = 0;
 	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
 	uint32_t high_limit_count;
 
@@ -4371,7 +4591,7 @@ static int fiji_trim_dpm_states(struct pp_hwmgr *hwmgr,
 			fiji_ps->performance_levels[0].memory_clock,
 			fiji_ps->performance_levels[high_limit_count].memory_clock);
 
-	return result;
+	return 0;
 }
 
 static int fiji_generate_dpm_level_enable_mask(
@@ -4630,8 +4850,9 @@ static int fiji_unfreeze_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 		(data->need_update_smu7_dpm_table &
 		(DPMTABLE_OD_UPDATE_SCLK + DPMTABLE_UPDATE_SCLK))) {
 
-		PP_ASSERT_WITH_CODE(true == fiji_is_dpm_running(hwmgr),
-				"Trying to Unfreeze SCLK DPM when DPM is disabled",);
+		PP_ASSERT_WITH_CODE(fiji_is_dpm_running(hwmgr),
+				    "Trying to Unfreeze SCLK DPM when DPM is disabled",
+				    );
 		PP_ASSERT_WITH_CODE(0 == smum_send_msg_to_smc(hwmgr->smumgr,
 				PPSMC_MSG_SCLKDPM_UnfreezeLevel),
 			"Failed to unfreeze SCLK DPM during UnFreezeSclkMclkDPM Function!",
@@ -4641,8 +4862,9 @@ static int fiji_unfreeze_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 	if ((0 == data->mclk_dpm_key_disabled) &&
 		(data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_MCLK)) {
 
-		PP_ASSERT_WITH_CODE(true == fiji_is_dpm_running(hwmgr),
-				"Trying to Unfreeze MCLK DPM when DPM is disabled",);
+		PP_ASSERT_WITH_CODE(fiji_is_dpm_running(hwmgr),
+				    "Trying to Unfreeze MCLK DPM when DPM is disabled",
+				    );
 		PP_ASSERT_WITH_CODE(0 == smum_send_msg_to_smc(hwmgr->smumgr,
 				PPSMC_MSG_SCLKDPM_UnfreezeLevel),
 		    "Failed to unfreeze MCLK DPM during UnFreezeSclkMclkDPM Function!",
@@ -5069,42 +5291,6 @@ static int fiji_get_fan_control_mode(struct pp_hwmgr *hwmgr)
 				CG_FDO_CTRL2, FDO_PWM_MODE);
 }
 
-static int fiji_get_pp_table(struct pp_hwmgr *hwmgr, char **table)
-{
-	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
-
-	if (!data->soft_pp_table) {
-		data->soft_pp_table = kmemdup(hwmgr->soft_pp_table,
-					      hwmgr->soft_pp_table_size,
-					      GFP_KERNEL);
-		if (!data->soft_pp_table)
-			return -ENOMEM;
-	}
-
-	*table = (char *)&data->soft_pp_table;
-
-	return hwmgr->soft_pp_table_size;
-}
-
-static int fiji_set_pp_table(struct pp_hwmgr *hwmgr, const char *buf, size_t size)
-{
-	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
-
-	if (!data->soft_pp_table) {
-		data->soft_pp_table = kzalloc(hwmgr->soft_pp_table_size, GFP_KERNEL);
-		if (!data->soft_pp_table)
-			return -ENOMEM;
-	}
-
-	memcpy(data->soft_pp_table, buf, size);
-
-	hwmgr->soft_pp_table = data->soft_pp_table;
-
-	/* TODO: re-init powerplay to implement modified pptable */
-
-	return 0;
-}
-
 static int fiji_force_clock_level(struct pp_hwmgr *hwmgr,
 		enum pp_clock_type type, uint32_t mask)
 {
@@ -5274,12 +5460,96 @@ bool fiji_check_smc_update_required_for_display_configuration(struct pp_hwmgr *h
 	return is_update_required;
 }
 
+static int fiji_get_sclk_od(struct pp_hwmgr *hwmgr)
+{
+	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+	struct fiji_single_dpm_table *sclk_table = &(data->dpm_table.sclk_table);
+	struct fiji_single_dpm_table *golden_sclk_table =
+			&(data->golden_dpm_table.sclk_table);
+	int value;
+
+	value = (sclk_table->dpm_levels[sclk_table->count - 1].value -
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value) *
+			100 /
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value;
+
+	return value;
+}
+
+static int fiji_set_sclk_od(struct pp_hwmgr *hwmgr, uint32_t value)
+{
+	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+	struct fiji_single_dpm_table *golden_sclk_table =
+			&(data->golden_dpm_table.sclk_table);
+	struct pp_power_state  *ps;
+	struct fiji_power_state  *fiji_ps;
+
+	if (value > 20)
+		value = 20;
+
+	ps = hwmgr->request_ps;
+
+	if (ps == NULL)
+		return -EINVAL;
+
+	fiji_ps = cast_phw_fiji_power_state(&ps->hardware);
+
+	fiji_ps->performance_levels[fiji_ps->performance_level_count - 1].engine_clock =
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value *
+			value / 100 +
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value;
+
+	return 0;
+}
+
+static int fiji_get_mclk_od(struct pp_hwmgr *hwmgr)
+{
+	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+	struct fiji_single_dpm_table *mclk_table = &(data->dpm_table.mclk_table);
+	struct fiji_single_dpm_table *golden_mclk_table =
+			&(data->golden_dpm_table.mclk_table);
+	int value;
+
+	value = (mclk_table->dpm_levels[mclk_table->count - 1].value -
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value) *
+			100 /
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value;
+
+	return value;
+}
+
+static int fiji_set_mclk_od(struct pp_hwmgr *hwmgr, uint32_t value)
+{
+	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+	struct fiji_single_dpm_table *golden_mclk_table =
+			&(data->golden_dpm_table.mclk_table);
+	struct pp_power_state  *ps;
+	struct fiji_power_state  *fiji_ps;
+
+	if (value > 20)
+		value = 20;
+
+	ps = hwmgr->request_ps;
+
+	if (ps == NULL)
+		return -EINVAL;
+
+	fiji_ps = cast_phw_fiji_power_state(&ps->hardware);
+
+	fiji_ps->performance_levels[fiji_ps->performance_level_count - 1].memory_clock =
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value *
+			value / 100 +
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value;
+
+	return 0;
+}
 
 static const struct pp_hwmgr_func fiji_hwmgr_funcs = {
 	.backend_init = &fiji_hwmgr_backend_init,
 	.backend_fini = &fiji_hwmgr_backend_fini,
 	.asic_setup = &fiji_setup_asic_task,
 	.dynamic_state_management_enable = &fiji_enable_dpm_tasks,
+	.dynamic_state_management_disable = &fiji_disable_dpm_tasks,
 	.force_dpm_level = &fiji_dpm_force_dpm_level,
 	.get_num_of_pp_table_entries = &tonga_get_number_of_powerplay_table_entries,
 	.get_power_state_size = &fiji_get_power_state_size,
@@ -5312,24 +5582,18 @@ static const struct pp_hwmgr_func fiji_hwmgr_funcs = {
 	.get_fan_control_mode = fiji_get_fan_control_mode,
 	.check_states_equal = fiji_check_states_equal,
 	.check_smc_update_required_for_display_configuration = fiji_check_smc_update_required_for_display_configuration,
-	.get_pp_table = fiji_get_pp_table,
-	.set_pp_table = fiji_set_pp_table,
 	.force_clock_level = fiji_force_clock_level,
 	.print_clock_levels = fiji_print_clock_levels,
+	.get_sclk_od = fiji_get_sclk_od,
+	.set_sclk_od = fiji_set_sclk_od,
+	.get_mclk_od = fiji_get_mclk_od,
+	.set_mclk_od = fiji_set_mclk_od,
 };
 
 int fiji_hwmgr_init(struct pp_hwmgr *hwmgr)
 {
-	struct fiji_hwmgr  *data;
-	int ret = 0;
-
-	data = kzalloc(sizeof(struct fiji_hwmgr), GFP_KERNEL);
-	if (data == NULL)
-		return -ENOMEM;
-
-	hwmgr->backend = data;
 	hwmgr->hwmgr_func = &fiji_hwmgr_funcs;
 	hwmgr->pptable_func = &tonga_pptable_funcs;
 	pp_fiji_thermal_initialize(hwmgr);
-	return ret;
+	return 0;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.h
index 170edf5a772d..bf67c2a92c68 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.h
@@ -302,9 +302,6 @@ struct fiji_hwmgr {
 	bool                           pg_acp_init;
 	bool                           frtc_enabled;
 	bool                           frtc_status_changed;
-
-	/* soft pptable for re-uploading into smu */
-	void *soft_pp_table;
 };
 
 /* To convert to Q8.8 format for firmware */
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_powertune.c
index db23a4068baf..44658451a8d2 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_powertune.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_powertune.c
@@ -73,17 +73,18 @@ void fiji_initialize_power_tune_defaults(struct pp_hwmgr *hwmgr)
 
 	if (!tmp) {
 		phm_cap_set(hwmgr->platform_descriptor.platformCaps,
-				PHM_PlatformCaps_PowerContainment);
-
-		phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 				PHM_PlatformCaps_CAC);
 
 		fiji_hwmgr->fast_watermark_threshold = 100;
 
-		tmp = 1;
-		fiji_hwmgr->enable_dte_feature = tmp ? false : true;
-		fiji_hwmgr->enable_tdc_limit_feature = tmp ? true : false;
-		fiji_hwmgr->enable_pkg_pwr_tracking_feature = tmp ? true : false;
+		if (hwmgr->powercontainment_enabled) {
+			phm_cap_set(hwmgr->platform_descriptor.platformCaps,
+				    PHM_PlatformCaps_PowerContainment);
+			tmp = 1;
+			fiji_hwmgr->enable_dte_feature = tmp ? false : true;
+			fiji_hwmgr->enable_tdc_limit_feature = tmp ? true : false;
+			fiji_hwmgr->enable_pkg_pwr_tracking_feature = tmp ? true : false;
+		}
 	}
 }
 
@@ -459,6 +460,23 @@ int fiji_enable_smc_cac(struct pp_hwmgr *hwmgr)
 	return result;
 }
 
+int fiji_disable_smc_cac(struct pp_hwmgr *hwmgr)
+{
+	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+	int result = 0;
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_CAC) && data->cac_enabled) {
+		int smc_result = smum_send_msg_to_smc(hwmgr->smumgr,
+				(uint16_t)(PPSMC_MSG_DisableCac));
+		PP_ASSERT_WITH_CODE((smc_result == 0),
+				"Failed to disable CAC in SMC.", result = -1);
+
+		data->cac_enabled = false;
+	}
+	return result;
+}
+
 int fiji_set_power_limit(struct pp_hwmgr *hwmgr, uint32_t n)
 {
 	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
@@ -528,6 +546,48 @@ int fiji_enable_power_containment(struct pp_hwmgr *hwmgr)
 	return result;
 }
 
+int fiji_disable_power_containment(struct pp_hwmgr *hwmgr)
+{
+	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+	int result = 0;
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_PowerContainment) &&
+			data->power_containment_features) {
+		int smc_result;
+
+		if (data->power_containment_features &
+				POWERCONTAINMENT_FEATURE_TDCLimit) {
+			smc_result = smum_send_msg_to_smc(hwmgr->smumgr,
+					(uint16_t)(PPSMC_MSG_TDCLimitDisable));
+			PP_ASSERT_WITH_CODE((smc_result == 0),
+					"Failed to disable TDCLimit in SMC.",
+					result = smc_result);
+		}
+
+		if (data->power_containment_features &
+				POWERCONTAINMENT_FEATURE_DTE) {
+			smc_result = smum_send_msg_to_smc(hwmgr->smumgr,
+					(uint16_t)(PPSMC_MSG_DisableDTE));
+			PP_ASSERT_WITH_CODE((smc_result == 0),
+					"Failed to disable DTE in SMC.",
+					result = smc_result);
+		}
+
+		if (data->power_containment_features &
+				POWERCONTAINMENT_FEATURE_PkgPwrLimit) {
+			smc_result = smum_send_msg_to_smc(hwmgr->smumgr,
+					(uint16_t)(PPSMC_MSG_PkgPwrLimitDisable));
+			PP_ASSERT_WITH_CODE((smc_result == 0),
+					"Failed to disable PkgPwrTracking in SMC.",
+					result = smc_result);
+		}
+		data->power_containment_features = 0;
+	}
+
+	return result;
+}
+
 int fiji_power_control_set_level(struct pp_hwmgr *hwmgr)
 {
 	struct phm_ppt_v1_information *table_info =
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_powertune.h b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_powertune.h
index 55e58200f33a..fec772421733 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_powertune.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_powertune.h
@@ -36,6 +36,19 @@ enum fiji_pt_config_reg_type {
 #define POWERCONTAINMENT_FEATURE_TDCLimit        0x00000002
 #define POWERCONTAINMENT_FEATURE_PkgPwrLimit     0x00000004
 
+#define DIDT_SQ_CTRL0__UNUSED_0_MASK             0xffffffc0
+#define DIDT_SQ_CTRL0__UNUSED_0__SHIFT           0x6
+#define DIDT_TD_CTRL0__UNUSED_0_MASK             0xffffffc0
+#define DIDT_TD_CTRL0__UNUSED_0__SHIFT           0x6
+#define DIDT_TCP_CTRL0__UNUSED_0_MASK            0xffffffc0
+#define DIDT_TCP_CTRL0__UNUSED_0__SHIFT          0x6
+#define DIDT_SQ_TUNING_CTRL__UNUSED_0_MASK                 0xe0000000
+#define DIDT_SQ_TUNING_CTRL__UNUSED_0__SHIFT               0x0000001d
+#define DIDT_TD_TUNING_CTRL__UNUSED_0_MASK                 0xe0000000
+#define DIDT_TD_TUNING_CTRL__UNUSED_0__SHIFT               0x0000001d
+#define DIDT_TCP_TUNING_CTRL__UNUSED_0_MASK                0xe0000000
+#define DIDT_TCP_TUNING_CTRL__UNUSED_0__SHIFT              0x0000001d
+
 struct fiji_pt_config_reg {
 	uint32_t                           offset;
 	uint32_t                           mask;
@@ -58,7 +71,9 @@ void fiji_initialize_power_tune_defaults(struct pp_hwmgr *hwmgr);
 int fiji_populate_bapm_parameters_in_dpm_table(struct pp_hwmgr *hwmgr);
 int fiji_populate_pm_fuses(struct pp_hwmgr *hwmgr);
 int fiji_enable_smc_cac(struct pp_hwmgr *hwmgr);
+int fiji_disable_smc_cac(struct pp_hwmgr *hwmgr);
 int fiji_enable_power_containment(struct pp_hwmgr *hwmgr);
+int fiji_disable_power_containment(struct pp_hwmgr *hwmgr);
 int fiji_set_power_limit(struct pp_hwmgr *hwmgr, uint32_t n);
 int fiji_power_control_set_level(struct pp_hwmgr *hwmgr);
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/functiontables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/functiontables.c
index 7a705cee0cc2..a6abe81bc843 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/functiontables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/functiontables.c
@@ -59,8 +59,8 @@ int phm_dispatch_table(struct pp_hwmgr *hwmgr,
 		       struct phm_runtime_table_header *rt_table,
 		       void *input, void *output)
 {
-	int result = 0;
-	void *temp_storage = NULL;
+	int result;
+	void *temp_storage;
 
 	if (hwmgr == NULL || rt_table == NULL) {
 		printk(KERN_ERR "[ powerplay ] Invalid Parameter!\n");
@@ -73,12 +73,13 @@ int phm_dispatch_table(struct pp_hwmgr *hwmgr,
 			printk(KERN_ERR "[ powerplay ] Could not allocate table temporary storage\n");
 			return -ENOMEM;
 		}
+	} else {
+		temp_storage = NULL;
 	}
 
 	result = phm_run_table(hwmgr, rt_table, input, output, temp_storage);
 
-	if (NULL != temp_storage)
-		kfree(temp_storage);
+	kfree(temp_storage);
 
 	return result;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
index fa208ada6892..789f98ad2615 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
@@ -154,6 +154,30 @@ int phm_enable_dynamic_state_management(struct pp_hwmgr *hwmgr)
 	return ret;
 }
 
+int phm_disable_dynamic_state_management(struct pp_hwmgr *hwmgr)
+{
+	int ret = -1;
+	bool enabled;
+
+	PHM_FUNC_CHECK(hwmgr);
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+		PHM_PlatformCaps_TablelessHardwareInterface)) {
+		if (hwmgr->hwmgr_func->dynamic_state_management_disable)
+			ret = hwmgr->hwmgr_func->dynamic_state_management_disable(hwmgr);
+	} else {
+		ret = phm_dispatch_table(hwmgr,
+				&(hwmgr->disable_dynamic_state_management),
+				NULL, NULL);
+	}
+
+	enabled = ret == 0 ? false : true;
+
+	cgs_notify_dpm_enabled(hwmgr->device, enabled);
+
+	return ret;
+}
+
 int phm_force_dpm_levels(struct pp_hwmgr *hwmgr, enum amd_dpm_forced_level level)
 {
 	PHM_FUNC_CHECK(hwmgr);
@@ -306,11 +330,15 @@ int phm_store_dal_configuration_data(struct pp_hwmgr *hwmgr,
 {
 	PHM_FUNC_CHECK(hwmgr);
 
-	if (hwmgr->hwmgr_func->store_cc6_data == NULL)
+	if (display_config == NULL)
 		return -EINVAL;
 
 	hwmgr->display_config = *display_config;
-	/* to do pass other display configuration in furture */
+
+	if (hwmgr->hwmgr_func->store_cc6_data == NULL)
+		return -EINVAL;
+
+	/* TODO: pass other display configuration in the future */
 
 	if (hwmgr->hwmgr_func->store_cc6_data)
 		hwmgr->hwmgr_func->store_cc6_data(hwmgr,
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
index 1c48917da3cf..27e07624ac28 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
@@ -24,6 +24,7 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
+#include <drm/amdgpu_drm.h>
 #include "cgs_common.h"
 #include "power_state.h"
 #include "hwmgr.h"
@@ -58,12 +59,13 @@ int hwmgr_init(struct amd_pp_init *pp_init, struct pp_instance *handle)
 	hwmgr->hw_revision = pp_init->rev_id;
 	hwmgr->usec_timeout = AMD_MAX_USEC_TIMEOUT;
 	hwmgr->power_source = PP_PowerSource_AC;
+	hwmgr->powercontainment_enabled = pp_init->powercontainment_enabled;
 
 	switch (hwmgr->chip_family) {
-	case AMD_FAMILY_CZ:
+	case AMDGPU_FAMILY_CZ:
 		cz_hwmgr_init(hwmgr);
 		break;
-	case AMD_FAMILY_VI:
+	case AMDGPU_FAMILY_VI:
 		switch (hwmgr->chip_id) {
 		case CHIP_TONGA:
 			tonga_hwmgr_init(hwmgr);
@@ -93,6 +95,15 @@ int hwmgr_fini(struct pp_hwmgr *hwmgr)
 	if (hwmgr == NULL || hwmgr->ps == NULL)
 		return -EINVAL;
 
+	/* do hwmgr finish*/
+	kfree(hwmgr->hardcode_pp_table);
+
+	kfree(hwmgr->backend);
+
+	kfree(hwmgr->start_thermal_controller.function_list);
+
+	kfree(hwmgr->set_temperature_range.function_list);
+
 	kfree(hwmgr->ps);
 	kfree(hwmgr);
 	return 0;
@@ -462,7 +473,7 @@ uint16_t phm_find_closest_vddci(struct pp_atomctrl_voltage_table *vddci_table, u
 
 	PP_ASSERT_WITH_CODE(false,
 			"VDDCI is larger than max VDDCI in VDDCI Voltage Table!",
-			return vddci_table->entries[i].value);
+			return vddci_table->entries[i-1].value);
 }
 
 int phm_find_boot_level(void *table,
@@ -523,7 +534,7 @@ int phm_initializa_dynamic_state_adjustment_rule_settings(struct pp_hwmgr *hwmgr
 
 	/* initialize vddc_dep_on_dal_pwrl table */
 	table_size = sizeof(uint32_t) + 4 * sizeof(struct phm_clock_voltage_dependency_record);
-	table_clk_vlt = (struct phm_clock_voltage_dependency_table *)kzalloc(table_size, GFP_KERNEL);
+	table_clk_vlt = kzalloc(table_size, GFP_KERNEL);
 
 	if (NULL == table_clk_vlt) {
 		printk(KERN_ERR "[ powerplay ] Can not allocate space for vddc_dep_on_dal_pwrl! \n");
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr_ppt.h b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr_ppt.h
index 347fef127ce9..2930a3355948 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr_ppt.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr_ppt.h
@@ -39,6 +39,7 @@ struct phm_ppt_v1_clock_voltage_dependency_record {
 	uint8_t phases;
 	uint8_t cks_enable;
 	uint8_t cks_voffset;
+	uint32_t sclk_offset;
 };
 
 typedef struct phm_ppt_v1_clock_voltage_dependency_record phm_ppt_v1_clock_voltage_dependency_record;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_clockpowergating.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_clockpowergating.c
index 8f142a74ad08..b5edb5105986 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_clockpowergating.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_clockpowergating.c
@@ -106,11 +106,17 @@ int polaris10_phm_powergate_uvd(struct pp_hwmgr *hwmgr, bool bgate)
 	data->uvd_power_gated = bgate;
 
 	if (bgate) {
+		cgs_set_clockgating_state(hwmgr->device,
+				AMD_IP_BLOCK_TYPE_UVD,
+				AMD_CG_STATE_GATE);
 		polaris10_update_uvd_dpm(hwmgr, true);
 		polaris10_phm_powerdown_uvd(hwmgr);
 	} else {
 		polaris10_phm_powerup_uvd(hwmgr);
 		polaris10_update_uvd_dpm(hwmgr, false);
+		cgs_set_clockgating_state(hwmgr->device,
+				AMD_IP_BLOCK_TYPE_UVD,
+				AMD_CG_STATE_UNGATE);
 	}
 
 	return 0;
@@ -125,11 +131,19 @@ int polaris10_phm_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate)
 
 	data->vce_power_gated = bgate;
 
-	if (bgate)
+	if (bgate) {
+		cgs_set_clockgating_state(hwmgr->device,
+				AMD_IP_BLOCK_TYPE_VCE,
+				AMD_CG_STATE_GATE);
+		polaris10_update_vce_dpm(hwmgr, true);
 		polaris10_phm_powerdown_vce(hwmgr);
-	else
+	} else {
 		polaris10_phm_powerup_vce(hwmgr);
-
+		polaris10_update_vce_dpm(hwmgr, false);
+		cgs_set_clockgating_state(hwmgr->device,
+				AMD_IP_BLOCK_TYPE_VCE,
+				AMD_CG_STATE_UNGATE);
+	}
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
index aa6be033f21b..769636a0c5b5 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
@@ -389,6 +389,34 @@ static int polaris10_program_voting_clients(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
+static int polaris10_clear_voting_clients(struct pp_hwmgr *hwmgr)
+{
+	/* Reset voting clients before disabling DPM */
+	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+			SCLK_PWRMGT_CNTL, RESET_SCLK_CNT, 1);
+	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+			SCLK_PWRMGT_CNTL, RESET_BUSY_CNT, 1);
+
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_0, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_1, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_2, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_3, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_4, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_5, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_6, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_7, 0);
+
+	return 0;
+}
+
 /**
 * Get the location of various tables inside the FW image.
 *
@@ -515,6 +543,11 @@ static int polaris10_copy_and_switch_arb_sets(struct pp_hwmgr *hwmgr,
 	return 0;
 }
 
+static int polaris10_reset_to_default(struct pp_hwmgr *hwmgr)
+{
+	return smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_ResetToDefaults);
+}
+
 /**
 * Initial switch from ARB F0->F1
 *
@@ -528,6 +561,21 @@ static int polaris10_initial_switch_from_arbf0_to_f1(struct pp_hwmgr *hwmgr)
 			MC_CG_ARB_FREQ_F0, MC_CG_ARB_FREQ_F1);
 }
 
+static int polaris10_force_switch_to_arbf0(struct pp_hwmgr *hwmgr)
+{
+	uint32_t tmp;
+
+	tmp = (cgs_read_ind_register(hwmgr->device,
+			CGS_IND_REG__SMC, ixSMC_SCRATCH9) &
+			0x0000ff00) >> 8;
+
+	if (tmp == MC_CG_ARB_FREQ_F0)
+		return 0;
+
+	return polaris10_copy_and_switch_arb_sets(hwmgr,
+			tmp, MC_CG_ARB_FREQ_F0);
+}
+
 static int polaris10_setup_default_pcie_table(struct pp_hwmgr *hwmgr)
 {
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
@@ -732,7 +780,7 @@ static int polaris10_populate_smc_mvdd_table(struct pp_hwmgr *hwmgr,
 			table->Smio[level] |=
 				data->mvdd_voltage_table.entries[level].smio_low;
 		}
-		table->SmioMask2 = data->vddci_voltage_table.mask_low;
+		table->SmioMask2 = data->mvdd_voltage_table.mask_low;
 
 		table->MvddLevelCount = (uint32_t) PP_HOST_TO_SMC_UL(count);
 	}
@@ -999,7 +1047,7 @@ static int polaris10_get_dependency_volt_by_clk(struct pp_hwmgr *hwmgr,
 				vddci = phm_find_closest_vddci(&(data->vddci_voltage_table),
 						(dep_table->entries[i].vddc -
 								(uint16_t)data->vddc_vddci_delta));
-				*voltage |= (vddci * VOLTAGE_SCALE) <<	VDDCI_SHIFT;
+				*voltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT;
 			}
 
 			if (POLARIS10_VOLTAGE_CONTROL_NONE == data->mvdd_control)
@@ -1296,7 +1344,6 @@ static int polaris10_populate_single_memory_level(struct pp_hwmgr *hwmgr,
 	}
 
 	mem_level->MclkFrequency = clock;
-	mem_level->StutterEnable = 0;
 	mem_level->EnabledForThrottle = 1;
 	mem_level->EnabledForActivity = 0;
 	mem_level->UpHyst = 0;
@@ -1304,7 +1351,6 @@ static int polaris10_populate_single_memory_level(struct pp_hwmgr *hwmgr,
 	mem_level->VoltageDownHyst = 0;
 	mem_level->ActivityLevel = (uint16_t)data->mclk_activity_target;
 	mem_level->StutterEnable = false;
-
 	mem_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW;
 
 	data->display_timing.num_existing_displays = info.display_count;
@@ -1358,12 +1404,12 @@ static int polaris10_populate_all_memory_levels(struct pp_hwmgr *hwmgr)
 			return result;
 	}
 
-	/* in order to prevent MC activity from stutter mode to push DPM up.
+	/* In order to prevent MC activity from stutter mode to push DPM up,
 	 * the UVD change complements this by putting the MCLK in
-	 * a higher state by default such that we are not effected by
+	 * a higher state by default such that we are not affected by
 	 * up threshold or and MCLK DPM latency.
 	 */
-	levels[0].ActivityLevel = (uint16_t)data->mclk_dpm0_activity_target;
+	levels[0].ActivityLevel = 0x1f;
 	CONVERT_FROM_HOST_TO_SMC_US(levels[0].ActivityLevel);
 
 	data->smc_state_table.MemoryDpmLevelCount =
@@ -1424,22 +1470,19 @@ static int polaris10_populate_smc_acpi_level(struct pp_hwmgr *hwmgr,
 
 	table->ACPILevel.Flags &= ~PPSMC_SWSTATE_FLAG_DC;
 
-	if (!data->sclk_dpm_key_disabled) {
-		/* Get MinVoltage and Frequency from DPM0,
-		 * already converted to SMC_UL */
-		sclk_frequency = data->dpm_table.sclk_table.dpm_levels[0].value;
-		result = polaris10_get_dependency_volt_by_clk(hwmgr,
-				table_info->vdd_dep_on_sclk,
-				table->ACPILevel.SclkFrequency,
-				&table->ACPILevel.MinVoltage, &mvdd);
-		PP_ASSERT_WITH_CODE((0 == result),
-				"Cannot find ACPI VDDC voltage value "
-				"in Clock Dependency Table", );
-	} else {
-		sclk_frequency = data->vbios_boot_state.sclk_bootup_value;
-		table->ACPILevel.MinVoltage =
-				data->vbios_boot_state.vddc_bootup_value * VOLTAGE_SCALE;
-	}
+
+	/* Get MinVoltage and Frequency from DPM0,
+	 * already converted to SMC_UL */
+	sclk_frequency = data->vbios_boot_state.sclk_bootup_value;
+	result = polaris10_get_dependency_volt_by_clk(hwmgr,
+			table_info->vdd_dep_on_sclk,
+			sclk_frequency,
+			&table->ACPILevel.MinVoltage, &mvdd);
+	PP_ASSERT_WITH_CODE((0 == result),
+			"Cannot find ACPI VDDC voltage value "
+			"in Clock Dependency Table",
+			);
+
 
 	result = polaris10_calculate_sclk_params(hwmgr, sclk_frequency,  &(table->ACPILevel.SclkSetting));
 	PP_ASSERT_WITH_CODE(result == 0, "Error retrieving Engine Clock dividers from VBIOS.", return result);
@@ -1464,24 +1507,17 @@ static int polaris10_populate_smc_acpi_level(struct pp_hwmgr *hwmgr,
 	CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Fcw1_frac);
 	CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Sclk_ss_slew_rate);
 
-	if (!data->mclk_dpm_key_disabled) {
-		/* Get MinVoltage and Frequency from DPM0, already converted to SMC_UL */
-		table->MemoryACPILevel.MclkFrequency =
-				data->dpm_table.mclk_table.dpm_levels[0].value;
-		result = polaris10_get_dependency_volt_by_clk(hwmgr,
-				table_info->vdd_dep_on_mclk,
-				table->MemoryACPILevel.MclkFrequency,
-				&table->MemoryACPILevel.MinVoltage, &mvdd);
-		PP_ASSERT_WITH_CODE((0 == result),
-				"Cannot find ACPI VDDCI voltage value "
-				"in Clock Dependency Table",
-				);
-	} else {
-		table->MemoryACPILevel.MclkFrequency =
-				data->vbios_boot_state.mclk_bootup_value;
-		table->MemoryACPILevel.MinVoltage =
-				data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE;
-	}
+
+	/* Get MinVoltage and Frequency from DPM0, already converted to SMC_UL */
+	table->MemoryACPILevel.MclkFrequency = data->vbios_boot_state.mclk_bootup_value;
+	result = polaris10_get_dependency_volt_by_clk(hwmgr,
+			table_info->vdd_dep_on_mclk,
+			table->MemoryACPILevel.MclkFrequency,
+			&table->MemoryACPILevel.MinVoltage, &mvdd);
+	PP_ASSERT_WITH_CODE((0 == result),
+			"Cannot find ACPI VDDCI voltage value "
+			"in Clock Dependency Table",
+			);
 
 	us_mvdd = 0;
 	if ((POLARIS10_VOLTAGE_CONTROL_NONE == data->mvdd_control) ||
@@ -1526,6 +1562,7 @@ static int polaris10_populate_smc_vce_level(struct pp_hwmgr *hwmgr,
 	struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table =
 			table_info->mm_dep_table;
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	uint32_t vddci;
 
 	table->VceLevelCount = (uint8_t)(mm_table->count);
 	table->VceBootLevel = 0;
@@ -1535,9 +1572,18 @@ static int polaris10_populate_smc_vce_level(struct pp_hwmgr *hwmgr,
 		table->VceLevel[count].MinVoltage = 0;
 		table->VceLevel[count].MinVoltage |=
 				(mm_table->entries[count].vddc * VOLTAGE_SCALE) << VDDC_SHIFT;
+
+		if (POLARIS10_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control)
+			vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table),
+						mm_table->entries[count].vddc - VDDC_VDDCI_DELTA);
+		else if (POLARIS10_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control)
+			vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA;
+		else
+			vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT;
+
+
 		table->VceLevel[count].MinVoltage |=
-				((mm_table->entries[count].vddc - data->vddc_vddci_delta) *
-						VOLTAGE_SCALE) << VDDCI_SHIFT;
+				(vddci * VOLTAGE_SCALE) << VDDCI_SHIFT;
 		table->VceLevel[count].MinVoltage |= 1 << PHASES_SHIFT;
 
 		/*retrieve divider value for VBIOS */
@@ -1566,6 +1612,7 @@ static int polaris10_populate_smc_samu_level(struct pp_hwmgr *hwmgr,
 	struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table =
 			table_info->mm_dep_table;
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	uint32_t vddci;
 
 	table->SamuBootLevel = 0;
 	table->SamuLevelCount = (uint8_t)(mm_table->count);
@@ -1576,8 +1623,16 @@ static int polaris10_populate_smc_samu_level(struct pp_hwmgr *hwmgr,
 		table->SamuLevel[count].Frequency = mm_table->entries[count].samclock;
 		table->SamuLevel[count].MinVoltage |= (mm_table->entries[count].vddc *
 				VOLTAGE_SCALE) << VDDC_SHIFT;
-		table->SamuLevel[count].MinVoltage |= ((mm_table->entries[count].vddc -
-				data->vddc_vddci_delta) * VOLTAGE_SCALE) << VDDCI_SHIFT;
+
+		if (POLARIS10_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control)
+			vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table),
+						mm_table->entries[count].vddc - VDDC_VDDCI_DELTA);
+		else if (POLARIS10_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control)
+			vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA;
+		else
+			vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT;
+
+		table->SamuLevel[count].MinVoltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT;
 		table->SamuLevel[count].MinVoltage |= 1 << PHASES_SHIFT;
 
 		/* retrieve divider value for VBIOS */
@@ -1660,6 +1715,7 @@ static int polaris10_populate_smc_uvd_level(struct pp_hwmgr *hwmgr,
 	struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table =
 			table_info->mm_dep_table;
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	uint32_t vddci;
 
 	table->UvdLevelCount = (uint8_t)(mm_table->count);
 	table->UvdBootLevel = 0;
@@ -1670,8 +1726,16 @@ static int polaris10_populate_smc_uvd_level(struct pp_hwmgr *hwmgr,
 		table->UvdLevel[count].DclkFrequency = mm_table->entries[count].dclk;
 		table->UvdLevel[count].MinVoltage |= (mm_table->entries[count].vddc *
 				VOLTAGE_SCALE) << VDDC_SHIFT;
-		table->UvdLevel[count].MinVoltage |= ((mm_table->entries[count].vddc -
-				data->vddc_vddci_delta) * VOLTAGE_SCALE) << VDDCI_SHIFT;
+
+		if (POLARIS10_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control)
+			vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table),
+						mm_table->entries[count].vddc - VDDC_VDDCI_DELTA);
+		else if (POLARIS10_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control)
+			vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA;
+		else
+			vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT;
+
+		table->UvdLevel[count].MinVoltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT;
 		table->UvdLevel[count].MinVoltage |= 1 << PHASES_SHIFT;
 
 		/* retrieve divider value for VBIOS */
@@ -1692,8 +1756,8 @@ static int polaris10_populate_smc_uvd_level(struct pp_hwmgr *hwmgr,
 		CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].VclkFrequency);
 		CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].DclkFrequency);
 		CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].MinVoltage);
-
 	}
+
 	return result;
 }
 
@@ -1761,12 +1825,9 @@ static int polaris10_populate_smc_initailial_state(struct pp_hwmgr *hwmgr)
 
 static int polaris10_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr)
 {
-	uint32_t ro, efuse, efuse2, clock_freq, volt_without_cks,
-			volt_with_cks, value;
-	uint16_t clock_freq_u16;
+	uint32_t ro, efuse, volt_without_cks, volt_with_cks, value, max, min;
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
-	uint8_t type, i, j, cks_setting, stretch_amount, stretch_amount2,
-			volt_offset = 0;
+	uint8_t i, stretch_amount, volt_offset = 0;
 	struct phm_ppt_v1_information *table_info =
 			(struct phm_ppt_v1_information *)(hwmgr->pptable);
 	struct phm_ppt_v1_clock_voltage_dependency_table *sclk_table =
@@ -1778,56 +1839,47 @@ static int polaris10_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr)
 	 * if the part is SS or FF. if RO >= 1660MHz, part is FF.
 	 */
 	efuse = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC,
-			ixSMU_EFUSE_0 + (146 * 4));
-	efuse2 = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC,
-			ixSMU_EFUSE_0 + (148 * 4));
+			ixSMU_EFUSE_0 + (67 * 4));
 	efuse &= 0xFF000000;
 	efuse = efuse >> 24;
-	efuse2 &= 0xF;
 
-	if (efuse2 == 1)
-		ro = (2300 - 1350) * efuse / 255 + 1350;
-	else
-		ro = (2500 - 1000) * efuse / 255 + 1000;
-
-	if (ro >= 1660)
-		type = 0;
-	else
-		type = 1;
+	if (hwmgr->chip_id == CHIP_POLARIS10) {
+		min = 1000;
+		max = 2300;
+	} else {
+		min = 1100;
+		max = 2100;
+	}
 
-	/* Populate Stretch amount */
-	data->smc_state_table.ClockStretcherAmount = stretch_amount;
+	ro = efuse * (max -min)/255 + min;
 
 	/* Populate Sclk_CKS_masterEn0_7 and Sclk_voltageOffset */
 	for (i = 0; i < sclk_table->count; i++) {
 		data->smc_state_table.Sclk_CKS_masterEn0_7 |=
 				sclk_table->entries[i].cks_enable << i;
-		volt_without_cks = (uint32_t)((14041 *
-			(sclk_table->entries[i].clk/100) / 10000 + 3571 + 75 - ro) * 1000 /
-			(4026 - (13924 * (sclk_table->entries[i].clk/100) / 10000)));
-		volt_with_cks = (uint32_t)((13946 *
-			(sclk_table->entries[i].clk/100) / 10000 + 3320 + 45 - ro) * 1000 /
-			(3664 - (11454 * (sclk_table->entries[i].clk/100) / 10000)));
+		if (hwmgr->chip_id == CHIP_POLARIS10) {
+			volt_without_cks = (uint32_t)((2753594000U + (sclk_table->entries[i].clk/100) * 136418 -(ro - 70) * 1000000) / \
+						(2424180 - (sclk_table->entries[i].clk/100) * 1132925/1000));
+			volt_with_cks = (uint32_t)((2797202000U + sclk_table->entries[i].clk/100 * 3232 - (ro - 65) * 1000000) / \
+					(2522480 - sclk_table->entries[i].clk/100 * 115764/100));
+		} else {
+			volt_without_cks = (uint32_t)((2416794800U + (sclk_table->entries[i].clk/100) * 1476925/10 -(ro - 50) * 1000000) / \
+						(2625416 - (sclk_table->entries[i].clk/100) * (12586807/10000)));
+			volt_with_cks = (uint32_t)((2999656000U - sclk_table->entries[i].clk/100 * 392803 - (ro - 44) * 1000000) / \
+					(3422454 - sclk_table->entries[i].clk/100 * (18886376/10000)));
+		}
+
 		if (volt_without_cks >= volt_with_cks)
 			volt_offset = (uint8_t)(((volt_without_cks - volt_with_cks +
-					sclk_table->entries[i].cks_voffset) * 100 / 625) + 1);
+					sclk_table->entries[i].cks_voffset) * 100 + 624) / 625);
+
 		data->smc_state_table.Sclk_voltageOffset[i] = volt_offset;
 	}
 
-	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE,
-			STRETCH_ENABLE, 0x0);
-	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE,
-			masterReset, 0x1);
-	/* PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE, staticEnable, 0x1); */
-	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE,
-			masterReset, 0x0);
-
+	data->smc_state_table.LdoRefSel = (table_info->cac_dtp_table->ucCKS_LDO_REFSEL != 0) ? table_info->cac_dtp_table->ucCKS_LDO_REFSEL : 6;
 	/* Populate CKS Lookup Table */
-	if (stretch_amount == 1 || stretch_amount == 2 || stretch_amount == 5)
-		stretch_amount2 = 0;
-	else if (stretch_amount == 3 || stretch_amount == 4)
-		stretch_amount2 = 1;
-	else {
+	if (stretch_amount != 1 && stretch_amount != 2 && stretch_amount != 3 &&
+			stretch_amount != 4 && stretch_amount != 5) {
 		phm_cap_unset(hwmgr->platform_descriptor.platformCaps,
 				PHM_PlatformCaps_ClockStretcher);
 		PP_ASSERT_WITH_CODE(false,
@@ -1835,69 +1887,6 @@ static int polaris10_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr)
 				return -EINVAL);
 	}
 
-	value = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC,
-			ixPWR_CKS_CNTL);
-	value &= 0xFFC2FF87;
-	data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].minFreq =
-			polaris10_clock_stretcher_lookup_table[stretch_amount2][0];
-	data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].maxFreq =
-			polaris10_clock_stretcher_lookup_table[stretch_amount2][1];
-	clock_freq_u16 = (uint16_t)(PP_SMC_TO_HOST_UL(data->smc_state_table.
-			GraphicsLevel[data->smc_state_table.GraphicsDpmLevelCount - 1].SclkSetting.SclkFrequency) / 100);
-	if (polaris10_clock_stretcher_lookup_table[stretch_amount2][0] < clock_freq_u16
-	&& polaris10_clock_stretcher_lookup_table[stretch_amount2][1] > clock_freq_u16) {
-		/* Program PWR_CKS_CNTL. CKS_USE_FOR_LOW_FREQ */
-		value |= (polaris10_clock_stretcher_lookup_table[stretch_amount2][3]) << 16;
-		/* Program PWR_CKS_CNTL. CKS_LDO_REFSEL */
-		value |= (polaris10_clock_stretcher_lookup_table[stretch_amount2][2]) << 18;
-		/* Program PWR_CKS_CNTL. CKS_STRETCH_AMOUNT */
-		value |= (polaris10_clock_stretch_amount_conversion
-				[polaris10_clock_stretcher_lookup_table[stretch_amount2][3]]
-				 [stretch_amount]) << 3;
-	}
-	CONVERT_FROM_HOST_TO_SMC_US(data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].minFreq);
-	CONVERT_FROM_HOST_TO_SMC_US(data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].maxFreq);
-	data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].setting =
-			polaris10_clock_stretcher_lookup_table[stretch_amount2][2] & 0x7F;
-	data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].setting |=
-			(polaris10_clock_stretcher_lookup_table[stretch_amount2][3]) << 7;
-
-	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
-			ixPWR_CKS_CNTL, value);
-
-	/* Populate DDT Lookup Table */
-	for (i = 0; i < 4; i++) {
-		/* Assign the minimum and maximum VID stored
-		 * in the last row of Clock Stretcher Voltage Table.
-		 */
-		data->smc_state_table.ClockStretcherDataTable.ClockStretcherDataTableEntry[i].minVID =
-				(uint8_t) polaris10_clock_stretcher_ddt_table[type][i][2];
-		data->smc_state_table.ClockStretcherDataTable.ClockStretcherDataTableEntry[i].maxVID =
-				(uint8_t) polaris10_clock_stretcher_ddt_table[type][i][3];
-		/* Loop through each SCLK and check the frequency
-		 * to see if it lies within the frequency for clock stretcher.
-		 */
-		for (j = 0; j < data->smc_state_table.GraphicsDpmLevelCount; j++) {
-			cks_setting = 0;
-			clock_freq = PP_SMC_TO_HOST_UL(
-					data->smc_state_table.GraphicsLevel[j].SclkSetting.SclkFrequency);
-			/* Check the allowed frequency against the sclk level[j].
-			 *  Sclk's endianness has already been converted,
-			 *  and it's in 10Khz unit,
-			 *  as opposed to Data table, which is in Mhz unit.
-			 */
-			if (clock_freq >= (polaris10_clock_stretcher_ddt_table[type][i][0]) * 100) {
-				cks_setting |= 0x2;
-				if (clock_freq < (polaris10_clock_stretcher_ddt_table[type][i][1]) * 100)
-					cks_setting |= 0x1;
-			}
-			data->smc_state_table.ClockStretcherDataTable.ClockStretcherDataTableEntry[i].setting
-							|= cks_setting << (j * 2);
-		}
-		CONVERT_FROM_HOST_TO_SMC_US(
-			data->smc_state_table.ClockStretcherDataTable.ClockStretcherDataTableEntry[i].setting);
-	}
-
 	value = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixPWR_CKS_CNTL);
 	value &= 0xFFFFFFFE;
 	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixPWR_CKS_CNTL, value);
@@ -1945,9 +1934,8 @@ static int polaris10_populate_vr_config(struct pp_hwmgr *hwmgr,
 	if (POLARIS10_VOLTAGE_CONTROL_BY_SVID2 == data->mvdd_control) {
 		config = VR_SVI2_PLANE_2;
 		table->VRConfig |= (config << VRCONF_MVDD_SHIFT);
-	} else if (POLARIS10_VOLTAGE_CONTROL_BY_GPIO == data->mvdd_control) {
-		config = VR_SMIO_PATTERN_2;
-		table->VRConfig |= (config << VRCONF_MVDD_SHIFT);
+		cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, data->soft_regs_start +
+			offsetof(SMU74_SoftRegisters, AllowMvddSwitch), 0x1);
 	} else {
 		config = VR_STATIC_VOLTAGE;
 		table->VRConfig |= (config << VRCONF_MVDD_SHIFT);
@@ -1956,6 +1944,90 @@ static int polaris10_populate_vr_config(struct pp_hwmgr *hwmgr,
 	return 0;
 }
 
+
+int polaris10_populate_avfs_parameters(struct pp_hwmgr *hwmgr)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	SMU74_Discrete_DpmTable  *table = &(data->smc_state_table);
+	int result = 0;
+	struct pp_atom_ctrl__avfs_parameters avfs_params = {0};
+	AVFS_meanNsigma_t AVFS_meanNsigma = { {0} };
+	AVFS_Sclk_Offset_t AVFS_SclkOffset = { {0} };
+	uint32_t tmp, i;
+	struct pp_smumgr *smumgr = hwmgr->smumgr;
+	struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(smumgr->backend);
+
+	struct phm_ppt_v1_information *table_info =
+			(struct phm_ppt_v1_information *)hwmgr->pptable;
+	struct phm_ppt_v1_clock_voltage_dependency_table *sclk_table =
+			table_info->vdd_dep_on_sclk;
+
+
+	if (smu_data->avfs.avfs_btc_status == AVFS_BTC_NOTSUPPORTED)
+		return result;
+
+	result = atomctrl_get_avfs_information(hwmgr, &avfs_params);
+
+	if (0 == result) {
+		table->BTCGB_VDROOP_TABLE[0].a0  = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a0);
+		table->BTCGB_VDROOP_TABLE[0].a1  = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a1);
+		table->BTCGB_VDROOP_TABLE[0].a2  = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a2);
+		table->BTCGB_VDROOP_TABLE[1].a0  = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a0);
+		table->BTCGB_VDROOP_TABLE[1].a1  = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a1);
+		table->BTCGB_VDROOP_TABLE[1].a2  = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a2);
+		table->AVFSGB_VDROOP_TABLE[0].m1 = PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSON_m1);
+		table->AVFSGB_VDROOP_TABLE[0].m2 = PP_HOST_TO_SMC_US(avfs_params.usAVFSGB_FUSE_TABLE_CKSON_m2);
+		table->AVFSGB_VDROOP_TABLE[0].b  = PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSON_b);
+		table->AVFSGB_VDROOP_TABLE[0].m1_shift = 24;
+		table->AVFSGB_VDROOP_TABLE[0].m2_shift  = 12;
+		table->AVFSGB_VDROOP_TABLE[1].m1 = PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_m1);
+		table->AVFSGB_VDROOP_TABLE[1].m2 = PP_HOST_TO_SMC_US(avfs_params.usAVFSGB_FUSE_TABLE_CKSOFF_m2);
+		table->AVFSGB_VDROOP_TABLE[1].b  = PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_b);
+		table->AVFSGB_VDROOP_TABLE[1].m1_shift = 24;
+		table->AVFSGB_VDROOP_TABLE[1].m2_shift  = 12;
+		table->MaxVoltage                = PP_HOST_TO_SMC_US(avfs_params.usMaxVoltage_0_25mv);
+		AVFS_meanNsigma.Aconstant[0]      = PP_HOST_TO_SMC_UL(avfs_params.ulAVFS_meanNsigma_Acontant0);
+		AVFS_meanNsigma.Aconstant[1]      = PP_HOST_TO_SMC_UL(avfs_params.ulAVFS_meanNsigma_Acontant1);
+		AVFS_meanNsigma.Aconstant[2]      = PP_HOST_TO_SMC_UL(avfs_params.ulAVFS_meanNsigma_Acontant2);
+		AVFS_meanNsigma.DC_tol_sigma      = PP_HOST_TO_SMC_US(avfs_params.usAVFS_meanNsigma_DC_tol_sigma);
+		AVFS_meanNsigma.Platform_mean     = PP_HOST_TO_SMC_US(avfs_params.usAVFS_meanNsigma_Platform_mean);
+		AVFS_meanNsigma.PSM_Age_CompFactor = PP_HOST_TO_SMC_US(avfs_params.usPSM_Age_ComFactor);
+		AVFS_meanNsigma.Platform_sigma     = PP_HOST_TO_SMC_US(avfs_params.usAVFS_meanNsigma_Platform_sigma);
+
+		for (i = 0; i < NUM_VFT_COLUMNS; i++) {
+			AVFS_meanNsigma.Static_Voltage_Offset[i] = (uint8_t)(sclk_table->entries[i].cks_voffset * 100 / 625);
+			AVFS_SclkOffset.Sclk_Offset[i] = PP_HOST_TO_SMC_US((uint16_t)(sclk_table->entries[i].sclk_offset) / 100);
+		}
+
+		result = polaris10_read_smc_sram_dword(smumgr,
+				SMU7_FIRMWARE_HEADER_LOCATION + offsetof(SMU74_Firmware_Header, AvfsMeanNSigma),
+				&tmp, data->sram_end);
+
+		polaris10_copy_bytes_to_smc(smumgr,
+					tmp,
+					(uint8_t *)&AVFS_meanNsigma,
+					sizeof(AVFS_meanNsigma_t),
+					data->sram_end);
+
+		result = polaris10_read_smc_sram_dword(smumgr,
+				SMU7_FIRMWARE_HEADER_LOCATION + offsetof(SMU74_Firmware_Header, AvfsSclkOffsetTable),
+				&tmp, data->sram_end);
+		polaris10_copy_bytes_to_smc(smumgr,
+					tmp,
+					(uint8_t *)&AVFS_SclkOffset,
+					sizeof(AVFS_Sclk_Offset_t),
+					data->sram_end);
+
+		data->avfs_vdroop_override_setting = (avfs_params.ucEnableGB_VDROOP_TABLE_CKSON << BTCGB0_Vdroop_Enable_SHIFT) |
+						(avfs_params.ucEnableGB_VDROOP_TABLE_CKSOFF << BTCGB1_Vdroop_Enable_SHIFT) |
+						(avfs_params.ucEnableGB_FUSE_TABLE_CKSON << AVFSGB0_Vdroop_Enable_SHIFT) |
+						(avfs_params.ucEnableGB_FUSE_TABLE_CKSOFF << AVFSGB1_Vdroop_Enable_SHIFT);
+		data->apply_avfs_cks_off_voltage = (avfs_params.ucEnableApplyAVFS_CKS_OFF_Voltage == 1) ? true : false;
+	}
+	return result;
+}
+
+
 /**
 * Initializes the SMC table and uploads it
 *
@@ -2056,6 +2128,10 @@ static int polaris10_init_smc_table(struct pp_hwmgr *hwmgr)
 				"Failed to populate Clock Stretcher Data Table!",
 				return result);
 	}
+
+	result = polaris10_populate_avfs_parameters(hwmgr);
+	PP_ASSERT_WITH_CODE(0 == result, "Failed to populate AVFS Parameters!", return result;);
+
 	table->CurrSclkPllRange = 0xff;
 	table->GraphicsVoltageChangeEnable  = 1;
 	table->GraphicsThermThrottleEnable  = 1;
@@ -2229,6 +2305,17 @@ static int polaris10_enable_ulv(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
+static int polaris10_disable_ulv(struct pp_hwmgr *hwmgr)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	struct polaris10_ulv_parm *ulv = &(data->ulv);
+
+	if (ulv->ulv_supported)
+		return smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_DisableULV);
+
+	return 0;
+}
+
 static int polaris10_enable_deep_sleep_master_switch(struct pp_hwmgr *hwmgr)
 {
 	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
@@ -2249,9 +2336,27 @@ static int polaris10_enable_deep_sleep_master_switch(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
+static int polaris10_disable_deep_sleep_master_switch(struct pp_hwmgr *hwmgr)
+{
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_SclkDeepSleep)) {
+		if (smum_send_msg_to_smc(hwmgr->smumgr,
+				PPSMC_MSG_MASTER_DeepSleep_OFF)) {
+			PP_ASSERT_WITH_CODE(false,
+					"Attempt to disable Master Deep Sleep switch failed!",
+					return -1);
+		}
+	}
+
+	return 0;
+}
+
 static int polaris10_enable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 {
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	uint32_t soft_register_value = 0;
+	uint32_t handshake_disables_offset = data->soft_regs_start
+				+ offsetof(SMU74_SoftRegisters, HandshakeDisables);
 
 	/* enable SCLK dpm */
 	if (!data->sclk_dpm_key_disabled)
@@ -2262,6 +2367,12 @@ static int polaris10_enable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 
 	/* enable MCLK dpm */
 	if (0 == data->mclk_dpm_key_disabled) {
+/* Disable UVD - SMU handshake for MCLK. */
+		soft_register_value = cgs_read_ind_register(hwmgr->device,
+					CGS_IND_REG__SMC, handshake_disables_offset);
+		soft_register_value |= SMU7_UVD_MCLK_HANDSHAKE_DISABLE;
+		cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+				handshake_disables_offset, soft_register_value);
 
 		PP_ASSERT_WITH_CODE(
 				(0 == smum_send_msg_to_smc(hwmgr->smumgr,
@@ -2269,7 +2380,6 @@ static int polaris10_enable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 				"Failed to enable MCLK DPM during DPM Start Function!",
 				return -1);
 
-
 		PHM_WRITE_FIELD(hwmgr->device, MC_SEQ_CNTL_3, CAC_EN, 0x1);
 
 		cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_MC0_CNTL, 0x5);
@@ -2338,6 +2448,58 @@ static int polaris10_start_dpm(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
+static int polaris10_disable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+
+	/* disable SCLK dpm */
+	if (!data->sclk_dpm_key_disabled)
+		PP_ASSERT_WITH_CODE(
+				(smum_send_msg_to_smc(hwmgr->smumgr,
+						PPSMC_MSG_DPM_Disable) == 0),
+				"Failed to disable SCLK DPM!",
+				return -1);
+
+	/* disable MCLK dpm */
+	if (!data->mclk_dpm_key_disabled) {
+		PP_ASSERT_WITH_CODE(
+				(smum_send_msg_to_smc(hwmgr->smumgr,
+						PPSMC_MSG_MCLKDPM_Disable) == 0),
+				"Failed to disable MCLK DPM!",
+				return -1);
+	}
+
+	return 0;
+}
+
+static int polaris10_stop_dpm(struct pp_hwmgr *hwmgr)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+
+	/* disable general power management */
+	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, GENERAL_PWRMGT,
+			GLOBAL_PWRMGT_EN, 0);
+	/* disable sclk deep sleep */
+	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, SCLK_PWRMGT_CNTL,
+			DYNAMIC_PM_EN, 0);
+
+	/* disable PCIE dpm */
+	if (!data->pcie_dpm_key_disabled) {
+		PP_ASSERT_WITH_CODE(
+				(smum_send_msg_to_smc(hwmgr->smumgr,
+						PPSMC_MSG_PCIeDPM_Disable) == 0),
+				"Failed to disable pcie DPM during DPM Stop Function!",
+				return -1);
+	}
+
+	if (polaris10_disable_sclk_mclk_dpm(hwmgr)) {
+		printk(KERN_ERR "Failed to disable Sclk DPM and Mclk DPM!");
+		return -1;
+	}
+
+	return 0;
+}
+
 static void polaris10_set_dpm_event_sources(struct pp_hwmgr *hwmgr, uint32_t sources)
 {
 	bool protection;
@@ -2395,6 +2557,23 @@ static int polaris10_enable_thermal_auto_throttle(struct pp_hwmgr *hwmgr)
 	return polaris10_enable_auto_throttle_source(hwmgr, PHM_AutoThrottleSource_Thermal);
 }
 
+static int polaris10_disable_auto_throttle_source(struct pp_hwmgr *hwmgr,
+		PHM_AutoThrottleSource source)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+
+	if (data->active_auto_throttle_sources & (1 << source)) {
+		data->active_auto_throttle_sources &= ~(1 << source);
+		polaris10_set_dpm_event_sources(hwmgr, data->active_auto_throttle_sources);
+	}
+	return 0;
+}
+
+static int polaris10_disable_thermal_auto_throttle(struct pp_hwmgr *hwmgr)
+{
+	return polaris10_disable_auto_throttle_source(hwmgr, PHM_AutoThrottleSource_Thermal);
+}
+
 int polaris10_pcie_performance_request(struct pp_hwmgr *hwmgr)
 {
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
@@ -2471,6 +2650,8 @@ int polaris10_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
 	PP_ASSERT_WITH_CODE((0 == tmp_result),
 			"Failed to enable VR hot GPIO interrupt!", result = tmp_result);
 
+	smum_send_msg_to_smc(hwmgr->smumgr, (PPSMC_Msg)PPSMC_HasDisplay);
+
 	tmp_result = polaris10_enable_sclk_control(hwmgr);
 	PP_ASSERT_WITH_CODE((0 == tmp_result),
 			"Failed to enable SCLK control!", result = tmp_result);
@@ -2487,6 +2668,10 @@ int polaris10_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
 	PP_ASSERT_WITH_CODE((0 == tmp_result),
 			"Failed to enable deep sleep master switch!", result = tmp_result);
 
+	tmp_result = polaris10_enable_didt_config(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to enable deep sleep master switch!", result = tmp_result);
+
 	tmp_result = polaris10_start_dpm(hwmgr);
 	PP_ASSERT_WITH_CODE((0 == tmp_result),
 			"Failed to start DPM!", result = tmp_result);
@@ -2516,8 +2701,60 @@ int polaris10_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
 
 int polaris10_disable_dpm_tasks(struct pp_hwmgr *hwmgr)
 {
+	int tmp_result, result = 0;
 
-	return 0;
+	tmp_result = (polaris10_is_dpm_running(hwmgr)) ? 0 : -1;
+	PP_ASSERT_WITH_CODE(tmp_result == 0,
+			"DPM is not running right now, no need to disable DPM!",
+			return 0);
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_ThermalController))
+		PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+				GENERAL_PWRMGT, THERMAL_PROTECTION_DIS, 1);
+
+	tmp_result = polaris10_disable_power_containment(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to disable power containment!", result = tmp_result);
+
+	tmp_result = polaris10_disable_smc_cac(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to disable SMC CAC!", result = tmp_result);
+
+	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+			CG_SPLL_SPREAD_SPECTRUM, SSEN, 0);
+	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+			GENERAL_PWRMGT, DYN_SPREAD_SPECTRUM_EN, 0);
+
+	tmp_result = polaris10_disable_thermal_auto_throttle(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to disable thermal auto throttle!", result = tmp_result);
+
+	tmp_result = polaris10_stop_dpm(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to stop DPM!", result = tmp_result);
+
+	tmp_result = polaris10_disable_deep_sleep_master_switch(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to disable deep sleep master switch!", result = tmp_result);
+
+	tmp_result = polaris10_disable_ulv(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to disable ULV!", result = tmp_result);
+
+	tmp_result = polaris10_clear_voting_clients(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to clear voting clients!", result = tmp_result);
+
+	tmp_result = polaris10_reset_to_default(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to reset to default!", result = tmp_result);
+
+	tmp_result = polaris10_force_switch_to_arbf0(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to force to switch arbf0!", result = tmp_result);
+
+	return result;
 }
 
 int polaris10_reset_asic_tasks(struct pp_hwmgr *hwmgr)
@@ -2528,13 +2765,6 @@ int polaris10_reset_asic_tasks(struct pp_hwmgr *hwmgr)
 
 int polaris10_hwmgr_backend_fini(struct pp_hwmgr *hwmgr)
 {
-	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
-
-	if (data->soft_pp_table) {
-		kfree(data->soft_pp_table);
-		data->soft_pp_table = NULL;
-	}
-
 	return phm_hwmgr_backend_fini(hwmgr);
 }
 
@@ -2581,17 +2811,22 @@ int polaris10_set_features_platform_caps(struct pp_hwmgr *hwmgr)
 					PHM_PlatformCaps_DynamicUVDState);
 
 	/* power tune caps Assume disabled */
-	phm_cap_unset(hwmgr->platform_descriptor.platformCaps,
+	phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 						PHM_PlatformCaps_SQRamping);
-	phm_cap_unset(hwmgr->platform_descriptor.platformCaps,
+	phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 						PHM_PlatformCaps_DBRamping);
-	phm_cap_unset(hwmgr->platform_descriptor.platformCaps,
+	phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 						PHM_PlatformCaps_TDRamping);
-	phm_cap_unset(hwmgr->platform_descriptor.platformCaps,
+	phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 						PHM_PlatformCaps_TCPRamping);
 
-	phm_cap_set(hwmgr->platform_descriptor.platformCaps,
-					PHM_PlatformCaps_PowerContainment);
+	if (hwmgr->powercontainment_enabled)
+		phm_cap_set(hwmgr->platform_descriptor.platformCaps,
+			    PHM_PlatformCaps_PowerContainment);
+	else
+		phm_cap_unset(hwmgr->platform_descriptor.platformCaps,
+			    PHM_PlatformCaps_PowerContainment);
+
 	phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 							PHM_PlatformCaps_CAC);
 
@@ -2606,6 +2841,7 @@ int polaris10_set_features_platform_caps(struct pp_hwmgr *hwmgr)
 
 	phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 						PHM_PlatformCaps_FanSpeedInTableIsRPM);
+
 	if (hwmgr->chip_id == CHIP_POLARIS11)
 		phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 					PHM_PlatformCaps_SPLLShutdownSupport);
@@ -2638,7 +2874,7 @@ static int polaris10_get_evv_voltages(struct pp_hwmgr *hwmgr)
 {
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
 	uint16_t vv_id;
-	uint16_t vddc = 0;
+	uint32_t vddc = 0;
 	uint16_t i, j;
 	uint32_t sclk = 0;
 	struct phm_ppt_v1_information *table_info =
@@ -2662,15 +2898,16 @@ static int polaris10_get_evv_voltages(struct pp_hwmgr *hwmgr)
 				}
 			}
 
+			if (atomctrl_get_voltage_evv_on_sclk_ai(hwmgr,
+						VOLTAGE_TYPE_VDDC,
+						sclk, vv_id, &vddc) != 0) {
+				printk(KERN_WARNING "failed to retrieving EVV voltage!\n");
+				continue;
+			}
 
-			PP_ASSERT_WITH_CODE(0 == atomctrl_get_voltage_evv_on_sclk_ai(hwmgr,
-							VOLTAGE_TYPE_VDDC, sclk, vv_id, &vddc),
-						"Error retrieving EVV voltage value!",
-						continue);
-
-
-			/* need to make sure vddc is less than 2v or else, it could burn the ASIC. */
-			PP_ASSERT_WITH_CODE((vddc < 2000 && vddc != 0),
+			/* need to make sure vddc is less than 2v or else, it could burn the ASIC.
+			 * real voltage level in unit of 0.01mv */
+			PP_ASSERT_WITH_CODE((vddc < 200000 && vddc != 0),
 					"Invalid VDDC value", result = -EINVAL;);
 
 			/* the voltage should not be zero nor equal to leakage ID */
@@ -2896,15 +3133,46 @@ static int polaris10_set_private_data_based_on_pptable(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
+int polaris10_patch_voltage_workaround(struct pp_hwmgr *hwmgr)
+{
+	struct phm_ppt_v1_information *table_info =
+		       (struct phm_ppt_v1_information *)(hwmgr->pptable);
+	struct phm_ppt_v1_clock_voltage_dependency_table *dep_mclk_table =
+			table_info->vdd_dep_on_mclk;
+	struct phm_ppt_v1_voltage_lookup_table *lookup_table =
+			table_info->vddc_lookup_table;
+	uint32_t i;
+
+	if (hwmgr->chip_id == CHIP_POLARIS10 && hwmgr->hw_revision == 0xC7) {
+		if (lookup_table->entries[dep_mclk_table->entries[dep_mclk_table->count-1].vddInd].us_vdd >= 1000)
+			return 0;
+
+		for (i = 0; i < lookup_table->count; i++) {
+			if (lookup_table->entries[i].us_vdd < 0xff01 && lookup_table->entries[i].us_vdd >= 1000) {
+				dep_mclk_table->entries[dep_mclk_table->count-1].vddInd = (uint8_t) i;
+				return 0;
+			}
+		}
+	}
+	return 0;
+}
+
+
 int polaris10_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 {
-	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	struct polaris10_hwmgr *data;
 	struct pp_atomctrl_gpio_pin_assignment gpio_pin_assignment;
 	uint32_t temp_reg;
 	int result;
 	struct phm_ppt_v1_information *table_info =
 			(struct phm_ppt_v1_information *)(hwmgr->pptable);
 
+	data = kzalloc(sizeof(struct polaris10_hwmgr), GFP_KERNEL);
+	if (data == NULL)
+		return -ENOMEM;
+
+	hwmgr->backend = data;
+
 	data->dll_default_on = false;
 	data->sram_end = SMC_RAM_END;
 	data->mclk_dpm0_activity_target = 0xa;
@@ -2938,6 +3206,11 @@ int polaris10_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 	data->vddci_control = POLARIS10_VOLTAGE_CONTROL_NONE;
 	data->mvdd_control = POLARIS10_VOLTAGE_CONTROL_NONE;
 
+	data->enable_tdc_limit_feature = true;
+	data->enable_pkg_pwr_tracking_feature = true;
+	data->force_pcie_gen = PP_PCIEGenInvalid;
+	data->mclk_stutter_mode_threshold = 40000;
+
 	if (atomctrl_is_voltage_controled_by_gpio_v3(hwmgr,
 			VOLTAGE_TYPE_VDDC, VOLTAGE_OBJ_SVID2))
 		data->voltage_control = POLARIS10_VOLTAGE_CONTROL_BY_SVID2;
@@ -2962,8 +3235,13 @@ int polaris10_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 			data->vddci_control = POLARIS10_VOLTAGE_CONTROL_BY_SVID2;
 	}
 
+	if (table_info->cac_dtp_table->usClockStretchAmount != 0)
+		phm_cap_set(hwmgr->platform_descriptor.platformCaps,
+					PHM_PlatformCaps_ClockStretcher);
+
 	polaris10_set_features_platform_caps(hwmgr);
 
+	polaris10_patch_voltage_workaround(hwmgr);
 	polaris10_init_dpm_defaults(hwmgr);
 
 	/* Get leakage voltage based on leakage ID. */
@@ -2983,7 +3261,7 @@ int polaris10_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 	if (0 == result) {
 		struct cgs_system_info sys_info = {0};
 
-		data->is_tlu_enabled = 0;
+		data->is_tlu_enabled = false;
 
 		hwmgr->platform_descriptor.hardwareActivityPerformanceLevels =
 							POLARIS10_MAX_HARDWARE_POWERLEVELS;
@@ -3068,7 +3346,7 @@ int polaris10_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 		sys_info.info_id = CGS_SYSTEM_INFO_PCIE_GEN_INFO;
 		result = cgs_query_system_info(hwmgr->device, &sys_info);
 		if (result)
-			data->pcie_gen_cap = 0x30007;
+			data->pcie_gen_cap = AMDGPU_DEFAULT_PCIE_GEN_MASK;
 		else
 			data->pcie_gen_cap = (uint32_t)sys_info.value;
 		if (data->pcie_gen_cap & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
@@ -3077,7 +3355,7 @@ int polaris10_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 		sys_info.info_id = CGS_SYSTEM_INFO_PCIE_MLW;
 		result = cgs_query_system_info(hwmgr->device, &sys_info);
 		if (result)
-			data->pcie_lane_cap = 0x2f0000;
+			data->pcie_lane_cap = AMDGPU_DEFAULT_PCIE_MLW_MASK;
 		else
 			data->pcie_lane_cap = (uint32_t)sys_info.value;
 
@@ -3366,6 +3644,7 @@ static int polaris10_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
 				    hwmgr->platform_descriptor.platformCaps,
 				    PHM_PlatformCaps_DisableMclkSwitchingForFrameLock);
 
+
 	disable_mclk_switching = (1 < info.display_count) ||
 				    disable_mclk_switching_for_frame_lock;
 
@@ -3520,10 +3799,11 @@ static int polaris10_get_pp_table_entry_callback_func(struct pp_hwmgr *hwmgr,
 	ATOM_Tonga_State *state_entry = (ATOM_Tonga_State *)state;
 	ATOM_Tonga_POWERPLAYTABLE *powerplay_table =
 			(ATOM_Tonga_POWERPLAYTABLE *)pp_table;
-	ATOM_Tonga_SCLK_Dependency_Table *sclk_dep_table =
-			(ATOM_Tonga_SCLK_Dependency_Table *)
+	PPTable_Generic_SubTable_Header *sclk_dep_table =
+			(PPTable_Generic_SubTable_Header *)
 			(((unsigned long)powerplay_table) +
 				le16_to_cpu(powerplay_table->usSclkDependencyTableOffset));
+
 	ATOM_Tonga_MCLK_Dependency_Table *mclk_dep_table =
 			(ATOM_Tonga_MCLK_Dependency_Table *)
 			(((unsigned long)powerplay_table) +
@@ -3575,7 +3855,11 @@ static int polaris10_get_pp_table_entry_callback_func(struct pp_hwmgr *hwmgr,
 	/* Performance levels are arranged from low to high. */
 	performance_level->memory_clock = mclk_dep_table->entries
 			[state_entry->ucMemoryClockIndexLow].ulMclk;
-	performance_level->engine_clock = sclk_dep_table->entries
+	if (sclk_dep_table->ucRevId == 0)
+		performance_level->engine_clock = ((ATOM_Tonga_SCLK_Dependency_Table *)sclk_dep_table)->entries
+			[state_entry->ucEngineClockIndexLow].ulSclk;
+	else if (sclk_dep_table->ucRevId == 1)
+		performance_level->engine_clock = ((ATOM_Polaris_SCLK_Dependency_Table *)sclk_dep_table)->entries
 			[state_entry->ucEngineClockIndexLow].ulSclk;
 	performance_level->pcie_gen = get_pcie_gen_support(data->pcie_gen_cap,
 			state_entry->ucPCIEGenLow);
@@ -3586,8 +3870,14 @@ static int polaris10_get_pp_table_entry_callback_func(struct pp_hwmgr *hwmgr,
 			[polaris10_power_state->performance_level_count++]);
 	performance_level->memory_clock = mclk_dep_table->entries
 			[state_entry->ucMemoryClockIndexHigh].ulMclk;
-	performance_level->engine_clock = sclk_dep_table->entries
+
+	if (sclk_dep_table->ucRevId == 0)
+		performance_level->engine_clock = ((ATOM_Tonga_SCLK_Dependency_Table *)sclk_dep_table)->entries
 			[state_entry->ucEngineClockIndexHigh].ulSclk;
+	else if (sclk_dep_table->ucRevId == 1)
+		performance_level->engine_clock = ((ATOM_Polaris_SCLK_Dependency_Table *)sclk_dep_table)->entries
+			[state_entry->ucEngineClockIndexHigh].ulSclk;
+
 	performance_level->pcie_gen = get_pcie_gen_support(data->pcie_gen_cap,
 			state_entry->ucPCIEGenHigh);
 	performance_level->pcie_lane = get_pcie_lane_support(data->pcie_lane_cap,
@@ -3645,7 +3935,6 @@ static int polaris10_get_pp_table_entry(struct pp_hwmgr *hwmgr,
 		switch (state->classification.ui_label) {
 		case PP_StateUILabel_Performance:
 			data->use_pcie_performance_levels = true;
-
 			for (i = 0; i < ps->performance_level_count; i++) {
 				if (data->pcie_gen_performance.max <
 						ps->performance_levels[i].pcie_gen)
@@ -3661,7 +3950,6 @@ static int polaris10_get_pp_table_entry(struct pp_hwmgr *hwmgr,
 						ps->performance_levels[i].pcie_lane)
 					data->pcie_lane_performance.max =
 							ps->performance_levels[i].pcie_lane;
-
 				if (data->pcie_lane_performance.min >
 						ps->performance_levels[i].pcie_lane)
 					data->pcie_lane_performance.min =
@@ -3861,8 +4149,8 @@ static int polaris10_freeze_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 	if ((0 == data->sclk_dpm_key_disabled) &&
 		(data->need_update_smu7_dpm_table &
 			(DPMTABLE_OD_UPDATE_SCLK + DPMTABLE_UPDATE_SCLK))) {
-		PP_ASSERT_WITH_CODE(true == polaris10_is_dpm_running(hwmgr),
-				"Trying to freeze SCLK DPM when DPM is disabled",
+		PP_ASSERT_WITH_CODE(polaris10_is_dpm_running(hwmgr),
+				    "Trying to freeze SCLK DPM when DPM is disabled",
 				);
 		PP_ASSERT_WITH_CODE(0 == smum_send_msg_to_smc(hwmgr->smumgr,
 				PPSMC_MSG_SCLKDPM_FreezeLevel),
@@ -3873,8 +4161,8 @@ static int polaris10_freeze_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 	if ((0 == data->mclk_dpm_key_disabled) &&
 		(data->need_update_smu7_dpm_table &
 		 DPMTABLE_OD_UPDATE_MCLK)) {
-		PP_ASSERT_WITH_CODE(true == polaris10_is_dpm_running(hwmgr),
-				"Trying to freeze MCLK DPM when DPM is disabled",
+		PP_ASSERT_WITH_CODE(polaris10_is_dpm_running(hwmgr),
+				    "Trying to freeze MCLK DPM when DPM is disabled",
 				);
 		PP_ASSERT_WITH_CODE(0 == smum_send_msg_to_smc(hwmgr->smumgr,
 				PPSMC_MSG_MCLKDPM_FreezeLevel),
@@ -4034,7 +4322,6 @@ static int polaris10_trim_single_dpm_states(struct pp_hwmgr *hwmgr,
 static int polaris10_trim_dpm_states(struct pp_hwmgr *hwmgr,
 		const struct polaris10_power_state *polaris10_ps)
 {
-	int result = 0;
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
 	uint32_t high_limit_count;
 
@@ -4054,7 +4341,7 @@ static int polaris10_trim_dpm_states(struct pp_hwmgr *hwmgr,
 			polaris10_ps->performance_levels[0].memory_clock,
 			polaris10_ps->performance_levels[high_limit_count].memory_clock);
 
-	return result;
+	return 0;
 }
 
 static int polaris10_generate_dpm_level_enable_mask(
@@ -4137,25 +4424,20 @@ int polaris10_update_uvd_dpm(struct pp_hwmgr *hwmgr, bool bgate)
 	return polaris10_enable_disable_uvd_dpm(hwmgr, !bgate);
 }
 
-static int polaris10_update_vce_dpm(struct pp_hwmgr *hwmgr, const void *input)
+int polaris10_update_vce_dpm(struct pp_hwmgr *hwmgr, bool bgate)
 {
-	const struct phm_set_power_state_input *states =
-			(const struct phm_set_power_state_input *)input;
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
-	const struct polaris10_power_state *polaris10_nps =
-			cast_const_phw_polaris10_power_state(states->pnew_state);
-	const struct polaris10_power_state *polaris10_cps =
-			cast_const_phw_polaris10_power_state(states->pcurrent_state);
-
 	uint32_t mm_boot_level_offset, mm_boot_level_value;
 	struct phm_ppt_v1_information *table_info =
 			(struct phm_ppt_v1_information *)(hwmgr->pptable);
 
-	if (polaris10_nps->vce_clks.evclk > 0 &&
-	(polaris10_cps == NULL || polaris10_cps->vce_clks.evclk == 0)) {
-
-		data->smc_state_table.VceBootLevel =
+	if (!bgate) {
+		if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+						PHM_PlatformCaps_StablePState))
+			data->smc_state_table.VceBootLevel =
 				(uint8_t) (table_info->mm_dep_table->count - 1);
+		else
+			data->smc_state_table.VceBootLevel = 0;
 
 		mm_boot_level_offset = data->dpm_table_start +
 				offsetof(SMU74_Discrete_DpmTable, VceBootLevel);
@@ -4168,18 +4450,14 @@ static int polaris10_update_vce_dpm(struct pp_hwmgr *hwmgr, const void *input)
 		cgs_write_ind_register(hwmgr->device,
 				CGS_IND_REG__SMC, mm_boot_level_offset, mm_boot_level_value);
 
-		if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_StablePState)) {
+		if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_StablePState))
 			smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
 					PPSMC_MSG_VCEDPM_SetEnabledMask,
 					(uint32_t)1 << data->smc_state_table.VceBootLevel);
-
-			polaris10_enable_disable_vce_dpm(hwmgr, true);
-		} else if (polaris10_nps->vce_clks.evclk == 0 &&
-				polaris10_cps != NULL &&
-				polaris10_cps->vce_clks.evclk > 0)
-			polaris10_enable_disable_vce_dpm(hwmgr, false);
 	}
 
+	polaris10_enable_disable_vce_dpm(hwmgr, !bgate);
+
 	return 0;
 }
 
@@ -4187,12 +4465,9 @@ int polaris10_update_samu_dpm(struct pp_hwmgr *hwmgr, bool bgate)
 {
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
 	uint32_t mm_boot_level_offset, mm_boot_level_value;
-	struct phm_ppt_v1_information *table_info =
-			(struct phm_ppt_v1_information *)(hwmgr->pptable);
 
 	if (!bgate) {
-		data->smc_state_table.SamuBootLevel =
-				(uint8_t) (table_info->mm_dep_table->count - 1);
+		data->smc_state_table.SamuBootLevel = 0;
 		mm_boot_level_offset = data->dpm_table_start +
 				offsetof(SMU74_Discrete_DpmTable, SamuBootLevel);
 		mm_boot_level_offset /= 4;
@@ -4267,8 +4542,8 @@ static int polaris10_unfreeze_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 		(data->need_update_smu7_dpm_table &
 		(DPMTABLE_OD_UPDATE_SCLK + DPMTABLE_UPDATE_SCLK))) {
 
-		PP_ASSERT_WITH_CODE(true == polaris10_is_dpm_running(hwmgr),
-				"Trying to Unfreeze SCLK DPM when DPM is disabled",
+		PP_ASSERT_WITH_CODE(polaris10_is_dpm_running(hwmgr),
+				    "Trying to Unfreeze SCLK DPM when DPM is disabled",
 				);
 		PP_ASSERT_WITH_CODE(0 == smum_send_msg_to_smc(hwmgr->smumgr,
 				PPSMC_MSG_SCLKDPM_UnfreezeLevel),
@@ -4279,8 +4554,8 @@ static int polaris10_unfreeze_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 	if ((0 == data->mclk_dpm_key_disabled) &&
 		(data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_MCLK)) {
 
-		PP_ASSERT_WITH_CODE(true == polaris10_is_dpm_running(hwmgr),
-				"Trying to Unfreeze MCLK DPM when DPM is disabled",
+		PP_ASSERT_WITH_CODE(polaris10_is_dpm_running(hwmgr),
+				    "Trying to Unfreeze MCLK DPM when DPM is disabled",
 				);
 		PP_ASSERT_WITH_CODE(0 == smum_send_msg_to_smc(hwmgr->smumgr,
 				PPSMC_MSG_SCLKDPM_UnfreezeLevel),
@@ -4327,6 +4602,17 @@ static int polaris10_notify_link_speed_change_after_state_change(
 	return 0;
 }
 
+static int polaris10_notify_smc_display(struct pp_hwmgr *hwmgr)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+
+	smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
+		(PPSMC_Msg)PPSMC_MSG_SetVBITimeout, data->frame_time_x2);
+	return (smum_send_msg_to_smc(hwmgr->smumgr, (PPSMC_Msg)PPSMC_HasDisplay) == 0) ?  0 : -EINVAL;
+}
+
+
+
 static int polaris10_set_power_state_tasks(struct pp_hwmgr *hwmgr, const void *input)
 {
 	int tmp_result, result = 0;
@@ -4360,11 +4646,6 @@ static int polaris10_set_power_state_tasks(struct pp_hwmgr *hwmgr, const void *i
 			"Failed to generate DPM level enabled mask!",
 			result = tmp_result);
 
-	tmp_result = polaris10_update_vce_dpm(hwmgr, input);
-	PP_ASSERT_WITH_CODE((0 == tmp_result),
-			"Failed to update VCE DPM!",
-			result = tmp_result);
-
 	tmp_result = polaris10_update_sclk_threshold(hwmgr);
 	PP_ASSERT_WITH_CODE((0 == tmp_result),
 			"Failed to update SCLK threshold!",
@@ -4375,6 +4656,11 @@ static int polaris10_set_power_state_tasks(struct pp_hwmgr *hwmgr, const void *i
 			"Failed to program memory timing parameters!",
 			result = tmp_result);
 
+	tmp_result = polaris10_notify_smc_display(hwmgr);
+	PP_ASSERT_WITH_CODE((0 == tmp_result),
+			"Failed to notify smc display settings!",
+			result = tmp_result);
+
 	tmp_result = polaris10_unfreeze_sclk_mclk_dpm(hwmgr);
 	PP_ASSERT_WITH_CODE((0 == tmp_result),
 			"Failed to unfreeze SCLK MCLK DPM!",
@@ -4409,6 +4695,7 @@ static int polaris10_set_max_fan_pwm_output(struct pp_hwmgr *hwmgr, uint16_t us_
 			PPSMC_MSG_SetFanPwmMax, us_max_fan_pwm);
 }
 
+
 int polaris10_notify_smc_display_change(struct pp_hwmgr *hwmgr, bool has_display)
 {
 	PPSMC_Msg msg = has_display ? (PPSMC_Msg)PPSMC_HasDisplay : (PPSMC_Msg)PPSMC_NoDisplay;
@@ -4428,8 +4715,7 @@ int polaris10_notify_smc_display_config_after_ps_adjustment(struct pp_hwmgr *hwm
 
 	if (num_active_displays > 1)  /* to do && (pHwMgr->pPECI->displayConfiguration.bMultiMonitorInSync != TRUE)) */
 		polaris10_notify_smc_display_change(hwmgr, false);
-	else
-		polaris10_notify_smc_display_change(hwmgr, true);
+
 
 	return 0;
 }
@@ -4470,6 +4756,8 @@ int polaris10_program_display_gap(struct pp_hwmgr *hwmgr)
 	frame_time_in_us = 1000000 / refresh_rate;
 
 	pre_vbi_time_in_us = frame_time_in_us - 200 - mode_info.vblank_time_us;
+	data->frame_time_x2 = frame_time_in_us * 2 / 100;
+
 	display_gap2 = pre_vbi_time_in_us * (ref_clock / 100);
 
 	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixCG_DISPLAY_GAP_CNTL2, display_gap2);
@@ -4478,7 +4766,6 @@ int polaris10_program_display_gap(struct pp_hwmgr *hwmgr)
 
 	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, data->soft_regs_start + offsetof(SMU74_SoftRegisters, VBlankTimeout), (frame_time_in_us - pre_vbi_time_in_us));
 
-	polaris10_notify_smc_display_change(hwmgr, num_active_displays != 0);
 
 	return 0;
 }
@@ -4591,7 +4878,7 @@ int polaris10_upload_mc_firmware(struct pp_hwmgr *hwmgr)
 		return 0;
 	}
 
-	data->need_long_memory_training = true;
+	data->need_long_memory_training = false;
 
 /*
  *	PPMCME_FirmwareDescriptorEntry *pfd = NULL;
@@ -4721,42 +5008,6 @@ int polaris10_setup_asic_task(struct pp_hwmgr *hwmgr)
 	return result;
 }
 
-static int polaris10_get_pp_table(struct pp_hwmgr *hwmgr, char **table)
-{
-	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
-
-	if (!data->soft_pp_table) {
-		data->soft_pp_table = kmemdup(hwmgr->soft_pp_table,
-					      hwmgr->soft_pp_table_size,
-					      GFP_KERNEL);
-		if (!data->soft_pp_table)
-			return -ENOMEM;
-	}
-
-	*table = (char *)&data->soft_pp_table;
-
-	return hwmgr->soft_pp_table_size;
-}
-
-static int polaris10_set_pp_table(struct pp_hwmgr *hwmgr, const char *buf, size_t size)
-{
-	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
-
-	if (!data->soft_pp_table) {
-		data->soft_pp_table = kzalloc(hwmgr->soft_pp_table_size, GFP_KERNEL);
-		if (!data->soft_pp_table)
-			return -ENOMEM;
-	}
-
-	memcpy(data->soft_pp_table, buf, size);
-
-	hwmgr->soft_pp_table = data->soft_pp_table;
-
-	/* TODO: re-init powerplay to implement modified pptable */
-
-	return 0;
-}
-
 static int polaris10_force_clock_level(struct pp_hwmgr *hwmgr,
 		enum pp_clock_type type, uint32_t mask)
 {
@@ -4899,6 +5150,89 @@ static int polaris10_get_fan_control_mode(struct pp_hwmgr *hwmgr)
 				CG_FDO_CTRL2, FDO_PWM_MODE);
 }
 
+static int polaris10_get_sclk_od(struct pp_hwmgr *hwmgr)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	struct polaris10_single_dpm_table *sclk_table = &(data->dpm_table.sclk_table);
+	struct polaris10_single_dpm_table *golden_sclk_table =
+			&(data->golden_dpm_table.sclk_table);
+	int value;
+
+	value = (sclk_table->dpm_levels[sclk_table->count - 1].value -
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value) *
+			100 /
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value;
+
+	return value;
+}
+
+static int polaris10_set_sclk_od(struct pp_hwmgr *hwmgr, uint32_t value)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	struct polaris10_single_dpm_table *golden_sclk_table =
+			&(data->golden_dpm_table.sclk_table);
+	struct pp_power_state  *ps;
+	struct polaris10_power_state  *polaris10_ps;
+
+	if (value > 20)
+		value = 20;
+
+	ps = hwmgr->request_ps;
+
+	if (ps == NULL)
+		return -EINVAL;
+
+	polaris10_ps = cast_phw_polaris10_power_state(&ps->hardware);
+
+	polaris10_ps->performance_levels[polaris10_ps->performance_level_count - 1].engine_clock =
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value *
+			value / 100 +
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value;
+
+	return 0;
+}
+
+static int polaris10_get_mclk_od(struct pp_hwmgr *hwmgr)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	struct polaris10_single_dpm_table *mclk_table = &(data->dpm_table.mclk_table);
+	struct polaris10_single_dpm_table *golden_mclk_table =
+			&(data->golden_dpm_table.mclk_table);
+	int value;
+
+	value = (mclk_table->dpm_levels[mclk_table->count - 1].value -
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value) *
+			100 /
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value;
+
+	return value;
+}
+
+static int polaris10_set_mclk_od(struct pp_hwmgr *hwmgr, uint32_t value)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	struct polaris10_single_dpm_table *golden_mclk_table =
+			&(data->golden_dpm_table.mclk_table);
+	struct pp_power_state  *ps;
+	struct polaris10_power_state  *polaris10_ps;
+
+	if (value > 20)
+		value = 20;
+
+	ps = hwmgr->request_ps;
+
+	if (ps == NULL)
+		return -EINVAL;
+
+	polaris10_ps = cast_phw_polaris10_power_state(&ps->hardware);
+
+	polaris10_ps->performance_levels[polaris10_ps->performance_level_count - 1].memory_clock =
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value *
+			value / 100 +
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value;
+
+	return 0;
+}
 static const struct pp_hwmgr_func polaris10_hwmgr_funcs = {
 	.backend_init = &polaris10_hwmgr_backend_init,
 	.backend_fini = &polaris10_hwmgr_backend_fini,
@@ -4937,22 +5271,17 @@ static const struct pp_hwmgr_func polaris10_hwmgr_funcs = {
 	.check_states_equal = polaris10_check_states_equal,
 	.set_fan_control_mode = polaris10_set_fan_control_mode,
 	.get_fan_control_mode = polaris10_get_fan_control_mode,
-	.get_pp_table = polaris10_get_pp_table,
-	.set_pp_table = polaris10_set_pp_table,
 	.force_clock_level = polaris10_force_clock_level,
 	.print_clock_levels = polaris10_print_clock_levels,
 	.enable_per_cu_power_gating = polaris10_phm_enable_per_cu_power_gating,
+	.get_sclk_od = polaris10_get_sclk_od,
+	.set_sclk_od = polaris10_set_sclk_od,
+	.get_mclk_od = polaris10_get_mclk_od,
+	.set_mclk_od = polaris10_set_mclk_od,
 };
 
 int polaris10_hwmgr_init(struct pp_hwmgr *hwmgr)
 {
-	struct polaris10_hwmgr  *data;
-
-	data = kzalloc (sizeof(struct polaris10_hwmgr), GFP_KERNEL);
-	if (data == NULL)
-		return -ENOMEM;
-
-	hwmgr->backend = data;
 	hwmgr->hwmgr_func = &polaris10_hwmgr_funcs;
 	hwmgr->pptable_func = &tonga_pptable_funcs;
 	pp_polaris10_thermal_initialize(hwmgr);
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h
index beedf35cbfa6..33c33947e827 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h
@@ -309,9 +309,9 @@ struct polaris10_hwmgr {
 	uint32_t                           up_hyst;
 	uint32_t disable_dpm_mask;
 	bool apply_optimized_settings;
-
-	/* soft pptable for re-uploading into smu */
-	void *soft_pp_table;
+	uint32_t                              avfs_vdroop_override_setting;
+	bool                                  apply_avfs_cks_off_voltage;
+	uint32_t                              frame_time_x2;
 };
 
 /* To convert to Q8.8 format for firmware */
@@ -352,6 +352,6 @@ int polaris10_hwmgr_init(struct pp_hwmgr *hwmgr);
 int polaris10_update_uvd_dpm(struct pp_hwmgr *hwmgr, bool bgate);
 int polaris10_update_samu_dpm(struct pp_hwmgr *hwmgr, bool bgate);
 int polaris10_enable_disable_vce_dpm(struct pp_hwmgr *hwmgr, bool enable);
-
+int polaris10_update_vce_dpm(struct pp_hwmgr *hwmgr, bool bgate);
 #endif
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_powertune.c
index 0b99ab3ba0c5..b9cb240a135d 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_powertune.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_powertune.c
@@ -28,10 +28,360 @@
 #include "polaris10_smumgr.h"
 #include "smu74_discrete.h"
 #include "pp_debug.h"
+#include "gca/gfx_8_0_d.h"
+#include "gca/gfx_8_0_sh_mask.h"
+#include "oss/oss_3_0_sh_mask.h"
 
 #define VOLTAGE_SCALE  4
 #define POWERTUNE_DEFAULT_SET_MAX    1
 
+uint32_t DIDTBlock_Info = SQ_IR_MASK | TCP_IR_MASK | TD_PCC_MASK;
+
+struct polaris10_pt_config_reg GCCACConfig_Polaris10[] = {
+/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ *      Offset                             Mask                                                Shift                                               Value       Type
+ * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ */
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x00060013, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x00860013, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x01060013, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x01860013, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x02060013, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x02860013, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x03060013, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x03860013, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x04060013, POLARIS10_CONFIGREG_GC_CAC_IND },
+
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x000E0013, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x008E0013, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x010E0013, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x018E0013, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x020E0013, POLARIS10_CONFIGREG_GC_CAC_IND },
+
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x00100013, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x00900013, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x01100013, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x01900013, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x02100013, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x02900013, POLARIS10_CONFIGREG_GC_CAC_IND },
+
+	{   0xFFFFFFFF  }
+};
+
+struct polaris10_pt_config_reg GCCACConfig_Polaris11[] = {
+/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ *      Offset                             Mask                                                Shift                                               Value       Type
+ * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ */
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x00060011, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x00860011, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x01060011, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x01860011, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x02060011, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x02860011, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x03060011, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x03860011, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x04060011, POLARIS10_CONFIGREG_GC_CAC_IND },
+
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x000E0011, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x008E0011, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x010E0011, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x018E0011, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x020E0011, POLARIS10_CONFIGREG_GC_CAC_IND },
+
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x00100011, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x00900011, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x01100011, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x01900011, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x02100011, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x02900011, POLARIS10_CONFIGREG_GC_CAC_IND },
+
+	{   0xFFFFFFFF  }
+};
+
+struct polaris10_pt_config_reg DIDTConfig_Polaris10[] = {
+/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ *      Offset                             Mask                                                Shift                                               Value       Type
+ * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ */
+	{   ixDIDT_SQ_WEIGHT0_3,               DIDT_SQ_WEIGHT0_3__WEIGHT0_MASK,                    DIDT_SQ_WEIGHT0_3__WEIGHT0__SHIFT,                  0x0073,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT0_3,               DIDT_SQ_WEIGHT0_3__WEIGHT1_MASK,                    DIDT_SQ_WEIGHT0_3__WEIGHT1__SHIFT,                  0x00ab,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT0_3,               DIDT_SQ_WEIGHT0_3__WEIGHT2_MASK,                    DIDT_SQ_WEIGHT0_3__WEIGHT2__SHIFT,                  0x0084,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT0_3,               DIDT_SQ_WEIGHT0_3__WEIGHT3_MASK,                    DIDT_SQ_WEIGHT0_3__WEIGHT3__SHIFT,                  0x005a,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_WEIGHT4_7,               DIDT_SQ_WEIGHT4_7__WEIGHT4_MASK,                    DIDT_SQ_WEIGHT4_7__WEIGHT4__SHIFT,                  0x0067,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT4_7,               DIDT_SQ_WEIGHT4_7__WEIGHT5_MASK,                    DIDT_SQ_WEIGHT4_7__WEIGHT5__SHIFT,                  0x0084,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT4_7,               DIDT_SQ_WEIGHT4_7__WEIGHT6_MASK,                    DIDT_SQ_WEIGHT4_7__WEIGHT6__SHIFT,                  0x0027,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT4_7,               DIDT_SQ_WEIGHT4_7__WEIGHT7_MASK,                    DIDT_SQ_WEIGHT4_7__WEIGHT7__SHIFT,                  0x0046,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_WEIGHT8_11,              DIDT_SQ_WEIGHT8_11__WEIGHT8_MASK,                   DIDT_SQ_WEIGHT8_11__WEIGHT8__SHIFT,                 0x00aa,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT8_11,              DIDT_SQ_WEIGHT8_11__WEIGHT9_MASK,                   DIDT_SQ_WEIGHT8_11__WEIGHT9__SHIFT,                 0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT8_11,              DIDT_SQ_WEIGHT8_11__WEIGHT10_MASK,                  DIDT_SQ_WEIGHT8_11__WEIGHT10__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT8_11,              DIDT_SQ_WEIGHT8_11__WEIGHT11_MASK,                  DIDT_SQ_WEIGHT8_11__WEIGHT11__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_CTRL1,                   DIDT_SQ_CTRL1__MIN_POWER_MASK,                      DIDT_SQ_CTRL1__MIN_POWER__SHIFT,                    0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL1,                   DIDT_SQ_CTRL1__MAX_POWER_MASK,                      DIDT_SQ_CTRL1__MAX_POWER__SHIFT,                    0xffff,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_CTRL_OCP,                DIDT_SQ_CTRL_OCP__UNUSED_0_MASK,                    DIDT_SQ_CTRL_OCP__UNUSED_0__SHIFT,                  0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL_OCP,                DIDT_SQ_CTRL_OCP__OCP_MAX_POWER_MASK,               DIDT_SQ_CTRL_OCP__OCP_MAX_POWER__SHIFT,             0xffff,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_CTRL2,                   DIDT_SQ_CTRL2__MAX_POWER_DELTA_MASK,                DIDT_SQ_CTRL2__MAX_POWER_DELTA__SHIFT,              0x3853,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL2,                   DIDT_SQ_CTRL2__UNUSED_0_MASK,                       DIDT_SQ_CTRL2__UNUSED_0__SHIFT,                     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL2,                   DIDT_SQ_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK,       DIDT_SQ_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT,     0x005a,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL2,                   DIDT_SQ_CTRL2__UNUSED_1_MASK,                       DIDT_SQ_CTRL2__UNUSED_1__SHIFT,                     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL2,                   DIDT_SQ_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK,       DIDT_SQ_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT,     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL2,                   DIDT_SQ_CTRL2__UNUSED_2_MASK,                       DIDT_SQ_CTRL2__UNUSED_2__SHIFT,                     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_STALL_CTRL,              DIDT_SQ_STALL_CTRL__DIDT_STALL_CTRL_ENABLE_MASK,    DIDT_SQ_STALL_CTRL__DIDT_STALL_CTRL_ENABLE__SHIFT,  0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_STALL_CTRL,              DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK,       DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT,     0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_STALL_CTRL,              DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK,       DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT,     0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_STALL_CTRL,              DIDT_SQ_STALL_CTRL__DIDT_HI_POWER_THRESHOLD_MASK,   DIDT_SQ_STALL_CTRL__DIDT_HI_POWER_THRESHOLD__SHIFT, 0x0ebb,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_STALL_CTRL,              DIDT_SQ_STALL_CTRL__UNUSED_0_MASK,                  DIDT_SQ_STALL_CTRL__UNUSED_0__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_TUNING_CTRL,             DIDT_SQ_TUNING_CTRL__DIDT_TUNING_ENABLE_MASK,       DIDT_SQ_TUNING_CTRL__DIDT_TUNING_ENABLE__SHIFT,     0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_TUNING_CTRL,             DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK,       DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT,     0x3853,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_TUNING_CTRL,             DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK,       DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT,     0x3153,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_TUNING_CTRL,             DIDT_SQ_TUNING_CTRL__UNUSED_0_MASK,                 DIDT_SQ_TUNING_CTRL__UNUSED_0__SHIFT,               0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_CTRL_EN_MASK,                   DIDT_SQ_CTRL0__DIDT_CTRL_EN__SHIFT,                 0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__USE_REF_CLOCK_MASK,                  DIDT_SQ_CTRL0__USE_REF_CLOCK__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__PHASE_OFFSET_MASK,                   DIDT_SQ_CTRL0__PHASE_OFFSET__SHIFT,                 0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_CTRL_RST_MASK,                  DIDT_SQ_CTRL0__DIDT_CTRL_RST__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK,           DIDT_SQ_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT,         0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI_MASK,     DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT,   0x0010,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO_MASK,     DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT,   0x0010,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__UNUSED_0_MASK,                       DIDT_SQ_CTRL0__UNUSED_0__SHIFT,                     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TD_WEIGHT0_3,               DIDT_TD_WEIGHT0_3__WEIGHT0_MASK,                    DIDT_TD_WEIGHT0_3__WEIGHT0__SHIFT,                  0x000a,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_WEIGHT0_3,               DIDT_TD_WEIGHT0_3__WEIGHT1_MASK,                    DIDT_TD_WEIGHT0_3__WEIGHT1__SHIFT,                  0x0010,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_WEIGHT0_3,               DIDT_TD_WEIGHT0_3__WEIGHT2_MASK,                    DIDT_TD_WEIGHT0_3__WEIGHT2__SHIFT,                  0x0017,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_WEIGHT0_3,               DIDT_TD_WEIGHT0_3__WEIGHT3_MASK,                    DIDT_TD_WEIGHT0_3__WEIGHT3__SHIFT,                  0x002f,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TD_WEIGHT4_7,               DIDT_TD_WEIGHT4_7__WEIGHT4_MASK,                    DIDT_TD_WEIGHT4_7__WEIGHT4__SHIFT,                  0x0046,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_WEIGHT4_7,               DIDT_TD_WEIGHT4_7__WEIGHT5_MASK,                    DIDT_TD_WEIGHT4_7__WEIGHT5__SHIFT,                  0x005d,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_WEIGHT4_7,               DIDT_TD_WEIGHT4_7__WEIGHT6_MASK,                    DIDT_TD_WEIGHT4_7__WEIGHT6__SHIFT,                  0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_WEIGHT4_7,               DIDT_TD_WEIGHT4_7__WEIGHT7_MASK,                    DIDT_TD_WEIGHT4_7__WEIGHT7__SHIFT,                  0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TD_CTRL1,                   DIDT_TD_CTRL1__MIN_POWER_MASK,                      DIDT_TD_CTRL1__MIN_POWER__SHIFT,                    0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL1,                   DIDT_TD_CTRL1__MAX_POWER_MASK,                      DIDT_TD_CTRL1__MAX_POWER__SHIFT,                    0xffff,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TD_CTRL_OCP,                DIDT_TD_CTRL_OCP__UNUSED_0_MASK,                    DIDT_TD_CTRL_OCP__UNUSED_0__SHIFT,                  0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL_OCP,                DIDT_TD_CTRL_OCP__OCP_MAX_POWER_MASK,               DIDT_TD_CTRL_OCP__OCP_MAX_POWER__SHIFT,             0x00ff,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TD_CTRL2,                   DIDT_TD_CTRL2__MAX_POWER_DELTA_MASK,                DIDT_TD_CTRL2__MAX_POWER_DELTA__SHIFT,              0x3fff,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL2,                   DIDT_TD_CTRL2__UNUSED_0_MASK,                       DIDT_TD_CTRL2__UNUSED_0__SHIFT,                     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL2,                   DIDT_TD_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK,       DIDT_TD_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT,     0x000f,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL2,                   DIDT_TD_CTRL2__UNUSED_1_MASK,                       DIDT_TD_CTRL2__UNUSED_1__SHIFT,                     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL2,                   DIDT_TD_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK,       DIDT_TD_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT,     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL2,                   DIDT_TD_CTRL2__UNUSED_2_MASK,                       DIDT_TD_CTRL2__UNUSED_2__SHIFT,                     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TD_STALL_CTRL,              DIDT_TD_STALL_CTRL__DIDT_STALL_CTRL_ENABLE_MASK,    DIDT_TD_STALL_CTRL__DIDT_STALL_CTRL_ENABLE__SHIFT,  0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_STALL_CTRL,              DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK,       DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT,     0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_STALL_CTRL,              DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK,       DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT,     0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_STALL_CTRL,              DIDT_TD_STALL_CTRL__DIDT_HI_POWER_THRESHOLD_MASK,   DIDT_TD_STALL_CTRL__DIDT_HI_POWER_THRESHOLD__SHIFT, 0x01aa,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_STALL_CTRL,              DIDT_TD_STALL_CTRL__UNUSED_0_MASK,                  DIDT_TD_STALL_CTRL__UNUSED_0__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TD_TUNING_CTRL,             DIDT_TD_TUNING_CTRL__DIDT_TUNING_ENABLE_MASK,       DIDT_TD_TUNING_CTRL__DIDT_TUNING_ENABLE__SHIFT,     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_TUNING_CTRL,             DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK,       DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT,     0x0dde,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_TUNING_CTRL,             DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK,       DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT,     0x0dde,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_TUNING_CTRL,             DIDT_TD_TUNING_CTRL__UNUSED_0_MASK,                 DIDT_TD_TUNING_CTRL__UNUSED_0__SHIFT,               0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_CTRL_EN_MASK,                   DIDT_TD_CTRL0__DIDT_CTRL_EN__SHIFT,                 0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__USE_REF_CLOCK_MASK,                  DIDT_TD_CTRL0__USE_REF_CLOCK__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__PHASE_OFFSET_MASK,                   DIDT_TD_CTRL0__PHASE_OFFSET__SHIFT,                 0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_CTRL_RST_MASK,                  DIDT_TD_CTRL0__DIDT_CTRL_RST__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK,           DIDT_TD_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT,         0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI_MASK,     DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT,   0x0009,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO_MASK,     DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT,   0x0009,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__UNUSED_0_MASK,                       DIDT_TD_CTRL0__UNUSED_0__SHIFT,                     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TCP_WEIGHT0_3,              DIDT_TCP_WEIGHT0_3__WEIGHT0_MASK,                   DIDT_TCP_WEIGHT0_3__WEIGHT0__SHIFT,                 0x0004,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_WEIGHT0_3,              DIDT_TCP_WEIGHT0_3__WEIGHT1_MASK,                   DIDT_TCP_WEIGHT0_3__WEIGHT1__SHIFT,                 0x0037,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_WEIGHT0_3,              DIDT_TCP_WEIGHT0_3__WEIGHT2_MASK,                   DIDT_TCP_WEIGHT0_3__WEIGHT2__SHIFT,                 0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_WEIGHT0_3,              DIDT_TCP_WEIGHT0_3__WEIGHT3_MASK,                   DIDT_TCP_WEIGHT0_3__WEIGHT3__SHIFT,                 0x00ff,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TCP_WEIGHT4_7,              DIDT_TCP_WEIGHT4_7__WEIGHT4_MASK,                   DIDT_TCP_WEIGHT4_7__WEIGHT4__SHIFT,                 0x0054,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_WEIGHT4_7,              DIDT_TCP_WEIGHT4_7__WEIGHT5_MASK,                   DIDT_TCP_WEIGHT4_7__WEIGHT5__SHIFT,                 0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_WEIGHT4_7,              DIDT_TCP_WEIGHT4_7__WEIGHT6_MASK,                   DIDT_TCP_WEIGHT4_7__WEIGHT6__SHIFT,                 0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_WEIGHT4_7,              DIDT_TCP_WEIGHT4_7__WEIGHT7_MASK,                   DIDT_TCP_WEIGHT4_7__WEIGHT7__SHIFT,                 0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TCP_CTRL1,                  DIDT_TCP_CTRL1__MIN_POWER_MASK,                     DIDT_TCP_CTRL1__MIN_POWER__SHIFT,                   0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL1,                  DIDT_TCP_CTRL1__MAX_POWER_MASK,                     DIDT_TCP_CTRL1__MAX_POWER__SHIFT,                   0xffff,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TCP_CTRL_OCP,               DIDT_TCP_CTRL_OCP__UNUSED_0_MASK,                   DIDT_TCP_CTRL_OCP__UNUSED_0__SHIFT,                 0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL_OCP,               DIDT_TCP_CTRL_OCP__OCP_MAX_POWER_MASK,              DIDT_TCP_CTRL_OCP__OCP_MAX_POWER__SHIFT,            0xffff,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TCP_CTRL2,                  DIDT_TCP_CTRL2__MAX_POWER_DELTA_MASK,               DIDT_TCP_CTRL2__MAX_POWER_DELTA__SHIFT,             0x3dde,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL2,                  DIDT_TCP_CTRL2__UNUSED_0_MASK,                      DIDT_TCP_CTRL2__UNUSED_0__SHIFT,                    0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL2,                  DIDT_TCP_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK,      DIDT_TCP_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT,    0x0032,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL2,                  DIDT_TCP_CTRL2__UNUSED_1_MASK,                      DIDT_TCP_CTRL2__UNUSED_1__SHIFT,                    0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL2,                  DIDT_TCP_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK,      DIDT_TCP_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT,    0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL2,                  DIDT_TCP_CTRL2__UNUSED_2_MASK,                      DIDT_TCP_CTRL2__UNUSED_2__SHIFT,                    0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TCP_STALL_CTRL,             DIDT_TCP_STALL_CTRL__DIDT_STALL_CTRL_ENABLE_MASK,   DIDT_TCP_STALL_CTRL__DIDT_STALL_CTRL_ENABLE__SHIFT, 0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_STALL_CTRL,             DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK,      DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT,    0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_STALL_CTRL,             DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK,      DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT,    0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_STALL_CTRL,             DIDT_TCP_STALL_CTRL__DIDT_HI_POWER_THRESHOLD_MASK,  DIDT_TCP_STALL_CTRL__DIDT_HI_POWER_THRESHOLD__SHIFT, 0x01aa,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_STALL_CTRL,             DIDT_TCP_STALL_CTRL__UNUSED_0_MASK,                 DIDT_TCP_STALL_CTRL__UNUSED_0__SHIFT,               0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TCP_TUNING_CTRL,            DIDT_TCP_TUNING_CTRL__DIDT_TUNING_ENABLE_MASK,      DIDT_TCP_TUNING_CTRL__DIDT_TUNING_ENABLE__SHIFT,    0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_TUNING_CTRL,            DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK,      DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT,    0x3dde,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_TUNING_CTRL,            DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK,      DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT,    0x3dde,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_TUNING_CTRL,            DIDT_TCP_TUNING_CTRL__UNUSED_0_MASK,                DIDT_TCP_TUNING_CTRL__UNUSED_0__SHIFT,              0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__DIDT_CTRL_EN_MASK,                   DIDT_TCP_CTRL0__DIDT_CTRL_EN__SHIFT,                 0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__USE_REF_CLOCK_MASK,                  DIDT_TCP_CTRL0__USE_REF_CLOCK__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__PHASE_OFFSET_MASK,                   DIDT_TCP_CTRL0__PHASE_OFFSET__SHIFT,                 0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__DIDT_CTRL_RST_MASK,                  DIDT_TCP_CTRL0__DIDT_CTRL_RST__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK,           DIDT_TCP_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT,         0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI_MASK,     DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT,   0x0010,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO_MASK,     DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT,   0x0010,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__UNUSED_0_MASK,                       DIDT_TCP_CTRL0__UNUSED_0__SHIFT,                     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   0xFFFFFFFF  }
+};
+
+struct polaris10_pt_config_reg DIDTConfig_Polaris11[] = {
+/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ *      Offset                             Mask                                                Shift                                               Value       Type
+ * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ */
+	{   ixDIDT_SQ_WEIGHT0_3,               DIDT_SQ_WEIGHT0_3__WEIGHT0_MASK,                    DIDT_SQ_WEIGHT0_3__WEIGHT0__SHIFT,                  0x0073,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT0_3,               DIDT_SQ_WEIGHT0_3__WEIGHT1_MASK,                    DIDT_SQ_WEIGHT0_3__WEIGHT1__SHIFT,                  0x00ab,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT0_3,               DIDT_SQ_WEIGHT0_3__WEIGHT2_MASK,                    DIDT_SQ_WEIGHT0_3__WEIGHT2__SHIFT,                  0x0084,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT0_3,               DIDT_SQ_WEIGHT0_3__WEIGHT3_MASK,                    DIDT_SQ_WEIGHT0_3__WEIGHT3__SHIFT,                  0x005a,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_WEIGHT4_7,               DIDT_SQ_WEIGHT4_7__WEIGHT4_MASK,                    DIDT_SQ_WEIGHT4_7__WEIGHT4__SHIFT,                  0x0067,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT4_7,               DIDT_SQ_WEIGHT4_7__WEIGHT5_MASK,                    DIDT_SQ_WEIGHT4_7__WEIGHT5__SHIFT,                  0x0084,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT4_7,               DIDT_SQ_WEIGHT4_7__WEIGHT6_MASK,                    DIDT_SQ_WEIGHT4_7__WEIGHT6__SHIFT,                  0x0027,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT4_7,               DIDT_SQ_WEIGHT4_7__WEIGHT7_MASK,                    DIDT_SQ_WEIGHT4_7__WEIGHT7__SHIFT,                  0x0046,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_WEIGHT8_11,              DIDT_SQ_WEIGHT8_11__WEIGHT8_MASK,                   DIDT_SQ_WEIGHT8_11__WEIGHT8__SHIFT,                 0x00aa,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT8_11,              DIDT_SQ_WEIGHT8_11__WEIGHT9_MASK,                   DIDT_SQ_WEIGHT8_11__WEIGHT9__SHIFT,                 0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT8_11,              DIDT_SQ_WEIGHT8_11__WEIGHT10_MASK,                  DIDT_SQ_WEIGHT8_11__WEIGHT10__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT8_11,              DIDT_SQ_WEIGHT8_11__WEIGHT11_MASK,                  DIDT_SQ_WEIGHT8_11__WEIGHT11__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_CTRL1,                   DIDT_SQ_CTRL1__MIN_POWER_MASK,                      DIDT_SQ_CTRL1__MIN_POWER__SHIFT,                    0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL1,                   DIDT_SQ_CTRL1__MAX_POWER_MASK,                      DIDT_SQ_CTRL1__MAX_POWER__SHIFT,                    0xffff,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_CTRL_OCP,                DIDT_SQ_CTRL_OCP__UNUSED_0_MASK,                    DIDT_SQ_CTRL_OCP__UNUSED_0__SHIFT,                  0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL_OCP,                DIDT_SQ_CTRL_OCP__OCP_MAX_POWER_MASK,               DIDT_SQ_CTRL_OCP__OCP_MAX_POWER__SHIFT,             0xffff,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_CTRL2,                   DIDT_SQ_CTRL2__MAX_POWER_DELTA_MASK,                DIDT_SQ_CTRL2__MAX_POWER_DELTA__SHIFT,              0x3853,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL2,                   DIDT_SQ_CTRL2__UNUSED_0_MASK,                       DIDT_SQ_CTRL2__UNUSED_0__SHIFT,                     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL2,                   DIDT_SQ_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK,       DIDT_SQ_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT,     0x005a,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL2,                   DIDT_SQ_CTRL2__UNUSED_1_MASK,                       DIDT_SQ_CTRL2__UNUSED_1__SHIFT,                     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL2,                   DIDT_SQ_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK,       DIDT_SQ_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT,     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL2,                   DIDT_SQ_CTRL2__UNUSED_2_MASK,                       DIDT_SQ_CTRL2__UNUSED_2__SHIFT,                     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_STALL_CTRL,              DIDT_SQ_STALL_CTRL__DIDT_STALL_CTRL_ENABLE_MASK,    DIDT_SQ_STALL_CTRL__DIDT_STALL_CTRL_ENABLE__SHIFT,  0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_STALL_CTRL,              DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK,       DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT,     0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_STALL_CTRL,              DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK,       DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT,     0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_STALL_CTRL,              DIDT_SQ_STALL_CTRL__DIDT_HI_POWER_THRESHOLD_MASK,   DIDT_SQ_STALL_CTRL__DIDT_HI_POWER_THRESHOLD__SHIFT, 0x0ebb,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_STALL_CTRL,              DIDT_SQ_STALL_CTRL__UNUSED_0_MASK,                  DIDT_SQ_STALL_CTRL__UNUSED_0__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_TUNING_CTRL,             DIDT_SQ_TUNING_CTRL__DIDT_TUNING_ENABLE_MASK,       DIDT_SQ_TUNING_CTRL__DIDT_TUNING_ENABLE__SHIFT,     0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_TUNING_CTRL,             DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK,       DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT,     0x3853,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_TUNING_CTRL,             DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK,       DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT,     0x3153,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_TUNING_CTRL,             DIDT_SQ_TUNING_CTRL__UNUSED_0_MASK,                 DIDT_SQ_TUNING_CTRL__UNUSED_0__SHIFT,               0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_CTRL_EN_MASK,                   DIDT_SQ_CTRL0__DIDT_CTRL_EN__SHIFT,                 0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__USE_REF_CLOCK_MASK,                  DIDT_SQ_CTRL0__USE_REF_CLOCK__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__PHASE_OFFSET_MASK,                   DIDT_SQ_CTRL0__PHASE_OFFSET__SHIFT,                 0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_CTRL_RST_MASK,                  DIDT_SQ_CTRL0__DIDT_CTRL_RST__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK,           DIDT_SQ_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT,         0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI_MASK,     DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT,   0x0010,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO_MASK,     DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT,   0x0010,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__UNUSED_0_MASK,                       DIDT_SQ_CTRL0__UNUSED_0__SHIFT,                     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TD_WEIGHT0_3,               DIDT_TD_WEIGHT0_3__WEIGHT0_MASK,                    DIDT_TD_WEIGHT0_3__WEIGHT0__SHIFT,                  0x000a,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_WEIGHT0_3,               DIDT_TD_WEIGHT0_3__WEIGHT1_MASK,                    DIDT_TD_WEIGHT0_3__WEIGHT1__SHIFT,                  0x0010,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_WEIGHT0_3,               DIDT_TD_WEIGHT0_3__WEIGHT2_MASK,                    DIDT_TD_WEIGHT0_3__WEIGHT2__SHIFT,                  0x0017,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_WEIGHT0_3,               DIDT_TD_WEIGHT0_3__WEIGHT3_MASK,                    DIDT_TD_WEIGHT0_3__WEIGHT3__SHIFT,                  0x002f,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TD_WEIGHT4_7,               DIDT_TD_WEIGHT4_7__WEIGHT4_MASK,                    DIDT_TD_WEIGHT4_7__WEIGHT4__SHIFT,                  0x0046,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_WEIGHT4_7,               DIDT_TD_WEIGHT4_7__WEIGHT5_MASK,                    DIDT_TD_WEIGHT4_7__WEIGHT5__SHIFT,                  0x005d,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_WEIGHT4_7,               DIDT_TD_WEIGHT4_7__WEIGHT6_MASK,                    DIDT_TD_WEIGHT4_7__WEIGHT6__SHIFT,                  0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_WEIGHT4_7,               DIDT_TD_WEIGHT4_7__WEIGHT7_MASK,                    DIDT_TD_WEIGHT4_7__WEIGHT7__SHIFT,                  0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TD_CTRL1,                   DIDT_TD_CTRL1__MIN_POWER_MASK,                      DIDT_TD_CTRL1__MIN_POWER__SHIFT,                    0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL1,                   DIDT_TD_CTRL1__MAX_POWER_MASK,                      DIDT_TD_CTRL1__MAX_POWER__SHIFT,                    0xffff,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TD_CTRL_OCP,                DIDT_TD_CTRL_OCP__UNUSED_0_MASK,                    DIDT_TD_CTRL_OCP__UNUSED_0__SHIFT,                  0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL_OCP,                DIDT_TD_CTRL_OCP__OCP_MAX_POWER_MASK,               DIDT_TD_CTRL_OCP__OCP_MAX_POWER__SHIFT,             0x00ff,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TD_CTRL2,                   DIDT_TD_CTRL2__MAX_POWER_DELTA_MASK,                DIDT_TD_CTRL2__MAX_POWER_DELTA__SHIFT,              0x3fff,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL2,                   DIDT_TD_CTRL2__UNUSED_0_MASK,                       DIDT_TD_CTRL2__UNUSED_0__SHIFT,                     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL2,                   DIDT_TD_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK,       DIDT_TD_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT,     0x000f,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL2,                   DIDT_TD_CTRL2__UNUSED_1_MASK,                       DIDT_TD_CTRL2__UNUSED_1__SHIFT,                     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL2,                   DIDT_TD_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK,       DIDT_TD_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT,     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL2,                   DIDT_TD_CTRL2__UNUSED_2_MASK,                       DIDT_TD_CTRL2__UNUSED_2__SHIFT,                     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TD_STALL_CTRL,              DIDT_TD_STALL_CTRL__DIDT_STALL_CTRL_ENABLE_MASK,    DIDT_TD_STALL_CTRL__DIDT_STALL_CTRL_ENABLE__SHIFT,  0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_STALL_CTRL,              DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK,       DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT,     0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_STALL_CTRL,              DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK,       DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT,     0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_STALL_CTRL,              DIDT_TD_STALL_CTRL__DIDT_HI_POWER_THRESHOLD_MASK,   DIDT_TD_STALL_CTRL__DIDT_HI_POWER_THRESHOLD__SHIFT, 0x01aa,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_STALL_CTRL,              DIDT_TD_STALL_CTRL__UNUSED_0_MASK,                  DIDT_TD_STALL_CTRL__UNUSED_0__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TD_TUNING_CTRL,             DIDT_TD_TUNING_CTRL__DIDT_TUNING_ENABLE_MASK,       DIDT_TD_TUNING_CTRL__DIDT_TUNING_ENABLE__SHIFT,     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_TUNING_CTRL,             DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK,       DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT,     0x0dde,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_TUNING_CTRL,             DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK,       DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT,     0x0dde,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_TUNING_CTRL,             DIDT_TD_TUNING_CTRL__UNUSED_0_MASK,                 DIDT_TD_TUNING_CTRL__UNUSED_0__SHIFT,               0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_CTRL_EN_MASK,                   DIDT_TD_CTRL0__DIDT_CTRL_EN__SHIFT,                 0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__USE_REF_CLOCK_MASK,                  DIDT_TD_CTRL0__USE_REF_CLOCK__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__PHASE_OFFSET_MASK,                   DIDT_TD_CTRL0__PHASE_OFFSET__SHIFT,                 0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_CTRL_RST_MASK,                  DIDT_TD_CTRL0__DIDT_CTRL_RST__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK,           DIDT_TD_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT,         0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI_MASK,     DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT,   0x0008,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO_MASK,     DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT,   0x0008,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__UNUSED_0_MASK,                       DIDT_TD_CTRL0__UNUSED_0__SHIFT,                     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TCP_WEIGHT0_3,              DIDT_TCP_WEIGHT0_3__WEIGHT0_MASK,                   DIDT_TCP_WEIGHT0_3__WEIGHT0__SHIFT,                 0x0004,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_WEIGHT0_3,              DIDT_TCP_WEIGHT0_3__WEIGHT1_MASK,                   DIDT_TCP_WEIGHT0_3__WEIGHT1__SHIFT,                 0x0037,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_WEIGHT0_3,              DIDT_TCP_WEIGHT0_3__WEIGHT2_MASK,                   DIDT_TCP_WEIGHT0_3__WEIGHT2__SHIFT,                 0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_WEIGHT0_3,              DIDT_TCP_WEIGHT0_3__WEIGHT3_MASK,                   DIDT_TCP_WEIGHT0_3__WEIGHT3__SHIFT,                 0x00ff,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TCP_WEIGHT4_7,              DIDT_TCP_WEIGHT4_7__WEIGHT4_MASK,                   DIDT_TCP_WEIGHT4_7__WEIGHT4__SHIFT,                 0x0054,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_WEIGHT4_7,              DIDT_TCP_WEIGHT4_7__WEIGHT5_MASK,                   DIDT_TCP_WEIGHT4_7__WEIGHT5__SHIFT,                 0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_WEIGHT4_7,              DIDT_TCP_WEIGHT4_7__WEIGHT6_MASK,                   DIDT_TCP_WEIGHT4_7__WEIGHT6__SHIFT,                 0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_WEIGHT4_7,              DIDT_TCP_WEIGHT4_7__WEIGHT7_MASK,                   DIDT_TCP_WEIGHT4_7__WEIGHT7__SHIFT,                 0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TCP_CTRL1,                  DIDT_TCP_CTRL1__MIN_POWER_MASK,                     DIDT_TCP_CTRL1__MIN_POWER__SHIFT,                   0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL1,                  DIDT_TCP_CTRL1__MAX_POWER_MASK,                     DIDT_TCP_CTRL1__MAX_POWER__SHIFT,                   0xffff,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TCP_CTRL_OCP,               DIDT_TCP_CTRL_OCP__UNUSED_0_MASK,                   DIDT_TCP_CTRL_OCP__UNUSED_0__SHIFT,                 0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL_OCP,               DIDT_TCP_CTRL_OCP__OCP_MAX_POWER_MASK,              DIDT_TCP_CTRL_OCP__OCP_MAX_POWER__SHIFT,            0xffff,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TCP_CTRL2,                  DIDT_TCP_CTRL2__MAX_POWER_DELTA_MASK,               DIDT_TCP_CTRL2__MAX_POWER_DELTA__SHIFT,             0x3dde,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL2,                  DIDT_TCP_CTRL2__UNUSED_0_MASK,                      DIDT_TCP_CTRL2__UNUSED_0__SHIFT,                    0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL2,                  DIDT_TCP_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK,      DIDT_TCP_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT,    0x0032,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL2,                  DIDT_TCP_CTRL2__UNUSED_1_MASK,                      DIDT_TCP_CTRL2__UNUSED_1__SHIFT,                    0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL2,                  DIDT_TCP_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK,      DIDT_TCP_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT,    0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL2,                  DIDT_TCP_CTRL2__UNUSED_2_MASK,                      DIDT_TCP_CTRL2__UNUSED_2__SHIFT,                    0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TCP_STALL_CTRL,             DIDT_TCP_STALL_CTRL__DIDT_STALL_CTRL_ENABLE_MASK,   DIDT_TCP_STALL_CTRL__DIDT_STALL_CTRL_ENABLE__SHIFT, 0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_STALL_CTRL,             DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK,      DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT,    0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_STALL_CTRL,             DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK,      DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT,    0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_STALL_CTRL,             DIDT_TCP_STALL_CTRL__DIDT_HI_POWER_THRESHOLD_MASK,  DIDT_TCP_STALL_CTRL__DIDT_HI_POWER_THRESHOLD__SHIFT, 0x01aa,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_STALL_CTRL,             DIDT_TCP_STALL_CTRL__UNUSED_0_MASK,                 DIDT_TCP_STALL_CTRL__UNUSED_0__SHIFT,               0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TCP_TUNING_CTRL,            DIDT_TCP_TUNING_CTRL__DIDT_TUNING_ENABLE_MASK,      DIDT_TCP_TUNING_CTRL__DIDT_TUNING_ENABLE__SHIFT,    0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_TUNING_CTRL,            DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK,      DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT,    0x3dde,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_TUNING_CTRL,            DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK,      DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT,    0x3dde,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_TUNING_CTRL,            DIDT_TCP_TUNING_CTRL__UNUSED_0_MASK,                DIDT_TCP_TUNING_CTRL__UNUSED_0__SHIFT,              0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__DIDT_CTRL_EN_MASK,                   DIDT_TCP_CTRL0__DIDT_CTRL_EN__SHIFT,                 0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__USE_REF_CLOCK_MASK,                  DIDT_TCP_CTRL0__USE_REF_CLOCK__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__PHASE_OFFSET_MASK,                   DIDT_TCP_CTRL0__PHASE_OFFSET__SHIFT,                 0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__DIDT_CTRL_RST_MASK,                  DIDT_TCP_CTRL0__DIDT_CTRL_RST__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK,           DIDT_TCP_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT,         0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI_MASK,     DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT,   0x0010,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO_MASK,     DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT,   0x0010,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__UNUSED_0_MASK,                       DIDT_TCP_CTRL0__UNUSED_0__SHIFT,                     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   0xFFFFFFFF  }
+};
+
 static const struct polaris10_pt_defaults polaris10_power_tune_data_set_array[POWERTUNE_DEFAULT_SET_MAX] = {
 	/* sviLoadLIneEn, SviLoadLineVddC, TDC_VDDC_ThrottleReleaseLimitPerc, TDC_MAWt,
 	 * TdcWaterfallCtl, DTEAmbientTempBase, DisplayCac, BAPM_TEMP_GRADIENT */
@@ -209,6 +559,187 @@ static int polaris10_min_max_vgnb_lpml_id_from_bapm_vddc(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
+static int polaris10_enable_didt(struct pp_hwmgr *hwmgr, const bool enable)
+{
+
+	uint32_t en = enable ? 1 : 0;
+	int32_t result = 0;
+	uint32_t data;
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SQRamping)) {
+		data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_SQ_CTRL0);
+		data &= ~DIDT_SQ_CTRL0__DIDT_CTRL_EN_MASK;
+		data |= ((en << DIDT_SQ_CTRL0__DIDT_CTRL_EN__SHIFT) & DIDT_SQ_CTRL0__DIDT_CTRL_EN_MASK);
+		cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_SQ_CTRL0, data);
+		DIDTBlock_Info &= ~SQ_Enable_MASK;
+		DIDTBlock_Info |= en << SQ_Enable_SHIFT;
+	}
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_DBRamping)) {
+		data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_DB_CTRL0);
+		data &= ~DIDT_DB_CTRL0__DIDT_CTRL_EN_MASK;
+		data |= ((en << DIDT_DB_CTRL0__DIDT_CTRL_EN__SHIFT) & DIDT_DB_CTRL0__DIDT_CTRL_EN_MASK);
+		cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_DB_CTRL0, data);
+		DIDTBlock_Info &= ~DB_Enable_MASK;
+		DIDTBlock_Info |= en << DB_Enable_SHIFT;
+	}
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_TDRamping)) {
+		data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_TD_CTRL0);
+		data &= ~DIDT_TD_CTRL0__DIDT_CTRL_EN_MASK;
+		data |= ((en << DIDT_TD_CTRL0__DIDT_CTRL_EN__SHIFT) & DIDT_TD_CTRL0__DIDT_CTRL_EN_MASK);
+		cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_TD_CTRL0, data);
+		DIDTBlock_Info &= ~TD_Enable_MASK;
+		DIDTBlock_Info |= en << TD_Enable_SHIFT;
+	}
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_TCPRamping)) {
+		data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_TCP_CTRL0);
+		data &= ~DIDT_TCP_CTRL0__DIDT_CTRL_EN_MASK;
+		data |= ((en << DIDT_TCP_CTRL0__DIDT_CTRL_EN__SHIFT) & DIDT_TCP_CTRL0__DIDT_CTRL_EN_MASK);
+		cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_TCP_CTRL0, data);
+		DIDTBlock_Info &= ~TCP_Enable_MASK;
+		DIDTBlock_Info |= en << TCP_Enable_SHIFT;
+	}
+
+	if (enable)
+		result = smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, PPSMC_MSG_Didt_Block_Function, DIDTBlock_Info);
+
+	return result;
+}
+
+static int polaris10_program_pt_config_registers(struct pp_hwmgr *hwmgr,
+				struct polaris10_pt_config_reg *cac_config_regs)
+{
+	struct polaris10_pt_config_reg *config_regs = cac_config_regs;
+	uint32_t cache = 0;
+	uint32_t data = 0;
+
+	PP_ASSERT_WITH_CODE((config_regs != NULL), "Invalid config register table.", return -EINVAL);
+
+	while (config_regs->offset != 0xFFFFFFFF) {
+		if (config_regs->type == POLARIS10_CONFIGREG_CACHE)
+			cache |= ((config_regs->value << config_regs->shift) & config_regs->mask);
+		else {
+			switch (config_regs->type) {
+			case POLARIS10_CONFIGREG_SMC_IND:
+				data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, config_regs->offset);
+				break;
+
+			case POLARIS10_CONFIGREG_DIDT_IND:
+				data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, config_regs->offset);
+				break;
+
+			case POLARIS10_CONFIGREG_GC_CAC_IND:
+				data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG_GC_CAC, config_regs->offset);
+				break;
+
+			default:
+				data = cgs_read_register(hwmgr->device, config_regs->offset);
+				break;
+			}
+
+			data &= ~config_regs->mask;
+			data |= ((config_regs->value << config_regs->shift) & config_regs->mask);
+			data |= cache;
+
+			switch (config_regs->type) {
+			case POLARIS10_CONFIGREG_SMC_IND:
+				cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, config_regs->offset, data);
+				break;
+
+			case POLARIS10_CONFIGREG_DIDT_IND:
+				cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, config_regs->offset, data);
+				break;
+
+			case POLARIS10_CONFIGREG_GC_CAC_IND:
+				cgs_write_ind_register(hwmgr->device, CGS_IND_REG_GC_CAC, config_regs->offset, data);
+				break;
+
+			default:
+				cgs_write_register(hwmgr->device, config_regs->offset, data);
+				break;
+			}
+			cache = 0;
+		}
+
+		config_regs++;
+	}
+
+	return 0;
+}
+
+int polaris10_enable_didt_config(struct pp_hwmgr *hwmgr)
+{
+	int result;
+	uint32_t num_se = 0;
+	uint32_t count, value, value2;
+	struct cgs_system_info sys_info = {0};
+
+	sys_info.size = sizeof(struct cgs_system_info);
+	sys_info.info_id = CGS_SYSTEM_INFO_GFX_SE_INFO;
+	result = cgs_query_system_info(hwmgr->device, &sys_info);
+
+
+	if (result == 0)
+		num_se = sys_info.value;
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SQRamping) ||
+		phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_DBRamping) ||
+		phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_TDRamping) ||
+		phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_TCPRamping)) {
+
+		/* TO DO Pre DIDT disable clock gating */
+		value = 0;
+		value2 = cgs_read_register(hwmgr->device, mmGRBM_GFX_INDEX);
+		for (count = 0; count < num_se; count++) {
+			value = SYS_GRBM_GFX_INDEX_DATA__INSTANCE_BROADCAST_WRITES_MASK
+				| SYS_GRBM_GFX_INDEX_DATA__SH_BROADCAST_WRITES_MASK
+				| (count << SYS_GRBM_GFX_INDEX_DATA__SE_INDEX__SHIFT);
+			cgs_write_register(hwmgr->device, mmGRBM_GFX_INDEX, value);
+
+			if (hwmgr->chip_id == CHIP_POLARIS10) {
+				result = polaris10_program_pt_config_registers(hwmgr, GCCACConfig_Polaris10);
+				PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", return result);
+				result = polaris10_program_pt_config_registers(hwmgr, DIDTConfig_Polaris10);
+				PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", return result);
+			} else if (hwmgr->chip_id == CHIP_POLARIS11) {
+				result = polaris10_program_pt_config_registers(hwmgr, GCCACConfig_Polaris11);
+				PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", return result);
+				result = polaris10_program_pt_config_registers(hwmgr, DIDTConfig_Polaris11);
+				PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", return result);
+			}
+		}
+		cgs_write_register(hwmgr->device, mmGRBM_GFX_INDEX, value2);
+
+		result = polaris10_enable_didt(hwmgr, true);
+		PP_ASSERT_WITH_CODE((result == 0), "EnableDiDt failed.", return result);
+
+		/* TO DO Post DIDT enable clock gating */
+	}
+
+	return 0;
+}
+
+int polaris10_disable_didt_config(struct pp_hwmgr *hwmgr)
+{
+	int result;
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SQRamping) ||
+		phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_DBRamping) ||
+		phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_TDRamping) ||
+		phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_TCPRamping)) {
+		/* TO DO Pre DIDT disable clock gating */
+
+		result = polaris10_enable_didt(hwmgr, false);
+		PP_ASSERT_WITH_CODE((result == 0), "Post DIDT enable clock gating failed.", return result);
+		/* TO DO Post DIDT enable clock gating */
+	}
+
+	return 0;
+}
+
+
 static int polaris10_populate_bapm_vddc_base_leakage_sidd(struct pp_hwmgr *hwmgr)
 {
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
@@ -286,7 +817,7 @@ int polaris10_populate_pm_fuses(struct pp_hwmgr *hwmgr)
 
 		if (polaris10_copy_bytes_to_smc(hwmgr->smumgr, pm_fuse_table_offset,
 				(uint8_t *)&data->power_tune_table,
-				sizeof(struct SMU74_Discrete_PmFuses), data->sram_end))
+				(sizeof(struct SMU74_Discrete_PmFuses) - 92), data->sram_end))
 			PP_ASSERT_WITH_CODE(false,
 					"Attempt to download PmFuseTable Failed!",
 					return -EINVAL);
@@ -312,6 +843,23 @@ int polaris10_enable_smc_cac(struct pp_hwmgr *hwmgr)
 	return result;
 }
 
+int polaris10_disable_smc_cac(struct pp_hwmgr *hwmgr)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	int result = 0;
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_CAC) && data->cac_enabled) {
+		int smc_result = smum_send_msg_to_smc(hwmgr->smumgr,
+				(uint16_t)(PPSMC_MSG_DisableCac));
+		PP_ASSERT_WITH_CODE((smc_result == 0),
+				"Failed to disable CAC in SMC.", result = -1);
+
+		data->cac_enabled = false;
+	}
+	return result;
+}
+
 int polaris10_set_power_limit(struct pp_hwmgr *hwmgr, uint32_t n)
 {
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
@@ -373,6 +921,48 @@ int polaris10_enable_power_containment(struct pp_hwmgr *hwmgr)
 	return result;
 }
 
+int polaris10_disable_power_containment(struct pp_hwmgr *hwmgr)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	int result = 0;
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_PowerContainment) &&
+			data->power_containment_features) {
+		int smc_result;
+
+		if (data->power_containment_features &
+				POWERCONTAINMENT_FEATURE_TDCLimit) {
+			smc_result = smum_send_msg_to_smc(hwmgr->smumgr,
+					(uint16_t)(PPSMC_MSG_TDCLimitDisable));
+			PP_ASSERT_WITH_CODE((smc_result == 0),
+					"Failed to disable TDCLimit in SMC.",
+					result = smc_result);
+		}
+
+		if (data->power_containment_features &
+				POWERCONTAINMENT_FEATURE_DTE) {
+			smc_result = smum_send_msg_to_smc(hwmgr->smumgr,
+					(uint16_t)(PPSMC_MSG_DisableDTE));
+			PP_ASSERT_WITH_CODE((smc_result == 0),
+					"Failed to disable DTE in SMC.",
+					result = smc_result);
+		}
+
+		if (data->power_containment_features &
+				POWERCONTAINMENT_FEATURE_PkgPwrLimit) {
+			smc_result = smum_send_msg_to_smc(hwmgr->smumgr,
+					(uint16_t)(PPSMC_MSG_PkgPwrLimitDisable));
+			PP_ASSERT_WITH_CODE((smc_result == 0),
+					"Failed to disable PkgPwrTracking in SMC.",
+					result = smc_result);
+		}
+		data->power_containment_features = 0;
+	}
+
+	return result;
+}
+
 int polaris10_power_control_set_level(struct pp_hwmgr *hwmgr)
 {
 	struct phm_ppt_v1_information *table_info =
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_powertune.h b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_powertune.h
index 68bc1cb6d40c..bc78e28f010d 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_powertune.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_powertune.h
@@ -27,15 +27,37 @@ enum polaris10_pt_config_reg_type {
 	POLARIS10_CONFIGREG_MMR = 0,
 	POLARIS10_CONFIGREG_SMC_IND,
 	POLARIS10_CONFIGREG_DIDT_IND,
+	POLARIS10_CONFIGREG_GC_CAC_IND,
 	POLARIS10_CONFIGREG_CACHE,
 	POLARIS10_CONFIGREG_MAX
 };
 
+#define DIDT_SQ_CTRL0__UNUSED_0_MASK    0xfffc0000
+#define DIDT_SQ_CTRL0__UNUSED_0__SHIFT  0x12
+#define DIDT_TD_CTRL0__UNUSED_0_MASK    0xfffc0000
+#define DIDT_TD_CTRL0__UNUSED_0__SHIFT  0x12
+#define DIDT_TCP_CTRL0__UNUSED_0_MASK   0xfffc0000
+#define DIDT_TCP_CTRL0__UNUSED_0__SHIFT 0x12
+#define DIDT_SQ_TUNING_CTRL__UNUSED_0_MASK                 0xc0000000
+#define DIDT_SQ_TUNING_CTRL__UNUSED_0__SHIFT               0x0000001e
+#define DIDT_TD_TUNING_CTRL__UNUSED_0_MASK                 0xc0000000
+#define DIDT_TD_TUNING_CTRL__UNUSED_0__SHIFT               0x0000001e
+#define DIDT_TCP_TUNING_CTRL__UNUSED_0_MASK                0xc0000000
+#define DIDT_TCP_TUNING_CTRL__UNUSED_0__SHIFT              0x0000001e
+
 /* PowerContainment Features */
 #define POWERCONTAINMENT_FEATURE_DTE             0x00000001
 #define POWERCONTAINMENT_FEATURE_TDCLimit        0x00000002
 #define POWERCONTAINMENT_FEATURE_PkgPwrLimit     0x00000004
 
+#define ixGC_CAC_CNTL 0x0000
+#define ixDIDT_SQ_STALL_CTRL 0x0004
+#define  ixDIDT_SQ_TUNING_CTRL 0x0005
+#define ixDIDT_TD_STALL_CTRL 0x0044
+#define ixDIDT_TD_TUNING_CTRL 0x0045
+#define ixDIDT_TCP_STALL_CTRL 0x0064
+#define ixDIDT_TCP_TUNING_CTRL 0x0065
+
 struct polaris10_pt_config_reg {
 	uint32_t                           offset;
 	uint32_t                           mask;
@@ -62,9 +84,11 @@ void polaris10_initialize_power_tune_defaults(struct pp_hwmgr *hwmgr);
 int polaris10_populate_bapm_parameters_in_dpm_table(struct pp_hwmgr *hwmgr);
 int polaris10_populate_pm_fuses(struct pp_hwmgr *hwmgr);
 int polaris10_enable_smc_cac(struct pp_hwmgr *hwmgr);
+int polaris10_disable_smc_cac(struct pp_hwmgr *hwmgr);
 int polaris10_enable_power_containment(struct pp_hwmgr *hwmgr);
+int polaris10_disable_power_containment(struct pp_hwmgr *hwmgr);
 int polaris10_set_power_limit(struct pp_hwmgr *hwmgr, uint32_t n);
 int polaris10_power_control_set_level(struct pp_hwmgr *hwmgr);
-
+int polaris10_enable_didt_config(struct pp_hwmgr *hwmgr);
 #endif  /* POLARIS10_POWERTUNE_H */
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_thermal.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_thermal.c
index aba167f7d167..b206632d4650 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_thermal.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_thermal.c
@@ -625,10 +625,14 @@ static int tf_polaris10_thermal_avfs_enable(struct pp_hwmgr *hwmgr,
 	int ret;
 	struct pp_smumgr *smumgr = (struct pp_smumgr *)(hwmgr->smumgr);
 	struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(smumgr->backend);
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
 
-	if (smu_data->avfs.avfs_btc_status != AVFS_BTC_ENABLEAVFS)
+	if (smu_data->avfs.avfs_btc_status == AVFS_BTC_NOTSUPPORTED)
 		return 0;
 
+	ret = smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
+			PPSMC_MSG_SetGBDroopSettings, data->avfs_vdroop_override_setting);
+
 	ret = (smum_send_msg_to_smc(smumgr, PPSMC_MSG_EnableAvfs) == 0) ?
 			0 : -1;
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c
index 58742e0d1492..1944d289f846 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c
@@ -44,6 +44,20 @@ bool acpi_atcs_functions_supported(void *device, uint32_t index)
 	return result == 0 ? (output_buf.function_bits & (1 << (index - 1))) != 0 : false;
 }
 
+bool acpi_atcs_notify_pcie_device_ready(void *device)
+{
+	int32_t temp_buffer = 1;
+
+	return cgs_call_acpi_method(device, CGS_ACPI_METHOD_ATCS,
+				ATCS_FUNCTION_PCIE_DEVICE_READY_NOTIFICATION,
+						&temp_buffer,
+						NULL,
+						0,
+						sizeof(temp_buffer),
+						0);
+}
+
+
 int acpi_pcie_perf_request(void *device, uint8_t perf_req, bool advertise)
 {
 	struct atcs_pref_req_input atcs_input;
@@ -52,7 +66,7 @@ int acpi_pcie_perf_request(void *device, uint8_t perf_req, bool advertise)
 	int result;
 	struct cgs_system_info info = {0};
 
-	if (!acpi_atcs_functions_supported(device, ATCS_FUNCTION_PCIE_PERFORMANCE_REQUEST))
+	if (acpi_atcs_notify_pcie_device_ready(device))
 		return -EINVAL;
 
 	info.size = sizeof(struct cgs_system_info);
@@ -77,7 +91,7 @@ int acpi_pcie_perf_request(void *device, uint8_t perf_req, bool advertise)
 						ATCS_FUNCTION_PCIE_PERFORMANCE_REQUEST,
 						&atcs_input,
 						&atcs_output,
-						0,
+						1,
 						sizeof(atcs_input),
 						sizeof(atcs_output));
 		if (result != 0)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c
index da9f5f1b6dc2..26f3e30d0fef 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c
@@ -179,13 +179,12 @@ int atomctrl_set_engine_dram_timings_rv770(
 
 	/* They are both in 10KHz Units. */
 	engine_clock_parameters.ulTargetEngineClock =
-		(uint32_t) engine_clock & SET_CLOCK_FREQ_MASK;
-	engine_clock_parameters.ulTargetEngineClock |=
-		(COMPUTE_ENGINE_PLL_PARAM << 24);
+		cpu_to_le32((engine_clock & SET_CLOCK_FREQ_MASK) |
+			    ((COMPUTE_ENGINE_PLL_PARAM << 24)));
 
 	/* in 10 khz units.*/
 	engine_clock_parameters.sReserved.ulClock =
-		(uint32_t) memory_clock & SET_CLOCK_FREQ_MASK;
+		cpu_to_le32(memory_clock & SET_CLOCK_FREQ_MASK);
 	return cgs_atom_exec_cmd_table(hwmgr->device,
 			GetIndexIntoMasterTable(COMMAND, DynamicMemorySettings),
 			&engine_clock_parameters);
@@ -252,7 +251,7 @@ int atomctrl_get_memory_pll_dividers_si(
 	COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_1 mpll_parameters;
 	int result;
 
-	mpll_parameters.ulClock = (uint32_t) clock_value;
+	mpll_parameters.ulClock = cpu_to_le32(clock_value);
 	mpll_parameters.ucInputFlag = (uint8_t)((strobe_mode) ? 1 : 0);
 
 	result = cgs_atom_exec_cmd_table
@@ -262,9 +261,9 @@ int atomctrl_get_memory_pll_dividers_si(
 
 	if (0 == result) {
 		mpll_param->mpll_fb_divider.clk_frac =
-			mpll_parameters.ulFbDiv.usFbDivFrac;
+			le16_to_cpu(mpll_parameters.ulFbDiv.usFbDivFrac);
 		mpll_param->mpll_fb_divider.cl_kf =
-			mpll_parameters.ulFbDiv.usFbDiv;
+			le16_to_cpu(mpll_parameters.ulFbDiv.usFbDiv);
 		mpll_param->mpll_post_divider =
 			(uint32_t)mpll_parameters.ucPostDiv;
 		mpll_param->vco_mode =
@@ -300,7 +299,7 @@ int atomctrl_get_memory_pll_dividers_vi(struct pp_hwmgr *hwmgr,
 	COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_2 mpll_parameters;
 	int result;
 
-	mpll_parameters.ulClock.ulClock = (uint32_t)clock_value;
+	mpll_parameters.ulClock.ulClock = cpu_to_le32(clock_value);
 
 	result = cgs_atom_exec_cmd_table(hwmgr->device,
 			GetIndexIntoMasterTable(COMMAND, ComputeMemoryClockParam),
@@ -320,7 +319,7 @@ int atomctrl_get_engine_pll_dividers_kong(struct pp_hwmgr *hwmgr,
 	COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V4 pll_parameters;
 	int result;
 
-	pll_parameters.ulClock = clock_value;
+	pll_parameters.ulClock = cpu_to_le32(clock_value);
 
 	result = cgs_atom_exec_cmd_table
 		(hwmgr->device,
@@ -329,7 +328,7 @@ int atomctrl_get_engine_pll_dividers_kong(struct pp_hwmgr *hwmgr,
 
 	if (0 == result) {
 		dividers->pll_post_divider = pll_parameters.ucPostDiv;
-		dividers->real_clock = pll_parameters.ulClock;
+		dividers->real_clock = le32_to_cpu(pll_parameters.ulClock);
 	}
 
 	return result;
@@ -343,7 +342,7 @@ int atomctrl_get_engine_pll_dividers_vi(
 	COMPUTE_GPU_CLOCK_OUTPUT_PARAMETERS_V1_6 pll_patameters;
 	int result;
 
-	pll_patameters.ulClock.ulClock = clock_value;
+	pll_patameters.ulClock.ulClock = cpu_to_le32(clock_value);
 	pll_patameters.ulClock.ucPostDiv = COMPUTE_GPUCLK_INPUT_FLAG_SCLK;
 
 	result = cgs_atom_exec_cmd_table
@@ -355,12 +354,12 @@ int atomctrl_get_engine_pll_dividers_vi(
 		dividers->pll_post_divider =
 			pll_patameters.ulClock.ucPostDiv;
 		dividers->real_clock =
-			pll_patameters.ulClock.ulClock;
+			le32_to_cpu(pll_patameters.ulClock.ulClock);
 
 		dividers->ul_fb_div.ul_fb_div_frac =
-			pll_patameters.ulFbDiv.usFbDivFrac;
+			le16_to_cpu(pll_patameters.ulFbDiv.usFbDivFrac);
 		dividers->ul_fb_div.ul_fb_div =
-			pll_patameters.ulFbDiv.usFbDiv;
+			le16_to_cpu(pll_patameters.ulFbDiv.usFbDiv);
 
 		dividers->uc_pll_ref_div =
 			pll_patameters.ucPllRefDiv;
@@ -380,7 +379,7 @@ int atomctrl_get_engine_pll_dividers_ai(struct pp_hwmgr *hwmgr,
 	COMPUTE_GPU_CLOCK_OUTPUT_PARAMETERS_V1_7 pll_patameters;
 	int result;
 
-	pll_patameters.ulClock.ulClock = clock_value;
+	pll_patameters.ulClock.ulClock = cpu_to_le32(clock_value);
 	pll_patameters.ulClock.ucPostDiv = COMPUTE_GPUCLK_INPUT_FLAG_SCLK;
 
 	result = cgs_atom_exec_cmd_table
@@ -412,7 +411,7 @@ int atomctrl_get_dfs_pll_dividers_vi(
 	COMPUTE_GPU_CLOCK_OUTPUT_PARAMETERS_V1_6 pll_patameters;
 	int result;
 
-	pll_patameters.ulClock.ulClock = clock_value;
+	pll_patameters.ulClock.ulClock = cpu_to_le32(clock_value);
 	pll_patameters.ulClock.ucPostDiv =
 		COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK;
 
@@ -425,12 +424,12 @@ int atomctrl_get_dfs_pll_dividers_vi(
 		dividers->pll_post_divider =
 			pll_patameters.ulClock.ucPostDiv;
 		dividers->real_clock =
-			pll_patameters.ulClock.ulClock;
+			le32_to_cpu(pll_patameters.ulClock.ulClock);
 
 		dividers->ul_fb_div.ul_fb_div_frac =
-			pll_patameters.ulFbDiv.usFbDivFrac;
+			le16_to_cpu(pll_patameters.ulFbDiv.usFbDivFrac);
 		dividers->ul_fb_div.ul_fb_div =
-			pll_patameters.ulFbDiv.usFbDiv;
+			le16_to_cpu(pll_patameters.ulFbDiv.usFbDiv);
 
 		dividers->uc_pll_ref_div =
 			pll_patameters.ucPllRefDiv;
@@ -519,13 +518,13 @@ int atomctrl_get_voltage_table_v3(
 
 	for (i = 0; i < voltage_object->asGpioVoltageObj.ucGpioEntryNum; i++) {
 		voltage_table->entries[i].value =
-			voltage_object->asGpioVoltageObj.asVolGpioLut[i].usVoltageValue;
+			le16_to_cpu(voltage_object->asGpioVoltageObj.asVolGpioLut[i].usVoltageValue);
 		voltage_table->entries[i].smio_low =
-			voltage_object->asGpioVoltageObj.asVolGpioLut[i].ulVoltageId;
+			le32_to_cpu(voltage_object->asGpioVoltageObj.asVolGpioLut[i].ulVoltageId);
 	}
 
 	voltage_table->mask_low    =
-		voltage_object->asGpioVoltageObj.ulGpioMaskVal;
+		le32_to_cpu(voltage_object->asGpioVoltageObj.ulGpioMaskVal);
 	voltage_table->count      =
 		voltage_object->asGpioVoltageObj.ucGpioEntryNum;
 	voltage_table->phase_delay =
@@ -552,13 +551,13 @@ static bool atomctrl_lookup_gpio_pin(
 				pin_assignment->ucGpioPinBitShift;
 			gpio_pin_assignment->us_gpio_pin_aindex =
 				le16_to_cpu(pin_assignment->usGpioPin_AIndex);
-			return false;
+			return true;
 		}
 
 		offset += offsetof(ATOM_GPIO_PIN_ASSIGNMENT, ucGPIO_ID) + 1;
 	}
 
-	return true;
+	return false;
 }
 
 /**
@@ -592,12 +591,12 @@ bool atomctrl_get_pp_assign_pin(
 		const uint32_t pinId,
 		pp_atomctrl_gpio_pin_assignment *gpio_pin_assignment)
 {
-	bool bRet = 0;
+	bool bRet = false;
 	ATOM_GPIO_PIN_LUT *gpio_lookup_table =
 		get_gpio_lookup_table(hwmgr->device);
 
 	PP_ASSERT_WITH_CODE((NULL != gpio_lookup_table),
-			"Could not find GPIO lookup Table in BIOS.", return -1);
+			"Could not find GPIO lookup Table in BIOS.", return false);
 
 	bRet = atomctrl_lookup_gpio_pin(gpio_lookup_table, pinId,
 		gpio_pin_assignment);
@@ -650,8 +649,8 @@ int atomctrl_calculate_voltage_evv_on_sclk(
 		return -1;
 
 	if (getASICProfilingInfo->asHeader.ucTableFormatRevision < 3 ||
-			(getASICProfilingInfo->asHeader.ucTableFormatRevision == 3 &&
-			getASICProfilingInfo->asHeader.ucTableContentRevision < 4))
+	    (getASICProfilingInfo->asHeader.ucTableFormatRevision == 3 &&
+	     getASICProfilingInfo->asHeader.ucTableContentRevision < 4))
 		return -1;
 
 	/*-----------------------------------------------------------
@@ -662,37 +661,37 @@ int atomctrl_calculate_voltage_evv_on_sclk(
 
 	switch (dpm_level) {
 	case 1:
-		fPowerDPMx = Convert_ULONG_ToFraction(getASICProfilingInfo->usPowerDpm1);
-		fDerateTDP = GetScaledFraction(getASICProfilingInfo->ulTdpDerateDPM1, 1000);
+		fPowerDPMx = Convert_ULONG_ToFraction(le16_to_cpu(getASICProfilingInfo->usPowerDpm1));
+		fDerateTDP = GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulTdpDerateDPM1), 1000);
 		break;
 	case 2:
-		fPowerDPMx = Convert_ULONG_ToFraction(getASICProfilingInfo->usPowerDpm2);
-		fDerateTDP = GetScaledFraction(getASICProfilingInfo->ulTdpDerateDPM2, 1000);
+		fPowerDPMx = Convert_ULONG_ToFraction(le16_to_cpu(getASICProfilingInfo->usPowerDpm2));
+		fDerateTDP = GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulTdpDerateDPM2), 1000);
 		break;
 	case 3:
-		fPowerDPMx = Convert_ULONG_ToFraction(getASICProfilingInfo->usPowerDpm3);
-		fDerateTDP = GetScaledFraction(getASICProfilingInfo->ulTdpDerateDPM3, 1000);
+		fPowerDPMx = Convert_ULONG_ToFraction(le16_to_cpu(getASICProfilingInfo->usPowerDpm3));
+		fDerateTDP = GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulTdpDerateDPM3), 1000);
 		break;
 	case 4:
-		fPowerDPMx = Convert_ULONG_ToFraction(getASICProfilingInfo->usPowerDpm4);
-		fDerateTDP = GetScaledFraction(getASICProfilingInfo->ulTdpDerateDPM4, 1000);
+		fPowerDPMx = Convert_ULONG_ToFraction(le16_to_cpu(getASICProfilingInfo->usPowerDpm4));
+		fDerateTDP = GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulTdpDerateDPM4), 1000);
 		break;
 	case 5:
-		fPowerDPMx = Convert_ULONG_ToFraction(getASICProfilingInfo->usPowerDpm5);
-		fDerateTDP = GetScaledFraction(getASICProfilingInfo->ulTdpDerateDPM5, 1000);
+		fPowerDPMx = Convert_ULONG_ToFraction(le16_to_cpu(getASICProfilingInfo->usPowerDpm5));
+		fDerateTDP = GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulTdpDerateDPM5), 1000);
 		break;
 	case 6:
-		fPowerDPMx = Convert_ULONG_ToFraction(getASICProfilingInfo->usPowerDpm6);
-		fDerateTDP = GetScaledFraction(getASICProfilingInfo->ulTdpDerateDPM6, 1000);
+		fPowerDPMx = Convert_ULONG_ToFraction(le16_to_cpu(getASICProfilingInfo->usPowerDpm6));
+		fDerateTDP = GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulTdpDerateDPM6), 1000);
 		break;
 	case 7:
-		fPowerDPMx = Convert_ULONG_ToFraction(getASICProfilingInfo->usPowerDpm7);
-		fDerateTDP = GetScaledFraction(getASICProfilingInfo->ulTdpDerateDPM7, 1000);
+		fPowerDPMx = Convert_ULONG_ToFraction(le16_to_cpu(getASICProfilingInfo->usPowerDpm7));
+		fDerateTDP = GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulTdpDerateDPM7), 1000);
 		break;
 	default:
 		printk(KERN_ERR "DPM Level not supported\n");
 		fPowerDPMx = Convert_ULONG_ToFraction(1);
-		fDerateTDP = GetScaledFraction(getASICProfilingInfo->ulTdpDerateDPM0, 1000);
+		fDerateTDP = GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulTdpDerateDPM0), 1000);
 	}
 
 	/*-------------------------
@@ -716,9 +715,9 @@ int atomctrl_calculate_voltage_evv_on_sclk(
 		return result;
 
 	/* Finally, the actual fuse value */
-	ul_RO_fused = sOutput_FuseValues.ulEfuseValue;
-	fMin = GetScaledFraction(sRO_fuse.ulEfuseMin, 1);
-	fRange = GetScaledFraction(sRO_fuse.ulEfuseEncodeRange, 1);
+	ul_RO_fused = le32_to_cpu(sOutput_FuseValues.ulEfuseValue);
+	fMin = GetScaledFraction(le32_to_cpu(sRO_fuse.ulEfuseMin), 1);
+	fRange = GetScaledFraction(le32_to_cpu(sRO_fuse.ulEfuseEncodeRange), 1);
 	fRO_fused = fDecodeLinearFuse(ul_RO_fused, fMin, fRange, sRO_fuse.ucEfuseLength);
 
 	sCACm_fuse = getASICProfilingInfo->sCACm;
@@ -736,9 +735,9 @@ int atomctrl_calculate_voltage_evv_on_sclk(
 	if (result)
 		return result;
 
-	ul_CACm_fused = sOutput_FuseValues.ulEfuseValue;
-	fMin = GetScaledFraction(sCACm_fuse.ulEfuseMin, 1000);
-	fRange = GetScaledFraction(sCACm_fuse.ulEfuseEncodeRange, 1000);
+	ul_CACm_fused = le32_to_cpu(sOutput_FuseValues.ulEfuseValue);
+	fMin = GetScaledFraction(le32_to_cpu(sCACm_fuse.ulEfuseMin), 1000);
+	fRange = GetScaledFraction(le32_to_cpu(sCACm_fuse.ulEfuseEncodeRange), 1000);
 
 	fCACm_fused = fDecodeLinearFuse(ul_CACm_fused, fMin, fRange, sCACm_fuse.ucEfuseLength);
 
@@ -756,9 +755,9 @@ int atomctrl_calculate_voltage_evv_on_sclk(
 	if (result)
 		return result;
 
-	ul_CACb_fused = sOutput_FuseValues.ulEfuseValue;
-	fMin = GetScaledFraction(sCACb_fuse.ulEfuseMin, 1000);
-	fRange = GetScaledFraction(sCACb_fuse.ulEfuseEncodeRange, 1000);
+	ul_CACb_fused = le32_to_cpu(sOutput_FuseValues.ulEfuseValue);
+	fMin = GetScaledFraction(le32_to_cpu(sCACb_fuse.ulEfuseMin), 1000);
+	fRange = GetScaledFraction(le32_to_cpu(sCACb_fuse.ulEfuseEncodeRange), 1000);
 
 	fCACb_fused = fDecodeLinearFuse(ul_CACb_fused, fMin, fRange, sCACb_fuse.ucEfuseLength);
 
@@ -777,9 +776,9 @@ int atomctrl_calculate_voltage_evv_on_sclk(
 	if (result)
 		return result;
 
-	ul_Kt_Beta_fused = sOutput_FuseValues.ulEfuseValue;
-	fAverage = GetScaledFraction(sKt_Beta_fuse.ulEfuseEncodeAverage, 1000);
-	fRange = GetScaledFraction(sKt_Beta_fuse.ulEfuseEncodeRange, 1000);
+	ul_Kt_Beta_fused = le32_to_cpu(sOutput_FuseValues.ulEfuseValue);
+	fAverage = GetScaledFraction(le32_to_cpu(sKt_Beta_fuse.ulEfuseEncodeAverage), 1000);
+	fRange = GetScaledFraction(le32_to_cpu(sKt_Beta_fuse.ulEfuseEncodeRange), 1000);
 
 	fKt_Beta_fused = fDecodeLogisticFuse(ul_Kt_Beta_fused,
 			fAverage, fRange, sKt_Beta_fuse.ucEfuseLength);
@@ -798,9 +797,9 @@ int atomctrl_calculate_voltage_evv_on_sclk(
 	if (result)
 		return result;
 
-	ul_Kv_m_fused = sOutput_FuseValues.ulEfuseValue;
-	fAverage = GetScaledFraction(sKv_m_fuse.ulEfuseEncodeAverage, 1000);
-	fRange = GetScaledFraction((sKv_m_fuse.ulEfuseEncodeRange & 0x7fffffff), 1000);
+	ul_Kv_m_fused = le32_to_cpu(sOutput_FuseValues.ulEfuseValue);
+	fAverage = GetScaledFraction(le32_to_cpu(sKv_m_fuse.ulEfuseEncodeAverage), 1000);
+	fRange = GetScaledFraction((le32_to_cpu(sKv_m_fuse.ulEfuseEncodeRange) & 0x7fffffff), 1000);
 	fRange = fMultiply(fRange, ConvertToFraction(-1));
 
 	fKv_m_fused = fDecodeLogisticFuse(ul_Kv_m_fused,
@@ -820,9 +819,9 @@ int atomctrl_calculate_voltage_evv_on_sclk(
 	if (result)
 		return result;
 
-	ul_Kv_b_fused = sOutput_FuseValues.ulEfuseValue;
-	fAverage = GetScaledFraction(sKv_b_fuse.ulEfuseEncodeAverage, 1000);
-	fRange = GetScaledFraction(sKv_b_fuse.ulEfuseEncodeRange, 1000);
+	ul_Kv_b_fused = le32_to_cpu(sOutput_FuseValues.ulEfuseValue);
+	fAverage = GetScaledFraction(le32_to_cpu(sKv_b_fuse.ulEfuseEncodeAverage), 1000);
+	fRange = GetScaledFraction(le32_to_cpu(sKv_b_fuse.ulEfuseEncodeRange), 1000);
 
 	fKv_b_fused = fDecodeLogisticFuse(ul_Kv_b_fused,
 			fAverage, fRange, sKv_b_fuse.ucEfuseLength);
@@ -851,9 +850,9 @@ int atomctrl_calculate_voltage_evv_on_sclk(
 	if (result)
 		return result;
 
-	ul_FT_Lkg_V0NORM = sOutput_FuseValues.ulEfuseValue;
-	fLn_MaxDivMin = GetScaledFraction(getASICProfilingInfo->ulLkgEncodeLn_MaxDivMin, 10000);
-	fMin = GetScaledFraction(getASICProfilingInfo->ulLkgEncodeMin, 10000);
+	ul_FT_Lkg_V0NORM = le32_to_cpu(sOutput_FuseValues.ulEfuseValue);
+	fLn_MaxDivMin = GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulLkgEncodeLn_MaxDivMin), 10000);
+	fMin = GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulLkgEncodeMin), 10000);
 
 	fFT_Lkg_V0NORM = fDecodeLeakageID(ul_FT_Lkg_V0NORM,
 			fLn_MaxDivMin, fMin, getASICProfilingInfo->ucLkgEfuseLength);
@@ -863,40 +862,40 @@ int atomctrl_calculate_voltage_evv_on_sclk(
 	 * PART 2 - Grabbing all required values
 	 *-------------------------------------------
 	 */
-	fSM_A0 = fMultiply(GetScaledFraction(getASICProfilingInfo->ulSM_A0, 1000000),
+	fSM_A0 = fMultiply(GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulSM_A0), 1000000),
 			ConvertToFraction(uPow(-1, getASICProfilingInfo->ucSM_A0_sign)));
-	fSM_A1 = fMultiply(GetScaledFraction(getASICProfilingInfo->ulSM_A1, 1000000),
+	fSM_A1 = fMultiply(GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulSM_A1), 1000000),
 			ConvertToFraction(uPow(-1, getASICProfilingInfo->ucSM_A1_sign)));
-	fSM_A2 = fMultiply(GetScaledFraction(getASICProfilingInfo->ulSM_A2, 100000),
+	fSM_A2 = fMultiply(GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulSM_A2), 100000),
 			ConvertToFraction(uPow(-1, getASICProfilingInfo->ucSM_A2_sign)));
-	fSM_A3 = fMultiply(GetScaledFraction(getASICProfilingInfo->ulSM_A3, 1000000),
+	fSM_A3 = fMultiply(GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulSM_A3), 1000000),
 			ConvertToFraction(uPow(-1, getASICProfilingInfo->ucSM_A3_sign)));
-	fSM_A4 = fMultiply(GetScaledFraction(getASICProfilingInfo->ulSM_A4, 1000000),
+	fSM_A4 = fMultiply(GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulSM_A4), 1000000),
 			ConvertToFraction(uPow(-1, getASICProfilingInfo->ucSM_A4_sign)));
-	fSM_A5 = fMultiply(GetScaledFraction(getASICProfilingInfo->ulSM_A5, 1000),
+	fSM_A5 = fMultiply(GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulSM_A5), 1000),
 			ConvertToFraction(uPow(-1, getASICProfilingInfo->ucSM_A5_sign)));
-	fSM_A6 = fMultiply(GetScaledFraction(getASICProfilingInfo->ulSM_A6, 1000),
+	fSM_A6 = fMultiply(GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulSM_A6), 1000),
 			ConvertToFraction(uPow(-1, getASICProfilingInfo->ucSM_A6_sign)));
-	fSM_A7 = fMultiply(GetScaledFraction(getASICProfilingInfo->ulSM_A7, 1000),
+	fSM_A7 = fMultiply(GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulSM_A7), 1000),
 			ConvertToFraction(uPow(-1, getASICProfilingInfo->ucSM_A7_sign)));
 
-	fMargin_RO_a = ConvertToFraction(getASICProfilingInfo->ulMargin_RO_a);
-	fMargin_RO_b = ConvertToFraction(getASICProfilingInfo->ulMargin_RO_b);
-	fMargin_RO_c = ConvertToFraction(getASICProfilingInfo->ulMargin_RO_c);
+	fMargin_RO_a = ConvertToFraction(le32_to_cpu(getASICProfilingInfo->ulMargin_RO_a));
+	fMargin_RO_b = ConvertToFraction(le32_to_cpu(getASICProfilingInfo->ulMargin_RO_b));
+	fMargin_RO_c = ConvertToFraction(le32_to_cpu(getASICProfilingInfo->ulMargin_RO_c));
 
-	fMargin_fixed = ConvertToFraction(getASICProfilingInfo->ulMargin_fixed);
+	fMargin_fixed = ConvertToFraction(le32_to_cpu(getASICProfilingInfo->ulMargin_fixed));
 
 	fMargin_FMAX_mean = GetScaledFraction(
-			getASICProfilingInfo->ulMargin_Fmax_mean, 10000);
+		le32_to_cpu(getASICProfilingInfo->ulMargin_Fmax_mean), 10000);
 	fMargin_Plat_mean = GetScaledFraction(
-			getASICProfilingInfo->ulMargin_plat_mean, 10000);
+		le32_to_cpu(getASICProfilingInfo->ulMargin_plat_mean), 10000);
 	fMargin_FMAX_sigma = GetScaledFraction(
-			getASICProfilingInfo->ulMargin_Fmax_sigma, 10000);
+		le32_to_cpu(getASICProfilingInfo->ulMargin_Fmax_sigma), 10000);
 	fMargin_Plat_sigma = GetScaledFraction(
-			getASICProfilingInfo->ulMargin_plat_sigma, 10000);
+		le32_to_cpu(getASICProfilingInfo->ulMargin_plat_sigma), 10000);
 
 	fMargin_DC_sigma = GetScaledFraction(
-			getASICProfilingInfo->ulMargin_DC_sigma, 100);
+		le32_to_cpu(getASICProfilingInfo->ulMargin_DC_sigma), 100);
 	fMargin_DC_sigma = fDivide(fMargin_DC_sigma, ConvertToFraction(1000));
 
 	fCACm_fused = fDivide(fCACm_fused, ConvertToFraction(100));
@@ -908,14 +907,14 @@ int atomctrl_calculate_voltage_evv_on_sclk(
 	fSclk = GetScaledFraction(sclk, 100);
 
 	fV_max = fDivide(GetScaledFraction(
-			getASICProfilingInfo->ulMaxVddc, 1000), ConvertToFraction(4));
-	fT_prod = GetScaledFraction(getASICProfilingInfo->ulBoardCoreTemp, 10);
-	fLKG_Factor = GetScaledFraction(getASICProfilingInfo->ulEvvLkgFactor, 100);
-	fT_FT = GetScaledFraction(getASICProfilingInfo->ulLeakageTemp, 10);
+				 le32_to_cpu(getASICProfilingInfo->ulMaxVddc), 1000), ConvertToFraction(4));
+	fT_prod = GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulBoardCoreTemp), 10);
+	fLKG_Factor = GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulEvvLkgFactor), 100);
+	fT_FT = GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulLeakageTemp), 10);
 	fV_FT = fDivide(GetScaledFraction(
-			getASICProfilingInfo->ulLeakageVoltage, 1000), ConvertToFraction(4));
+				le32_to_cpu(getASICProfilingInfo->ulLeakageVoltage), 1000), ConvertToFraction(4));
 	fV_min = fDivide(GetScaledFraction(
-			getASICProfilingInfo->ulMinVddc, 1000), ConvertToFraction(4));
+				 le32_to_cpu(getASICProfilingInfo->ulMinVddc), 1000), ConvertToFraction(4));
 
 	/*-----------------------
 	 * PART 3
@@ -925,7 +924,7 @@ int atomctrl_calculate_voltage_evv_on_sclk(
 	fA_Term = fAdd(fMargin_RO_a, fAdd(fMultiply(fSM_A4, fSclk), fSM_A5));
 	fB_Term = fAdd(fAdd(fMultiply(fSM_A2, fSclk), fSM_A6), fMargin_RO_b);
 	fC_Term = fAdd(fMargin_RO_c,
-			fAdd(fMultiply(fSM_A0,fLkg_FT),
+			fAdd(fMultiply(fSM_A0, fLkg_FT),
 			fAdd(fMultiply(fSM_A1, fMultiply(fLkg_FT, fSclk)),
 			fAdd(fMultiply(fSM_A3, fSclk),
 			fSubtract(fSM_A7, fRO_fused)))));
@@ -1063,9 +1062,9 @@ int atomctrl_get_voltage_evv_on_sclk(
 	get_voltage_info_param_space.ucVoltageMode   =
 		ATOM_GET_VOLTAGE_EVV_VOLTAGE;
 	get_voltage_info_param_space.usVoltageLevel  =
-		virtual_voltage_Id;
+		cpu_to_le16(virtual_voltage_Id);
 	get_voltage_info_param_space.ulSCLKFreq      =
-		sclk;
+		cpu_to_le32(sclk);
 
 	result = cgs_atom_exec_cmd_table(hwmgr->device,
 			GetIndexIntoMasterTable(COMMAND, GetVoltageInfo),
@@ -1074,8 +1073,54 @@ int atomctrl_get_voltage_evv_on_sclk(
 	if (0 != result)
 		return result;
 
-	*voltage = ((GET_EVV_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_2 *)
-			(&get_voltage_info_param_space))->usVoltageLevel;
+	*voltage = le16_to_cpu(((GET_EVV_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_2 *)
+				(&get_voltage_info_param_space))->usVoltageLevel);
+
+	return result;
+}
+
+/**
+ * atomctrl_get_voltage_evv gets voltage via call to ATOM COMMAND table.
+ * @param hwmgr	input: pointer to hwManager
+ * @param virtual_voltage_id      input: voltage id which match per voltage DPM state: 0xff01, 0xff02.. 0xff08
+ * @param voltage		       output: real voltage level in unit of mv
+ */
+int atomctrl_get_voltage_evv(struct pp_hwmgr *hwmgr,
+			     uint16_t virtual_voltage_id,
+			     uint16_t *voltage)
+{
+	int result;
+	int entry_id;
+	GET_VOLTAGE_INFO_INPUT_PARAMETER_V1_2 get_voltage_info_param_space;
+
+	/* search for leakage voltage ID 0xff01 ~ 0xff08 and sckl */
+	for (entry_id = 0; entry_id < hwmgr->dyn_state.vddc_dependency_on_sclk->count; entry_id++) {
+		if (hwmgr->dyn_state.vddc_dependency_on_sclk->entries[entry_id].v == virtual_voltage_id) {
+			/* found */
+			break;
+		}
+	}
+
+	PP_ASSERT_WITH_CODE(entry_id < hwmgr->dyn_state.vddc_dependency_on_sclk->count,
+	        "Can't find requested voltage id in vddc_dependency_on_sclk table!",
+	        return -EINVAL;
+	);
+
+	get_voltage_info_param_space.ucVoltageType = VOLTAGE_TYPE_VDDC;
+	get_voltage_info_param_space.ucVoltageMode = ATOM_GET_VOLTAGE_EVV_VOLTAGE;
+	get_voltage_info_param_space.usVoltageLevel = virtual_voltage_id;
+	get_voltage_info_param_space.ulSCLKFreq =
+		cpu_to_le32(hwmgr->dyn_state.vddc_dependency_on_sclk->entries[entry_id].clk);
+
+	result = cgs_atom_exec_cmd_table(hwmgr->device,
+			GetIndexIntoMasterTable(COMMAND, GetVoltageInfo),
+			&get_voltage_info_param_space);
+
+	if (0 != result)
+		return result;
+
+	*voltage = le16_to_cpu(((GET_EVV_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_2 *)
+				(&get_voltage_info_param_space))->usVoltageLevel);
 
 	return result;
 }
@@ -1165,8 +1210,8 @@ static int asic_internal_ss_get_ss_asignment(struct pp_hwmgr *hwmgr,
 
 	if (entry_found) {
 		ssEntry->speed_spectrum_percentage =
-			ssInfo->usSpreadSpectrumPercentage;
-		ssEntry->speed_spectrum_rate = ssInfo->usSpreadRateInKhz;
+			le16_to_cpu(ssInfo->usSpreadSpectrumPercentage);
+		ssEntry->speed_spectrum_rate = le16_to_cpu(ssInfo->usSpreadRateInKhz);
 
 		if (((GET_DATA_TABLE_MAJOR_REVISION(table) == 2) &&
 			(GET_DATA_TABLE_MINOR_REVISION(table) >= 2)) ||
@@ -1222,7 +1267,7 @@ int atomctrl_read_efuse(void *device, uint16_t start_index,
 	int result;
 	READ_EFUSE_VALUE_PARAMETER efuse_param;
 
-	efuse_param.sEfuse.usEfuseIndex = (start_index / 32) * 4;
+	efuse_param.sEfuse.usEfuseIndex = cpu_to_le16((start_index / 32) * 4);
 	efuse_param.sEfuse.ucBitShift = (uint8_t)
 			(start_index - ((start_index / 32) * 32));
 	efuse_param.sEfuse.ucBitLength  = (uint8_t)
@@ -1232,19 +1277,21 @@ int atomctrl_read_efuse(void *device, uint16_t start_index,
 			GetIndexIntoMasterTable(COMMAND, ReadEfuseValue),
 			&efuse_param);
 	if (!result)
-		*efuse = efuse_param.ulEfuseValue & mask;
+		*efuse = le32_to_cpu(efuse_param.ulEfuseValue) & mask;
 
 	return result;
 }
 
 int atomctrl_set_ac_timing_ai(struct pp_hwmgr *hwmgr, uint32_t memory_clock,
-								uint8_t level)
+			      uint8_t level)
 {
 	DYNAMICE_MEMORY_SETTINGS_PARAMETER_V2_1 memory_clock_parameters;
 	int result;
 
-	memory_clock_parameters.asDPMMCReg.ulClock.ulClockFreq = memory_clock & SET_CLOCK_FREQ_MASK;
-	memory_clock_parameters.asDPMMCReg.ulClock.ulComputeClockFlag = ADJUST_MC_SETTING_PARAM;
+	memory_clock_parameters.asDPMMCReg.ulClock.ulClockFreq =
+		memory_clock & SET_CLOCK_FREQ_MASK;
+	memory_clock_parameters.asDPMMCReg.ulClock.ulComputeClockFlag =
+		ADJUST_MC_SETTING_PARAM;
 	memory_clock_parameters.asDPMMCReg.ucMclkDPMState = level;
 
 	result = cgs_atom_exec_cmd_table
@@ -1256,7 +1303,7 @@ int atomctrl_set_ac_timing_ai(struct pp_hwmgr *hwmgr, uint32_t memory_clock,
 }
 
 int atomctrl_get_voltage_evv_on_sclk_ai(struct pp_hwmgr *hwmgr, uint8_t voltage_type,
-				uint32_t sclk, uint16_t virtual_voltage_Id, uint16_t *voltage)
+				uint32_t sclk, uint16_t virtual_voltage_Id, uint32_t *voltage)
 {
 
 	int result;
@@ -1264,8 +1311,8 @@ int atomctrl_get_voltage_evv_on_sclk_ai(struct pp_hwmgr *hwmgr, uint8_t voltage_
 
 	get_voltage_info_param_space.ucVoltageType = voltage_type;
 	get_voltage_info_param_space.ucVoltageMode = ATOM_GET_VOLTAGE_EVV_VOLTAGE;
-	get_voltage_info_param_space.usVoltageLevel = virtual_voltage_Id;
-	get_voltage_info_param_space.ulSCLKFreq = sclk;
+	get_voltage_info_param_space.usVoltageLevel = cpu_to_le16(virtual_voltage_Id);
+	get_voltage_info_param_space.ulSCLKFreq = cpu_to_le32(sclk);
 
 	result = cgs_atom_exec_cmd_table(hwmgr->device,
 			GetIndexIntoMasterTable(COMMAND, GetVoltageInfo),
@@ -1274,7 +1321,7 @@ int atomctrl_get_voltage_evv_on_sclk_ai(struct pp_hwmgr *hwmgr, uint8_t voltage_
 	if (0 != result)
 		return result;
 
-	*voltage = get_voltage_info_param_space.usVoltageLevel;
+	*voltage = le32_to_cpu(((GET_EVV_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_3 *)(&get_voltage_info_param_space))->ulVoltageLevel);
 
 	return result;
 }
@@ -1295,10 +1342,57 @@ int atomctrl_get_smc_sclk_range_table(struct pp_hwmgr *hwmgr, struct pp_atom_ctr
 	for (i = 0; i < psmu_info->ucSclkEntryNum; i++) {
 		table->entry[i].ucVco_setting = psmu_info->asSclkFcwRangeEntry[i].ucVco_setting;
 		table->entry[i].ucPostdiv = psmu_info->asSclkFcwRangeEntry[i].ucPostdiv;
-		table->entry[i].usFcw_pcc = psmu_info->asSclkFcwRangeEntry[i].ucFcw_pcc;
-		table->entry[i].usFcw_trans_upper = psmu_info->asSclkFcwRangeEntry[i].ucFcw_trans_upper;
-		table->entry[i].usRcw_trans_lower = psmu_info->asSclkFcwRangeEntry[i].ucRcw_trans_lower;
+		table->entry[i].usFcw_pcc =
+			le16_to_cpu(psmu_info->asSclkFcwRangeEntry[i].ucFcw_pcc);
+		table->entry[i].usFcw_trans_upper =
+			le16_to_cpu(psmu_info->asSclkFcwRangeEntry[i].ucFcw_trans_upper);
+		table->entry[i].usRcw_trans_lower =
+			le16_to_cpu(psmu_info->asSclkFcwRangeEntry[i].ucRcw_trans_lower);
 	}
 
 	return 0;
 }
+
+int atomctrl_get_avfs_information(struct pp_hwmgr *hwmgr,
+				  struct pp_atom_ctrl__avfs_parameters *param)
+{
+	ATOM_ASIC_PROFILING_INFO_V3_6 *profile = NULL;
+
+	if (param == NULL)
+		return -EINVAL;
+
+	profile = (ATOM_ASIC_PROFILING_INFO_V3_6 *)
+			cgs_atom_get_data_table(hwmgr->device,
+					GetIndexIntoMasterTable(DATA, ASIC_ProfilingInfo),
+					NULL, NULL, NULL);
+	if (!profile)
+		return -1;
+
+	param->ulAVFS_meanNsigma_Acontant0 = le32_to_cpu(profile->ulAVFS_meanNsigma_Acontant0);
+	param->ulAVFS_meanNsigma_Acontant1 = le32_to_cpu(profile->ulAVFS_meanNsigma_Acontant1);
+	param->ulAVFS_meanNsigma_Acontant2 = le32_to_cpu(profile->ulAVFS_meanNsigma_Acontant2);
+	param->usAVFS_meanNsigma_DC_tol_sigma = le16_to_cpu(profile->usAVFS_meanNsigma_DC_tol_sigma);
+	param->usAVFS_meanNsigma_Platform_mean = le16_to_cpu(profile->usAVFS_meanNsigma_Platform_mean);
+	param->usAVFS_meanNsigma_Platform_sigma = le16_to_cpu(profile->usAVFS_meanNsigma_Platform_sigma);
+	param->ulGB_VDROOP_TABLE_CKSOFF_a0 = le32_to_cpu(profile->ulGB_VDROOP_TABLE_CKSOFF_a0);
+	param->ulGB_VDROOP_TABLE_CKSOFF_a1 = le32_to_cpu(profile->ulGB_VDROOP_TABLE_CKSOFF_a1);
+	param->ulGB_VDROOP_TABLE_CKSOFF_a2 = le32_to_cpu(profile->ulGB_VDROOP_TABLE_CKSOFF_a2);
+	param->ulGB_VDROOP_TABLE_CKSON_a0 = le32_to_cpu(profile->ulGB_VDROOP_TABLE_CKSON_a0);
+	param->ulGB_VDROOP_TABLE_CKSON_a1 = le32_to_cpu(profile->ulGB_VDROOP_TABLE_CKSON_a1);
+	param->ulGB_VDROOP_TABLE_CKSON_a2 = le32_to_cpu(profile->ulGB_VDROOP_TABLE_CKSON_a2);
+	param->ulAVFSGB_FUSE_TABLE_CKSOFF_m1 = le32_to_cpu(profile->ulAVFSGB_FUSE_TABLE_CKSOFF_m1);
+	param->usAVFSGB_FUSE_TABLE_CKSOFF_m2 = le16_to_cpu(profile->usAVFSGB_FUSE_TABLE_CKSOFF_m2);
+	param->ulAVFSGB_FUSE_TABLE_CKSOFF_b = le32_to_cpu(profile->ulAVFSGB_FUSE_TABLE_CKSOFF_b);
+	param->ulAVFSGB_FUSE_TABLE_CKSON_m1 = le32_to_cpu(profile->ulAVFSGB_FUSE_TABLE_CKSON_m1);
+	param->usAVFSGB_FUSE_TABLE_CKSON_m2 = le16_to_cpu(profile->usAVFSGB_FUSE_TABLE_CKSON_m2);
+	param->ulAVFSGB_FUSE_TABLE_CKSON_b = le32_to_cpu(profile->ulAVFSGB_FUSE_TABLE_CKSON_b);
+	param->usMaxVoltage_0_25mv = le16_to_cpu(profile->usMaxVoltage_0_25mv);
+	param->ucEnableGB_VDROOP_TABLE_CKSOFF = profile->ucEnableGB_VDROOP_TABLE_CKSOFF;
+	param->ucEnableGB_VDROOP_TABLE_CKSON = profile->ucEnableGB_VDROOP_TABLE_CKSON;
+	param->ucEnableGB_FUSE_TABLE_CKSOFF = profile->ucEnableGB_FUSE_TABLE_CKSOFF;
+	param->ucEnableGB_FUSE_TABLE_CKSON = profile->ucEnableGB_FUSE_TABLE_CKSON;
+	param->usPSM_Age_ComFactor = le16_to_cpu(profile->usPSM_Age_ComFactor);
+	param->ucEnableApplyAVFS_CKS_OFF_Voltage = profile->ucEnableApplyAVFS_CKS_OFF_Voltage;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h
index d24ebb566905..fc898afce002 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h
@@ -250,8 +250,38 @@ struct pp_atomctrl_gpio_pin_assignment {
 };
 typedef struct pp_atomctrl_gpio_pin_assignment pp_atomctrl_gpio_pin_assignment;
 
+struct pp_atom_ctrl__avfs_parameters {
+	uint32_t  ulAVFS_meanNsigma_Acontant0;
+	uint32_t  ulAVFS_meanNsigma_Acontant1;
+	uint32_t  ulAVFS_meanNsigma_Acontant2;
+	uint16_t usAVFS_meanNsigma_DC_tol_sigma;
+	uint16_t usAVFS_meanNsigma_Platform_mean;
+	uint16_t usAVFS_meanNsigma_Platform_sigma;
+	uint32_t  ulGB_VDROOP_TABLE_CKSOFF_a0;
+	uint32_t  ulGB_VDROOP_TABLE_CKSOFF_a1;
+	uint32_t  ulGB_VDROOP_TABLE_CKSOFF_a2;
+	uint32_t  ulGB_VDROOP_TABLE_CKSON_a0;
+	uint32_t  ulGB_VDROOP_TABLE_CKSON_a1;
+	uint32_t  ulGB_VDROOP_TABLE_CKSON_a2;
+	uint32_t  ulAVFSGB_FUSE_TABLE_CKSOFF_m1;
+	uint16_t  usAVFSGB_FUSE_TABLE_CKSOFF_m2;
+	uint32_t  ulAVFSGB_FUSE_TABLE_CKSOFF_b;
+	uint32_t  ulAVFSGB_FUSE_TABLE_CKSON_m1;
+	uint16_t  usAVFSGB_FUSE_TABLE_CKSON_m2;
+	uint32_t  ulAVFSGB_FUSE_TABLE_CKSON_b;
+	uint16_t  usMaxVoltage_0_25mv;
+	uint8_t  ucEnableGB_VDROOP_TABLE_CKSOFF;
+	uint8_t  ucEnableGB_VDROOP_TABLE_CKSON;
+	uint8_t  ucEnableGB_FUSE_TABLE_CKSOFF;
+	uint8_t  ucEnableGB_FUSE_TABLE_CKSON;
+	uint16_t usPSM_Age_ComFactor;
+	uint8_t  ucEnableApplyAVFS_CKS_OFF_Voltage;
+	uint8_t  ucReserved;
+};
+
 extern bool atomctrl_get_pp_assign_pin(struct pp_hwmgr *hwmgr, const uint32_t pinId, pp_atomctrl_gpio_pin_assignment *gpio_pin_assignment);
 extern int atomctrl_get_voltage_evv_on_sclk(struct pp_hwmgr *hwmgr, uint8_t voltage_type, uint32_t sclk, uint16_t virtual_voltage_Id, uint16_t *voltage);
+extern int atomctrl_get_voltage_evv(struct pp_hwmgr *hwmgr, uint16_t virtual_voltage_id, uint16_t *voltage);
 extern uint32_t atomctrl_get_mpll_reference_clock(struct pp_hwmgr *hwmgr);
 extern int atomctrl_get_memory_clock_spread_spectrum(struct pp_hwmgr *hwmgr, const uint32_t memory_clock, pp_atomctrl_internal_ss_info *ssInfo);
 extern int atomctrl_get_engine_clock_spread_spectrum(struct pp_hwmgr *hwmgr, const uint32_t engine_clock, pp_atomctrl_internal_ss_info *ssInfo);
@@ -276,7 +306,10 @@ extern int atomctrl_get_engine_pll_dividers_ai(struct pp_hwmgr *hwmgr, uint32_t
 extern int atomctrl_set_ac_timing_ai(struct pp_hwmgr *hwmgr, uint32_t memory_clock,
 								uint8_t level);
 extern int atomctrl_get_voltage_evv_on_sclk_ai(struct pp_hwmgr *hwmgr, uint8_t voltage_type,
-				uint32_t sclk, uint16_t virtual_voltage_Id, uint16_t *voltage);
+				uint32_t sclk, uint16_t virtual_voltage_Id, uint32_t *voltage);
 extern int atomctrl_get_smc_sclk_range_table(struct pp_hwmgr *hwmgr, struct pp_atom_ctrl_sclk_range_table *table);
+
+extern int atomctrl_get_avfs_information(struct pp_hwmgr *hwmgr, struct pp_atom_ctrl__avfs_parameters *param);
+
 #endif
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppevvmath.h b/drivers/gpu/drm/amd/powerplay/hwmgr/ppevvmath.h
index 009bd5963ed8..8f50a038396c 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppevvmath.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppevvmath.h
@@ -50,55 +50,45 @@ typedef union _fInt {
  * Function Declarations
  *  -------------------------------------------------------------------------------
  */
-fInt ConvertToFraction(int);                       /* Use this to convert an INT to a FINT */
-fInt Convert_ULONG_ToFraction(uint32_t);              /* Use this to convert an uint32_t to a FINT */
-fInt GetScaledFraction(int, int);                  /* Use this to convert an INT to a FINT after scaling it by a factor */
-int ConvertBackToInteger(fInt);                    /* Convert a FINT back to an INT that is scaled by 1000 (i.e. last 3 digits are the decimal digits) */
-
-fInt fNegate(fInt);                                /* Returns -1 * input fInt value */
-fInt fAdd (fInt, fInt);                            /* Returns the sum of two fInt numbers */
-fInt fSubtract (fInt A, fInt B);                   /* Returns A-B - Sometimes easier than Adding negative numbers */
-fInt fMultiply (fInt, fInt);                       /* Returns the product of two fInt numbers */
-fInt fDivide (fInt A, fInt B);                     /* Returns A/B */
-fInt fGetSquare(fInt);                             /* Returns the square of a fInt number */
-fInt fSqrt(fInt);                                  /* Returns the Square Root of a fInt number */
-
-int uAbs(int);                                     /* Returns the Absolute value of the Int */
-fInt fAbs(fInt);                                   /* Returns the Absolute value of the fInt */
-int uPow(int base, int exponent);                  /* Returns base^exponent an INT */
-
-void SolveQuadracticEqn(fInt, fInt, fInt, fInt[]); /* Returns the 2 roots via the array */
-bool Equal(fInt, fInt);                         /* Returns true if two fInts are equal to each other */
-bool GreaterThan(fInt A, fInt B);               /* Returns true if A > B */
-
-fInt fExponential(fInt exponent);                  /* Can be used to calculate e^exponent */
-fInt fNaturalLog(fInt value);                      /* Can be used to calculate ln(value) */
+static fInt ConvertToFraction(int);                       /* Use this to convert an INT to a FINT */
+static fInt Convert_ULONG_ToFraction(uint32_t);           /* Use this to convert an uint32_t to a FINT */
+static fInt GetScaledFraction(int, int);                  /* Use this to convert an INT to a FINT after scaling it by a factor */
+static int ConvertBackToInteger(fInt);                    /* Convert a FINT back to an INT that is scaled by 1000 (i.e. last 3 digits are the decimal digits) */
+
+static fInt fNegate(fInt);                                /* Returns -1 * input fInt value */
+static fInt fAdd (fInt, fInt);                            /* Returns the sum of two fInt numbers */
+static fInt fSubtract (fInt A, fInt B);                   /* Returns A-B - Sometimes easier than Adding negative numbers */
+static fInt fMultiply (fInt, fInt);                       /* Returns the product of two fInt numbers */
+static fInt fDivide (fInt A, fInt B);                     /* Returns A/B */
+static fInt fGetSquare(fInt);                             /* Returns the square of a fInt number */
+static fInt fSqrt(fInt);                                  /* Returns the Square Root of a fInt number */
+
+static int uAbs(int);                                     /* Returns the Absolute value of the Int */
+static int uPow(int base, int exponent);                  /* Returns base^exponent an INT */
+
+static void SolveQuadracticEqn(fInt, fInt, fInt, fInt[]); /* Returns the 2 roots via the array */
+static bool Equal(fInt, fInt);                            /* Returns true if two fInts are equal to each other */
+static bool GreaterThan(fInt A, fInt B);                  /* Returns true if A > B */
+
+static fInt fExponential(fInt exponent);                  /* Can be used to calculate e^exponent */
+static fInt fNaturalLog(fInt value);                      /* Can be used to calculate ln(value) */
 
 /* Fuse decoding functions
  * -------------------------------------------------------------------------------------
  */
-fInt fDecodeLinearFuse(uint32_t fuse_value, fInt f_min, fInt f_range, uint32_t bitlength);
-fInt fDecodeLogisticFuse(uint32_t fuse_value, fInt f_average, fInt f_range, uint32_t bitlength);
-fInt fDecodeLeakageID (uint32_t leakageID_fuse, fInt ln_max_div_min, fInt f_min, uint32_t bitlength);
+static fInt fDecodeLinearFuse(uint32_t fuse_value, fInt f_min, fInt f_range, uint32_t bitlength);
+static fInt fDecodeLogisticFuse(uint32_t fuse_value, fInt f_average, fInt f_range, uint32_t bitlength);
+static fInt fDecodeLeakageID (uint32_t leakageID_fuse, fInt ln_max_div_min, fInt f_min, uint32_t bitlength);
 
 /* Internal Support Functions - Use these ONLY for testing or adding to internal functions
  * -------------------------------------------------------------------------------------
  * Some of the following functions take two INTs as their input - This is unsafe for a variety of reasons.
  */
-fInt Add (int, int);                               /* Add two INTs and return Sum as FINT */
-fInt Multiply (int, int);                          /* Multiply two INTs and return Product as FINT */
-fInt Divide (int, int);                            /* You get the idea... */
-fInt fNegate(fInt);
+static fInt Divide (int, int);                            /* Divide two INTs and return result as FINT */
+static fInt fNegate(fInt);
 
-int uGetScaledDecimal (fInt);                      /* Internal function */
-int GetReal (fInt A);                              /* Internal function */
-
-/* Future Additions and Incomplete Functions
- * -------------------------------------------------------------------------------------
- */
-int GetRoundedValue(fInt);                         /* Incomplete function - Useful only when Precision is lacking */
-                                                   /* Let us say we have 2.126 but can only handle 2 decimal points. We could */
-                                                   /* either chop of 6 and keep 2.12 or use this function to get 2.13, which is more accurate */
+static int uGetScaledDecimal (fInt);                      /* Internal function */
+static int GetReal (fInt A);                              /* Internal function */
 
 /* -------------------------------------------------------------------------------------
  * TROUBLESHOOTING INFORMATION
@@ -115,7 +105,7 @@ int GetRoundedValue(fInt);                         /* Incomplete function - Usef
  * START OF CODE
  * -------------------------------------------------------------------------------------
  */
-fInt fExponential(fInt exponent)        /*Can be used to calculate e^exponent*/
+static fInt fExponential(fInt exponent)        /*Can be used to calculate e^exponent*/
 {
 	uint32_t i;
 	bool bNegated = false;
@@ -154,7 +144,7 @@ fInt fExponential(fInt exponent)        /*Can be used to calculate e^exponent*/
 	return solution;
 }
 
-fInt fNaturalLog(fInt value)
+static fInt fNaturalLog(fInt value)
 {
 	uint32_t i;
 	fInt upper_bound = Divide(8, 1000);
@@ -179,7 +169,7 @@ fInt fNaturalLog(fInt value)
 	return (fAdd(solution, error_term));
 }
 
-fInt fDecodeLinearFuse(uint32_t fuse_value, fInt f_min, fInt f_range, uint32_t bitlength)
+static fInt fDecodeLinearFuse(uint32_t fuse_value, fInt f_min, fInt f_range, uint32_t bitlength)
 {
 	fInt f_fuse_value = Convert_ULONG_ToFraction(fuse_value);
 	fInt f_bit_max_value = Convert_ULONG_ToFraction((uPow(2, bitlength)) - 1);
@@ -194,7 +184,7 @@ fInt fDecodeLinearFuse(uint32_t fuse_value, fInt f_min, fInt f_range, uint32_t b
 }
 
 
-fInt fDecodeLogisticFuse(uint32_t fuse_value, fInt f_average, fInt f_range, uint32_t bitlength)
+static fInt fDecodeLogisticFuse(uint32_t fuse_value, fInt f_average, fInt f_range, uint32_t bitlength)
 {
 	fInt f_fuse_value = Convert_ULONG_ToFraction(fuse_value);
 	fInt f_bit_max_value = Convert_ULONG_ToFraction((uPow(2, bitlength)) - 1);
@@ -212,7 +202,7 @@ fInt fDecodeLogisticFuse(uint32_t fuse_value, fInt f_average, fInt f_range, uint
 	return f_decoded_value;
 }
 
-fInt fDecodeLeakageID (uint32_t leakageID_fuse, fInt ln_max_div_min, fInt f_min, uint32_t bitlength)
+static fInt fDecodeLeakageID (uint32_t leakageID_fuse, fInt ln_max_div_min, fInt f_min, uint32_t bitlength)
 {
 	fInt fLeakage;
 	fInt f_bit_max_value = Convert_ULONG_ToFraction((uPow(2, bitlength)) - 1);
@@ -225,7 +215,7 @@ fInt fDecodeLeakageID (uint32_t leakageID_fuse, fInt ln_max_div_min, fInt f_min,
 	return fLeakage;
 }
 
-fInt ConvertToFraction(int X) /*Add all range checking here. Is it possible to make fInt a private declaration? */
+static fInt ConvertToFraction(int X) /*Add all range checking here. Is it possible to make fInt a private declaration? */
 {
 	fInt temp;
 
@@ -237,13 +227,13 @@ fInt ConvertToFraction(int X) /*Add all range checking here. Is it possible to m
 	return temp;
 }
 
-fInt fNegate(fInt X)
+static fInt fNegate(fInt X)
 {
 	fInt CONSTANT_NEGONE = ConvertToFraction(-1);
 	return (fMultiply(X, CONSTANT_NEGONE));
 }
 
-fInt Convert_ULONG_ToFraction(uint32_t X)
+static fInt Convert_ULONG_ToFraction(uint32_t X)
 {
 	fInt temp;
 
@@ -255,7 +245,7 @@ fInt Convert_ULONG_ToFraction(uint32_t X)
 	return temp;
 }
 
-fInt GetScaledFraction(int X, int factor)
+static fInt GetScaledFraction(int X, int factor)
 {
 	int times_shifted, factor_shifted;
 	bool bNEGATED;
@@ -304,7 +294,7 @@ fInt GetScaledFraction(int X, int factor)
 }
 
 /* Addition using two fInts */
-fInt fAdd (fInt X, fInt Y)
+static fInt fAdd (fInt X, fInt Y)
 {
 	fInt Sum;
 
@@ -314,7 +304,7 @@ fInt fAdd (fInt X, fInt Y)
 }
 
 /* Addition using two fInts */
-fInt fSubtract (fInt X, fInt Y)
+static fInt fSubtract (fInt X, fInt Y)
 {
 	fInt Difference;
 
@@ -323,7 +313,7 @@ fInt fSubtract (fInt X, fInt Y)
 	return Difference;
 }
 
-bool Equal(fInt A, fInt B)
+static bool Equal(fInt A, fInt B)
 {
 	if (A.full == B.full)
 		return true;
@@ -331,7 +321,7 @@ bool Equal(fInt A, fInt B)
 		return false;
 }
 
-bool GreaterThan(fInt A, fInt B)
+static bool GreaterThan(fInt A, fInt B)
 {
 	if (A.full > B.full)
 		return true;
@@ -339,7 +329,7 @@ bool GreaterThan(fInt A, fInt B)
 		return false;
 }
 
-fInt fMultiply (fInt X, fInt Y) /* Uses 64-bit integers (int64_t) */
+static fInt fMultiply (fInt X, fInt Y) /* Uses 64-bit integers (int64_t) */
 {
 	fInt Product;
 	int64_t tempProduct;
@@ -363,7 +353,7 @@ fInt fMultiply (fInt X, fInt Y) /* Uses 64-bit integers (int64_t) */
 	return Product;
 }
 
-fInt fDivide (fInt X, fInt Y)
+static fInt fDivide (fInt X, fInt Y)
 {
 	fInt fZERO, fQuotient;
 	int64_t longlongX, longlongY;
@@ -384,7 +374,7 @@ fInt fDivide (fInt X, fInt Y)
 	return fQuotient;
 }
 
-int ConvertBackToInteger (fInt A) /*THIS is the function that will be used to check with the Golden settings table*/
+static int ConvertBackToInteger (fInt A) /*THIS is the function that will be used to check with the Golden settings table*/
 {
 	fInt fullNumber, scaledDecimal, scaledReal;
 
@@ -397,13 +387,13 @@ int ConvertBackToInteger (fInt A) /*THIS is the function that will be used to ch
 	return fullNumber.full;
 }
 
-fInt fGetSquare(fInt A)
+static fInt fGetSquare(fInt A)
 {
 	return fMultiply(A,A);
 }
 
 /* x_new = x_old - (x_old^2 - C) / (2 * x_old) */
-fInt fSqrt(fInt num)
+static fInt fSqrt(fInt num)
 {
 	fInt F_divide_Fprime, Fprime;
 	fInt test;
@@ -460,7 +450,7 @@ fInt fSqrt(fInt num)
 	return (x_new);
 }
 
-void SolveQuadracticEqn(fInt A, fInt B, fInt C, fInt Roots[])
+static void SolveQuadracticEqn(fInt A, fInt B, fInt C, fInt Roots[])
 {
 	fInt *pRoots = &Roots[0];
 	fInt temp, root_first, root_second;
@@ -498,52 +488,13 @@ void SolveQuadracticEqn(fInt A, fInt B, fInt C, fInt Roots[])
  * -----------------------------------------------------------------------------
  */
 
-/* Addition using two normal ints - Temporary - Use only for testing purposes?. */
-fInt Add (int X, int Y)
-{
-	fInt A, B, Sum;
-
-	A.full = (X << SHIFT_AMOUNT);
-	B.full = (Y << SHIFT_AMOUNT);
-
-	Sum.full = A.full + B.full;
-
-	return Sum;
-}
-
 /* Conversion Functions */
-int GetReal (fInt A)
+static int GetReal (fInt A)
 {
 	return (A.full >> SHIFT_AMOUNT);
 }
 
-/* Temporarily Disabled */
-int GetRoundedValue(fInt A) /*For now, round the 3rd decimal place */
-{
-	/* ROUNDING TEMPORARLY DISABLED
-	int temp = A.full;
-	int decimal_cutoff, decimal_mask = 0x000001FF;
-	decimal_cutoff = temp & decimal_mask;
-	if (decimal_cutoff > 0x147) {
-		temp += 673;
-	}*/
-
-	return ConvertBackToInteger(A)/10000; /*Temporary - in case this was used somewhere else */
-}
-
-fInt Multiply (int X, int Y)
-{
-	fInt A, B, Product;
-
-	A.full = X << SHIFT_AMOUNT;
-	B.full = Y << SHIFT_AMOUNT;
-
-	Product = fMultiply(A, B);
-
-	return Product;
-}
-
-fInt Divide (int X, int Y)
+static fInt Divide (int X, int Y)
 {
 	fInt A, B, Quotient;
 
@@ -555,7 +506,7 @@ fInt Divide (int X, int Y)
 	return Quotient;
 }
 
-int uGetScaledDecimal (fInt A) /*Converts the fractional portion to whole integers - Costly function */
+static int uGetScaledDecimal (fInt A) /*Converts the fractional portion to whole integers - Costly function */
 {
 	int dec[PRECISION];
 	int i, scaledDecimal = 0, tmp = A.partial.decimal;
@@ -570,7 +521,7 @@ int uGetScaledDecimal (fInt A) /*Converts the fractional portion to whole intege
 	return scaledDecimal;
 }
 
-int uPow(int base, int power)
+static int uPow(int base, int power)
 {
 	if (power == 0)
 		return 1;
@@ -578,15 +529,7 @@ int uPow(int base, int power)
 		return (base)*uPow(base, power - 1);
 }
 
-fInt fAbs(fInt A)
-{
-	if (A.partial.real < 0)
-		return (fMultiply(A, ConvertToFraction(-1)));
-	else
-		return A;
-}
-
-int uAbs(int X)
+static int uAbs(int X)
 {
 	if (X < 0)
 		return (X * -1);
@@ -594,7 +537,7 @@ int uAbs(int X)
 		return X;
 }
 
-fInt fRoundUpByStepSize(fInt A, fInt fStepSize, bool error_term)
+static fInt fRoundUpByStepSize(fInt A, fInt fStepSize, bool error_term)
 {
 	fInt solution;
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c
index 2f1a14fe05b1..6c321b0d8a1e 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c
@@ -794,19 +794,35 @@ static const ATOM_PPLIB_STATE_V2 *get_state_entry_v2(
 static const ATOM_PPLIB_POWERPLAYTABLE *get_powerplay_table(
 				     struct pp_hwmgr *hwmgr)
 {
-	const void *table_addr = NULL;
+	const void *table_addr = hwmgr->soft_pp_table;
 	uint8_t frev, crev;
 	uint16_t size;
 
-	table_addr = cgs_atom_get_data_table(hwmgr->device,
-			GetIndexIntoMasterTable(DATA, PowerPlayInfo),
-			&size, &frev, &crev);
+	if (!table_addr) {
+		table_addr = cgs_atom_get_data_table(hwmgr->device,
+				GetIndexIntoMasterTable(DATA, PowerPlayInfo),
+				&size, &frev, &crev);
 
-	hwmgr->soft_pp_table = table_addr;
+		hwmgr->soft_pp_table = table_addr;
+		hwmgr->soft_pp_table_size = size;
+	}
 
 	return (const ATOM_PPLIB_POWERPLAYTABLE *)table_addr;
 }
 
+int pp_tables_get_response_times(struct pp_hwmgr *hwmgr,
+				uint32_t *vol_rep_time, uint32_t *bb_rep_time)
+{
+	const ATOM_PPLIB_POWERPLAYTABLE *powerplay_tab = get_powerplay_table(hwmgr);
+
+	PP_ASSERT_WITH_CODE(NULL != powerplay_tab,
+			    "Missing PowerPlay Table!", return -EINVAL);
+
+	*vol_rep_time = (uint32_t)le16_to_cpu(powerplay_tab->usVoltageTime);
+	*bb_rep_time = (uint32_t)le16_to_cpu(powerplay_tab->usBackbiasTime);
+
+	return 0;
+}
 
 int pp_tables_get_num_of_entries(struct pp_hwmgr *hwmgr,
 				     unsigned long *num_of_entries)
@@ -1499,7 +1515,7 @@ int get_number_of_vce_state_table_entries(
 	const ATOM_PPLIB_VCE_State_Table *vce_table =
 				    get_vce_state_table(hwmgr, table);
 
-	if (vce_table > 0)
+	if (vce_table)
 		return vce_table->numEntries;
 
 	return 0;
@@ -1589,11 +1605,6 @@ static int pp_tables_initialize(struct pp_hwmgr *hwmgr)
 
 static int pp_tables_uninitialize(struct pp_hwmgr *hwmgr)
 {
-	if (NULL != hwmgr->soft_pp_table) {
-		kfree(hwmgr->soft_pp_table);
-		hwmgr->soft_pp_table = NULL;
-	}
-
 	if (NULL != hwmgr->dyn_state.vddc_dependency_on_sclk) {
 		kfree(hwmgr->dyn_state.vddc_dependency_on_sclk);
 		hwmgr->dyn_state.vddc_dependency_on_sclk = NULL;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.h b/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.h
index 30434802417e..baddaa75693b 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.h
@@ -32,16 +32,19 @@ struct pp_hw_power_state;
 extern const struct pp_table_func pptable_funcs;
 
 typedef int (*pp_tables_hw_clock_info_callback)(struct pp_hwmgr *hwmgr,
-					struct pp_hw_power_state *hw_ps,
-							unsigned int index,
-						 const void *clock_info);
+						struct pp_hw_power_state *hw_ps,
+						unsigned int index,
+						const void *clock_info);
 
 int pp_tables_get_num_of_entries(struct pp_hwmgr *hwmgr,
-				     unsigned long *num_of_entries);
+				 unsigned long *num_of_entries);
 
 int pp_tables_get_entry(struct pp_hwmgr *hwmgr,
-						unsigned long entry_index,
-						struct pp_power_state *ps,
-				pp_tables_hw_clock_info_callback func);
+			unsigned long entry_index,
+			struct pp_power_state *ps,
+			pp_tables_hw_clock_info_callback func);
+
+int pp_tables_get_response_times(struct pp_hwmgr *hwmgr,
+				 uint32_t *vol_rep_time, uint32_t *bb_rep_time);
 
 #endif
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
index 16fed487973b..c7dc111221c2 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
@@ -571,7 +571,7 @@ int tonga_disable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 	if (0 == data->sclk_dpm_key_disabled) {
 		/* Checking if DPM is running.  If we discover hang because of this, we should skip this message.*/
 		PP_ASSERT_WITH_CODE(
-				(0 == tonga_is_dpm_running(hwmgr)),
+				!tonga_is_dpm_running(hwmgr),
 				"Trying to Disable SCLK DPM when DPM is disabled",
 				return -1
 				);
@@ -587,7 +587,7 @@ int tonga_disable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 	if (0 == data->mclk_dpm_key_disabled) {
 		/* Checking if DPM is running.  If we discover hang because of this, we should skip this message. */
 		PP_ASSERT_WITH_CODE(
-				(0 == tonga_is_dpm_running(hwmgr)),
+				!tonga_is_dpm_running(hwmgr),
 				"Trying to Disable MCLK DPM when DPM is disabled",
 				return -1
 				);
@@ -614,7 +614,7 @@ int tonga_stop_dpm(struct pp_hwmgr *hwmgr)
 	if (0 == data->pcie_dpm_key_disabled) {
 		/* Checking if DPM is running.  If we discover hang because of this, we should skip this message.*/
 		PP_ASSERT_WITH_CODE(
-				(0 == tonga_is_dpm_running(hwmgr)),
+				!tonga_is_dpm_running(hwmgr),
 				"Trying to Disable PCIE DPM when DPM is disabled",
 				return -1
 				);
@@ -630,7 +630,7 @@ int tonga_stop_dpm(struct pp_hwmgr *hwmgr)
 
 	/* Checking if DPM is running.  If we discover hang because of this, we should skip this message.*/
 	PP_ASSERT_WITH_CODE(
-			(0 == tonga_is_dpm_running(hwmgr)),
+			!tonga_is_dpm_running(hwmgr),
 			"Trying to Disable Voltage CNTL when DPM is disabled",
 			return -1
 			);
@@ -688,8 +688,9 @@ int tonga_dpm_force_state(struct pp_hwmgr *hwmgr, uint32_t n)
 	uint32_t level_mask = 1 << n;
 
 	/* Checking if DPM is running.  If we discover hang because of this, we should skip this message. */
-	PP_ASSERT_WITH_CODE(0 == tonga_is_dpm_running(hwmgr),
-			"Trying to force SCLK when DPM is disabled", return -1;);
+	PP_ASSERT_WITH_CODE(!tonga_is_dpm_running(hwmgr),
+			    "Trying to force SCLK when DPM is disabled",
+			    return -1;);
 	if (0 == data->sclk_dpm_key_disabled)
 		return (0 == smum_send_msg_to_smc_with_parameter(
 							     hwmgr->smumgr,
@@ -712,8 +713,9 @@ int tonga_dpm_force_state_mclk(struct pp_hwmgr *hwmgr, uint32_t n)
 	uint32_t level_mask = 1 << n;
 
 	/* Checking if DPM is running.  If we discover hang because of this, we should skip this message. */
-	PP_ASSERT_WITH_CODE(0 == tonga_is_dpm_running(hwmgr),
-			"Trying to Force MCLK when DPM is disabled", return -1;);
+	PP_ASSERT_WITH_CODE(!tonga_is_dpm_running(hwmgr),
+			    "Trying to Force MCLK when DPM is disabled",
+			    return -1;);
 	if (0 == data->mclk_dpm_key_disabled)
 		return (0 == smum_send_msg_to_smc_with_parameter(
 								hwmgr->smumgr,
@@ -735,8 +737,9 @@ int tonga_dpm_force_state_pcie(struct pp_hwmgr *hwmgr, uint32_t n)
 	tonga_hwmgr *data = (tonga_hwmgr *)(hwmgr->backend);
 
 	/* Checking if DPM is running.  If we discover hang because of this, we should skip this message.*/
-	PP_ASSERT_WITH_CODE(0 == tonga_is_dpm_running(hwmgr),
-			"Trying to Force PCIE level when DPM is disabled", return -1;);
+	PP_ASSERT_WITH_CODE(!tonga_is_dpm_running(hwmgr),
+			    "Trying to Force PCIE level when DPM is disabled",
+			    return -1;);
 	if (0 == data->pcie_dpm_key_disabled)
 		return (0 == smum_send_msg_to_smc_with_parameter(
 							     hwmgr->smumgr,
@@ -774,7 +777,7 @@ int tonga_process_firmware_header(struct pp_hwmgr *hwmgr)
 
 	uint32_t tmp;
 	int result;
-	bool error = 0;
+	bool error = false;
 
 	result = tonga_read_smc_sram_dword(hwmgr->smumgr,
 				SMU72_FIRMWARE_HEADER_LOCATION +
@@ -933,11 +936,11 @@ int tonga_init_power_gate_state(struct pp_hwmgr *hwmgr)
 {
 	tonga_hwmgr *data = (tonga_hwmgr *)(hwmgr->backend);
 
-	data->uvd_power_gated = 0;
-	data->vce_power_gated = 0;
-	data->samu_power_gated = 0;
-	data->acp_power_gated = 0;
-	data->pg_acp_init = 1;
+	data->uvd_power_gated = false;
+	data->vce_power_gated = false;
+	data->samu_power_gated = false;
+	data->acp_power_gated = false;
+	data->pg_acp_init = true;
 
 	return 0;
 }
@@ -955,7 +958,7 @@ int tonga_check_for_dpm_running(struct pp_hwmgr *hwmgr)
 	 * because we may have test scenarios that need us intentionly disable SCLK/MCLK DPM,
 	 * whereas voltage control is a fundemental change that will not be disabled
 	 */
-	return (0 == tonga_is_dpm_running(hwmgr) ? 0 : 1);
+	return (!tonga_is_dpm_running(hwmgr) ? 0 : 1);
 }
 
 /**
@@ -968,7 +971,7 @@ int tonga_check_for_dpm_stopped(struct pp_hwmgr *hwmgr)
 {
 	tonga_hwmgr *data = (tonga_hwmgr *)(hwmgr->backend);
 
-	if (0 != tonga_is_dpm_running(hwmgr)) {
+	if (tonga_is_dpm_running(hwmgr)) {
 		/* If HW Virtualization is enabled, dpm_table_start will not have a valid value */
 		if (!data->dpm_table_start) {
 			return 1;
@@ -991,7 +994,7 @@ static int tonga_trim_voltage_table(struct pp_hwmgr *hwmgr,
 {
 	uint32_t table_size, i, j;
 	uint16_t vvalue;
-	bool bVoltageFound = 0;
+	bool bVoltageFound = false;
 	pp_atomctrl_voltage_table *table;
 
 	PP_ASSERT_WITH_CODE((NULL != voltage_table), "Voltage Table empty.", return -1;);
@@ -1007,11 +1010,11 @@ static int tonga_trim_voltage_table(struct pp_hwmgr *hwmgr,
 
 	for (i = 0; i < voltage_table->count; i++) {
 		vvalue = voltage_table->entries[i].value;
-		bVoltageFound = 0;
+		bVoltageFound = false;
 
 		for (j = 0; j < table->count; j++) {
 			if (vvalue == table->entries[j].value) {
-				bVoltageFound = 1;
+				bVoltageFound = true;
 				break;
 			}
 		}
@@ -1302,7 +1305,7 @@ static int tonga_populate_smc_mvdd_table(struct pp_hwmgr *hwmgr,
 			table->Smio[count] |=
 				data->mvdd_voltage_table.entries[count].smio_low;
 		}
-		table->SmioMask2 = data->vddci_voltage_table.mask_low;
+		table->SmioMask2 = data->mvdd_voltage_table.mask_low;
 
 		CONVERT_FROM_HOST_TO_SMC_UL(table->MvddLevelCount);
 	}
@@ -1331,7 +1334,6 @@ static int tonga_populate_cac_tables(struct pp_hwmgr *hwmgr,
 {
 	uint32_t count;
 	uint8_t index;
-	int result = 0;
 	tonga_hwmgr *data = (tonga_hwmgr *)(hwmgr->backend);
 	struct phm_ppt_v1_information *pptable_info = (struct phm_ppt_v1_information *)(hwmgr->pptable);
 	struct phm_ppt_v1_voltage_lookup_table *vddgfx_lookup_table = pptable_info->vddgfx_lookup_table;
@@ -1378,7 +1380,7 @@ static int tonga_populate_cac_tables(struct pp_hwmgr *hwmgr,
 		}
 	}
 
-	return result;
+	return 0;
 }
 
 
@@ -2042,7 +2044,7 @@ static int tonga_populate_single_memory_level(
 
 	if ((data->mclk_stutter_mode_threshold != 0) &&
 	    (memory_clock <= data->mclk_stutter_mode_threshold) &&
-	    (data->is_uvd_enabled == 0)
+	    (!data->is_uvd_enabled)
 	    && (PHM_READ_FIELD(hwmgr->device, DPG_PIPE_STUTTER_CONTROL, STUTTER_ENABLE) & 0x1)
 	    && (data->display_timing.num_existing_displays <= 2)
 	    && (data->display_timing.num_existing_displays != 0))
@@ -2705,7 +2707,7 @@ static int tonga_reset_single_dpm_table(
 
 	dpm_table->count = count;
 	for (i = 0; i < MAX_REGULAR_DPM_NUMBER; i++) {
-		dpm_table->dpm_levels[i].enabled = 0;
+		dpm_table->dpm_levels[i].enabled = false;
 	}
 
 	return 0;
@@ -2718,7 +2720,7 @@ static void tonga_setup_pcie_table_entry(
 {
 	dpm_table->dpm_levels[index].value = pcie_gen;
 	dpm_table->dpm_levels[index].param1 = pcie_lanes;
-	dpm_table->dpm_levels[index].enabled = 1;
+	dpm_table->dpm_levels[index].enabled = true;
 }
 
 static int tonga_setup_default_pcie_tables(struct pp_hwmgr *hwmgr)
@@ -2828,7 +2830,7 @@ static int tonga_setup_default_dpm_tables(struct pp_hwmgr *hwmgr)
 				allowed_vdd_sclk_table->entries[i].clk) {
 			data->dpm_table.sclk_table.dpm_levels[data->dpm_table.sclk_table.count].value =
 				allowed_vdd_sclk_table->entries[i].clk;
-			data->dpm_table.sclk_table.dpm_levels[data->dpm_table.sclk_table.count].enabled = 1; /*(i==0) ? 1 : 0; to do */
+			data->dpm_table.sclk_table.dpm_levels[data->dpm_table.sclk_table.count].enabled = true; /*(i==0) ? 1 : 0; to do */
 			data->dpm_table.sclk_table.count++;
 		}
 	}
@@ -2842,32 +2844,11 @@ static int tonga_setup_default_dpm_tables(struct pp_hwmgr *hwmgr)
 			allowed_vdd_mclk_table->entries[i].clk) {
 			data->dpm_table.mclk_table.dpm_levels[data->dpm_table.mclk_table.count].value =
 				allowed_vdd_mclk_table->entries[i].clk;
-			data->dpm_table.mclk_table.dpm_levels[data->dpm_table.mclk_table.count].enabled = 1; /*(i==0) ? 1 : 0; */
+			data->dpm_table.mclk_table.dpm_levels[data->dpm_table.mclk_table.count].enabled = true; /*(i==0) ? 1 : 0; */
 			data->dpm_table.mclk_table.count++;
 		}
 	}
 
-	/* Initialize Vddc DPM table based on allow Vddc values.  And populate corresponding std values. */
-	for (i = 0; i < allowed_vdd_sclk_table->count; i++) {
-		data->dpm_table.vddc_table.dpm_levels[i].value = allowed_vdd_mclk_table->entries[i].vddc;
-		/* tonga_hwmgr->dpm_table.VddcTable.dpm_levels[i].param1 = stdVoltageTable->entries[i].Leakage; */
-		/* param1 is for corresponding std voltage */
-		data->dpm_table.vddc_table.dpm_levels[i].enabled = 1;
-	}
-	data->dpm_table.vddc_table.count = allowed_vdd_sclk_table->count;
-
-	if (NULL != allowed_vdd_mclk_table) {
-		/* Initialize Vddci DPM table based on allow Mclk values */
-		for (i = 0; i < allowed_vdd_mclk_table->count; i++) {
-			data->dpm_table.vdd_ci_table.dpm_levels[i].value = allowed_vdd_mclk_table->entries[i].vddci;
-			data->dpm_table.vdd_ci_table.dpm_levels[i].enabled = 1;
-			data->dpm_table.mvdd_table.dpm_levels[i].value = allowed_vdd_mclk_table->entries[i].mvdd;
-			data->dpm_table.mvdd_table.dpm_levels[i].enabled = 1;
-		}
-		data->dpm_table.vdd_ci_table.count = allowed_vdd_mclk_table->count;
-		data->dpm_table.mvdd_table.count = allowed_vdd_mclk_table->count;
-	}
-
 	/* setup PCIE gen speed levels*/
 	tonga_setup_default_pcie_tables(hwmgr);
 
@@ -3047,8 +3028,8 @@ int tonga_init_smc_table(struct pp_hwmgr *hwmgr)
 
 	reg_value = 0;
 	if ((0 == reg_value) &&
-		(0 == atomctrl_get_pp_assign_pin(hwmgr,
-			VDDC_VRHOT_GPIO_PINID, &gpio_pin_assignment))) {
+		(atomctrl_get_pp_assign_pin(hwmgr, VDDC_VRHOT_GPIO_PINID,
+						&gpio_pin_assignment))) {
 		table->VRHotGpio = gpio_pin_assignment.uc_gpio_pin_bit_shift;
 		phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 			PHM_PlatformCaps_RegulatorHot);
@@ -3061,8 +3042,8 @@ int tonga_init_smc_table(struct pp_hwmgr *hwmgr)
 	/* ACDC Switch GPIO */
 	reg_value = 0;
 	if ((0 == reg_value) &&
-		(0 == atomctrl_get_pp_assign_pin(hwmgr,
-			PP_AC_DC_SWITCH_GPIO_PINID, &gpio_pin_assignment))) {
+		(atomctrl_get_pp_assign_pin(hwmgr, PP_AC_DC_SWITCH_GPIO_PINID,
+						&gpio_pin_assignment))) {
 		table->AcDcGpio = gpio_pin_assignment.uc_gpio_pin_bit_shift;
 		phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 			PHM_PlatformCaps_AutomaticDCTransition);
@@ -3084,8 +3065,7 @@ int tonga_init_smc_table(struct pp_hwmgr *hwmgr)
 	}
 
 	reg_value = 0;
-	if ((0 == reg_value) &&
-		(0 == atomctrl_get_pp_assign_pin(hwmgr,
+	if ((0 == reg_value) && (atomctrl_get_pp_assign_pin(hwmgr,
 			THERMAL_INT_OUTPUT_GPIO_PINID, &gpio_pin_assignment))) {
 		phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 			PHM_PlatformCaps_ThermalOutGPIO);
@@ -3156,7 +3136,7 @@ int tonga_upload_dpm_level_enable_mask(struct pp_hwmgr *hwmgr)
 
 	if (0 == data->sclk_dpm_key_disabled) {
 		/* Checking if DPM is running.  If we discover hang because of this, we should skip this message.*/
-		if (0 != tonga_is_dpm_running(hwmgr))
+		if (tonga_is_dpm_running(hwmgr))
 			printk(KERN_ERR "[ powerplay ] Trying to set Enable Mask when DPM is disabled \n");
 
 		if (0 != data->dpm_level_enable_mask.sclk_dpm_enable_mask) {
@@ -3171,7 +3151,7 @@ int tonga_upload_dpm_level_enable_mask(struct pp_hwmgr *hwmgr)
 
 	if (0 == data->mclk_dpm_key_disabled) {
 		/* Checking if DPM is running.  If we discover hang because of this, we should skip this message.*/
-		if (0 != tonga_is_dpm_running(hwmgr))
+		if (tonga_is_dpm_running(hwmgr))
 			printk(KERN_ERR "[ powerplay ] Trying to set Enable Mask when DPM is disabled \n");
 
 		if (0 != data->dpm_level_enable_mask.mclk_dpm_enable_mask) {
@@ -3282,7 +3262,7 @@ int tonga_initializa_dynamic_state_adjustment_rule_settings(struct pp_hwmgr *hwm
 
 	/* initialize vddc_dep_on_dal_pwrl table */
 	table_size = sizeof(uint32_t) + 4 * sizeof(struct phm_clock_voltage_dependency_record);
-	table_clk_vlt = (struct phm_clock_voltage_dependency_table *)kzalloc(table_size, GFP_KERNEL);
+	table_clk_vlt = kzalloc(table_size, GFP_KERNEL);
 
 	if (NULL == table_clk_vlt) {
 		printk(KERN_ERR "[ powerplay ] Can not allocate space for vddc_dep_on_dal_pwrl! \n");
@@ -3357,9 +3337,9 @@ int tonga_unforce_dpm_levels(struct pp_hwmgr *hwmgr)
 	tonga_hwmgr *data = (tonga_hwmgr *)(hwmgr->backend);
 	int result = 1;
 
-	PP_ASSERT_WITH_CODE (0 == tonga_is_dpm_running(hwmgr),
-		"Trying to Unforce DPM when DPM is disabled. Returning without sending SMC message.",
-							return result);
+	PP_ASSERT_WITH_CODE (!tonga_is_dpm_running(hwmgr),
+			     "Trying to Unforce DPM when DPM is disabled. Returning without sending SMC message.",
+			     return result);
 
 	if (0 == data->pcie_dpm_key_disabled) {
 		PP_ASSERT_WITH_CODE((0 == smum_send_msg_to_smc(
@@ -3763,7 +3743,7 @@ uint8_t tonga_get_memory_modile_index(struct pp_hwmgr *hwmgr)
 
 bool tonga_check_s0_mc_reg_index(uint16_t inReg, uint16_t *outReg)
 {
-	bool result = 1;
+	bool result = true;
 
 	switch (inReg) {
 	case  mmMC_SEQ_RAS_TIMING:
@@ -3847,7 +3827,7 @@ bool tonga_check_s0_mc_reg_index(uint16_t inReg, uint16_t *outReg)
 		break;
 
 	default:
-		result = 0;
+		result = false;
 		break;
 	}
 
@@ -4443,13 +4423,6 @@ int tonga_reset_asic_tasks(struct pp_hwmgr *hwmgr)
 
 int tonga_hwmgr_backend_fini(struct pp_hwmgr *hwmgr)
 {
-	struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend);
-
-	if (data->soft_pp_table) {
-		kfree(data->soft_pp_table);
-		data->soft_pp_table = NULL;
-	}
-
 	return phm_hwmgr_backend_fini(hwmgr);
 }
 
@@ -4463,7 +4436,7 @@ int tonga_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 {
 	int result = 0;
 	SMU72_Discrete_DpmTable  *table = NULL;
-	tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend);
+	tonga_hwmgr *data;
 	pp_atomctrl_gpio_pin_assignment gpio_pin_assignment;
 	struct phm_ppt_v1_information *pptable_info = (struct phm_ppt_v1_information *)(hwmgr->pptable);
 	phw_tonga_ulv_parm *ulv;
@@ -4472,7 +4445,13 @@ int tonga_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 	PP_ASSERT_WITH_CODE((NULL != hwmgr),
 		"Invalid Parameter!", return -1;);
 
-	data->dll_defaule_on = 0;
+	data = kzalloc(sizeof(struct tonga_hwmgr), GFP_KERNEL);
+	if (data == NULL)
+		return -ENOMEM;
+
+	hwmgr->backend = data;
+
+	data->dll_defaule_on = false;
 	data->sram_end = SMC_RAM_END;
 
 	data->activity_target[0] = PPTONGA_TARGETACTIVITY_DFLT;
@@ -4510,6 +4489,7 @@ int tonga_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 	data->vdd_ci_control = TONGA_VOLTAGE_CONTROL_NONE;
 	data->vdd_gfx_control = TONGA_VOLTAGE_CONTROL_NONE;
 	data->mvdd_control = TONGA_VOLTAGE_CONTROL_NONE;
+	data->force_pcie_gen = PP_PCIEGenInvalid;
 
 	if (atomctrl_is_voltage_controled_by_gpio_v3(hwmgr,
 				VOLTAGE_TYPE_VDDC, VOLTAGE_OBJ_SVID2)) {
@@ -4577,13 +4557,13 @@ int tonga_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 
 	/* ULV Support*/
 	ulv = &(data->ulv);
-	ulv->ulv_supported = 0;
+	ulv->ulv_supported = false;
 
 	/* Initalize Dynamic State Adjustment Rule Settings*/
 	result = tonga_initializa_dynamic_state_adjustment_rule_settings(hwmgr);
 	if (result)
 		printk(KERN_ERR "[ powerplay ] tonga_initializa_dynamic_state_adjustment_rule_settings failed!\n");
-	data->uvd_enabled = 0;
+	data->uvd_enabled = false;
 
 	table = &(data->smc_state_table);
 
@@ -4591,7 +4571,7 @@ int tonga_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 	* if ucGPIO_ID=VDDC_PCC_GPIO_PINID in GPIO_LUTable,
 	* Peak Current Control feature is enabled and we should program PCC HW register
 	*/
-	if (0 == atomctrl_get_pp_assign_pin(hwmgr, VDDC_PCC_GPIO_PINID, &gpio_pin_assignment)) {
+	if (atomctrl_get_pp_assign_pin(hwmgr, VDDC_PCC_GPIO_PINID, &gpio_pin_assignment)) {
 		uint32_t temp_reg = cgs_read_ind_register(hwmgr->device,
 										CGS_IND_REG__SMC, ixCNB_PWRMGT_CNTL);
 
@@ -4630,7 +4610,7 @@ int tonga_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 	phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 		PHM_PlatformCaps_SMU7);
 
-	data->vddc_phase_shed_control = 0;
+	data->vddc_phase_shed_control = false;
 
 	phm_cap_unset(hwmgr->platform_descriptor.platformCaps,
 		      PHM_PlatformCaps_UVDPowerGating);
@@ -4649,7 +4629,7 @@ int tonga_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 	}
 
 	if (0 == result) {
-		data->is_tlu_enabled = 0;
+		data->is_tlu_enabled = false;
 		hwmgr->platform_descriptor.hardwareActivityPerformanceLevels =
 			TONGA_MAX_HARDWARE_POWERLEVELS;
 		hwmgr->platform_descriptor.hardwarePerformanceLevels = 2;
@@ -4659,7 +4639,7 @@ int tonga_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 		sys_info.info_id = CGS_SYSTEM_INFO_PCIE_GEN_INFO;
 		result = cgs_query_system_info(hwmgr->device, &sys_info);
 		if (result)
-			data->pcie_gen_cap = 0x30007;
+			data->pcie_gen_cap = AMDGPU_DEFAULT_PCIE_GEN_MASK;
 		else
 			data->pcie_gen_cap = (uint32_t)sys_info.value;
 		if (data->pcie_gen_cap & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
@@ -4668,7 +4648,7 @@ int tonga_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 		sys_info.info_id = CGS_SYSTEM_INFO_PCIE_MLW;
 		result = cgs_query_system_info(hwmgr->device, &sys_info);
 		if (result)
-			data->pcie_lane_cap = 0x2f0000;
+			data->pcie_lane_cap = AMDGPU_DEFAULT_PCIE_MLW_MASK;
 		else
 			data->pcie_lane_cap = (uint32_t)sys_info.value;
 	} else {
@@ -5330,9 +5310,8 @@ static int tonga_freeze_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 	if ((0 == data->sclk_dpm_key_disabled) &&
 		(data->need_update_smu7_dpm_table &
 		(DPMTABLE_OD_UPDATE_SCLK + DPMTABLE_UPDATE_SCLK))) {
-		PP_ASSERT_WITH_CODE(
-			0 == tonga_is_dpm_running(hwmgr),
-			"Trying to freeze SCLK DPM when DPM is disabled",
+		PP_ASSERT_WITH_CODE(!tonga_is_dpm_running(hwmgr),
+				    "Trying to freeze SCLK DPM when DPM is disabled",
 			);
 		PP_ASSERT_WITH_CODE(
 			0 == smum_send_msg_to_smc(hwmgr->smumgr,
@@ -5344,8 +5323,8 @@ static int tonga_freeze_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 	if ((0 == data->mclk_dpm_key_disabled) &&
 		(data->need_update_smu7_dpm_table &
 		 DPMTABLE_OD_UPDATE_MCLK)) {
-		PP_ASSERT_WITH_CODE(0 == tonga_is_dpm_running(hwmgr),
-			"Trying to freeze MCLK DPM when DPM is disabled",
+		PP_ASSERT_WITH_CODE(!tonga_is_dpm_running(hwmgr),
+				    "Trying to freeze MCLK DPM when DPM is disabled",
 			);
 		PP_ASSERT_WITH_CODE(
 			0 == smum_send_msg_to_smc(hwmgr->smumgr,
@@ -5480,7 +5459,6 @@ static	int tonga_trim_single_dpm_states(struct pp_hwmgr *hwmgr,
 
 static int tonga_trim_dpm_states(struct pp_hwmgr *hwmgr, const struct tonga_power_state *hw_state)
 {
-	int result = 0;
 	struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend);
 	uint32_t high_limit_count;
 
@@ -5500,7 +5478,7 @@ static int tonga_trim_dpm_states(struct pp_hwmgr *hwmgr, const struct tonga_powe
 						hw_state->performance_levels[0].memory_clock,
 						hw_state->performance_levels[high_limit_count].memory_clock);
 
-	return result;
+	return 0;
 }
 
 static int tonga_generate_dpm_level_enable_mask(struct pp_hwmgr *hwmgr, const void *input)
@@ -5647,8 +5625,8 @@ static int tonga_unfreeze_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 		(data->need_update_smu7_dpm_table &
 		(DPMTABLE_OD_UPDATE_SCLK + DPMTABLE_UPDATE_SCLK))) {
 
-		PP_ASSERT_WITH_CODE(0 == tonga_is_dpm_running(hwmgr),
-			"Trying to Unfreeze SCLK DPM when DPM is disabled",
+		PP_ASSERT_WITH_CODE(!tonga_is_dpm_running(hwmgr),
+				    "Trying to Unfreeze SCLK DPM when DPM is disabled",
 			);
 		PP_ASSERT_WITH_CODE(
 			 0 == smum_send_msg_to_smc(hwmgr->smumgr,
@@ -5660,9 +5638,8 @@ static int tonga_unfreeze_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 	if ((0 == data->mclk_dpm_key_disabled) &&
 		(data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_MCLK)) {
 
-		PP_ASSERT_WITH_CODE(
-				0 == tonga_is_dpm_running(hwmgr),
-				"Trying to Unfreeze MCLK DPM when DPM is disabled",
+		PP_ASSERT_WITH_CODE(!tonga_is_dpm_running(hwmgr),
+				    "Trying to Unfreeze MCLK DPM when DPM is disabled",
 				);
 		PP_ASSERT_WITH_CODE(
 			 0 == smum_send_msg_to_smc(hwmgr->smumgr,
@@ -6051,42 +6028,6 @@ static int tonga_get_fan_control_mode(struct pp_hwmgr *hwmgr)
 				CG_FDO_CTRL2, FDO_PWM_MODE);
 }
 
-static int tonga_get_pp_table(struct pp_hwmgr *hwmgr, char **table)
-{
-	struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend);
-
-	if (!data->soft_pp_table) {
-		data->soft_pp_table = kmemdup(hwmgr->soft_pp_table,
-					      hwmgr->soft_pp_table_size,
-					      GFP_KERNEL);
-		if (!data->soft_pp_table)
-			return -ENOMEM;
-	}
-
-	*table = (char *)&data->soft_pp_table;
-
-	return hwmgr->soft_pp_table_size;
-}
-
-static int tonga_set_pp_table(struct pp_hwmgr *hwmgr, const char *buf, size_t size)
-{
-	struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend);
-
-	if (!data->soft_pp_table) {
-		data->soft_pp_table = kzalloc(hwmgr->soft_pp_table_size, GFP_KERNEL);
-		if (!data->soft_pp_table)
-			return -ENOMEM;
-	}
-
-	memcpy(data->soft_pp_table, buf, size);
-
-	hwmgr->soft_pp_table = data->soft_pp_table;
-
-	/* TODO: re-init powerplay to implement modified pptable */
-
-	return 0;
-}
-
 static int tonga_force_clock_level(struct pp_hwmgr *hwmgr,
 		enum pp_clock_type type, uint32_t mask)
 {
@@ -6194,11 +6135,96 @@ static int tonga_print_clock_levels(struct pp_hwmgr *hwmgr,
 	return size;
 }
 
+static int tonga_get_sclk_od(struct pp_hwmgr *hwmgr)
+{
+	struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend);
+	struct tonga_single_dpm_table *sclk_table = &(data->dpm_table.sclk_table);
+	struct tonga_single_dpm_table *golden_sclk_table =
+			&(data->golden_dpm_table.sclk_table);
+	int value;
+
+	value = (sclk_table->dpm_levels[sclk_table->count - 1].value -
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value) *
+			100 /
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value;
+
+	return value;
+}
+
+static int tonga_set_sclk_od(struct pp_hwmgr *hwmgr, uint32_t value)
+{
+	struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend);
+	struct tonga_single_dpm_table *golden_sclk_table =
+			&(data->golden_dpm_table.sclk_table);
+	struct pp_power_state  *ps;
+	struct tonga_power_state  *tonga_ps;
+
+	if (value > 20)
+		value = 20;
+
+	ps = hwmgr->request_ps;
+
+	if (ps == NULL)
+		return -EINVAL;
+
+	tonga_ps = cast_phw_tonga_power_state(&ps->hardware);
+
+	tonga_ps->performance_levels[tonga_ps->performance_level_count - 1].engine_clock =
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value *
+			value / 100 +
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value;
+
+	return 0;
+}
+
+static int tonga_get_mclk_od(struct pp_hwmgr *hwmgr)
+{
+	struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend);
+	struct tonga_single_dpm_table *mclk_table = &(data->dpm_table.mclk_table);
+	struct tonga_single_dpm_table *golden_mclk_table =
+			&(data->golden_dpm_table.mclk_table);
+	int value;
+
+	value = (mclk_table->dpm_levels[mclk_table->count - 1].value -
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value) *
+			100 /
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value;
+
+	return value;
+}
+
+static int tonga_set_mclk_od(struct pp_hwmgr *hwmgr, uint32_t value)
+{
+	struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend);
+	struct tonga_single_dpm_table *golden_mclk_table =
+			&(data->golden_dpm_table.mclk_table);
+	struct pp_power_state  *ps;
+	struct tonga_power_state  *tonga_ps;
+
+	if (value > 20)
+		value = 20;
+
+	ps = hwmgr->request_ps;
+
+	if (ps == NULL)
+		return -EINVAL;
+
+	tonga_ps = cast_phw_tonga_power_state(&ps->hardware);
+
+	tonga_ps->performance_levels[tonga_ps->performance_level_count - 1].memory_clock =
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value *
+			value / 100 +
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value;
+
+	return 0;
+}
+
 static const struct pp_hwmgr_func tonga_hwmgr_funcs = {
 	.backend_init = &tonga_hwmgr_backend_init,
 	.backend_fini = &tonga_hwmgr_backend_fini,
 	.asic_setup = &tonga_setup_asic_task,
 	.dynamic_state_management_enable = &tonga_enable_dpm_tasks,
+	.dynamic_state_management_disable = &tonga_disable_dpm_tasks,
 	.apply_state_adjust_rules = tonga_apply_state_adjust_rules,
 	.force_dpm_level = &tonga_force_dpm_level,
 	.power_state_set = tonga_set_power_state_tasks,
@@ -6232,22 +6258,16 @@ static const struct pp_hwmgr_func tonga_hwmgr_funcs = {
 	.check_states_equal = tonga_check_states_equal,
 	.set_fan_control_mode = tonga_set_fan_control_mode,
 	.get_fan_control_mode = tonga_get_fan_control_mode,
-	.get_pp_table = tonga_get_pp_table,
-	.set_pp_table = tonga_set_pp_table,
 	.force_clock_level = tonga_force_clock_level,
 	.print_clock_levels = tonga_print_clock_levels,
+	.get_sclk_od = tonga_get_sclk_od,
+	.set_sclk_od = tonga_set_sclk_od,
+	.get_mclk_od = tonga_get_mclk_od,
+	.set_mclk_od = tonga_set_mclk_od,
 };
 
 int tonga_hwmgr_init(struct pp_hwmgr *hwmgr)
 {
-	tonga_hwmgr  *data;
-
-	data = kzalloc (sizeof(tonga_hwmgr), GFP_KERNEL);
-	if (data == NULL)
-		return -ENOMEM;
-	memset(data, 0x00, sizeof(tonga_hwmgr));
-
-	hwmgr->backend = data;
 	hwmgr->hwmgr_func = &tonga_hwmgr_funcs;
 	hwmgr->pptable_func = &tonga_pptable_funcs;
 	pp_tonga_thermal_initialize(hwmgr);
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.h
index 573cd39fe78d..3961884bfa9b 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.h
@@ -352,9 +352,6 @@ struct tonga_hwmgr {
 	bool                           samu_power_gated; /* 1: gated, 0:not gated */
 	bool                           acp_power_gated;  /* 1: gated, 0:not gated */
 	bool                           pg_acp_init;
-
-	/* soft pptable for re-uploading into smu */
-	void *soft_pp_table;
 };
 
 typedef struct tonga_hwmgr tonga_hwmgr;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_pptable.h b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_pptable.h
index 1b44f4e9b8f5..f127198aafc4 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_pptable.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_pptable.h
@@ -197,6 +197,22 @@ typedef struct _ATOM_Tonga_SCLK_Dependency_Table {
 	ATOM_Tonga_SCLK_Dependency_Record entries[1];				 /* Dynamically allocate entries. */
 } ATOM_Tonga_SCLK_Dependency_Table;
 
+typedef struct _ATOM_Polaris_SCLK_Dependency_Record {
+	UCHAR  ucVddInd;											/* Base voltage */
+	USHORT usVddcOffset;										/* Offset relative to base voltage */
+	ULONG ulSclk;
+	USHORT usEdcCurrent;
+	UCHAR  ucReliabilityTemperature;
+	UCHAR  ucCKSVOffsetandDisable;			/* Bits 0~6: Voltage offset for CKS, Bit 7: Disable/enable for the SCLK level. */
+	ULONG  ulSclkOffset;
+} ATOM_Polaris_SCLK_Dependency_Record;
+
+typedef struct _ATOM_Polaris_SCLK_Dependency_Table {
+	UCHAR ucRevId;
+	UCHAR ucNumEntries;							/* Number of entries. */
+	ATOM_Polaris_SCLK_Dependency_Record entries[1];				 /* Dynamically allocate entries. */
+} ATOM_Polaris_SCLK_Dependency_Table;
+
 typedef struct _ATOM_Tonga_PCIE_Record {
 	UCHAR ucPCIEGenSpeed;
 	UCHAR usPCIELaneWidth;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c
index 10e3630ee39d..cfb647f76cbe 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c
@@ -167,8 +167,7 @@ static int get_vddc_lookup_table(
 	table_size = sizeof(uint32_t) +
 		sizeof(phm_ppt_v1_voltage_lookup_record) * max_levels;
 
-	table = (phm_ppt_v1_voltage_lookup_table *)
-		kzalloc(table_size, GFP_KERNEL);
+	table = kzalloc(table_size, GFP_KERNEL);
 
 	if (NULL == table)
 		return -ENOMEM;
@@ -302,7 +301,7 @@ static int init_dpm_2_parameters(
 			(((unsigned long)powerplay_table) + le16_to_cpu(powerplay_table->usPPMTableOffset));
 
 		if (0 != powerplay_table->usPPMTableOffset) {
-			if (1 == get_platform_power_management_table(hwmgr, atom_ppm_table)) {
+			if (get_platform_power_management_table(hwmgr, atom_ppm_table) == 0) {
 				phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 					PHM_PlatformCaps_EnablePlatformPowerManagement);
 			}
@@ -327,7 +326,7 @@ static int get_valid_clk(
 	table_size = sizeof(uint32_t) +
 		sizeof(uint32_t) * clk_volt_pp_table->count;
 
-	table = (struct phm_clock_array *)kzalloc(table_size, GFP_KERNEL);
+	table = kzalloc(table_size, GFP_KERNEL);
 
 	if (NULL == table)
 		return -ENOMEM;
@@ -377,8 +376,7 @@ static int get_mclk_voltage_dependency_table(
 	table_size = sizeof(uint32_t) + sizeof(phm_ppt_v1_clock_voltage_dependency_record)
 		* mclk_dep_table->ucNumEntries;
 
-	mclk_table = (phm_ppt_v1_clock_voltage_dependency_table *)
-		kzalloc(table_size, GFP_KERNEL);
+	mclk_table = kzalloc(table_size, GFP_KERNEL);
 
 	if (NULL == mclk_table)
 		return -ENOMEM;
@@ -408,41 +406,76 @@ static int get_mclk_voltage_dependency_table(
 static int get_sclk_voltage_dependency_table(
 		struct pp_hwmgr *hwmgr,
 		phm_ppt_v1_clock_voltage_dependency_table **pp_tonga_sclk_dep_table,
-		const ATOM_Tonga_SCLK_Dependency_Table * sclk_dep_table
+		const PPTable_Generic_SubTable_Header *sclk_dep_table
 		)
 {
 	uint32_t table_size, i;
 	phm_ppt_v1_clock_voltage_dependency_table *sclk_table;
 
-	PP_ASSERT_WITH_CODE((0 != sclk_dep_table->ucNumEntries),
-		"Invalid PowerPlay Table!", return -1);
+	if (sclk_dep_table->ucRevId < 1) {
+		const ATOM_Tonga_SCLK_Dependency_Table *tonga_table =
+			    (ATOM_Tonga_SCLK_Dependency_Table *)sclk_dep_table;
 
-	table_size = sizeof(uint32_t) + sizeof(phm_ppt_v1_clock_voltage_dependency_record)
-		* sclk_dep_table->ucNumEntries;
+		PP_ASSERT_WITH_CODE((0 != tonga_table->ucNumEntries),
+			"Invalid PowerPlay Table!", return -1);
 
-	sclk_table = (phm_ppt_v1_clock_voltage_dependency_table *)
-		kzalloc(table_size, GFP_KERNEL);
+		table_size = sizeof(uint32_t) + sizeof(phm_ppt_v1_clock_voltage_dependency_record)
+			* tonga_table->ucNumEntries;
 
-	if (NULL == sclk_table)
-		return -ENOMEM;
+		sclk_table = kzalloc(table_size, GFP_KERNEL);
 
-	memset(sclk_table, 0x00, table_size);
-
-	sclk_table->count = (uint32_t)sclk_dep_table->ucNumEntries;
-
-	for (i = 0; i < sclk_dep_table->ucNumEntries; i++) {
-		sclk_table->entries[i].vddInd =
-			sclk_dep_table->entries[i].ucVddInd;
-		sclk_table->entries[i].vdd_offset =
-			sclk_dep_table->entries[i].usVddcOffset;
-		sclk_table->entries[i].clk =
-			sclk_dep_table->entries[i].ulSclk;
-		sclk_table->entries[i].cks_enable =
-			(((sclk_dep_table->entries[i].ucCKSVOffsetandDisable & 0x80) >> 7) == 0) ? 1 : 0;
-		sclk_table->entries[i].cks_voffset =
-			(sclk_dep_table->entries[i].ucCKSVOffsetandDisable & 0x7F);
-	}
+		if (NULL == sclk_table)
+			return -ENOMEM;
+
+		memset(sclk_table, 0x00, table_size);
+
+		sclk_table->count = (uint32_t)tonga_table->ucNumEntries;
+
+		for (i = 0; i < tonga_table->ucNumEntries; i++) {
+			sclk_table->entries[i].vddInd =
+				tonga_table->entries[i].ucVddInd;
+			sclk_table->entries[i].vdd_offset =
+				tonga_table->entries[i].usVddcOffset;
+			sclk_table->entries[i].clk =
+				tonga_table->entries[i].ulSclk;
+			sclk_table->entries[i].cks_enable =
+				(((tonga_table->entries[i].ucCKSVOffsetandDisable & 0x80) >> 7) == 0) ? 1 : 0;
+			sclk_table->entries[i].cks_voffset =
+				(tonga_table->entries[i].ucCKSVOffsetandDisable & 0x7F);
+		}
+	} else {
+		const ATOM_Polaris_SCLK_Dependency_Table *polaris_table =
+			    (ATOM_Polaris_SCLK_Dependency_Table *)sclk_dep_table;
+
+		PP_ASSERT_WITH_CODE((0 != polaris_table->ucNumEntries),
+			"Invalid PowerPlay Table!", return -1);
+
+		table_size = sizeof(uint32_t) + sizeof(phm_ppt_v1_clock_voltage_dependency_record)
+			* polaris_table->ucNumEntries;
 
+		sclk_table = kzalloc(table_size, GFP_KERNEL);
+
+		if (NULL == sclk_table)
+			return -ENOMEM;
+
+		memset(sclk_table, 0x00, table_size);
+
+		sclk_table->count = (uint32_t)polaris_table->ucNumEntries;
+
+		for (i = 0; i < polaris_table->ucNumEntries; i++) {
+			sclk_table->entries[i].vddInd =
+				polaris_table->entries[i].ucVddInd;
+			sclk_table->entries[i].vdd_offset =
+				polaris_table->entries[i].usVddcOffset;
+			sclk_table->entries[i].clk =
+				polaris_table->entries[i].ulSclk;
+			sclk_table->entries[i].cks_enable =
+				(((polaris_table->entries[i].ucCKSVOffsetandDisable & 0x80) >> 7) == 0) ? 1 : 0;
+			sclk_table->entries[i].cks_voffset =
+				(polaris_table->entries[i].ucCKSVOffsetandDisable & 0x7F);
+			sclk_table->entries[i].sclk_offset = polaris_table->entries[i].ulSclkOffset;
+		}
+	}
 	*pp_tonga_sclk_dep_table = sclk_table;
 
 	return 0;
@@ -467,7 +500,7 @@ static int get_pcie_table(
 		table_size = sizeof(uint32_t) +
 			sizeof(phm_ppt_v1_pcie_record) * atom_pcie_table->ucNumEntries;
 
-		pcie_table = (phm_ppt_v1_pcie_table *)kzalloc(table_size, GFP_KERNEL);
+		pcie_table = kzalloc(table_size, GFP_KERNEL);
 
 		if (pcie_table == NULL)
 			return -ENOMEM;
@@ -504,7 +537,7 @@ static int get_pcie_table(
 		table_size = sizeof(uint32_t) +
 			sizeof(phm_ppt_v1_pcie_record) * atom_pcie_table->ucNumEntries;
 
-		pcie_table = (phm_ppt_v1_pcie_table *)kzalloc(table_size, GFP_KERNEL);
+		pcie_table = kzalloc(table_size, GFP_KERNEL);
 
 		if (pcie_table == NULL)
 			return -ENOMEM;
@@ -658,8 +691,7 @@ static int get_mm_clock_voltage_table(
 	table_size = sizeof(uint32_t) +
 		sizeof(phm_ppt_v1_mm_clock_voltage_dependency_record)
 		* mm_dependency_table->ucNumEntries;
-	mm_table = (phm_ppt_v1_mm_clock_voltage_dependency_table *)
-		kzalloc(table_size, GFP_KERNEL);
+	mm_table = kzalloc(table_size, GFP_KERNEL);
 
 	if (NULL == mm_table)
 		return -ENOMEM;
@@ -708,8 +740,8 @@ static int init_clock_voltage_dependency(
 	const ATOM_Tonga_MCLK_Dependency_Table *mclk_dep_table =
 		(const ATOM_Tonga_MCLK_Dependency_Table *)(((unsigned long) powerplay_table) +
 		le16_to_cpu(powerplay_table->usMclkDependencyTableOffset));
-	const ATOM_Tonga_SCLK_Dependency_Table *sclk_dep_table =
-		(const ATOM_Tonga_SCLK_Dependency_Table *)(((unsigned long) powerplay_table) +
+	const PPTable_Generic_SubTable_Header *sclk_dep_table =
+		(const PPTable_Generic_SubTable_Header *)(((unsigned long) powerplay_table) +
 		le16_to_cpu(powerplay_table->usSclkDependencyTableOffset));
 	const ATOM_Tonga_Hard_Limit_Table *pHardLimits =
 		(const ATOM_Tonga_Hard_Limit_Table *)(((unsigned long) powerplay_table) +
@@ -1036,54 +1068,46 @@ int tonga_pp_tables_initialize(struct pp_hwmgr *hwmgr)
 
 int tonga_pp_tables_uninitialize(struct pp_hwmgr *hwmgr)
 {
-	int result = 0;
 	struct phm_ppt_v1_information *pp_table_information =
 		(struct phm_ppt_v1_information *)(hwmgr->pptable);
 
-	if (NULL != hwmgr->soft_pp_table) {
-		kfree(hwmgr->soft_pp_table);
-		hwmgr->soft_pp_table = NULL;
-	}
-
-	if (NULL != pp_table_information->vdd_dep_on_sclk)
-		pp_table_information->vdd_dep_on_sclk = NULL;
+	kfree(pp_table_information->vdd_dep_on_sclk);
+	pp_table_information->vdd_dep_on_sclk = NULL;
 
-	if (NULL != pp_table_information->vdd_dep_on_mclk)
-		pp_table_information->vdd_dep_on_mclk = NULL;
+	kfree(pp_table_information->vdd_dep_on_mclk);
+	pp_table_information->vdd_dep_on_mclk = NULL;
 
-	if (NULL != pp_table_information->valid_mclk_values)
-		pp_table_information->valid_mclk_values = NULL;
+	kfree(pp_table_information->valid_mclk_values);
+	pp_table_information->valid_mclk_values = NULL;
 
-	if (NULL != pp_table_information->valid_sclk_values)
-		pp_table_information->valid_sclk_values = NULL;
+	kfree(pp_table_information->valid_sclk_values);
+	pp_table_information->valid_sclk_values = NULL;
 
-	if (NULL != pp_table_information->vddc_lookup_table)
-		pp_table_information->vddc_lookup_table = NULL;
+	kfree(pp_table_information->vddc_lookup_table);
+	pp_table_information->vddc_lookup_table = NULL;
 
-	if (NULL != pp_table_information->vddgfx_lookup_table)
-		pp_table_information->vddgfx_lookup_table = NULL;
+	kfree(pp_table_information->vddgfx_lookup_table);
+	pp_table_information->vddgfx_lookup_table = NULL;
 
-	if (NULL != pp_table_information->mm_dep_table)
-		pp_table_information->mm_dep_table = NULL;
+	kfree(pp_table_information->mm_dep_table);
+	pp_table_information->mm_dep_table = NULL;
 
-	if (NULL != pp_table_information->cac_dtp_table)
-		pp_table_information->cac_dtp_table = NULL;
+	kfree(pp_table_information->cac_dtp_table);
+	pp_table_information->cac_dtp_table = NULL;
 
-	if (NULL != hwmgr->dyn_state.cac_dtp_table)
-		hwmgr->dyn_state.cac_dtp_table = NULL;
+	kfree(hwmgr->dyn_state.cac_dtp_table);
+	hwmgr->dyn_state.cac_dtp_table = NULL;
 
-	if (NULL != pp_table_information->ppm_parameter_table)
-		pp_table_information->ppm_parameter_table = NULL;
+	kfree(pp_table_information->ppm_parameter_table);
+	pp_table_information->ppm_parameter_table = NULL;
 
-	if (NULL != pp_table_information->pcie_table)
-		pp_table_information->pcie_table = NULL;
+	kfree(pp_table_information->pcie_table);
+	pp_table_information->pcie_table = NULL;
 
-	if (NULL != hwmgr->pptable) {
-		kfree(hwmgr->pptable);
-		hwmgr->pptable = NULL;
-	}
+	kfree(hwmgr->pptable);
+	hwmgr->pptable = NULL;
 
-	return result;
+	return 0;
 }
 
 const struct pp_table_func tonga_pptable_funcs = {
diff --git a/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h b/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h
index 50b367d44307..b764c8c05ec8 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h
@@ -132,6 +132,7 @@ struct amd_pp_init {
 	uint32_t chip_family;
 	uint32_t chip_id;
 	uint32_t rev_id;
+	bool powercontainment_enabled;
 };
 enum amd_pp_display_config_type{
 	AMD_PP_DisplayConfigType_None = 0,
@@ -342,6 +343,10 @@ struct amd_powerplay_funcs {
 	int (*set_pp_table)(void *handle, const char *buf, size_t size);
 	int (*force_clock_level)(void *handle, enum pp_clock_type type, uint32_t mask);
 	int (*print_clock_levels)(void *handle, enum pp_clock_type type, char *buf);
+	int (*get_sclk_od)(void *handle);
+	int (*set_sclk_od)(void *handle, uint32_t value);
+	int (*get_mclk_od)(void *handle);
+	int (*set_mclk_od)(void *handle, uint32_t value);
 };
 
 struct amd_powerplay {
@@ -355,6 +360,8 @@ int amd_powerplay_init(struct amd_pp_init *pp_init,
 
 int amd_powerplay_fini(void *handle);
 
+int amd_powerplay_reset(void *handle);
+
 int amd_powerplay_display_configuration_change(void *handle,
 		const struct amd_pp_display_configuration *input);
 
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
index 56f712c7d07a..962cb5385951 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
@@ -340,6 +340,7 @@ extern int phm_powergate_vce(struct pp_hwmgr *hwmgr, bool gate);
 extern int phm_powerdown_uvd(struct pp_hwmgr *hwmgr);
 extern int phm_setup_asic(struct pp_hwmgr *hwmgr);
 extern int phm_enable_dynamic_state_management(struct pp_hwmgr *hwmgr);
+extern int phm_disable_dynamic_state_management(struct pp_hwmgr *hwmgr);
 extern void phm_init_dynamic_caps(struct pp_hwmgr *hwmgr);
 extern bool phm_is_hw_access_blocked(struct pp_hwmgr *hwmgr);
 extern int phm_block_hw_access(struct pp_hwmgr *hwmgr, bool block);
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
index 28f571449495..bf0d2accf7bf 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
@@ -278,6 +278,8 @@ struct pp_hwmgr_func {
 
 	int (*dynamic_state_management_enable)(
 						struct pp_hwmgr *hw_mgr);
+	int (*dynamic_state_management_disable)(
+						struct pp_hwmgr *hw_mgr);
 
 	int (*patch_boot_state)(struct pp_hwmgr *hwmgr,
 				     struct pp_hw_power_state *hw_ps);
@@ -333,11 +335,13 @@ struct pp_hwmgr_func {
 	int (*get_clock_by_type)(struct pp_hwmgr *hwmgr, enum amd_pp_clock_type type, struct amd_pp_clocks *clocks);
 	int (*get_max_high_clocks)(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_info *clocks);
 	int (*power_off_asic)(struct pp_hwmgr *hwmgr);
-	int (*get_pp_table)(struct pp_hwmgr *hwmgr, char **table);
-	int (*set_pp_table)(struct pp_hwmgr *hwmgr, const char *buf, size_t size);
 	int (*force_clock_level)(struct pp_hwmgr *hwmgr, enum pp_clock_type type, uint32_t mask);
 	int (*print_clock_levels)(struct pp_hwmgr *hwmgr, enum pp_clock_type type, char *buf);
 	int (*enable_per_cu_power_gating)(struct pp_hwmgr *hwmgr, bool enable);
+	int (*get_sclk_od)(struct pp_hwmgr *hwmgr);
+	int (*set_sclk_od)(struct pp_hwmgr *hwmgr, uint32_t value);
+	int (*get_mclk_od)(struct pp_hwmgr *hwmgr);
+	int (*set_mclk_od)(struct pp_hwmgr *hwmgr, uint32_t value);
 };
 
 struct pp_table_func {
@@ -411,6 +415,8 @@ struct phm_cac_tdp_table {
 	uint8_t  ucVr_I2C_Line;
 	uint8_t  ucPlx_I2C_address;
 	uint8_t  ucPlx_I2C_Line;
+	uint32_t usBoostPowerLimit;
+	uint8_t  ucCKS_LDO_REFSEL;
 };
 
 struct phm_ppm_table {
@@ -578,6 +584,7 @@ struct pp_hwmgr {
 	struct pp_smumgr *smumgr;
 	const void *soft_pp_table;
 	uint32_t soft_pp_table_size;
+	void *hardcode_pp_table;
 	bool need_pp_table_upload;
 	enum amd_dpm_forced_level dpm_level;
 	bool block_hw_access;
@@ -607,6 +614,7 @@ struct pp_hwmgr {
 	uint32_t num_ps;
 	struct pp_thermal_controller_info thermal_controller;
 	bool fan_ctrl_is_in_default_mode;
+	bool powercontainment_enabled;
 	uint32_t fan_ctrl_default_mode;
 	uint32_t tmin;
 	struct phm_microcode_version_info microcode_version_info;
diff --git a/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h
index 0c6a413eaa5b..b8f4b73c322e 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h
@@ -27,6 +27,7 @@
 
 #pragma pack(push, 1)
 
+#define PPSMC_MSG_SetGBDroopSettings          ((uint16_t) 0x305)
 
 #define PPSMC_SWSTATE_FLAG_DC                           0x01
 #define PPSMC_SWSTATE_FLAG_UVD                          0x02
@@ -391,6 +392,8 @@ typedef uint16_t PPSMC_Result;
 #define PPSMC_MSG_SetGpuPllDfsForSclk         ((uint16_t) 0x300)
 #define PPSMC_MSG_Didt_Block_Function		  ((uint16_t) 0x301)
 
+#define PPSMC_MSG_SetVBITimeout               ((uint16_t) 0x306)
+
 #define PPSMC_MSG_SecureSRBMWrite             ((uint16_t) 0x600)
 #define PPSMC_MSG_SecureSRBMRead              ((uint16_t) 0x601)
 #define PPSMC_MSG_SetAddress                  ((uint16_t) 0x800)
diff --git a/drivers/gpu/drm/amd/powerplay/inc/pp_acpi.h b/drivers/gpu/drm/amd/powerplay/inc/pp_acpi.h
index 3bd5e69b9045..3df5de2cdab0 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/pp_acpi.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/pp_acpi.h
@@ -26,3 +26,4 @@ extern bool acpi_atcs_functions_supported(void *device,
 extern int acpi_pcie_perf_request(void *device,
 						uint8_t perf_req,
 						bool advertise);
+extern bool acpi_atcs_notify_pcie_device_ready(void *device);
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu74.h b/drivers/gpu/drm/amd/powerplay/inc/smu74.h
index 1a12d85b8e97..fd10a9fa843d 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu74.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu74.h
@@ -34,6 +34,30 @@
 #define SMU__NUM_LCLK_DPM_LEVELS 8
 #define SMU__NUM_PCIE_DPM_LEVELS 8
 
+#define EXP_M1  35
+#define EXP_M2  92821
+#define EXP_B   66629747
+
+#define EXP_M1_1  365
+#define EXP_M2_1  658700
+#define EXP_B_1   305506134
+
+#define EXP_M1_2  189
+#define EXP_M2_2  379692
+#define EXP_B_2   194609469
+
+#define EXP_M1_3  99
+#define EXP_M2_3  217915
+#define EXP_B_3   122255994
+
+#define EXP_M1_4  51
+#define EXP_M2_4  122643
+#define EXP_B_4   74893384
+
+#define EXP_M1_5  423
+#define EXP_M2_5  1103326
+#define EXP_B_5   728122621
+
 enum SID_OPTION {
 	SID_OPTION_HI,
 	SID_OPTION_LO,
@@ -548,20 +572,20 @@ struct SMU74_Firmware_Header {
 	uint32_t CacConfigTable;
 	uint32_t CacStatusTable;
 
-
 	uint32_t mcRegisterTable;
 
-
 	uint32_t mcArbDramTimingTable;
 
-
-
-
 	uint32_t PmFuseTable;
 	uint32_t Globals;
 	uint32_t ClockStretcherTable;
 	uint32_t VftTable;
-	uint32_t Reserved[21];
+	uint32_t Reserved1;
+	uint32_t AvfsTable;
+	uint32_t AvfsCksOffGbvTable;
+	uint32_t AvfsMeanNSigma;
+	uint32_t AvfsSclkOffsetTable;
+	uint32_t Reserved[16];
 	uint32_t Signature;
 };
 
@@ -701,8 +725,6 @@ VR Config info is contained in dpmTable.VRConfig */
 struct SMU_ClockStretcherDataTableEntry {
 	uint8_t minVID;
 	uint8_t maxVID;
-
-
 	uint16_t setting;
 };
 typedef struct SMU_ClockStretcherDataTableEntry SMU_ClockStretcherDataTableEntry;
@@ -769,6 +791,43 @@ struct VFT_TABLE_t {
 typedef struct VFT_TABLE_t VFT_TABLE_t;
 
 
+/* Total margin, root mean square of Fmax + DC + Platform */
+struct AVFS_Margin_t {
+	VFT_CELL_t Cell[NUM_VFT_COLUMNS];
+};
+typedef struct AVFS_Margin_t AVFS_Margin_t;
+
+#define BTCGB_VDROOP_TABLE_MAX_ENTRIES 2
+#define AVFSGB_VDROOP_TABLE_MAX_ENTRIES 2
+
+struct GB_VDROOP_TABLE_t {
+	int32_t a0;
+	int32_t a1;
+	int32_t a2;
+	uint32_t spare;
+};
+typedef struct GB_VDROOP_TABLE_t GB_VDROOP_TABLE_t;
+
+struct AVFS_CksOff_Gbv_t {
+	VFT_CELL_t Cell[NUM_VFT_COLUMNS];
+};
+typedef struct AVFS_CksOff_Gbv_t AVFS_CksOff_Gbv_t;
+
+struct AVFS_meanNsigma_t {
+	uint32_t Aconstant[3];
+	uint16_t DC_tol_sigma;
+	uint16_t Platform_mean;
+	uint16_t Platform_sigma;
+	uint16_t PSM_Age_CompFactor;
+	uint8_t  Static_Voltage_Offset[NUM_VFT_COLUMNS];
+};
+typedef struct AVFS_meanNsigma_t AVFS_meanNsigma_t;
+
+struct AVFS_Sclk_Offset_t {
+	uint16_t Sclk_Offset[8];
+};
+typedef struct AVFS_Sclk_Offset_t AVFS_Sclk_Offset_t;
+
 #endif
 
 
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h b/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h
index 0dfe82336dc7..899d6d8108c2 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h
@@ -223,6 +223,16 @@ struct SMU74_Discrete_StateInfo {
 
 typedef struct SMU74_Discrete_StateInfo SMU74_Discrete_StateInfo;
 
+struct SMU_QuadraticCoeffs {
+	int32_t m1;
+	uint32_t b;
+
+	int16_t m2;
+	uint8_t m1_shift;
+	uint8_t m2_shift;
+};
+typedef struct SMU_QuadraticCoeffs SMU_QuadraticCoeffs;
+
 struct SMU74_Discrete_DpmTable {
 
 	SMU74_PIDController                  GraphicsPIDController;
@@ -258,7 +268,15 @@ struct SMU74_Discrete_DpmTable {
 	uint8_t                             ThermOutPolarity;
 	uint8_t                             ThermOutMode;
 	uint8_t                             BootPhases;
-	uint32_t                            Reserved[4];
+
+	uint8_t                             VRHotLevel;
+	uint8_t                             LdoRefSel;
+	uint8_t                             Reserved1[2];
+	uint16_t                            FanStartTemperature;
+	uint16_t                            FanStopTemperature;
+	uint16_t                            MaxVoltage;
+	uint16_t                            Reserved2;
+	uint32_t                            Reserved[1];
 
 	SMU74_Discrete_GraphicsLevel        GraphicsLevel[SMU74_MAX_LEVELS_GRAPHICS];
 	SMU74_Discrete_MemoryLevel          MemoryACPILevel;
@@ -347,6 +365,8 @@ struct SMU74_Discrete_DpmTable {
 
 	uint32_t                            CurrSclkPllRange;
 	sclkFcwRange_t                      SclkFcwRangeTable[NUM_SCLK_RANGE];
+	GB_VDROOP_TABLE_t                   BTCGB_VDROOP_TABLE[BTCGB_VDROOP_TABLE_MAX_ENTRIES];
+	SMU_QuadraticCoeffs                 AVFSGB_VDROOP_TABLE[AVFSGB_VDROOP_TABLE_MAX_ENTRIES];
 };
 
 typedef struct SMU74_Discrete_DpmTable SMU74_Discrete_DpmTable;
@@ -550,16 +570,6 @@ struct SMU7_AcpiScoreboard {
 
 typedef struct SMU7_AcpiScoreboard SMU7_AcpiScoreboard;
 
-struct SMU_QuadraticCoeffs {
-	int32_t m1;
-	uint32_t b;
-
-	int16_t m2;
-	uint8_t m1_shift;
-	uint8_t m2_shift;
-};
-typedef struct SMU_QuadraticCoeffs SMU_QuadraticCoeffs;
-
 struct SMU74_Discrete_PmFuses {
 	uint8_t BapmVddCVidHiSidd[8];
 	uint8_t BapmVddCVidLoSidd[8];
@@ -821,6 +831,17 @@ typedef struct SMU7_GfxCuPgScoreboard SMU7_GfxCuPgScoreboard;
 #define DB_PCC_SHIFT 26 
 #define DB_EDC_SHIFT 27
 
+#define BTCGB0_Vdroop_Enable_MASK  0x1
+#define BTCGB1_Vdroop_Enable_MASK  0x2
+#define AVFSGB0_Vdroop_Enable_MASK 0x4
+#define AVFSGB1_Vdroop_Enable_MASK 0x8
+
+#define BTCGB0_Vdroop_Enable_SHIFT  0
+#define BTCGB1_Vdroop_Enable_SHIFT  1
+#define AVFSGB0_Vdroop_Enable_SHIFT 2
+#define AVFSGB1_Vdroop_Enable_SHIFT 3
+
+
 #pragma pack(pop)
 
 
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
index fc9e3d1dd409..3c235f0177cd 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
@@ -131,6 +131,12 @@ extern int smu_free_memory(void *device, void *handle);
 	smum_wait_on_indirect_register(smumgr,				\
 				mm##port##_INDEX, index, value, mask)
 
+#define SMUM_WAIT_INDIRECT_REGISTER(smumgr, port, reg, value, mask)    \
+	    SMUM_WAIT_INDIRECT_REGISTER_GIVEN_INDEX(smumgr, port, ix##reg, value, mask)
+
+#define SMUM_WAIT_INDIRECT_FIELD(smumgr, port, reg, field, fieldval)                          \
+	    SMUM_WAIT_INDIRECT_REGISTER(smumgr, port, reg, (fieldval) << SMUM_FIELD_SHIFT(reg, field), \
+			            SMUM_FIELD_MASK(reg, field) )
 
 #define SMUM_WAIT_REGISTER_UNEQUAL_GIVEN_INDEX(smumgr,         \
 							index, value, mask) \
@@ -158,6 +164,10 @@ extern int smu_free_memory(void *device, void *handle);
 		(SMUM_FIELD_MASK(reg, field) & ((field_val) <<                 \
 			SMUM_FIELD_SHIFT(reg, field))))
 
+#define SMUM_READ_INDIRECT_FIELD(device, port, reg, field) \
+	    SMUM_GET_FIELD(cgs_read_ind_register(device, port, ix##reg), \
+			   reg, field)
+
 #define SMUM_WAIT_VFPF_INDIRECT_REGISTER_GIVEN_INDEX(smumgr,		\
 				port, index, value, mask)		\
 	smum_wait_on_indirect_register(smumgr,				\
@@ -191,6 +201,13 @@ extern int smu_free_memory(void *device, void *handle);
 			SMUM_SET_FIELD(cgs_read_ind_register(device, port, ix##reg), \
 			reg, field, fieldval))
 
+
+#define SMUM_WRITE_INDIRECT_FIELD(device, port, reg, field, fieldval)    		\
+		cgs_write_ind_register(device, port, ix##reg, 				\
+			SMUM_SET_FIELD(cgs_read_ind_register(device, port, ix##reg), 	\
+				       reg, field, fieldval))
+
+
 #define SMUM_WAIT_VFPF_INDIRECT_FIELD(smumgr, port, reg, field, fieldval) \
 	SMUM_WAIT_VFPF_INDIRECT_REGISTER(smumgr, port, reg,		\
 		(fieldval) << SMUM_FIELD_SHIFT(reg, field),		\
@@ -200,4 +217,16 @@ extern int smu_free_memory(void *device, void *handle);
 	SMUM_WAIT_VFPF_INDIRECT_REGISTER_UNEQUAL(smumgr, port, reg,	\
 		(fieldval) << SMUM_FIELD_SHIFT(reg, field),		\
 		SMUM_FIELD_MASK(reg, field))
+
+#define SMUM_WAIT_INDIRECT_REGISTER_UNEQUAL_GIVEN_INDEX(smumgr, port, index, value, mask)    \
+	smum_wait_for_indirect_register_unequal(smumgr,			\
+		mm##port##_INDEX, index, value, mask)
+
+#define SMUM_WAIT_INDIRECT_REGISTER_UNEQUAL(smumgr, port, reg, value, mask)    \
+	    SMUM_WAIT_INDIRECT_REGISTER_UNEQUAL_GIVEN_INDEX(smumgr, port, ix##reg, value, mask)
+
+#define SMUM_WAIT_INDIRECT_FIELD_UNEQUAL(smumgr, port, reg, field, fieldval)                          \
+	    SMUM_WAIT_INDIRECT_REGISTER_UNEQUAL(smumgr, port, reg, (fieldval) << SMUM_FIELD_SHIFT(reg, field), \
+			            SMUM_FIELD_MASK(reg, field) )
+
 #endif
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
index 673a75c74e18..8e52a2e82db5 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
@@ -1006,10 +1006,16 @@ static int fiji_smu_init(struct pp_smumgr *smumgr)
 
 static int fiji_smu_fini(struct pp_smumgr *smumgr)
 {
+	struct fiji_smumgr *priv = (struct fiji_smumgr *)(smumgr->backend);
+
+	smu_free_memory(smumgr->device, (void *)priv->header_buffer.handle);
+
 	if (smumgr->backend) {
 		kfree(smumgr->backend);
 		smumgr->backend = NULL;
 	}
+
+	cgs_rel_firmware(smumgr->device, CGS_UCODE_ID_SMU);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
index de618ead9db8..5dba7c509710 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
@@ -52,19 +52,18 @@
 static const SMU74_Discrete_GraphicsLevel avfs_graphics_level_polaris10[8] = {
 	/*  Min      pcie   DeepSleep Activity  CgSpll      CgSpll    CcPwr  CcPwr  Sclk         Enabled      Enabled                       Voltage    Power */
 	/* Voltage, DpmLevel, DivId,  Level,  FuncCntl3,  FuncCntl4,  DynRm, DynRm1 Did, Padding,ForActivity, ForThrottle, UpHyst, DownHyst, DownHyst, Throttle */
-	{ 0x3c0fd047, 0x00, 0x03, 0x1e00, 0x00200410, 0x87020000, 0, 0, 0x16, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x30750000, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0xa00fd047, 0x01, 0x04, 0x1e00, 0x00800510, 0x87020000, 0, 0, 0x16, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x409c0000, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0x0410d047, 0x01, 0x00, 0x1e00, 0x00600410, 0x87020000, 0, 0, 0x0e, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x50c30000, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0x6810d047, 0x01, 0x00, 0x1e00, 0x00800410, 0x87020000, 0, 0, 0x0c, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x60ea0000, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0xcc10d047, 0x01, 0x00, 0x1e00, 0x00e00410, 0x87020000, 0, 0, 0x0c, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0xe8fd0000, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0x3011d047, 0x01, 0x00, 0x1e00, 0x00400510, 0x87020000, 0, 0, 0x0c, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x70110100, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0x9411d047, 0x01, 0x00, 0x1e00, 0x00a00510, 0x87020000, 0, 0, 0x0c, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0xf8240100, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0xf811d047, 0x01, 0x00, 0x1e00, 0x00000610, 0x87020000, 0, 0, 0x0c, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x80380100, 0, 0, 0, 0, 0, 0, 0 } }
+	{ 0x100ea446, 0x00, 0x03, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x30750000, 0x3000, 0, 0x2600, 0, 0, 0x0004, 0x8f02, 0xffff, 0x2f00, 0x300e, 0x2700 } },
+	{ 0x400ea446, 0x01, 0x04, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x409c0000, 0x2000, 0, 0x1e00, 1, 1, 0x0004, 0x8300, 0xffff, 0x1f00, 0xcb5e, 0x1a00 } },
+	{ 0x740ea446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x50c30000, 0x2800, 0, 0x2000, 1, 1, 0x0004, 0x0c02, 0xffff, 0x2700, 0x6433, 0x2100 } },
+	{ 0xa40ea446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x60ea0000, 0x3000, 0, 0x2600, 1, 1, 0x0004, 0x8f02, 0xffff, 0x2f00, 0x300e, 0x2700 } },
+	{ 0xd80ea446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x70110100, 0x3800, 0, 0x2c00, 1, 1, 0x0004, 0x1203, 0xffff, 0x3600, 0xc9e2, 0x2e00 } },
+	{ 0x3c0fa446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x80380100, 0x2000, 0, 0x1e00, 2, 1, 0x0004, 0x8300, 0xffff, 0x1f00, 0xcb5e, 0x1a00 } },
+	{ 0x6c0fa446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x905f0100, 0x2400, 0, 0x1e00, 2, 1, 0x0004, 0x8901, 0xffff, 0x2300, 0x314c, 0x1d00 } },
+	{ 0xa00fa446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0xa0860100, 0x2800, 0, 0x2000, 2, 1, 0x0004, 0x0c02, 0xffff, 0x2700, 0x6433, 0x2100 } }
 };
 
 static const SMU74_Discrete_MemoryLevel avfs_memory_level_polaris10 =
-	{0x50140000, 0x50140000, 0x00320000, 0x00, 0x00,
-	 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x0000, 0x00, 0x00};
+	{0x100ea446, 0, 0x30750000, 0x01, 0x01, 0x01, 0x00, 0x00, 0x64, 0x00, 0x00, 0x1f00, 0x00, 0x00};
 
 /**
 * Set the address for reading/writing the SMC SRAM space.
@@ -219,6 +218,18 @@ bool polaris10_is_smc_ram_running(struct pp_smumgr *smumgr)
 	&& (0x20100 <= cgs_read_ind_register(smumgr->device, CGS_IND_REG__SMC, ixSMC_PC_C)));
 }
 
+static bool polaris10_is_hw_avfs_present(struct pp_smumgr *smumgr)
+{
+	uint32_t efuse;
+
+	efuse = cgs_read_ind_register(smumgr->device, CGS_IND_REG__SMC, ixSMU_EFUSE_0 + (49*4));
+	efuse &= 0x00000001;
+	if (efuse)
+		return true;
+
+	return false;
+}
+
 /**
 * Send a message to the SMC, and wait for its response.
 *
@@ -228,21 +239,27 @@ bool polaris10_is_smc_ram_running(struct pp_smumgr *smumgr)
 */
 int polaris10_send_msg_to_smc(struct pp_smumgr *smumgr, uint16_t msg)
 {
+	int ret;
+
 	if (!polaris10_is_smc_ram_running(smumgr))
 		return -1;
 
+
 	SMUM_WAIT_FIELD_UNEQUAL(smumgr, SMC_RESP_0, SMC_RESP, 0);
 
-	if (1 != SMUM_READ_FIELD(smumgr->device, SMC_RESP_0, SMC_RESP))
-		printk("Failed to send Previous Message.\n");
+	ret = SMUM_READ_FIELD(smumgr->device, SMC_RESP_0, SMC_RESP);
 
+	if (ret != 1)
+		printk("\n failed to send pre message %x ret is %d \n",  msg, ret);
 
 	cgs_write_register(smumgr->device, mmSMC_MESSAGE_0, msg);
 
 	SMUM_WAIT_FIELD_UNEQUAL(smumgr, SMC_RESP_0, SMC_RESP, 0);
 
-	if (1 != SMUM_READ_FIELD(smumgr->device, SMC_RESP_0, SMC_RESP))
-		printk("Failed to send Message.\n");
+	ret = SMUM_READ_FIELD(smumgr->device, SMC_RESP_0, SMC_RESP);
+
+	if (ret != 1)
+		printk("\n failed to send message %x ret is %d \n",  msg, ret);
 
 	return 0;
 }
@@ -469,6 +486,7 @@ int polaris10_smu_fini(struct pp_smumgr *smumgr)
 		kfree(smumgr->backend);
 		smumgr->backend = NULL;
 	}
+	cgs_rel_firmware(smumgr->device, CGS_UCODE_ID_SMU);
 	return 0;
 }
 
@@ -952,6 +970,11 @@ static int polaris10_smu_init(struct pp_smumgr *smumgr)
 		(cgs_handle_t)smu_data->smu_buffer.handle);
 		return -1;);
 
+	if (polaris10_is_hw_avfs_present(smumgr))
+		smu_data->avfs.avfs_btc_status = AVFS_BTC_BOOT;
+	else
+		smu_data->avfs.avfs_btc_status = AVFS_BTC_NOTSUPPORTED;
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c
index c483baf6b4fb..7723473e51a0 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c
@@ -23,6 +23,7 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
+#include <drm/amdgpu_drm.h>
 #include "pp_instance.h"
 #include "smumgr.h"
 #include "cgs_common.h"
@@ -52,10 +53,10 @@ int smum_init(struct amd_pp_init *pp_init, struct pp_instance *handle)
 	handle->smu_mgr = smumgr;
 
 	switch (smumgr->chip_family) {
-	case AMD_FAMILY_CZ:
+	case AMDGPU_FAMILY_CZ:
 		cz_smum_init(smumgr);
 		break;
-	case AMD_FAMILY_VI:
+	case AMDGPU_FAMILY_VI:
 		switch (smumgr->chip_id) {
 		case CHIP_TONGA:
 			tonga_smum_init(smumgr);
@@ -81,6 +82,7 @@ int smum_init(struct amd_pp_init *pp_init, struct pp_instance *handle)
 
 int smum_fini(struct pp_smumgr *smumgr)
 {
+	kfree(smumgr->device);
 	kfree(smumgr);
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
index 32820b680d88..f42c536b3af1 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
@@ -328,10 +328,17 @@ int tonga_write_smc_sram_dword(struct pp_smumgr *smumgr,
 
 static int tonga_smu_fini(struct pp_smumgr *smumgr)
 {
+	struct tonga_smumgr *priv = (struct tonga_smumgr *)(smumgr->backend);
+
+	smu_free_memory(smumgr->device, (void *)priv->smu_buffer.handle);
+	smu_free_memory(smumgr->device, (void *)priv->header_buffer.handle);
+
 	if (smumgr->backend != NULL) {
 		kfree(smumgr->backend);
 		smumgr->backend = NULL;
 	}
+
+	cgs_rel_firmware(smumgr->device, CGS_UCODE_ID_SMU);
 	return 0;
 }
 
@@ -472,7 +479,6 @@ static int tonga_request_smu_reload_fw(struct pp_smumgr *smumgr)
 	struct tonga_smumgr *tonga_smu =
 		(struct tonga_smumgr *)(smumgr->backend);
 	uint16_t fw_to_load;
-	int result = 0;
 	struct SMU_DRAMData_TOC *toc;
 	/**
 	 * First time this gets called during SmuMgr init,
@@ -556,7 +562,7 @@ static int tonga_request_smu_reload_fw(struct pp_smumgr *smumgr)
 		smumgr, PPSMC_MSG_LoadUcodes, fw_to_load),
 		"Fail to Request SMU Load uCode", return 0);
 
-	return result;
+	return 0;
 }
 
 static int tonga_request_smu_load_specific_fw(struct pp_smumgr *smumgr,
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
index c89dc777768f..b961a1c6caf3 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
@@ -26,7 +26,7 @@ TRACE_EVENT(amd_sched_job,
 	    TP_fast_assign(
 			   __entry->entity = sched_job->s_entity;
 			   __entry->sched_job = sched_job;
-			   __entry->fence = &sched_job->s_fence->base;
+			   __entry->fence = &sched_job->s_fence->finished;
 			   __entry->name = sched_job->sched->name;
 			   __entry->job_count = kfifo_len(
 				   &sched_job->s_entity->job_queue) / sizeof(sched_job);
@@ -46,7 +46,7 @@ TRACE_EVENT(amd_sched_process_job,
 		    ),
 
 	    TP_fast_assign(
-		    __entry->fence = &fence->base;
+		    __entry->fence = &fence->finished;
 		    ),
 	    TP_printk("fence=%p signaled", __entry->fence)
 );
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index c16248cee779..ef312bb75fda 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -32,6 +32,7 @@
 
 static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity);
 static void amd_sched_wakeup(struct amd_gpu_scheduler *sched);
+static void amd_sched_process_job(struct fence *f, struct fence_cb *cb);
 
 struct kmem_cache *sched_fence_slab;
 atomic_t sched_fence_slab_ref = ATOMIC_INIT(0);
@@ -140,7 +141,7 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
 		return r;
 
 	atomic_set(&entity->fence_seq, 0);
-	entity->fence_context = fence_context_alloc(1);
+	entity->fence_context = fence_context_alloc(2);
 
 	return 0;
 }
@@ -251,17 +252,21 @@ static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity)
 
 	s_fence = to_amd_sched_fence(fence);
 	if (s_fence && s_fence->sched == sched) {
-		/* Fence is from the same scheduler */
-		if (test_bit(AMD_SCHED_FENCE_SCHEDULED_BIT, &fence->flags)) {
-			/* Ignore it when it is already scheduled */
-			fence_put(entity->dependency);
-			return false;
-		}
 
-		/* Wait for fence to be scheduled */
-		entity->cb.func = amd_sched_entity_clear_dep;
-		list_add_tail(&entity->cb.node, &s_fence->scheduled_cb);
-		return true;
+		/*
+		 * Fence is from the same scheduler, only need to wait for
+		 * it to be scheduled
+		 */
+		fence = fence_get(&s_fence->scheduled);
+		fence_put(entity->dependency);
+		entity->dependency = fence;
+		if (!fence_add_callback(fence, &entity->cb,
+					amd_sched_entity_clear_dep))
+			return true;
+
+		/* Ignore it when it is already scheduled */
+		fence_put(fence);
+		return false;
 	}
 
 	if (!fence_add_callback(entity->dependency, &entity->cb,
@@ -319,46 +324,114 @@ static bool amd_sched_entity_in(struct amd_sched_job *sched_job)
 	return added;
 }
 
-static void amd_sched_free_job(struct fence *f, struct fence_cb *cb) {
-	struct amd_sched_job *job = container_of(cb, struct amd_sched_job, cb_free_job);
-	schedule_work(&job->work_free_job);
-}
-
 /* job_finish is called after hw fence signaled, and
  * the job had already been deleted from ring_mirror_list
  */
-void amd_sched_job_finish(struct amd_sched_job *s_job)
+static void amd_sched_job_finish(struct work_struct *work)
 {
-	struct amd_sched_job *next;
+	struct amd_sched_job *s_job = container_of(work, struct amd_sched_job,
+						   finish_work);
 	struct amd_gpu_scheduler *sched = s_job->sched;
 
+	/* remove job from ring_mirror_list */
+	spin_lock(&sched->job_list_lock);
+	list_del_init(&s_job->node);
 	if (sched->timeout != MAX_SCHEDULE_TIMEOUT) {
-		if (cancel_delayed_work(&s_job->work_tdr))
-			amd_sched_job_put(s_job);
+		struct amd_sched_job *next;
+
+		spin_unlock(&sched->job_list_lock);
+		cancel_delayed_work_sync(&s_job->work_tdr);
+		spin_lock(&sched->job_list_lock);
 
 		/* queue TDR for next job */
 		next = list_first_entry_or_null(&sched->ring_mirror_list,
 						struct amd_sched_job, node);
 
-		if (next) {
-			INIT_DELAYED_WORK(&next->work_tdr, s_job->timeout_callback);
-			amd_sched_job_get(next);
+		if (next)
 			schedule_delayed_work(&next->work_tdr, sched->timeout);
-		}
 	}
+	spin_unlock(&sched->job_list_lock);
+	sched->ops->free_job(s_job);
 }
 
-void amd_sched_job_begin(struct amd_sched_job *s_job)
+static void amd_sched_job_finish_cb(struct fence *f, struct fence_cb *cb)
+{
+	struct amd_sched_job *job = container_of(cb, struct amd_sched_job,
+						 finish_cb);
+	schedule_work(&job->finish_work);
+}
+
+static void amd_sched_job_begin(struct amd_sched_job *s_job)
 {
 	struct amd_gpu_scheduler *sched = s_job->sched;
 
+	spin_lock(&sched->job_list_lock);
+	list_add_tail(&s_job->node, &sched->ring_mirror_list);
 	if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
-		list_first_entry_or_null(&sched->ring_mirror_list, struct amd_sched_job, node) == s_job)
-	{
-		INIT_DELAYED_WORK(&s_job->work_tdr, s_job->timeout_callback);
-		amd_sched_job_get(s_job);
+	    list_first_entry_or_null(&sched->ring_mirror_list,
+				     struct amd_sched_job, node) == s_job)
+		schedule_delayed_work(&s_job->work_tdr, sched->timeout);
+	spin_unlock(&sched->job_list_lock);
+}
+
+static void amd_sched_job_timedout(struct work_struct *work)
+{
+	struct amd_sched_job *job = container_of(work, struct amd_sched_job,
+						 work_tdr.work);
+
+	job->sched->ops->timedout_job(job);
+}
+
+void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched)
+{
+	struct amd_sched_job *s_job;
+
+	spin_lock(&sched->job_list_lock);
+	list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) {
+		if (fence_remove_callback(s_job->s_fence->parent, &s_job->s_fence->cb)) {
+			fence_put(s_job->s_fence->parent);
+			s_job->s_fence->parent = NULL;
+		}
+	}
+	atomic_set(&sched->hw_rq_count, 0);
+	spin_unlock(&sched->job_list_lock);
+}
+
+void amd_sched_job_recovery(struct amd_gpu_scheduler *sched)
+{
+	struct amd_sched_job *s_job, *tmp;
+	int r;
+
+	spin_lock(&sched->job_list_lock);
+	s_job = list_first_entry_or_null(&sched->ring_mirror_list,
+					 struct amd_sched_job, node);
+	if (s_job)
 		schedule_delayed_work(&s_job->work_tdr, sched->timeout);
+
+	list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
+		struct amd_sched_fence *s_fence = s_job->s_fence;
+		struct fence *fence;
+
+		spin_unlock(&sched->job_list_lock);
+		fence = sched->ops->run_job(s_job);
+		atomic_inc(&sched->hw_rq_count);
+		if (fence) {
+			s_fence->parent = fence_get(fence);
+			r = fence_add_callback(fence, &s_fence->cb,
+					       amd_sched_process_job);
+			if (r == -ENOENT)
+				amd_sched_process_job(fence, &s_fence->cb);
+			else if (r)
+				DRM_ERROR("fence add callback failed (%d)\n",
+					  r);
+			fence_put(fence);
+		} else {
+			DRM_ERROR("Failed to run job!\n");
+			amd_sched_process_job(NULL, &s_fence->cb);
+		}
+		spin_lock(&sched->job_list_lock);
 	}
+	spin_unlock(&sched->job_list_lock);
 }
 
 /**
@@ -372,36 +445,29 @@ void amd_sched_entity_push_job(struct amd_sched_job *sched_job)
 {
 	struct amd_sched_entity *entity = sched_job->s_entity;
 
-	sched_job->use_sched = 1;
-	fence_add_callback(&sched_job->s_fence->base,
-					&sched_job->cb_free_job, amd_sched_free_job);
 	trace_amd_sched_job(sched_job);
+	fence_add_callback(&sched_job->s_fence->finished, &sched_job->finish_cb,
+			   amd_sched_job_finish_cb);
 	wait_event(entity->sched->job_scheduled,
 		   amd_sched_entity_in(sched_job));
 }
 
 /* init a sched_job with basic field */
 int amd_sched_job_init(struct amd_sched_job *job,
-						struct amd_gpu_scheduler *sched,
-						struct amd_sched_entity *entity,
-						void (*timeout_cb)(struct work_struct *work),
-						void (*free_cb)(struct kref *refcount),
-						void *owner, struct fence **fence)
+		       struct amd_gpu_scheduler *sched,
+		       struct amd_sched_entity *entity,
+		       void *owner)
 {
-	INIT_LIST_HEAD(&job->node);
-	kref_init(&job->refcount);
 	job->sched = sched;
 	job->s_entity = entity;
 	job->s_fence = amd_sched_fence_create(entity, owner);
 	if (!job->s_fence)
 		return -ENOMEM;
 
-	job->s_fence->s_job = job;
-	job->timeout_callback = timeout_cb;
-	job->free_callback = free_cb;
+	INIT_WORK(&job->finish_work, amd_sched_job_finish);
+	INIT_LIST_HEAD(&job->node);
+	INIT_DELAYED_WORK(&job->work_tdr, amd_sched_job_timedout);
 
-	if (fence)
-		*fence = &job->s_fence->base;
 	return 0;
 }
 
@@ -450,23 +516,25 @@ static void amd_sched_process_job(struct fence *f, struct fence_cb *cb)
 	struct amd_sched_fence *s_fence =
 		container_of(cb, struct amd_sched_fence, cb);
 	struct amd_gpu_scheduler *sched = s_fence->sched;
-	unsigned long flags;
 
 	atomic_dec(&sched->hw_rq_count);
-
-	/* remove job from ring_mirror_list */
-	spin_lock_irqsave(&sched->job_list_lock, flags);
-	list_del_init(&s_fence->s_job->node);
-	sched->ops->finish_job(s_fence->s_job);
-	spin_unlock_irqrestore(&sched->job_list_lock, flags);
-
-	amd_sched_fence_signal(s_fence);
+	amd_sched_fence_finished(s_fence);
 
 	trace_amd_sched_process_job(s_fence);
-	fence_put(&s_fence->base);
+	fence_put(&s_fence->finished);
 	wake_up_interruptible(&sched->wake_up_worker);
 }
 
+static bool amd_sched_blocked(struct amd_gpu_scheduler *sched)
+{
+	if (kthread_should_park()) {
+		kthread_parkme();
+		return true;
+	}
+
+	return false;
+}
+
 static int amd_sched_main(void *param)
 {
 	struct sched_param sparam = {.sched_priority = 1};
@@ -476,14 +544,15 @@ static int amd_sched_main(void *param)
 	sched_setscheduler(current, SCHED_FIFO, &sparam);
 
 	while (!kthread_should_stop()) {
-		struct amd_sched_entity *entity;
+		struct amd_sched_entity *entity = NULL;
 		struct amd_sched_fence *s_fence;
 		struct amd_sched_job *sched_job;
 		struct fence *fence;
 
 		wait_event_interruptible(sched->wake_up_worker,
-			(entity = amd_sched_select_entity(sched)) ||
-			kthread_should_stop());
+					 (!amd_sched_blocked(sched) &&
+					  (entity = amd_sched_select_entity(sched))) ||
+					 kthread_should_stop());
 
 		if (!entity)
 			continue;
@@ -495,16 +564,19 @@ static int amd_sched_main(void *param)
 		s_fence = sched_job->s_fence;
 
 		atomic_inc(&sched->hw_rq_count);
-		amd_sched_job_pre_schedule(sched, sched_job);
+		amd_sched_job_begin(sched_job);
+
 		fence = sched->ops->run_job(sched_job);
 		amd_sched_fence_scheduled(s_fence);
 		if (fence) {
+			s_fence->parent = fence_get(fence);
 			r = fence_add_callback(fence, &s_fence->cb,
 					       amd_sched_process_job);
 			if (r == -ENOENT)
 				amd_sched_process_job(fence, &s_fence->cb);
 			else if (r)
-				DRM_ERROR("fence add callback failed (%d)\n", r);
+				DRM_ERROR("fence add callback failed (%d)\n",
+					  r);
 			fence_put(fence);
 		} else {
 			DRM_ERROR("Failed to run job!\n");
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
index 070095a9433c..7cbbbfb502ef 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
@@ -27,8 +27,6 @@
 #include <linux/kfifo.h>
 #include <linux/fence.h>
 
-#define AMD_SCHED_FENCE_SCHEDULED_BIT	FENCE_FLAG_USER_BITS
-
 struct amd_gpu_scheduler;
 struct amd_sched_rq;
 
@@ -68,36 +66,34 @@ struct amd_sched_rq {
 };
 
 struct amd_sched_fence {
-	struct fence                    base;
+	struct fence                    scheduled;
+	struct fence                    finished;
 	struct fence_cb                 cb;
-	struct list_head		scheduled_cb;
+	struct fence                    *parent;
 	struct amd_gpu_scheduler	*sched;
 	spinlock_t			lock;
 	void                            *owner;
-	struct amd_sched_job	*s_job;
 };
 
 struct amd_sched_job {
-	struct kref refcount;
 	struct amd_gpu_scheduler        *sched;
 	struct amd_sched_entity         *s_entity;
 	struct amd_sched_fence          *s_fence;
-	bool	use_sched;	/* true if the job goes to scheduler */
-	struct fence_cb                cb_free_job;
-	struct work_struct             work_free_job;
-	struct list_head			   node;
-	struct delayed_work work_tdr;
-	void (*timeout_callback) (struct work_struct *work);
-	void (*free_callback)(struct kref *refcount);
+	struct fence_cb			finish_cb;
+	struct work_struct		finish_work;
+	struct list_head		node;
+	struct delayed_work		work_tdr;
 };
 
-extern const struct fence_ops amd_sched_fence_ops;
+extern const struct fence_ops amd_sched_fence_ops_scheduled;
+extern const struct fence_ops amd_sched_fence_ops_finished;
 static inline struct amd_sched_fence *to_amd_sched_fence(struct fence *f)
 {
-	struct amd_sched_fence *__f = container_of(f, struct amd_sched_fence, base);
+	if (f->ops == &amd_sched_fence_ops_scheduled)
+		return container_of(f, struct amd_sched_fence, scheduled);
 
-	if (__f->base.ops == &amd_sched_fence_ops)
-		return __f;
+	if (f->ops == &amd_sched_fence_ops_finished)
+		return container_of(f, struct amd_sched_fence, finished);
 
 	return NULL;
 }
@@ -109,8 +105,8 @@ static inline struct amd_sched_fence *to_amd_sched_fence(struct fence *f)
 struct amd_sched_backend_ops {
 	struct fence *(*dependency)(struct amd_sched_job *sched_job);
 	struct fence *(*run_job)(struct amd_sched_job *sched_job);
-	void (*begin_job)(struct amd_sched_job *sched_job);
-	void (*finish_job)(struct amd_sched_job *sched_job);
+	void (*timedout_job)(struct amd_sched_job *sched_job);
+	void (*free_job)(struct amd_sched_job *sched_job);
 };
 
 enum amd_sched_priority {
@@ -152,25 +148,11 @@ void amd_sched_entity_push_job(struct amd_sched_job *sched_job);
 struct amd_sched_fence *amd_sched_fence_create(
 	struct amd_sched_entity *s_entity, void *owner);
 void amd_sched_fence_scheduled(struct amd_sched_fence *fence);
-void amd_sched_fence_signal(struct amd_sched_fence *fence);
+void amd_sched_fence_finished(struct amd_sched_fence *fence);
 int amd_sched_job_init(struct amd_sched_job *job,
-					struct amd_gpu_scheduler *sched,
-					struct amd_sched_entity *entity,
-					void (*timeout_cb)(struct work_struct *work),
-					void (*free_cb)(struct kref* refcount),
-					void *owner, struct fence **fence);
-void amd_sched_job_pre_schedule(struct amd_gpu_scheduler *sched ,
-								struct amd_sched_job *s_job);
-void amd_sched_job_finish(struct amd_sched_job *s_job);
-void amd_sched_job_begin(struct amd_sched_job *s_job);
-static inline void amd_sched_job_get(struct amd_sched_job *job) {
-	if (job)
-		kref_get(&job->refcount);
-}
-
-static inline void amd_sched_job_put(struct amd_sched_job *job) {
-	if (job)
-		kref_put(&job->refcount, job->free_callback);
-}
-
+		       struct amd_gpu_scheduler *sched,
+		       struct amd_sched_entity *entity,
+		       void *owner);
+void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched);
+void amd_sched_job_recovery(struct amd_gpu_scheduler *sched);
 #endif
diff --git a/drivers/gpu/drm/amd/scheduler/sched_fence.c b/drivers/gpu/drm/amd/scheduler/sched_fence.c
index 2a732c490375..6b63beaf7574 100644
--- a/drivers/gpu/drm/amd/scheduler/sched_fence.c
+++ b/drivers/gpu/drm/amd/scheduler/sched_fence.c
@@ -27,7 +27,8 @@
 #include <drm/drmP.h>
 #include "gpu_scheduler.h"
 
-struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *s_entity, void *owner)
+struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *entity,
+					       void *owner)
 {
 	struct amd_sched_fence *fence = NULL;
 	unsigned seq;
@@ -36,46 +37,37 @@ struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *s_entity
 	if (fence == NULL)
 		return NULL;
 
-	INIT_LIST_HEAD(&fence->scheduled_cb);
 	fence->owner = owner;
-	fence->sched = s_entity->sched;
+	fence->sched = entity->sched;
 	spin_lock_init(&fence->lock);
 
-	seq = atomic_inc_return(&s_entity->fence_seq);
-	fence_init(&fence->base, &amd_sched_fence_ops, &fence->lock,
-		   s_entity->fence_context, seq);
+	seq = atomic_inc_return(&entity->fence_seq);
+	fence_init(&fence->scheduled, &amd_sched_fence_ops_scheduled,
+		   &fence->lock, entity->fence_context, seq);
+	fence_init(&fence->finished, &amd_sched_fence_ops_finished,
+		   &fence->lock, entity->fence_context + 1, seq);
 
 	return fence;
 }
 
-void amd_sched_fence_signal(struct amd_sched_fence *fence)
+void amd_sched_fence_scheduled(struct amd_sched_fence *fence)
 {
-	int ret = fence_signal(&fence->base);
+	int ret = fence_signal(&fence->scheduled);
+
 	if (!ret)
-		FENCE_TRACE(&fence->base, "signaled from irq context\n");
+		FENCE_TRACE(&fence->scheduled, "signaled from irq context\n");
 	else
-		FENCE_TRACE(&fence->base, "was already signaled\n");
-}
-
-void amd_sched_job_pre_schedule(struct amd_gpu_scheduler *sched ,
-				struct amd_sched_job *s_job)
-{
-	unsigned long flags;
-	spin_lock_irqsave(&sched->job_list_lock, flags);
-	list_add_tail(&s_job->node, &sched->ring_mirror_list);
-	sched->ops->begin_job(s_job);
-	spin_unlock_irqrestore(&sched->job_list_lock, flags);
+		FENCE_TRACE(&fence->scheduled, "was already signaled\n");
 }
 
-void amd_sched_fence_scheduled(struct amd_sched_fence *s_fence)
+void amd_sched_fence_finished(struct amd_sched_fence *fence)
 {
-	struct fence_cb *cur, *tmp;
+	int ret = fence_signal(&fence->finished);
 
-	set_bit(AMD_SCHED_FENCE_SCHEDULED_BIT, &s_fence->base.flags);
-	list_for_each_entry_safe(cur, tmp, &s_fence->scheduled_cb, node) {
-		list_del_init(&cur->node);
-		cur->func(&s_fence->base, cur);
-	}
+	if (!ret)
+		FENCE_TRACE(&fence->finished, "signaled from irq context\n");
+	else
+		FENCE_TRACE(&fence->finished, "was already signaled\n");
 }
 
 static const char *amd_sched_fence_get_driver_name(struct fence *fence)
@@ -105,6 +97,8 @@ static void amd_sched_fence_free(struct rcu_head *rcu)
 {
 	struct fence *f = container_of(rcu, struct fence, rcu);
 	struct amd_sched_fence *fence = to_amd_sched_fence(f);
+
+	fence_put(fence->parent);
 	kmem_cache_free(sched_fence_slab, fence);
 }
 
@@ -116,16 +110,41 @@ static void amd_sched_fence_free(struct rcu_head *rcu)
  * This function is called when the reference count becomes zero.
  * It just RCU schedules freeing up the fence.
  */
-static void amd_sched_fence_release(struct fence *f)
+static void amd_sched_fence_release_scheduled(struct fence *f)
 {
-	call_rcu(&f->rcu, amd_sched_fence_free);
+	struct amd_sched_fence *fence = to_amd_sched_fence(f);
+
+	call_rcu(&fence->finished.rcu, amd_sched_fence_free);
 }
 
-const struct fence_ops amd_sched_fence_ops = {
+/**
+ * amd_sched_fence_release_scheduled - drop extra reference
+ *
+ * @f: fence
+ *
+ * Drop the extra reference from the scheduled fence to the base fence.
+ */
+static void amd_sched_fence_release_finished(struct fence *f)
+{
+	struct amd_sched_fence *fence = to_amd_sched_fence(f);
+
+	fence_put(&fence->scheduled);
+}
+
+const struct fence_ops amd_sched_fence_ops_scheduled = {
+	.get_driver_name = amd_sched_fence_get_driver_name,
+	.get_timeline_name = amd_sched_fence_get_timeline_name,
+	.enable_signaling = amd_sched_fence_enable_signaling,
+	.signaled = NULL,
+	.wait = fence_default_wait,
+	.release = amd_sched_fence_release_scheduled,
+};
+
+const struct fence_ops amd_sched_fence_ops_finished = {
 	.get_driver_name = amd_sched_fence_get_driver_name,
 	.get_timeline_name = amd_sched_fence_get_timeline_name,
 	.enable_signaling = amd_sched_fence_enable_signaling,
 	.signaled = NULL,
 	.wait = fence_default_wait,
-	.release = amd_sched_fence_release,
+	.release = amd_sched_fence_release_finished,
 };
diff --git a/drivers/gpu/drm/arc/Kconfig b/drivers/gpu/drm/arc/Kconfig
index f9a13b658fea..f47d88ba4fa5 100644
--- a/drivers/gpu/drm/arc/Kconfig
+++ b/drivers/gpu/drm/arc/Kconfig
@@ -2,7 +2,6 @@ config DRM_ARCPGU
 	tristate "ARC PGU"
 	depends on DRM && OF
 	select DRM_KMS_CMA_HELPER
-	select DRM_KMS_FB_HELPER
 	select DRM_KMS_HELPER
 	help
 	  Choose this option if you have an ARC PGU controller.
diff --git a/drivers/gpu/drm/arc/arcpgu_drv.c b/drivers/gpu/drm/arc/arcpgu_drv.c
index ccbdadb108dc..6d4ff34737cb 100644
--- a/drivers/gpu/drm/arc/arcpgu_drv.c
+++ b/drivers/gpu/drm/arc/arcpgu_drv.c
@@ -28,8 +28,7 @@ static void arcpgu_fb_output_poll_changed(struct drm_device *dev)
 {
 	struct arcpgu_drm_private *arcpgu = dev->dev_private;
 
-	if (arcpgu->fbdev)
-		drm_fbdev_cma_hotplug_event(arcpgu->fbdev);
+	drm_fbdev_cma_hotplug_event(arcpgu->fbdev);
 }
 
 static struct drm_mode_config_funcs arcpgu_drm_modecfg_funcs = {
@@ -49,7 +48,7 @@ static void arcpgu_setup_mode_config(struct drm_device *drm)
 	drm->mode_config.funcs = &arcpgu_drm_modecfg_funcs;
 }
 
-int arcpgu_gem_mmap(struct file *filp, struct vm_area_struct *vma)
+static int arcpgu_gem_mmap(struct file *filp, struct vm_area_struct *vma)
 {
 	int ret;
 
@@ -104,10 +103,8 @@ static int arcpgu_load(struct drm_device *drm)
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	arcpgu->regs = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(arcpgu->regs)) {
-		dev_err(drm->dev, "Could not remap IO mem\n");
+	if (IS_ERR(arcpgu->regs))
 		return PTR_ERR(arcpgu->regs);
-	}
 
 	dev_info(drm->dev, "arc_pgu ID: 0x%x\n",
 		 arc_pgu_read(arcpgu, ARCPGU_REG_ID));
@@ -127,10 +124,11 @@ static int arcpgu_load(struct drm_device *drm)
 	encoder_node = of_parse_phandle(drm->dev->of_node, "encoder-slave", 0);
 	if (encoder_node) {
 		ret = arcpgu_drm_hdmi_init(drm, encoder_node);
+		of_node_put(encoder_node);
 		if (ret < 0)
 			return ret;
 	} else {
-		ret = arcpgu_drm_sim_init(drm, 0);
+		ret = arcpgu_drm_sim_init(drm, NULL);
 		if (ret < 0)
 			return ret;
 	}
@@ -151,7 +149,7 @@ static int arcpgu_load(struct drm_device *drm)
 	return 0;
 }
 
-int arcpgu_unload(struct drm_device *drm)
+static int arcpgu_unload(struct drm_device *drm)
 {
 	struct arcpgu_drm_private *arcpgu = drm->dev_private;
 
diff --git a/drivers/gpu/drm/arm/Kconfig b/drivers/gpu/drm/arm/Kconfig
index 1b2906568a48..9a18e1bd57b4 100644
--- a/drivers/gpu/drm/arm/Kconfig
+++ b/drivers/gpu/drm/arm/Kconfig
@@ -9,7 +9,6 @@ config DRM_HDLCD
 	depends on COMMON_CLK
 	select DRM_ARM
 	select DRM_KMS_HELPER
-	select DRM_KMS_FB_HELPER
 	select DRM_KMS_CMA_HELPER
 	help
 	  Choose this option if you have an ARM High Definition Colour LCD
diff --git a/drivers/gpu/drm/arm/hdlcd_drv.c b/drivers/gpu/drm/arm/hdlcd_drv.c
index 74279be20b75..d83b46a30327 100644
--- a/drivers/gpu/drm/arm/hdlcd_drv.c
+++ b/drivers/gpu/drm/arm/hdlcd_drv.c
@@ -102,8 +102,7 @@ static void hdlcd_fb_output_poll_changed(struct drm_device *drm)
 {
 	struct hdlcd_drm_private *hdlcd = drm->dev_private;
 
-	if (hdlcd->fbdev)
-		drm_fbdev_cma_hotplug_event(hdlcd->fbdev);
+	drm_fbdev_cma_hotplug_event(hdlcd->fbdev);
 }
 
 static const struct drm_mode_config_funcs hdlcd_mode_config_funcs = {
diff --git a/drivers/gpu/drm/arm/malidp_drv.c b/drivers/gpu/drm/arm/malidp_drv.c
index e5b44e92f8cf..82171d223f2d 100644
--- a/drivers/gpu/drm/arm/malidp_drv.c
+++ b/drivers/gpu/drm/arm/malidp_drv.c
@@ -257,6 +257,7 @@ static int malidp_bind(struct device *dev)
 {
 	struct resource *res;
 	struct drm_device *drm;
+	struct device_node *ep;
 	struct malidp_drm *malidp;
 	struct malidp_hw_device *hwdev;
 	struct platform_device *pdev = to_platform_device(dev);
@@ -284,10 +285,8 @@ static int malidp_bind(struct device *dev)
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	hwdev->regs = devm_ioremap_resource(dev, res);
-	if (IS_ERR(hwdev->regs)) {
-		DRM_ERROR("Failed to map control registers area\n");
+	if (IS_ERR(hwdev->regs))
 		return PTR_ERR(hwdev->regs);
-	}
 
 	hwdev->pclk = devm_clk_get(dev, "pclk");
 	if (IS_ERR(hwdev->pclk))
@@ -360,11 +359,14 @@ static int malidp_bind(struct device *dev)
 		goto register_fail;
 
 	/* Set the CRTC's port so that the encoder component can find it */
-	malidp->crtc.port = of_graph_get_next_endpoint(dev->of_node, NULL);
+	ep = of_graph_get_next_endpoint(dev->of_node, NULL);
+	if (!ep) {
+		ret = -EINVAL;
+		goto port_fail;
+	}
+	malidp->crtc.port = of_get_next_parent(ep);
 
 	ret = component_bind_all(dev, drm);
-	of_node_put(malidp->crtc.port);
-
 	if (ret) {
 		DRM_ERROR("Failed to bind all components\n");
 		goto bind_fail;
@@ -402,6 +404,9 @@ vblank_fail:
 irq_init_fail:
 	component_unbind_all(dev, drm);
 bind_fail:
+	of_node_put(malidp->crtc.port);
+	malidp->crtc.port = NULL;
+port_fail:
 	drm_dev_unregister(drm);
 register_fail:
 	malidp_de_planes_destroy(drm);
@@ -435,6 +440,8 @@ static void malidp_unbind(struct device *dev)
 	malidp_de_irq_fini(drm);
 	drm_vblank_cleanup(drm);
 	component_unbind_all(dev, drm);
+	of_node_put(malidp->crtc.port);
+	malidp->crtc.port = NULL;
 	drm_dev_unregister(drm);
 	malidp_de_planes_destroy(drm);
 	drm_mode_config_cleanup(drm);
diff --git a/drivers/gpu/drm/armada/Kconfig b/drivers/gpu/drm/armada/Kconfig
index eb773e9af313..15f3ecfb16f1 100644
--- a/drivers/gpu/drm/armada/Kconfig
+++ b/drivers/gpu/drm/armada/Kconfig
@@ -1,11 +1,7 @@
 config DRM_ARMADA
 	tristate "DRM support for Marvell Armada SoCs"
 	depends on DRM && HAVE_CLK && ARM
-	select FB_CFB_FILLRECT
-	select FB_CFB_COPYAREA
-	select FB_CFB_IMAGEBLIT
 	select DRM_KMS_HELPER
-	select DRM_KMS_FB_HELPER
 	help
 	  Support the "LCD" controllers found on the Marvell Armada 510
 	  devices.  There are two controllers on the device, each controller
diff --git a/drivers/gpu/drm/armada/armada_crtc.c b/drivers/gpu/drm/armada/armada_crtc.c
index 34405e4a5d36..2f58e9e2a59c 100644
--- a/drivers/gpu/drm/armada/armada_crtc.c
+++ b/drivers/gpu/drm/armada/armada_crtc.c
@@ -410,7 +410,7 @@ static void armada_drm_crtc_irq(struct armada_crtc *dcrtc, u32 stat)
 		DRM_ERROR("graphics underflow on crtc %u\n", dcrtc->num);
 
 	if (stat & VSYNC_IRQ)
-		drm_handle_vblank(dcrtc->crtc.dev, dcrtc->num);
+		drm_crtc_handle_vblank(&dcrtc->crtc);
 
 	spin_lock(&dcrtc->irq_lock);
 	ovl_plane = dcrtc->plane;
diff --git a/drivers/gpu/drm/ast/Kconfig b/drivers/gpu/drm/ast/Kconfig
index 8a784c460c89..15f6ce7acb2a 100644
--- a/drivers/gpu/drm/ast/Kconfig
+++ b/drivers/gpu/drm/ast/Kconfig
@@ -2,11 +2,7 @@ config DRM_AST
 	tristate "AST server chips"
 	depends on DRM && PCI
 	select DRM_TTM
-	select FB_SYS_COPYAREA
-	select FB_SYS_FILLRECT
-	select FB_SYS_IMAGEBLIT
 	select DRM_KMS_HELPER
-	select DRM_KMS_FB_HELPER
 	select DRM_TTM
 	help
 	 Say yes for experimental AST GPU driver. Do not enable
diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c
index 7bc3aa6dda8c..904beaa932d0 100644
--- a/drivers/gpu/drm/ast/ast_main.c
+++ b/drivers/gpu/drm/ast/ast_main.c
@@ -295,9 +295,8 @@ static int ast_get_dram_info(struct drm_device *dev)
 static void ast_user_framebuffer_destroy(struct drm_framebuffer *fb)
 {
 	struct ast_framebuffer *ast_fb = to_ast_framebuffer(fb);
-	if (ast_fb->obj)
-		drm_gem_object_unreference_unlocked(ast_fb->obj);
 
+	drm_gem_object_unreference_unlocked(ast_fb->obj);
 	drm_framebuffer_cleanup(fb);
 	kfree(fb);
 }
diff --git a/drivers/gpu/drm/ast/ast_ttm.c b/drivers/gpu/drm/ast/ast_ttm.c
index 59f2f93b6f84..b29a41218fc9 100644
--- a/drivers/gpu/drm/ast/ast_ttm.c
+++ b/drivers/gpu/drm/ast/ast_ttm.c
@@ -186,17 +186,6 @@ static void ast_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *
 {
 }
 
-static int ast_bo_move(struct ttm_buffer_object *bo,
-		       bool evict, bool interruptible,
-		       bool no_wait_gpu,
-		       struct ttm_mem_reg *new_mem)
-{
-	int r;
-	r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
-	return r;
-}
-
-
 static void ast_ttm_backend_destroy(struct ttm_tt *tt)
 {
 	ttm_tt_fini(tt);
@@ -241,7 +230,7 @@ struct ttm_bo_driver ast_bo_driver = {
 	.ttm_tt_unpopulate = ast_ttm_tt_unpopulate,
 	.init_mem_type = ast_bo_init_mem_type,
 	.evict_flags = ast_bo_evict_flags,
-	.move = ast_bo_move,
+	.move = NULL,
 	.verify_access = ast_bo_verify_access,
 	.io_mem_reserve = &ast_ttm_io_mem_reserve,
 	.io_mem_free = &ast_ttm_io_mem_free,
diff --git a/drivers/gpu/drm/atmel-hlcdc/Kconfig b/drivers/gpu/drm/atmel-hlcdc/Kconfig
index 99b4f0698a30..32bcc4bad06a 100644
--- a/drivers/gpu/drm/atmel-hlcdc/Kconfig
+++ b/drivers/gpu/drm/atmel-hlcdc/Kconfig
@@ -3,7 +3,6 @@ config DRM_ATMEL_HLCDC
 	depends on DRM && OF && COMMON_CLK && MFD_ATMEL_HLCDC && ARM
 	select DRM_GEM_CMA_HELPER
 	select DRM_KMS_HELPER
-	select DRM_KMS_FB_HELPER
 	select DRM_KMS_CMA_HELPER
 	select DRM_PANEL
 	help
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
index 613f6c99b76a..a978381ef95b 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
@@ -383,7 +383,7 @@ static void atmel_hlcdc_crtc_finish_page_flip(struct atmel_hlcdc_crtc *crtc)
 
 void atmel_hlcdc_crtc_irq(struct drm_crtc *c)
 {
-	drm_handle_vblank(c->dev, 0);
+	drm_crtc_handle_vblank(c);
 	atmel_hlcdc_crtc_finish_page_flip(drm_crtc_to_atmel_hlcdc_crtc(c));
 }
 
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c
index 473a475f27b1..6119b5085501 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c
@@ -254,9 +254,10 @@ int atmel_hlcdc_create_outputs(struct drm_device *dev)
 		if (!ret)
 			ret = atmel_hlcdc_check_endpoint(dev, &ep);
 
-		of_node_put(ep_np);
-		if (ret)
+		if (ret) {
+			of_node_put(ep_np);
 			return ret;
+		}
 	}
 
 	for_each_endpoint_of_node(dev->dev->of_node, ep_np) {
@@ -264,9 +265,10 @@ int atmel_hlcdc_create_outputs(struct drm_device *dev)
 		if (!ret)
 			ret = atmel_hlcdc_attach_endpoint(dev, &ep);
 
-		of_node_put(ep_np);
-		if (ret)
+		if (ret) {
+			of_node_put(ep_np);
 			return ret;
+		}
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
index aef3ca8a81fa..016c191221f3 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
@@ -339,6 +339,8 @@ atmel_hlcdc_plane_update_pos_and_size(struct atmel_hlcdc_plane *plane,
 
 		atmel_hlcdc_layer_update_cfg(&plane->layer, 13, 0xffffffff,
 					     factor_reg);
+	} else {
+		atmel_hlcdc_layer_update_cfg(&plane->layer, 13, 0xffffffff, 0);
 	}
 }
 
diff --git a/drivers/gpu/drm/bochs/Kconfig b/drivers/gpu/drm/bochs/Kconfig
index 5f8b0c2b9a44..f739763f47ce 100644
--- a/drivers/gpu/drm/bochs/Kconfig
+++ b/drivers/gpu/drm/bochs/Kconfig
@@ -2,10 +2,6 @@ config DRM_BOCHS
 	tristate "DRM Support for bochs dispi vga interface (qemu stdvga)"
 	depends on DRM && PCI
 	select DRM_KMS_HELPER
-	select DRM_KMS_FB_HELPER
-	select FB_SYS_FILLRECT
-	select FB_SYS_COPYAREA
-	select FB_SYS_IMAGEBLIT
 	select DRM_TTM
 	help
 	  Choose this option for qemu.
diff --git a/drivers/gpu/drm/bochs/bochs_mm.c b/drivers/gpu/drm/bochs/bochs_mm.c
index 6cf912c45e48..5c5638a777a1 100644
--- a/drivers/gpu/drm/bochs/bochs_mm.c
+++ b/drivers/gpu/drm/bochs/bochs_mm.c
@@ -165,15 +165,6 @@ static void bochs_ttm_io_mem_free(struct ttm_bo_device *bdev,
 {
 }
 
-static int bochs_bo_move(struct ttm_buffer_object *bo,
-			 bool evict, bool interruptible,
-			 bool no_wait_gpu,
-			 struct ttm_mem_reg *new_mem)
-{
-	return ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
-}
-
-
 static void bochs_ttm_backend_destroy(struct ttm_tt *tt)
 {
 	ttm_tt_fini(tt);
@@ -208,7 +199,7 @@ struct ttm_bo_driver bochs_bo_driver = {
 	.ttm_tt_unpopulate = ttm_pool_unpopulate,
 	.init_mem_type = bochs_bo_init_mem_type,
 	.evict_flags = bochs_bo_evict_flags,
-	.move = bochs_bo_move,
+	.move = NULL,
 	.verify_access = bochs_bo_verify_access,
 	.io_mem_reserve = &bochs_ttm_io_mem_reserve,
 	.io_mem_free = &bochs_ttm_io_mem_free,
@@ -474,8 +465,8 @@ int bochs_dumb_mmap_offset(struct drm_file *file, struct drm_device *dev,
 static void bochs_user_framebuffer_destroy(struct drm_framebuffer *fb)
 {
 	struct bochs_framebuffer *bochs_fb = to_bochs_framebuffer(fb);
-	if (bochs_fb->obj)
-		drm_gem_object_unreference_unlocked(bochs_fb->obj);
+
+	drm_gem_object_unreference_unlocked(bochs_fb->obj);
 	drm_framebuffer_cleanup(fb);
 	kfree(fb);
 }
diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig
index a141921445f4..b590e678052d 100644
--- a/drivers/gpu/drm/bridge/Kconfig
+++ b/drivers/gpu/drm/bridge/Kconfig
@@ -58,6 +58,17 @@ config DRM_SII902X
 	---help---
 	  Silicon Image sii902x bridge chip driver.
 
+config DRM_TOSHIBA_TC358767
+	tristate "Toshiba TC358767 eDP bridge"
+	depends on OF
+	select DRM_KMS_HELPER
+	select REGMAP_I2C
+	select DRM_PANEL
+	---help---
+	  Toshiba TC358767 eDP bridge chip driver.
+
 source "drivers/gpu/drm/bridge/analogix/Kconfig"
 
+source "drivers/gpu/drm/bridge/adv7511/Kconfig"
+
 endmenu
diff --git a/drivers/gpu/drm/bridge/Makefile b/drivers/gpu/drm/bridge/Makefile
index bfec9f8cb9d2..efdb07e878f5 100644
--- a/drivers/gpu/drm/bridge/Makefile
+++ b/drivers/gpu/drm/bridge/Makefile
@@ -6,4 +6,6 @@ obj-$(CONFIG_DRM_DW_HDMI_AHB_AUDIO) += dw-hdmi-ahb-audio.o
 obj-$(CONFIG_DRM_NXP_PTN3460) += nxp-ptn3460.o
 obj-$(CONFIG_DRM_PARADE_PS8622) += parade-ps8622.o
 obj-$(CONFIG_DRM_SII902X) += sii902x.o
+obj-$(CONFIG_DRM_TOSHIBA_TC358767) += tc358767.o
 obj-$(CONFIG_DRM_ANALOGIX_DP) += analogix/
+obj-$(CONFIG_DRM_I2C_ADV7511) += adv7511/
diff --git a/drivers/gpu/drm/bridge/adv7511/Kconfig b/drivers/gpu/drm/bridge/adv7511/Kconfig
new file mode 100644
index 000000000000..d2b0499ab7d7
--- /dev/null
+++ b/drivers/gpu/drm/bridge/adv7511/Kconfig
@@ -0,0 +1,15 @@
+config DRM_I2C_ADV7511
+	tristate "AV7511 encoder"
+	depends on OF
+	select DRM_KMS_HELPER
+	select REGMAP_I2C
+	help
+	  Support for the Analog Device ADV7511(W) and ADV7513 HDMI encoders.
+
+config DRM_I2C_ADV7533
+	bool "ADV7533 encoder"
+	depends on DRM_I2C_ADV7511
+	select DRM_MIPI_DSI
+	default y
+	help
+	  Support for the Analog Devices ADV7533 DSI to HDMI encoder.
diff --git a/drivers/gpu/drm/bridge/adv7511/Makefile b/drivers/gpu/drm/bridge/adv7511/Makefile
new file mode 100644
index 000000000000..9019327fff4c
--- /dev/null
+++ b/drivers/gpu/drm/bridge/adv7511/Makefile
@@ -0,0 +1,3 @@
+adv7511-y := adv7511_drv.o
+adv7511-$(CONFIG_DRM_I2C_ADV7533) += adv7533.o
+obj-$(CONFIG_DRM_I2C_ADV7511) += adv7511.o
diff --git a/drivers/gpu/drm/i2c/adv7511.h b/drivers/gpu/drm/bridge/adv7511/adv7511.h
index 38515b30cedf..161c923d6162 100644
--- a/drivers/gpu/drm/i2c/adv7511.h
+++ b/drivers/gpu/drm/bridge/adv7511/adv7511.h
@@ -10,6 +10,11 @@
 #define __DRM_I2C_ADV7511_H__
 
 #include <linux/hdmi.h>
+#include <linux/i2c.h>
+#include <linux/regmap.h>
+
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_mipi_dsi.h>
 
 #define ADV7511_REG_CHIP_REVISION		0x00
 #define ADV7511_REG_N0				0x01
@@ -286,4 +291,102 @@ struct adv7511_video_config {
 	struct hdmi_avi_infoframe avi_infoframe;
 };
 
+enum adv7511_type {
+	ADV7511,
+	ADV7533,
+};
+
+struct adv7511 {
+	struct i2c_client *i2c_main;
+	struct i2c_client *i2c_edid;
+	struct i2c_client *i2c_cec;
+
+	struct regmap *regmap;
+	struct regmap *regmap_cec;
+	enum drm_connector_status status;
+	bool powered;
+
+	struct drm_display_mode curr_mode;
+
+	unsigned int f_tmds;
+
+	unsigned int current_edid_segment;
+	uint8_t edid_buf[256];
+	bool edid_read;
+
+	wait_queue_head_t wq;
+	struct drm_bridge bridge;
+	struct drm_connector connector;
+
+	bool embedded_sync;
+	enum adv7511_sync_polarity vsync_polarity;
+	enum adv7511_sync_polarity hsync_polarity;
+	bool rgb;
+
+	struct edid *edid;
+
+	struct gpio_desc *gpio_pd;
+
+	/* ADV7533 DSI RX related params */
+	struct device_node *host_node;
+	struct mipi_dsi_device *dsi;
+	u8 num_dsi_lanes;
+	bool use_timing_gen;
+
+	enum adv7511_type type;
+};
+
+#ifdef CONFIG_DRM_I2C_ADV7533
+void adv7533_dsi_power_on(struct adv7511 *adv);
+void adv7533_dsi_power_off(struct adv7511 *adv);
+void adv7533_mode_set(struct adv7511 *adv, struct drm_display_mode *mode);
+int adv7533_patch_registers(struct adv7511 *adv);
+void adv7533_uninit_cec(struct adv7511 *adv);
+int adv7533_init_cec(struct adv7511 *adv);
+int adv7533_attach_dsi(struct adv7511 *adv);
+void adv7533_detach_dsi(struct adv7511 *adv);
+int adv7533_parse_dt(struct device_node *np, struct adv7511 *adv);
+#else
+static inline void adv7533_dsi_power_on(struct adv7511 *adv)
+{
+}
+
+static inline void adv7533_dsi_power_off(struct adv7511 *adv)
+{
+}
+
+static inline void adv7533_mode_set(struct adv7511 *adv,
+				    struct drm_display_mode *mode)
+{
+}
+
+static inline int adv7533_patch_registers(struct adv7511 *adv)
+{
+	return -ENODEV;
+}
+
+static inline void adv7533_uninit_cec(struct adv7511 *adv)
+{
+}
+
+static inline int adv7533_init_cec(struct adv7511 *adv)
+{
+	return -ENODEV;
+}
+
+static inline int adv7533_attach_dsi(struct adv7511 *adv)
+{
+	return -ENODEV;
+}
+
+static inline void adv7533_detach_dsi(struct adv7511 *adv)
+{
+}
+
+static inline int adv7533_parse_dt(struct device_node *np, struct adv7511 *adv)
+{
+	return -ENODEV;
+}
+#endif
+
 #endif /* __DRM_I2C_ADV7511_H__ */
diff --git a/drivers/gpu/drm/i2c/adv7511.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c
index a02112ba1c3d..ec8fb2ed3275 100644
--- a/drivers/gpu/drm/i2c/adv7511.c
+++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c
@@ -8,51 +8,17 @@
 
 #include <linux/device.h>
 #include <linux/gpio/consumer.h>
-#include <linux/i2c.h>
 #include <linux/module.h>
-#include <linux/regmap.h>
+#include <linux/of_device.h>
 #include <linux/slab.h>
 
 #include <drm/drmP.h>
-#include <drm/drm_crtc_helper.h>
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
 #include <drm/drm_edid.h>
-#include <drm/drm_encoder_slave.h>
 
 #include "adv7511.h"
 
-struct adv7511 {
-	struct i2c_client *i2c_main;
-	struct i2c_client *i2c_edid;
-
-	struct regmap *regmap;
-	struct regmap *packet_memory_regmap;
-	enum drm_connector_status status;
-	bool powered;
-
-	unsigned int f_tmds;
-
-	unsigned int current_edid_segment;
-	uint8_t edid_buf[256];
-	bool edid_read;
-
-	wait_queue_head_t wq;
-	struct drm_encoder *encoder;
-
-	bool embedded_sync;
-	enum adv7511_sync_polarity vsync_polarity;
-	enum adv7511_sync_polarity hsync_polarity;
-	bool rgb;
-
-	struct edid *edid;
-
-	struct gpio_desc *gpio_pd;
-};
-
-static struct adv7511 *encoder_to_adv7511(struct drm_encoder *encoder)
-{
-	return to_encoder_slave(encoder)->slave_priv;
-}
-
 /* ADI recommended values for proper operation. */
 static const struct reg_sequence adv7511_fixed_registers[] = {
 	{ 0x98, 0x03 },
@@ -394,6 +360,9 @@ static void adv7511_power_on(struct adv7511 *adv7511)
 	 */
 	regcache_sync(adv7511->regmap);
 
+	if (adv7511->type == ADV7533)
+		adv7533_dsi_power_on(adv7511);
+
 	adv7511->powered = true;
 }
 
@@ -405,6 +374,9 @@ static void adv7511_power_off(struct adv7511 *adv7511)
 			   ADV7511_POWER_POWER_DOWN);
 	regcache_mark_dirty(adv7511->regmap);
 
+	if (adv7511->type == ADV7533)
+		adv7533_dsi_power_off(adv7511);
+
 	adv7511->powered = false;
 }
 
@@ -430,7 +402,7 @@ static bool adv7511_hpd(struct adv7511 *adv7511)
 	return false;
 }
 
-static int adv7511_irq_process(struct adv7511 *adv7511)
+static int adv7511_irq_process(struct adv7511 *adv7511, bool process_hpd)
 {
 	unsigned int irq0, irq1;
 	int ret;
@@ -446,8 +418,8 @@ static int adv7511_irq_process(struct adv7511 *adv7511)
 	regmap_write(adv7511->regmap, ADV7511_REG_INT(0), irq0);
 	regmap_write(adv7511->regmap, ADV7511_REG_INT(1), irq1);
 
-	if (irq0 & ADV7511_INT0_HPD && adv7511->encoder)
-		drm_helper_hpd_irq_event(adv7511->encoder->dev);
+	if (process_hpd && irq0 & ADV7511_INT0_HPD && adv7511->bridge.encoder)
+		drm_helper_hpd_irq_event(adv7511->connector.dev);
 
 	if (irq0 & ADV7511_INT0_EDID_READY || irq1 & ADV7511_INT1_DDC_ERROR) {
 		adv7511->edid_read = true;
@@ -464,7 +436,7 @@ static irqreturn_t adv7511_irq_handler(int irq, void *devid)
 	struct adv7511 *adv7511 = devid;
 	int ret;
 
-	ret = adv7511_irq_process(adv7511);
+	ret = adv7511_irq_process(adv7511, true);
 	return ret < 0 ? IRQ_NONE : IRQ_HANDLED;
 }
 
@@ -481,7 +453,7 @@ static int adv7511_wait_for_edid(struct adv7511 *adv7511, int timeout)
 				adv7511->edid_read, msecs_to_jiffies(timeout));
 	} else {
 		for (; timeout > 0; timeout -= 25) {
-			ret = adv7511_irq_process(adv7511);
+			ret = adv7511_irq_process(adv7511, false);
 			if (ret < 0)
 				break;
 
@@ -563,13 +535,12 @@ static int adv7511_get_edid_block(void *data, u8 *buf, unsigned int block,
 }
 
 /* -----------------------------------------------------------------------------
- * Encoder operations
+ * ADV75xx helpers
  */
 
-static int adv7511_get_modes(struct drm_encoder *encoder,
+static int adv7511_get_modes(struct adv7511 *adv7511,
 			     struct drm_connector *connector)
 {
-	struct adv7511 *adv7511 = encoder_to_adv7511(encoder);
 	struct edid *edid;
 	unsigned int count;
 
@@ -606,21 +577,9 @@ static int adv7511_get_modes(struct drm_encoder *encoder,
 	return count;
 }
 
-static void adv7511_encoder_dpms(struct drm_encoder *encoder, int mode)
-{
-	struct adv7511 *adv7511 = encoder_to_adv7511(encoder);
-
-	if (mode == DRM_MODE_DPMS_ON)
-		adv7511_power_on(adv7511);
-	else
-		adv7511_power_off(adv7511);
-}
-
 static enum drm_connector_status
-adv7511_encoder_detect(struct drm_encoder *encoder,
-		       struct drm_connector *connector)
+adv7511_detect(struct adv7511 *adv7511, struct drm_connector *connector)
 {
-	struct adv7511 *adv7511 = encoder_to_adv7511(encoder);
 	enum drm_connector_status status;
 	unsigned int val;
 	bool hpd;
@@ -644,7 +603,7 @@ adv7511_encoder_detect(struct drm_encoder *encoder,
 	if (status == connector_status_connected && hpd && adv7511->powered) {
 		regcache_mark_dirty(adv7511->regmap);
 		adv7511_power_on(adv7511);
-		adv7511_get_modes(encoder, connector);
+		adv7511_get_modes(adv7511, connector);
 		if (adv7511->status == connector_status_connected)
 			status = connector_status_disconnected;
 	} else {
@@ -658,8 +617,8 @@ adv7511_encoder_detect(struct drm_encoder *encoder,
 	return status;
 }
 
-static int adv7511_encoder_mode_valid(struct drm_encoder *encoder,
-				      struct drm_display_mode *mode)
+static int adv7511_mode_valid(struct adv7511 *adv7511,
+			      struct drm_display_mode *mode)
 {
 	if (mode->clock > 165000)
 		return MODE_CLOCK_HIGH;
@@ -667,11 +626,10 @@ static int adv7511_encoder_mode_valid(struct drm_encoder *encoder,
 	return MODE_OK;
 }
 
-static void adv7511_encoder_mode_set(struct drm_encoder *encoder,
-				     struct drm_display_mode *mode,
-				     struct drm_display_mode *adj_mode)
+static void adv7511_mode_set(struct adv7511 *adv7511,
+			     struct drm_display_mode *mode,
+			     struct drm_display_mode *adj_mode)
 {
-	struct adv7511 *adv7511 = encoder_to_adv7511(encoder);
 	unsigned int low_refresh_rate;
 	unsigned int hsync_polarity = 0;
 	unsigned int vsync_polarity = 0;
@@ -754,6 +712,11 @@ static void adv7511_encoder_mode_set(struct drm_encoder *encoder,
 	regmap_update_bits(adv7511->regmap, 0x17,
 		0x60, (vsync_polarity << 6) | (hsync_polarity << 5));
 
+	if (adv7511->type == ADV7533)
+		adv7533_mode_set(adv7511, adj_mode);
+
+	drm_mode_copy(&adv7511->curr_mode, adj_mode);
+
 	/*
 	 * TODO Test first order 4:2:2 to 4:4:4 up conversion method, which is
 	 * supposed to give better results.
@@ -762,12 +725,114 @@ static void adv7511_encoder_mode_set(struct drm_encoder *encoder,
 	adv7511->f_tmds = mode->clock;
 }
 
-static const struct drm_encoder_slave_funcs adv7511_encoder_funcs = {
-	.dpms = adv7511_encoder_dpms,
-	.mode_valid = adv7511_encoder_mode_valid,
-	.mode_set = adv7511_encoder_mode_set,
-	.detect = adv7511_encoder_detect,
-	.get_modes = adv7511_get_modes,
+/* Connector funcs */
+static struct adv7511 *connector_to_adv7511(struct drm_connector *connector)
+{
+	return container_of(connector, struct adv7511, connector);
+}
+
+static int adv7511_connector_get_modes(struct drm_connector *connector)
+{
+	struct adv7511 *adv = connector_to_adv7511(connector);
+
+	return adv7511_get_modes(adv, connector);
+}
+
+static enum drm_mode_status
+adv7511_connector_mode_valid(struct drm_connector *connector,
+			     struct drm_display_mode *mode)
+{
+	struct adv7511 *adv = connector_to_adv7511(connector);
+
+	return adv7511_mode_valid(adv, mode);
+}
+
+static struct drm_connector_helper_funcs adv7511_connector_helper_funcs = {
+	.get_modes = adv7511_connector_get_modes,
+	.mode_valid = adv7511_connector_mode_valid,
+};
+
+static enum drm_connector_status
+adv7511_connector_detect(struct drm_connector *connector, bool force)
+{
+	struct adv7511 *adv = connector_to_adv7511(connector);
+
+	return adv7511_detect(adv, connector);
+}
+
+static struct drm_connector_funcs adv7511_connector_funcs = {
+	.dpms = drm_atomic_helper_connector_dpms,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.detect = adv7511_connector_detect,
+	.destroy = drm_connector_cleanup,
+	.reset = drm_atomic_helper_connector_reset,
+	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+};
+
+/* Bridge funcs */
+static struct adv7511 *bridge_to_adv7511(struct drm_bridge *bridge)
+{
+	return container_of(bridge, struct adv7511, bridge);
+}
+
+static void adv7511_bridge_enable(struct drm_bridge *bridge)
+{
+	struct adv7511 *adv = bridge_to_adv7511(bridge);
+
+	adv7511_power_on(adv);
+}
+
+static void adv7511_bridge_disable(struct drm_bridge *bridge)
+{
+	struct adv7511 *adv = bridge_to_adv7511(bridge);
+
+	adv7511_power_off(adv);
+}
+
+static void adv7511_bridge_mode_set(struct drm_bridge *bridge,
+				    struct drm_display_mode *mode,
+				    struct drm_display_mode *adj_mode)
+{
+	struct adv7511 *adv = bridge_to_adv7511(bridge);
+
+	adv7511_mode_set(adv, mode, adj_mode);
+}
+
+static int adv7511_bridge_attach(struct drm_bridge *bridge)
+{
+	struct adv7511 *adv = bridge_to_adv7511(bridge);
+	int ret;
+
+	if (!bridge->encoder) {
+		DRM_ERROR("Parent encoder object not found");
+		return -ENODEV;
+	}
+
+	adv->connector.polled = DRM_CONNECTOR_POLL_HPD;
+
+	ret = drm_connector_init(bridge->dev, &adv->connector,
+				 &adv7511_connector_funcs,
+				 DRM_MODE_CONNECTOR_HDMIA);
+	if (ret) {
+		DRM_ERROR("Failed to initialize connector with drm\n");
+		return ret;
+	}
+	drm_connector_helper_add(&adv->connector,
+				 &adv7511_connector_helper_funcs);
+	drm_mode_connector_attach_encoder(&adv->connector, bridge->encoder);
+
+	if (adv->type == ADV7533)
+		ret = adv7533_attach_dsi(adv);
+
+	return ret;
+}
+
+static struct drm_bridge_funcs adv7511_bridge_funcs = {
+	.enable = adv7511_bridge_enable,
+	.disable = adv7511_bridge_disable,
+	.mode_set = adv7511_bridge_mode_set,
+	.attach = adv7511_bridge_attach,
 };
 
 /* -----------------------------------------------------------------------------
@@ -780,8 +845,6 @@ static int adv7511_parse_dt(struct device_node *np,
 	const char *str;
 	int ret;
 
-	memset(config, 0, sizeof(*config));
-
 	of_property_read_u32(np, "adi,input-depth", &config->input_color_depth);
 	if (config->input_color_depth != 8 && config->input_color_depth != 10 &&
 	    config->input_color_depth != 12)
@@ -881,7 +944,17 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id)
 	adv7511->powered = false;
 	adv7511->status = connector_status_disconnected;
 
-	ret = adv7511_parse_dt(dev->of_node, &link_config);
+	if (dev->of_node)
+		adv7511->type = (enum adv7511_type)of_device_get_match_data(dev);
+	else
+		adv7511->type = id->driver_data;
+
+	memset(&link_config, 0, sizeof(link_config));
+
+	if (adv7511->type == ADV7511)
+		ret = adv7511_parse_dt(dev->of_node, &link_config);
+	else
+		ret = adv7533_parse_dt(dev->of_node, adv7511);
 	if (ret)
 		return ret;
 
@@ -907,8 +980,12 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id)
 		return ret;
 	dev_dbg(dev, "Rev. %d\n", val);
 
-	ret = regmap_register_patch(adv7511->regmap, adv7511_fixed_registers,
-				    ARRAY_SIZE(adv7511_fixed_registers));
+	if (adv7511->type == ADV7511)
+		ret = regmap_register_patch(adv7511->regmap,
+					    adv7511_fixed_registers,
+					    ARRAY_SIZE(adv7511_fixed_registers));
+	else
+		ret = adv7533_patch_registers(adv7511);
 	if (ret)
 		return ret;
 
@@ -923,6 +1000,12 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id)
 	if (!adv7511->i2c_edid)
 		return -ENOMEM;
 
+	if (adv7511->type == ADV7533) {
+		ret = adv7533_init_cec(adv7511);
+		if (ret)
+			goto err_i2c_unregister_edid;
+	}
+
 	if (i2c->irq) {
 		init_waitqueue_head(&adv7511->wq);
 
@@ -931,7 +1014,7 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id)
 						IRQF_ONESHOT, dev_name(dev),
 						adv7511);
 		if (ret)
-			goto err_i2c_unregister_device;
+			goto err_unregister_cec;
 	}
 
 	/* CEC is unused for now */
@@ -942,11 +1025,23 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id)
 
 	i2c_set_clientdata(i2c, adv7511);
 
-	adv7511_set_link_config(adv7511, &link_config);
+	if (adv7511->type == ADV7511)
+		adv7511_set_link_config(adv7511, &link_config);
+
+	adv7511->bridge.funcs = &adv7511_bridge_funcs;
+	adv7511->bridge.of_node = dev->of_node;
+
+	ret = drm_bridge_add(&adv7511->bridge);
+	if (ret) {
+		dev_err(dev, "failed to add adv7511 bridge\n");
+		goto err_unregister_cec;
+	}
 
 	return 0;
 
-err_i2c_unregister_device:
+err_unregister_cec:
+	adv7533_uninit_cec(adv7511);
+err_i2c_unregister_edid:
 	i2c_unregister_device(adv7511->i2c_edid);
 
 	return ret;
@@ -956,66 +1051,71 @@ static int adv7511_remove(struct i2c_client *i2c)
 {
 	struct adv7511 *adv7511 = i2c_get_clientdata(i2c);
 
-	i2c_unregister_device(adv7511->i2c_edid);
-
-	kfree(adv7511->edid);
-
-	return 0;
-}
-
-static int adv7511_encoder_init(struct i2c_client *i2c, struct drm_device *dev,
-				struct drm_encoder_slave *encoder)
-{
+	if (adv7511->type == ADV7533) {
+		adv7533_detach_dsi(adv7511);
+		adv7533_uninit_cec(adv7511);
+	}
 
-	struct adv7511 *adv7511 = i2c_get_clientdata(i2c);
+	drm_bridge_remove(&adv7511->bridge);
 
-	encoder->slave_priv = adv7511;
-	encoder->slave_funcs = &adv7511_encoder_funcs;
+	i2c_unregister_device(adv7511->i2c_edid);
 
-	adv7511->encoder = &encoder->base;
+	kfree(adv7511->edid);
 
 	return 0;
 }
 
 static const struct i2c_device_id adv7511_i2c_ids[] = {
-	{ "adv7511", 0 },
-	{ "adv7511w", 0 },
-	{ "adv7513", 0 },
+	{ "adv7511", ADV7511 },
+	{ "adv7511w", ADV7511 },
+	{ "adv7513", ADV7511 },
+#ifdef CONFIG_DRM_I2C_ADV7533
+	{ "adv7533", ADV7533 },
+#endif
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, adv7511_i2c_ids);
 
 static const struct of_device_id adv7511_of_ids[] = {
-	{ .compatible = "adi,adv7511", },
-	{ .compatible = "adi,adv7511w", },
-	{ .compatible = "adi,adv7513", },
+	{ .compatible = "adi,adv7511", .data = (void *)ADV7511 },
+	{ .compatible = "adi,adv7511w", .data = (void *)ADV7511 },
+	{ .compatible = "adi,adv7513", .data = (void *)ADV7511 },
+#ifdef CONFIG_DRM_I2C_ADV7533
+	{ .compatible = "adi,adv7533", .data = (void *)ADV7533 },
+#endif
 	{ }
 };
 MODULE_DEVICE_TABLE(of, adv7511_of_ids);
 
-static struct drm_i2c_encoder_driver adv7511_driver = {
-	.i2c_driver = {
-		.driver = {
-			.name = "adv7511",
-			.of_match_table = adv7511_of_ids,
-		},
-		.id_table = adv7511_i2c_ids,
-		.probe = adv7511_probe,
-		.remove = adv7511_remove,
-	},
+static struct mipi_dsi_driver adv7533_dsi_driver = {
+	.driver.name = "adv7533",
+};
 
-	.encoder_init = adv7511_encoder_init,
+static struct i2c_driver adv7511_driver = {
+	.driver = {
+		.name = "adv7511",
+		.of_match_table = adv7511_of_ids,
+	},
+	.id_table = adv7511_i2c_ids,
+	.probe = adv7511_probe,
+	.remove = adv7511_remove,
 };
 
 static int __init adv7511_init(void)
 {
-	return drm_i2c_encoder_register(THIS_MODULE, &adv7511_driver);
+	if (IS_ENABLED(CONFIG_DRM_MIPI_DSI))
+		mipi_dsi_driver_register(&adv7533_dsi_driver);
+
+	return i2c_add_driver(&adv7511_driver);
 }
 module_init(adv7511_init);
 
 static void __exit adv7511_exit(void)
 {
-	drm_i2c_encoder_unregister(&adv7511_driver);
+	i2c_del_driver(&adv7511_driver);
+
+	if (IS_ENABLED(CONFIG_DRM_MIPI_DSI))
+		mipi_dsi_driver_unregister(&adv7533_dsi_driver);
 }
 module_exit(adv7511_exit);
 
diff --git a/drivers/gpu/drm/bridge/adv7511/adv7533.c b/drivers/gpu/drm/bridge/adv7511/adv7533.c
new file mode 100644
index 000000000000..5eebd15899b1
--- /dev/null
+++ b/drivers/gpu/drm/bridge/adv7511/adv7533.c
@@ -0,0 +1,265 @@
+/*
+ * Copyright (c) 2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/of_graph.h>
+
+#include "adv7511.h"
+
+static const struct reg_sequence adv7533_fixed_registers[] = {
+	{ 0x16, 0x20 },
+	{ 0x9a, 0xe0 },
+	{ 0xba, 0x70 },
+	{ 0xde, 0x82 },
+	{ 0xe4, 0x40 },
+	{ 0xe5, 0x80 },
+};
+
+static const struct reg_sequence adv7533_cec_fixed_registers[] = {
+	{ 0x15, 0xd0 },
+	{ 0x17, 0xd0 },
+	{ 0x24, 0x20 },
+	{ 0x57, 0x11 },
+};
+
+static const struct regmap_config adv7533_cec_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+
+	.max_register = 0xff,
+	.cache_type = REGCACHE_RBTREE,
+};
+
+static void adv7511_dsi_config_timing_gen(struct adv7511 *adv)
+{
+	struct mipi_dsi_device *dsi = adv->dsi;
+	struct drm_display_mode *mode = &adv->curr_mode;
+	unsigned int hsw, hfp, hbp, vsw, vfp, vbp;
+	u8 clock_div_by_lanes[] = { 6, 4, 3 };	/* 2, 3, 4 lanes */
+
+	hsw = mode->hsync_end - mode->hsync_start;
+	hfp = mode->hsync_start - mode->hdisplay;
+	hbp = mode->htotal - mode->hsync_end;
+	vsw = mode->vsync_end - mode->vsync_start;
+	vfp = mode->vsync_start - mode->vdisplay;
+	vbp = mode->vtotal - mode->vsync_end;
+
+	/* set pixel clock divider mode */
+	regmap_write(adv->regmap_cec, 0x16,
+		     clock_div_by_lanes[dsi->lanes - 2] << 3);
+
+	/* horizontal porch params */
+	regmap_write(adv->regmap_cec, 0x28, mode->htotal >> 4);
+	regmap_write(adv->regmap_cec, 0x29, (mode->htotal << 4) & 0xff);
+	regmap_write(adv->regmap_cec, 0x2a, hsw >> 4);
+	regmap_write(adv->regmap_cec, 0x2b, (hsw << 4) & 0xff);
+	regmap_write(adv->regmap_cec, 0x2c, hfp >> 4);
+	regmap_write(adv->regmap_cec, 0x2d, (hfp << 4) & 0xff);
+	regmap_write(adv->regmap_cec, 0x2e, hbp >> 4);
+	regmap_write(adv->regmap_cec, 0x2f, (hbp << 4) & 0xff);
+
+	/* vertical porch params */
+	regmap_write(adv->regmap_cec, 0x30, mode->vtotal >> 4);
+	regmap_write(adv->regmap_cec, 0x31, (mode->vtotal << 4) & 0xff);
+	regmap_write(adv->regmap_cec, 0x32, vsw >> 4);
+	regmap_write(adv->regmap_cec, 0x33, (vsw << 4) & 0xff);
+	regmap_write(adv->regmap_cec, 0x34, vfp >> 4);
+	regmap_write(adv->regmap_cec, 0x35, (vfp << 4) & 0xff);
+	regmap_write(adv->regmap_cec, 0x36, vbp >> 4);
+	regmap_write(adv->regmap_cec, 0x37, (vbp << 4) & 0xff);
+}
+
+void adv7533_dsi_power_on(struct adv7511 *adv)
+{
+	struct mipi_dsi_device *dsi = adv->dsi;
+
+	if (adv->use_timing_gen)
+		adv7511_dsi_config_timing_gen(adv);
+
+	/* set number of dsi lanes */
+	regmap_write(adv->regmap_cec, 0x1c, dsi->lanes << 4);
+
+	if (adv->use_timing_gen) {
+		/* reset internal timing generator */
+		regmap_write(adv->regmap_cec, 0x27, 0xcb);
+		regmap_write(adv->regmap_cec, 0x27, 0x8b);
+		regmap_write(adv->regmap_cec, 0x27, 0xcb);
+	} else {
+		/* disable internal timing generator */
+		regmap_write(adv->regmap_cec, 0x27, 0x0b);
+	}
+
+	/* enable hdmi */
+	regmap_write(adv->regmap_cec, 0x03, 0x89);
+	/* disable test mode */
+	regmap_write(adv->regmap_cec, 0x55, 0x00);
+
+	regmap_register_patch(adv->regmap_cec, adv7533_cec_fixed_registers,
+			      ARRAY_SIZE(adv7533_cec_fixed_registers));
+}
+
+void adv7533_dsi_power_off(struct adv7511 *adv)
+{
+	/* disable hdmi */
+	regmap_write(adv->regmap_cec, 0x03, 0x0b);
+	/* disable internal timing generator */
+	regmap_write(adv->regmap_cec, 0x27, 0x0b);
+}
+
+void adv7533_mode_set(struct adv7511 *adv, struct drm_display_mode *mode)
+{
+	struct mipi_dsi_device *dsi = adv->dsi;
+	int lanes, ret;
+
+	if (adv->num_dsi_lanes != 4)
+		return;
+
+	if (mode->clock > 80000)
+		lanes = 4;
+	else
+		lanes = 3;
+
+	if (lanes != dsi->lanes) {
+		mipi_dsi_detach(dsi);
+		dsi->lanes = lanes;
+		ret = mipi_dsi_attach(dsi);
+		if (ret)
+			dev_err(&dsi->dev, "failed to change host lanes\n");
+	}
+}
+
+int adv7533_patch_registers(struct adv7511 *adv)
+{
+	return regmap_register_patch(adv->regmap,
+				     adv7533_fixed_registers,
+				     ARRAY_SIZE(adv7533_fixed_registers));
+}
+
+void adv7533_uninit_cec(struct adv7511 *adv)
+{
+	i2c_unregister_device(adv->i2c_cec);
+}
+
+static const int cec_i2c_addr = 0x78;
+
+int adv7533_init_cec(struct adv7511 *adv)
+{
+	int ret;
+
+	adv->i2c_cec = i2c_new_dummy(adv->i2c_main->adapter, cec_i2c_addr >> 1);
+	if (!adv->i2c_cec)
+		return -ENOMEM;
+
+	adv->regmap_cec = devm_regmap_init_i2c(adv->i2c_cec,
+					&adv7533_cec_regmap_config);
+	if (IS_ERR(adv->regmap_cec)) {
+		ret = PTR_ERR(adv->regmap_cec);
+		goto err;
+	}
+
+	ret = regmap_register_patch(adv->regmap_cec,
+				    adv7533_cec_fixed_registers,
+				    ARRAY_SIZE(adv7533_cec_fixed_registers));
+	if (ret)
+		goto err;
+
+	return 0;
+err:
+	adv7533_uninit_cec(adv);
+	return ret;
+}
+
+int adv7533_attach_dsi(struct adv7511 *adv)
+{
+	struct device *dev = &adv->i2c_main->dev;
+	struct mipi_dsi_host *host;
+	struct mipi_dsi_device *dsi;
+	int ret = 0;
+	const struct mipi_dsi_device_info info = { .type = "adv7533",
+						   .channel = 0,
+						   .node = NULL,
+						 };
+
+	host = of_find_mipi_dsi_host_by_node(adv->host_node);
+	if (!host) {
+		dev_err(dev, "failed to find dsi host\n");
+		return -EPROBE_DEFER;
+	}
+
+	dsi = mipi_dsi_device_register_full(host, &info);
+	if (IS_ERR(dsi)) {
+		dev_err(dev, "failed to create dsi device\n");
+		ret = PTR_ERR(dsi);
+		goto err_dsi_device;
+	}
+
+	adv->dsi = dsi;
+
+	dsi->lanes = adv->num_dsi_lanes;
+	dsi->format = MIPI_DSI_FMT_RGB888;
+	dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_SYNC_PULSE |
+			  MIPI_DSI_MODE_EOT_PACKET | MIPI_DSI_MODE_VIDEO_HSE;
+
+	ret = mipi_dsi_attach(dsi);
+	if (ret < 0) {
+		dev_err(dev, "failed to attach dsi to host\n");
+		goto err_dsi_attach;
+	}
+
+	return 0;
+
+err_dsi_attach:
+	mipi_dsi_device_unregister(dsi);
+err_dsi_device:
+	return ret;
+}
+
+void adv7533_detach_dsi(struct adv7511 *adv)
+{
+	mipi_dsi_detach(adv->dsi);
+	mipi_dsi_device_unregister(adv->dsi);
+}
+
+int adv7533_parse_dt(struct device_node *np, struct adv7511 *adv)
+{
+	u32 num_lanes;
+	struct device_node *endpoint;
+
+	of_property_read_u32(np, "adi,dsi-lanes", &num_lanes);
+
+	if (num_lanes < 1 || num_lanes > 4)
+		return -EINVAL;
+
+	adv->num_dsi_lanes = num_lanes;
+
+	endpoint = of_graph_get_next_endpoint(np, NULL);
+	if (!endpoint)
+		return -ENODEV;
+
+	adv->host_node = of_graph_get_remote_port_parent(endpoint);
+	if (!adv->host_node) {
+		of_node_put(endpoint);
+		return -ENODEV;
+	}
+
+	of_node_put(endpoint);
+	of_node_put(adv->host_node);
+
+	adv->use_timing_gen = !of_property_read_bool(np,
+						"adi,disable-timing-generator");
+
+	/* TODO: Check if these need to be parsed by DT or not */
+	adv->rgb = true;
+	adv->embedded_sync = false;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c
index 7699597070a1..32715daf73cb 100644
--- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c
+++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c
@@ -938,7 +938,7 @@ int analogix_dp_get_modes(struct drm_connector *connector)
 		num_modes += drm_panel_get_modes(dp->plat_data->panel);
 
 	if (dp->plat_data->get_modes)
-		num_modes += dp->plat_data->get_modes(dp->plat_data);
+		num_modes += dp->plat_data->get_modes(dp->plat_data, connector);
 
 	return num_modes;
 }
@@ -1208,6 +1208,7 @@ static int analogix_dp_dt_parse_pdata(struct analogix_dp_device *dp)
 
 	switch (dp->plat_data->dev_type) {
 	case RK3288_DP:
+	case RK3399_EDP:
 		/*
 		 * Like Rk3288 DisplayPort TRM indicate that "Main link
 		 * containing 4 physical lanes of 2.7/1.62 Gbps/lane".
diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h
index f09275d40f70..b45638043ec4 100644
--- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h
+++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h
@@ -127,10 +127,10 @@ enum analog_power_block {
 };
 
 enum dp_irq_type {
-	DP_IRQ_TYPE_HP_CABLE_IN,
-	DP_IRQ_TYPE_HP_CABLE_OUT,
-	DP_IRQ_TYPE_HP_CHANGE,
-	DP_IRQ_TYPE_UNKNOWN,
+	DP_IRQ_TYPE_HP_CABLE_IN  = BIT(0),
+	DP_IRQ_TYPE_HP_CABLE_OUT = BIT(1),
+	DP_IRQ_TYPE_HP_CHANGE    = BIT(2),
+	DP_IRQ_TYPE_UNKNOWN      = BIT(3),
 };
 
 struct video_info {
diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c
index 49205ef02be3..48030f0cf497 100644
--- a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c
+++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c
@@ -74,8 +74,12 @@ void analogix_dp_init_analog_param(struct analogix_dp_device *dp)
 	reg = SEL_24M | TX_DVDD_BIT_1_0625V;
 	writel(reg, dp->reg_base + ANALOGIX_DP_ANALOG_CTL_2);
 
-	if (dp->plat_data && (dp->plat_data->dev_type == RK3288_DP)) {
-		writel(REF_CLK_24M, dp->reg_base + ANALOGIX_DP_PLL_REG_1);
+	if (dp->plat_data && is_rockchip(dp->plat_data->dev_type)) {
+		reg = REF_CLK_24M;
+		if (dp->plat_data->dev_type == RK3288_DP)
+			reg ^= REF_CLK_MASK;
+
+		writel(reg, dp->reg_base + ANALOGIX_DP_PLL_REG_1);
 		writel(0x95, dp->reg_base + ANALOGIX_DP_PLL_REG_2);
 		writel(0x40, dp->reg_base + ANALOGIX_DP_PLL_REG_3);
 		writel(0x58, dp->reg_base + ANALOGIX_DP_PLL_REG_4);
@@ -244,7 +248,7 @@ void analogix_dp_set_analog_power_down(struct analogix_dp_device *dp,
 	u32 reg;
 	u32 phy_pd_addr = ANALOGIX_DP_PHY_PD;
 
-	if (dp->plat_data && (dp->plat_data->dev_type == RK3288_DP))
+	if (dp->plat_data && is_rockchip(dp->plat_data->dev_type))
 		phy_pd_addr = ANALOGIX_DP_PD;
 
 	switch (block) {
@@ -448,7 +452,7 @@ void analogix_dp_init_aux(struct analogix_dp_device *dp)
 	analogix_dp_reset_aux(dp);
 
 	/* Disable AUX transaction H/W retry */
-	if (dp->plat_data && (dp->plat_data->dev_type == RK3288_DP))
+	if (dp->plat_data && is_rockchip(dp->plat_data->dev_type))
 		reg = AUX_BIT_PERIOD_EXPECTED_DELAY(0) |
 		      AUX_HW_RETRY_COUNT_SEL(3) |
 		      AUX_HW_RETRY_INTERVAL_600_MICROSECONDS;
diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.h b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.h
index 337912b0aeab..cdcc6c5add5e 100644
--- a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.h
+++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.h
@@ -163,8 +163,9 @@
 #define HSYNC_POLARITY_CFG			(0x1 << 0)
 
 /* ANALOGIX_DP_PLL_REG_1 */
-#define REF_CLK_24M				(0x1 << 1)
-#define REF_CLK_27M				(0x0 << 1)
+#define REF_CLK_24M				(0x1 << 0)
+#define REF_CLK_27M				(0x0 << 0)
+#define REF_CLK_MASK				(0x1 << 0)
 
 /* ANALOGIX_DP_LANE_MAP */
 #define LANE3_MAP_LOGIC_LANE_0			(0x0 << 6)
diff --git a/drivers/gpu/drm/bridge/dw-hdmi.c b/drivers/gpu/drm/bridge/dw-hdmi.c
index 70b1f7d4270b..77ab47341658 100644
--- a/drivers/gpu/drm/bridge/dw-hdmi.c
+++ b/drivers/gpu/drm/bridge/dw-hdmi.c
@@ -1495,14 +1495,6 @@ static void dw_hdmi_connector_force(struct drm_connector *connector)
 }
 
 static const struct drm_connector_funcs dw_hdmi_connector_funcs = {
-	.dpms = drm_helper_connector_dpms,
-	.fill_modes = drm_helper_probe_single_connector_modes,
-	.detect = dw_hdmi_connector_detect,
-	.destroy = dw_hdmi_connector_destroy,
-	.force = dw_hdmi_connector_force,
-};
-
-static const struct drm_connector_funcs dw_hdmi_atomic_connector_funcs = {
 	.dpms = drm_atomic_helper_connector_dpms,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.detect = dw_hdmi_connector_detect,
@@ -1634,14 +1626,9 @@ static int dw_hdmi_register(struct drm_device *drm, struct dw_hdmi *hdmi)
 	drm_connector_helper_add(&hdmi->connector,
 				 &dw_hdmi_connector_helper_funcs);
 
-	if (drm_core_check_feature(drm, DRIVER_ATOMIC))
-		drm_connector_init(drm, &hdmi->connector,
-				   &dw_hdmi_atomic_connector_funcs,
-				   DRM_MODE_CONNECTOR_HDMIA);
-	else
-		drm_connector_init(drm, &hdmi->connector,
-				   &dw_hdmi_connector_funcs,
-				   DRM_MODE_CONNECTOR_HDMIA);
+	drm_connector_init(drm, &hdmi->connector,
+			   &dw_hdmi_connector_funcs,
+			   DRM_MODE_CONNECTOR_HDMIA);
 
 	drm_mode_connector_attach_encoder(&hdmi->connector, encoder);
 
diff --git a/drivers/gpu/drm/bridge/parade-ps8622.c b/drivers/gpu/drm/bridge/parade-ps8622.c
index 5cd8dd7e5904..583b8ce614e3 100644
--- a/drivers/gpu/drm/bridge/parade-ps8622.c
+++ b/drivers/gpu/drm/bridge/parade-ps8622.c
@@ -636,9 +636,7 @@ static int ps8622_remove(struct i2c_client *client)
 {
 	struct ps8622_bridge *ps8622 = i2c_get_clientdata(client);
 
-	if (ps8622->bl)
-		backlight_device_unregister(ps8622->bl);
-
+	backlight_device_unregister(ps8622->bl);
 	drm_bridge_remove(&ps8622->bridge);
 
 	return 0;
diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c
new file mode 100644
index 000000000000..a09825d8c94a
--- /dev/null
+++ b/drivers/gpu/drm/bridge/tc358767.c
@@ -0,0 +1,1413 @@
+/*
+ * tc358767 eDP bridge driver
+ *
+ * Copyright (C) 2016 CogentEmbedded Inc
+ * Author: Andrey Gusakov <andrey.gusakov@cogentembedded.com>
+ *
+ * Copyright (C) 2016 Pengutronix, Philipp Zabel <p.zabel@pengutronix.de>
+ *
+ * Initially based on: drivers/gpu/drm/i2c/tda998x_drv.c
+ *
+ * Copyright (C) 2012 Texas Instruments
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/gpio/consumer.h>
+#include <linux/i2c.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_dp_helper.h>
+#include <drm/drm_edid.h>
+#include <drm/drm_of.h>
+#include <drm/drm_panel.h>
+
+/* Registers */
+
+/* Display Parallel Interface */
+#define DPIPXLFMT		0x0440
+#define VS_POL_ACTIVE_LOW		(1 << 10)
+#define HS_POL_ACTIVE_LOW		(1 << 9)
+#define DE_POL_ACTIVE_HIGH		(0 << 8)
+#define SUB_CFG_TYPE_CONFIG1		(0 << 2) /* LSB aligned */
+#define SUB_CFG_TYPE_CONFIG2		(1 << 2) /* Loosely Packed */
+#define SUB_CFG_TYPE_CONFIG3		(2 << 2) /* LSB aligned 8-bit */
+#define DPI_BPP_RGB888			(0 << 0)
+#define DPI_BPP_RGB666			(1 << 0)
+#define DPI_BPP_RGB565			(2 << 0)
+
+/* Video Path */
+#define VPCTRL0			0x0450
+#define OPXLFMT_RGB666			(0 << 8)
+#define OPXLFMT_RGB888			(1 << 8)
+#define FRMSYNC_DISABLED		(0 << 4) /* Video Timing Gen Disabled */
+#define FRMSYNC_ENABLED			(1 << 4) /* Video Timing Gen Enabled */
+#define MSF_DISABLED			(0 << 0) /* Magic Square FRC disabled */
+#define MSF_ENABLED			(1 << 0) /* Magic Square FRC enabled */
+#define HTIM01			0x0454
+#define HTIM02			0x0458
+#define VTIM01			0x045c
+#define VTIM02			0x0460
+#define VFUEN0			0x0464
+#define VFUEN				BIT(0)   /* Video Frame Timing Upload */
+
+/* System */
+#define TC_IDREG		0x0500
+#define SYSCTRL			0x0510
+#define DP0_AUDSRC_NO_INPUT		(0 << 3)
+#define DP0_AUDSRC_I2S_RX		(1 << 3)
+#define DP0_VIDSRC_NO_INPUT		(0 << 0)
+#define DP0_VIDSRC_DSI_RX		(1 << 0)
+#define DP0_VIDSRC_DPI_RX		(2 << 0)
+#define DP0_VIDSRC_COLOR_BAR		(3 << 0)
+
+/* Control */
+#define DP0CTL			0x0600
+#define VID_MN_GEN			BIT(6)   /* Auto-generate M/N values */
+#define EF_EN				BIT(5)   /* Enable Enhanced Framing */
+#define VID_EN				BIT(1)   /* Video transmission enable */
+#define DP_EN				BIT(0)   /* Enable DPTX function */
+
+/* Clocks */
+#define DP0_VIDMNGEN0		0x0610
+#define DP0_VIDMNGEN1		0x0614
+#define DP0_VMNGENSTATUS	0x0618
+
+/* Main Channel */
+#define DP0_SECSAMPLE		0x0640
+#define DP0_VIDSYNCDELAY	0x0644
+#define DP0_TOTALVAL		0x0648
+#define DP0_STARTVAL		0x064c
+#define DP0_ACTIVEVAL		0x0650
+#define DP0_SYNCVAL		0x0654
+#define DP0_MISC		0x0658
+#define TU_SIZE_RECOMMENDED		(0x3f << 16) /* LSCLK cycles per TU */
+#define BPC_6				(0 << 5)
+#define BPC_8				(1 << 5)
+
+/* AUX channel */
+#define DP0_AUXCFG0		0x0660
+#define DP0_AUXCFG1		0x0664
+#define AUX_RX_FILTER_EN		BIT(16)
+
+#define DP0_AUXADDR		0x0668
+#define DP0_AUXWDATA(i)		(0x066c + (i) * 4)
+#define DP0_AUXRDATA(i)		(0x067c + (i) * 4)
+#define DP0_AUXSTATUS		0x068c
+#define AUX_STATUS_MASK			0xf0
+#define AUX_STATUS_SHIFT		4
+#define AUX_TIMEOUT			BIT(1)
+#define AUX_BUSY			BIT(0)
+#define DP0_AUXI2CADR		0x0698
+
+/* Link Training */
+#define DP0_SRCCTRL		0x06a0
+#define DP0_SRCCTRL_SCRMBLDIS		BIT(13)
+#define DP0_SRCCTRL_EN810B		BIT(12)
+#define DP0_SRCCTRL_NOTP		(0 << 8)
+#define DP0_SRCCTRL_TP1			(1 << 8)
+#define DP0_SRCCTRL_TP2			(2 << 8)
+#define DP0_SRCCTRL_LANESKEW		BIT(7)
+#define DP0_SRCCTRL_SSCG		BIT(3)
+#define DP0_SRCCTRL_LANES_1		(0 << 2)
+#define DP0_SRCCTRL_LANES_2		(1 << 2)
+#define DP0_SRCCTRL_BW27		(1 << 1)
+#define DP0_SRCCTRL_BW162		(0 << 1)
+#define DP0_SRCCTRL_AUTOCORRECT		BIT(0)
+#define DP0_LTSTAT		0x06d0
+#define LT_LOOPDONE			BIT(13)
+#define LT_STATUS_MASK			(0x1f << 8)
+#define LT_CHANNEL1_EQ_BITS		(DP_CHANNEL_EQ_BITS << 4)
+#define LT_INTERLANE_ALIGN_DONE		BIT(3)
+#define LT_CHANNEL0_EQ_BITS		(DP_CHANNEL_EQ_BITS)
+#define DP0_SNKLTCHGREQ		0x06d4
+#define DP0_LTLOOPCTRL		0x06d8
+#define DP0_SNKLTCTRL		0x06e4
+
+/* PHY */
+#define DP_PHY_CTRL		0x0800
+#define DP_PHY_RST			BIT(28)  /* DP PHY Global Soft Reset */
+#define BGREN				BIT(25)  /* AUX PHY BGR Enable */
+#define PWR_SW_EN			BIT(24)  /* PHY Power Switch Enable */
+#define PHY_M1_RST			BIT(12)  /* Reset PHY1 Main Channel */
+#define PHY_RDY				BIT(16)  /* PHY Main Channels Ready */
+#define PHY_M0_RST			BIT(8)   /* Reset PHY0 Main Channel */
+#define PHY_A0_EN			BIT(1)   /* PHY Aux Channel0 Enable */
+#define PHY_M0_EN			BIT(0)   /* PHY Main Channel0 Enable */
+
+/* PLL */
+#define DP0_PLLCTRL		0x0900
+#define DP1_PLLCTRL		0x0904	/* not defined in DS */
+#define PXL_PLLCTRL		0x0908
+#define PLLUPDATE			BIT(2)
+#define PLLBYP				BIT(1)
+#define PLLEN				BIT(0)
+#define PXL_PLLPARAM		0x0914
+#define IN_SEL_REFCLK			(0 << 14)
+#define SYS_PLLPARAM		0x0918
+#define REF_FREQ_38M4			(0 << 8) /* 38.4 MHz */
+#define REF_FREQ_19M2			(1 << 8) /* 19.2 MHz */
+#define REF_FREQ_26M			(2 << 8) /* 26 MHz */
+#define REF_FREQ_13M			(3 << 8) /* 13 MHz */
+#define SYSCLK_SEL_LSCLK		(0 << 4)
+#define LSCLK_DIV_1			(0 << 0)
+#define LSCLK_DIV_2			(1 << 0)
+
+/* Test & Debug */
+#define TSTCTL			0x0a00
+#define PLL_DBG			0x0a04
+
+static bool tc_test_pattern;
+module_param_named(test, tc_test_pattern, bool, 0644);
+
+struct tc_edp_link {
+	struct drm_dp_link	base;
+	u8			assr;
+	int			scrambler_dis;
+	int			spread;
+	int			coding8b10b;
+	u8			swing;
+	u8			preemp;
+};
+
+struct tc_data {
+	struct device		*dev;
+	struct regmap		*regmap;
+	struct drm_dp_aux	aux;
+
+	struct drm_bridge	bridge;
+	struct drm_connector	connector;
+	struct drm_panel	*panel;
+
+	/* link settings */
+	struct tc_edp_link	link;
+
+	/* display edid */
+	struct edid		*edid;
+	/* current mode */
+	struct drm_display_mode	*mode;
+
+	u32			rev;
+	u8			assr;
+
+	struct gpio_desc	*sd_gpio;
+	struct gpio_desc	*reset_gpio;
+	struct clk		*refclk;
+};
+
+static inline struct tc_data *aux_to_tc(struct drm_dp_aux *a)
+{
+	return container_of(a, struct tc_data, aux);
+}
+
+static inline struct tc_data *bridge_to_tc(struct drm_bridge *b)
+{
+	return container_of(b, struct tc_data, bridge);
+}
+
+static inline struct tc_data *connector_to_tc(struct drm_connector *c)
+{
+	return container_of(c, struct tc_data, connector);
+}
+
+/* Simple macros to avoid repeated error checks */
+#define tc_write(reg, var)					\
+	do {							\
+		ret = regmap_write(tc->regmap, reg, var);	\
+		if (ret)					\
+			goto err;				\
+	} while (0)
+#define tc_read(reg, var)					\
+	do {							\
+		ret = regmap_read(tc->regmap, reg, var);	\
+		if (ret)					\
+			goto err;				\
+	} while (0)
+
+static inline int tc_poll_timeout(struct regmap *map, unsigned int addr,
+				  unsigned int cond_mask,
+				  unsigned int cond_value,
+				  unsigned long sleep_us, u64 timeout_us)
+{
+	ktime_t timeout = ktime_add_us(ktime_get(), timeout_us);
+	unsigned int val;
+	int ret;
+
+	for (;;) {
+		ret = regmap_read(map, addr, &val);
+		if (ret)
+			break;
+		if ((val & cond_mask) == cond_value)
+			break;
+		if (timeout_us && ktime_compare(ktime_get(), timeout) > 0) {
+			ret = regmap_read(map, addr, &val);
+			break;
+		}
+		if (sleep_us)
+			usleep_range((sleep_us >> 2) + 1, sleep_us);
+	}
+	return ret ?: (((val & cond_mask) == cond_value) ? 0 : -ETIMEDOUT);
+}
+
+static int tc_aux_wait_busy(struct tc_data *tc, unsigned int timeout_ms)
+{
+	return tc_poll_timeout(tc->regmap, DP0_AUXSTATUS, AUX_BUSY, 0,
+			       1000, 1000 * timeout_ms);
+}
+
+static int tc_aux_get_status(struct tc_data *tc, u8 *reply)
+{
+	int ret;
+	u32 value;
+
+	ret = regmap_read(tc->regmap, DP0_AUXSTATUS, &value);
+	if (ret < 0)
+		return ret;
+	if (value & AUX_BUSY) {
+		if (value & AUX_TIMEOUT) {
+			dev_err(tc->dev, "i2c access timeout!\n");
+			return -ETIMEDOUT;
+		}
+		return -EBUSY;
+	}
+
+	*reply = (value & AUX_STATUS_MASK) >> AUX_STATUS_SHIFT;
+	return 0;
+}
+
+static ssize_t tc_aux_transfer(struct drm_dp_aux *aux,
+			       struct drm_dp_aux_msg *msg)
+{
+	struct tc_data *tc = aux_to_tc(aux);
+	size_t size = min_t(size_t, 8, msg->size);
+	u8 request = msg->request & ~DP_AUX_I2C_MOT;
+	u8 *buf = msg->buffer;
+	u32 tmp = 0;
+	int i = 0;
+	int ret;
+
+	if (size == 0)
+		return 0;
+
+	ret = tc_aux_wait_busy(tc, 100);
+	if (ret)
+		goto err;
+
+	if (request == DP_AUX_I2C_WRITE || request == DP_AUX_NATIVE_WRITE) {
+		/* Store data */
+		while (i < size) {
+			if (request == DP_AUX_NATIVE_WRITE)
+				tmp = tmp | (buf[i] << (8 * (i & 0x3)));
+			else
+				tmp = (tmp << 8) | buf[i];
+			i++;
+			if (((i % 4) == 0) || (i == size)) {
+				tc_write(DP0_AUXWDATA(i >> 2), tmp);
+				tmp = 0;
+			}
+		}
+	} else if (request != DP_AUX_I2C_READ &&
+		   request != DP_AUX_NATIVE_READ) {
+		return -EINVAL;
+	}
+
+	/* Store address */
+	tc_write(DP0_AUXADDR, msg->address);
+	/* Start transfer */
+	tc_write(DP0_AUXCFG0, ((size - 1) << 8) | request);
+
+	ret = tc_aux_wait_busy(tc, 100);
+	if (ret)
+		goto err;
+
+	ret = tc_aux_get_status(tc, &msg->reply);
+	if (ret)
+		goto err;
+
+	if (request == DP_AUX_I2C_READ || request == DP_AUX_NATIVE_READ) {
+		/* Read data */
+		while (i < size) {
+			if ((i % 4) == 0)
+				tc_read(DP0_AUXRDATA(i >> 2), &tmp);
+			buf[i] = tmp & 0xff;
+			tmp = tmp >> 8;
+			i++;
+		}
+	}
+
+	return size;
+err:
+	return ret;
+}
+
+static const char * const training_pattern1_errors[] = {
+	"No errors",
+	"Aux write error",
+	"Aux read error",
+	"Max voltage reached error",
+	"Loop counter expired error",
+	"res", "res", "res"
+};
+
+static const char * const training_pattern2_errors[] = {
+	"No errors",
+	"Aux write error",
+	"Aux read error",
+	"Clock recovery failed error",
+	"Loop counter expired error",
+	"res", "res", "res"
+};
+
+static u32 tc_srcctrl(struct tc_data *tc)
+{
+	/*
+	 * No training pattern, skew lane 1 data by two LSCLK cycles with
+	 * respect to lane 0 data, AutoCorrect Mode = 0
+	 */
+	u32 reg = DP0_SRCCTRL_NOTP | DP0_SRCCTRL_LANESKEW;
+
+	if (tc->link.scrambler_dis)
+		reg |= DP0_SRCCTRL_SCRMBLDIS;	/* Scrambler Disabled */
+	if (tc->link.coding8b10b)
+		/* Enable 8/10B Encoder (TxData[19:16] not used) */
+		reg |= DP0_SRCCTRL_EN810B;
+	if (tc->link.spread)
+		reg |= DP0_SRCCTRL_SSCG;	/* Spread Spectrum Enable */
+	if (tc->link.base.num_lanes == 2)
+		reg |= DP0_SRCCTRL_LANES_2;	/* Two Main Channel Lanes */
+	if (tc->link.base.rate != 162000)
+		reg |= DP0_SRCCTRL_BW27;	/* 2.7 Gbps link */
+	return reg;
+}
+
+static void tc_wait_pll_lock(struct tc_data *tc)
+{
+	/* Wait for PLL to lock: up to 2.09 ms, depending on refclk */
+	usleep_range(3000, 6000);
+}
+
+static int tc_pxl_pll_en(struct tc_data *tc, u32 refclk, u32 pixelclock)
+{
+	int ret;
+	int i_pre, best_pre = 1;
+	int i_post, best_post = 1;
+	int div, best_div = 1;
+	int mul, best_mul = 1;
+	int delta, best_delta;
+	int ext_div[] = {1, 2, 3, 5, 7};
+	int best_pixelclock = 0;
+	int vco_hi = 0;
+
+	dev_dbg(tc->dev, "PLL: requested %d pixelclock, ref %d\n", pixelclock,
+		refclk);
+	best_delta = pixelclock;
+	/* Loop over all possible ext_divs, skipping invalid configurations */
+	for (i_pre = 0; i_pre < ARRAY_SIZE(ext_div); i_pre++) {
+		/*
+		 * refclk / ext_pre_div should be in the 1 to 200 MHz range.
+		 * We don't allow any refclk > 200 MHz, only check lower bounds.
+		 */
+		if (refclk / ext_div[i_pre] < 1000000)
+			continue;
+		for (i_post = 0; i_post < ARRAY_SIZE(ext_div); i_post++) {
+			for (div = 1; div <= 16; div++) {
+				u32 clk;
+				u64 tmp;
+
+				tmp = pixelclock * ext_div[i_pre] *
+				      ext_div[i_post] * div;
+				do_div(tmp, refclk);
+				mul = tmp;
+
+				/* Check limits */
+				if ((mul < 1) || (mul > 128))
+					continue;
+
+				clk = (refclk / ext_div[i_pre] / div) * mul;
+				/*
+				 * refclk * mul / (ext_pre_div * pre_div)
+				 * should be in the 150 to 650 MHz range
+				 */
+				if ((clk > 650000000) || (clk < 150000000))
+					continue;
+
+				clk = clk / ext_div[i_post];
+				delta = clk - pixelclock;
+
+				if (abs(delta) < abs(best_delta)) {
+					best_pre = i_pre;
+					best_post = i_post;
+					best_div = div;
+					best_mul = mul;
+					best_delta = delta;
+					best_pixelclock = clk;
+				}
+			}
+		}
+	}
+	if (best_pixelclock == 0) {
+		dev_err(tc->dev, "Failed to calc clock for %d pixelclock\n",
+			pixelclock);
+		return -EINVAL;
+	}
+
+	dev_dbg(tc->dev, "PLL: got %d, delta %d\n", best_pixelclock,
+		best_delta);
+	dev_dbg(tc->dev, "PLL: %d / %d / %d * %d / %d\n", refclk,
+		ext_div[best_pre], best_div, best_mul, ext_div[best_post]);
+
+	/* if VCO >= 300 MHz */
+	if (refclk / ext_div[best_pre] / best_div * best_mul >= 300000000)
+		vco_hi = 1;
+	/* see DS */
+	if (best_div == 16)
+		best_div = 0;
+	if (best_mul == 128)
+		best_mul = 0;
+
+	/* Power up PLL and switch to bypass */
+	tc_write(PXL_PLLCTRL, PLLBYP | PLLEN);
+
+	tc_write(PXL_PLLPARAM,
+		 (vco_hi << 24) |		/* For PLL VCO >= 300 MHz = 1 */
+		 (ext_div[best_pre] << 20) |	/* External Pre-divider */
+		 (ext_div[best_post] << 16) |	/* External Post-divider */
+		 IN_SEL_REFCLK |		/* Use RefClk as PLL input */
+		 (best_div << 8) |		/* Divider for PLL RefClk */
+		 (best_mul << 0));		/* Multiplier for PLL */
+
+	/* Force PLL parameter update and disable bypass */
+	tc_write(PXL_PLLCTRL, PLLUPDATE | PLLEN);
+
+	tc_wait_pll_lock(tc);
+
+	return 0;
+err:
+	return ret;
+}
+
+static int tc_pxl_pll_dis(struct tc_data *tc)
+{
+	/* Enable PLL bypass, power down PLL */
+	return regmap_write(tc->regmap, PXL_PLLCTRL, PLLBYP);
+}
+
+static int tc_stream_clock_calc(struct tc_data *tc)
+{
+	int ret;
+	/*
+	 * If the Stream clock and Link Symbol clock are
+	 * asynchronous with each other, the value of M changes over
+	 * time. This way of generating link clock and stream
+	 * clock is called Asynchronous Clock mode. The value M
+	 * must change while the value N stays constant. The
+	 * value of N in this Asynchronous Clock mode must be set
+	 * to 2^15 or 32,768.
+	 *
+	 * LSCLK = 1/10 of high speed link clock
+	 *
+	 * f_STRMCLK = M/N * f_LSCLK
+	 * M/N = f_STRMCLK / f_LSCLK
+	 *
+	 */
+	tc_write(DP0_VIDMNGEN1, 32768);
+
+	return 0;
+err:
+	return ret;
+}
+
+static int tc_aux_link_setup(struct tc_data *tc)
+{
+	unsigned long rate;
+	u32 value;
+	int ret;
+
+	rate = clk_get_rate(tc->refclk);
+	switch (rate) {
+	case 38400000:
+		value = REF_FREQ_38M4;
+		break;
+	case 26000000:
+		value = REF_FREQ_26M;
+		break;
+	case 19200000:
+		value = REF_FREQ_19M2;
+		break;
+	case 13000000:
+		value = REF_FREQ_13M;
+		break;
+	default:
+		dev_err(tc->dev, "Invalid refclk rate: %lu Hz\n", rate);
+		return -EINVAL;
+	}
+
+	/* Setup DP-PHY / PLL */
+	value |= SYSCLK_SEL_LSCLK | LSCLK_DIV_2;
+	tc_write(SYS_PLLPARAM, value);
+
+	tc_write(DP_PHY_CTRL, BGREN | PWR_SW_EN | BIT(2) | PHY_A0_EN);
+
+	/*
+	 * Initially PLLs are in bypass. Force PLL parameter update,
+	 * disable PLL bypass, enable PLL
+	 */
+	tc_write(DP0_PLLCTRL, PLLUPDATE | PLLEN);
+	tc_wait_pll_lock(tc);
+
+	tc_write(DP1_PLLCTRL, PLLUPDATE | PLLEN);
+	tc_wait_pll_lock(tc);
+
+	ret = tc_poll_timeout(tc->regmap, DP_PHY_CTRL, PHY_RDY, PHY_RDY, 1,
+			      1000);
+	if (ret == -ETIMEDOUT) {
+		dev_err(tc->dev, "Timeout waiting for PHY to become ready");
+		return ret;
+	} else if (ret)
+		goto err;
+
+	/* Setup AUX link */
+	tc_write(DP0_AUXCFG1, AUX_RX_FILTER_EN |
+		 (0x06 << 8) |	/* Aux Bit Period Calculator Threshold */
+		 (0x3f << 0));	/* Aux Response Timeout Timer */
+
+	return 0;
+err:
+	dev_err(tc->dev, "tc_aux_link_setup failed: %d\n", ret);
+	return ret;
+}
+
+static int tc_get_display_props(struct tc_data *tc)
+{
+	int ret;
+	/* temp buffer */
+	u8 tmp[8];
+
+	/* Read DP Rx Link Capability */
+	ret = drm_dp_link_probe(&tc->aux, &tc->link.base);
+	if (ret < 0)
+		goto err_dpcd_read;
+	if ((tc->link.base.rate != 162000) && (tc->link.base.rate != 270000))
+		goto err_dpcd_inval;
+
+	ret = drm_dp_dpcd_readb(&tc->aux, DP_MAX_DOWNSPREAD, tmp);
+	if (ret < 0)
+		goto err_dpcd_read;
+	tc->link.spread = tmp[0] & BIT(0); /* 0.5% down spread */
+
+	ret = drm_dp_dpcd_readb(&tc->aux, DP_MAIN_LINK_CHANNEL_CODING, tmp);
+	if (ret < 0)
+		goto err_dpcd_read;
+	tc->link.coding8b10b = tmp[0] & BIT(0);
+	tc->link.scrambler_dis = 0;
+	/* read assr */
+	ret = drm_dp_dpcd_readb(&tc->aux, DP_EDP_CONFIGURATION_SET, tmp);
+	if (ret < 0)
+		goto err_dpcd_read;
+	tc->link.assr = tmp[0] & DP_ALTERNATE_SCRAMBLER_RESET_ENABLE;
+
+	dev_dbg(tc->dev, "DPCD rev: %d.%d, rate: %s, lanes: %d, framing: %s\n",
+		tc->link.base.revision >> 4, tc->link.base.revision & 0x0f,
+		(tc->link.base.rate == 162000) ? "1.62Gbps" : "2.7Gbps",
+		tc->link.base.num_lanes,
+		(tc->link.base.capabilities & DP_LINK_CAP_ENHANCED_FRAMING) ?
+		"enhanced" : "non-enhanced");
+	dev_dbg(tc->dev, "ANSI 8B/10B: %d\n", tc->link.coding8b10b);
+	dev_dbg(tc->dev, "Display ASSR: %d, TC358767 ASSR: %d\n",
+		tc->link.assr, tc->assr);
+
+	return 0;
+
+err_dpcd_read:
+	dev_err(tc->dev, "failed to read DPCD: %d\n", ret);
+	return ret;
+err_dpcd_inval:
+	dev_err(tc->dev, "invalid DPCD\n");
+	return -EINVAL;
+}
+
+static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode)
+{
+	int ret;
+	int vid_sync_dly;
+	int max_tu_symbol;
+
+	int left_margin = mode->htotal - mode->hsync_end;
+	int right_margin = mode->hsync_start - mode->hdisplay;
+	int hsync_len = mode->hsync_end - mode->hsync_start;
+	int upper_margin = mode->vtotal - mode->vsync_end;
+	int lower_margin = mode->vsync_start - mode->vdisplay;
+	int vsync_len = mode->vsync_end - mode->vsync_start;
+
+	dev_dbg(tc->dev, "set mode %dx%d\n",
+		mode->hdisplay, mode->vdisplay);
+	dev_dbg(tc->dev, "H margin %d,%d sync %d\n",
+		left_margin, right_margin, hsync_len);
+	dev_dbg(tc->dev, "V margin %d,%d sync %d\n",
+		upper_margin, lower_margin, vsync_len);
+	dev_dbg(tc->dev, "total: %dx%d\n", mode->htotal, mode->vtotal);
+
+
+	/* LCD Ctl Frame Size */
+	tc_write(VPCTRL0, (0x40 << 20) /* VSDELAY */ |
+		 OPXLFMT_RGB888 | FRMSYNC_DISABLED | MSF_DISABLED);
+	tc_write(HTIM01, (left_margin << 16) |		/* H back porch */
+			 (hsync_len << 0));		/* Hsync */
+	tc_write(HTIM02, (right_margin << 16) |		/* H front porch */
+			 (mode->hdisplay << 0));	/* width */
+	tc_write(VTIM01, (upper_margin << 16) |		/* V back porch */
+			 (vsync_len << 0));		/* Vsync */
+	tc_write(VTIM02, (lower_margin << 16) |		/* V front porch */
+			 (mode->vdisplay << 0));	/* height */
+	tc_write(VFUEN0, VFUEN);		/* update settings */
+
+	/* Test pattern settings */
+	tc_write(TSTCTL,
+		 (120 << 24) |	/* Red Color component value */
+		 (20 << 16) |	/* Green Color component value */
+		 (99 << 8) |	/* Blue Color component value */
+		 (1 << 4) |	/* Enable I2C Filter */
+		 (2 << 0) |	/* Color bar Mode */
+		 0);
+
+	/* DP Main Stream Attributes */
+	vid_sync_dly = hsync_len + left_margin + mode->hdisplay;
+	tc_write(DP0_VIDSYNCDELAY,
+		 (0x003e << 16) |	/* thresh_dly */
+		 (vid_sync_dly << 0));
+
+	tc_write(DP0_TOTALVAL, (mode->vtotal << 16) | (mode->htotal));
+
+	tc_write(DP0_STARTVAL,
+		 ((upper_margin + vsync_len) << 16) |
+		 ((left_margin + hsync_len) << 0));
+
+	tc_write(DP0_ACTIVEVAL, (mode->vdisplay << 16) | (mode->hdisplay));
+
+	tc_write(DP0_SYNCVAL, (vsync_len << 16) | (hsync_len << 0));
+
+	tc_write(DPIPXLFMT, VS_POL_ACTIVE_LOW | HS_POL_ACTIVE_LOW |
+		 DE_POL_ACTIVE_HIGH | SUB_CFG_TYPE_CONFIG1 | DPI_BPP_RGB888);
+
+	/*
+	 * Recommended maximum number of symbols transferred in a transfer unit:
+	 * DIV_ROUND_UP((input active video bandwidth in bytes) * tu_size,
+	 *              (output active video bandwidth in bytes))
+	 * Must be less than tu_size.
+	 */
+	max_tu_symbol = TU_SIZE_RECOMMENDED - 1;
+	tc_write(DP0_MISC, (max_tu_symbol << 23) | TU_SIZE_RECOMMENDED | BPC_8);
+
+	return 0;
+err:
+	return ret;
+}
+
+static int tc_link_training(struct tc_data *tc, int pattern)
+{
+	const char * const *errors;
+	u32 srcctrl = tc_srcctrl(tc) | DP0_SRCCTRL_SCRMBLDIS |
+		      DP0_SRCCTRL_AUTOCORRECT;
+	int timeout;
+	int retry;
+	u32 value;
+	int ret;
+
+	if (pattern == DP_TRAINING_PATTERN_1) {
+		srcctrl |= DP0_SRCCTRL_TP1;
+		errors = training_pattern1_errors;
+	} else {
+		srcctrl |= DP0_SRCCTRL_TP2;
+		errors = training_pattern2_errors;
+	}
+
+	/* Set DPCD 0x102 for Training Part 1 or 2 */
+	tc_write(DP0_SNKLTCTRL, DP_LINK_SCRAMBLING_DISABLE | pattern);
+
+	tc_write(DP0_LTLOOPCTRL,
+		 (0x0f << 28) |	/* Defer Iteration Count */
+		 (0x0f << 24) |	/* Loop Iteration Count */
+		 (0x0d << 0));	/* Loop Timer Delay */
+
+	retry = 5;
+	do {
+		/* Set DP0 Training Pattern */
+		tc_write(DP0_SRCCTRL, srcctrl);
+
+		/* Enable DP0 to start Link Training */
+		tc_write(DP0CTL, DP_EN);
+
+		/* wait */
+		timeout = 1000;
+		do {
+			tc_read(DP0_LTSTAT, &value);
+			udelay(1);
+		} while ((!(value & LT_LOOPDONE)) && (--timeout));
+		if (timeout == 0) {
+			dev_err(tc->dev, "Link training timeout!\n");
+		} else {
+			int pattern = (value >> 11) & 0x3;
+			int error = (value >> 8) & 0x7;
+
+			dev_dbg(tc->dev,
+				"Link training phase %d done after %d uS: %s\n",
+				pattern, 1000 - timeout, errors[error]);
+			if (pattern == DP_TRAINING_PATTERN_1 && error == 0)
+				break;
+			if (pattern == DP_TRAINING_PATTERN_2) {
+				value &= LT_CHANNEL1_EQ_BITS |
+					 LT_INTERLANE_ALIGN_DONE |
+					 LT_CHANNEL0_EQ_BITS;
+				/* in case of two lanes */
+				if ((tc->link.base.num_lanes == 2) &&
+				    (value == (LT_CHANNEL1_EQ_BITS |
+					       LT_INTERLANE_ALIGN_DONE |
+					       LT_CHANNEL0_EQ_BITS)))
+					break;
+				/* in case of one line */
+				if ((tc->link.base.num_lanes == 1) &&
+				    (value == (LT_INTERLANE_ALIGN_DONE |
+					       LT_CHANNEL0_EQ_BITS)))
+					break;
+			}
+		}
+		/* restart */
+		tc_write(DP0CTL, 0);
+		usleep_range(10, 20);
+	} while (--retry);
+	if (retry == 0) {
+		dev_err(tc->dev, "Failed to finish training phase %d\n",
+			pattern);
+	}
+
+	return 0;
+err:
+	return ret;
+}
+
+static int tc_main_link_setup(struct tc_data *tc)
+{
+	struct drm_dp_aux *aux = &tc->aux;
+	struct device *dev = tc->dev;
+	unsigned int rate;
+	u32 dp_phy_ctrl;
+	int timeout;
+	bool aligned;
+	bool ready;
+	u32 value;
+	int ret;
+	u8 tmp[8];
+
+	/* display mode should be set at this point */
+	if (!tc->mode)
+		return -EINVAL;
+
+	/* from excel file - DP0_SrcCtrl */
+	tc_write(DP0_SRCCTRL, DP0_SRCCTRL_SCRMBLDIS | DP0_SRCCTRL_EN810B |
+		 DP0_SRCCTRL_LANESKEW | DP0_SRCCTRL_LANES_2 |
+		 DP0_SRCCTRL_BW27 | DP0_SRCCTRL_AUTOCORRECT);
+	/* from excel file - DP1_SrcCtrl */
+	tc_write(0x07a0, 0x00003083);
+
+	rate = clk_get_rate(tc->refclk);
+	switch (rate) {
+	case 38400000:
+		value = REF_FREQ_38M4;
+		break;
+	case 26000000:
+		value = REF_FREQ_26M;
+		break;
+	case 19200000:
+		value = REF_FREQ_19M2;
+		break;
+	case 13000000:
+		value = REF_FREQ_13M;
+		break;
+	default:
+		return -EINVAL;
+	}
+	value |= SYSCLK_SEL_LSCLK | LSCLK_DIV_2;
+	tc_write(SYS_PLLPARAM, value);
+	/* Setup Main Link */
+	dp_phy_ctrl = BGREN | PWR_SW_EN | BIT(2) | PHY_A0_EN |  PHY_M0_EN;
+	tc_write(DP_PHY_CTRL, dp_phy_ctrl);
+	msleep(100);
+
+	/* PLL setup */
+	tc_write(DP0_PLLCTRL, PLLUPDATE | PLLEN);
+	tc_wait_pll_lock(tc);
+
+	tc_write(DP1_PLLCTRL, PLLUPDATE | PLLEN);
+	tc_wait_pll_lock(tc);
+
+	/* PXL PLL setup */
+	if (tc_test_pattern) {
+		ret = tc_pxl_pll_en(tc, clk_get_rate(tc->refclk),
+				    1000 * tc->mode->clock);
+		if (ret)
+			goto err;
+	}
+
+	/* Reset/Enable Main Links */
+	dp_phy_ctrl |= DP_PHY_RST | PHY_M1_RST | PHY_M0_RST;
+	tc_write(DP_PHY_CTRL, dp_phy_ctrl);
+	usleep_range(100, 200);
+	dp_phy_ctrl &= ~(DP_PHY_RST | PHY_M1_RST | PHY_M0_RST);
+	tc_write(DP_PHY_CTRL, dp_phy_ctrl);
+
+	timeout = 1000;
+	do {
+		tc_read(DP_PHY_CTRL, &value);
+		udelay(1);
+	} while ((!(value & PHY_RDY)) && (--timeout));
+
+	if (timeout == 0) {
+		dev_err(dev, "timeout waiting for phy become ready");
+		return -ETIMEDOUT;
+	}
+
+	/* Set misc: 8 bits per color */
+	ret = regmap_update_bits(tc->regmap, DP0_MISC, BPC_8, BPC_8);
+	if (ret)
+		goto err;
+
+	/*
+	 * ASSR mode
+	 * on TC358767 side ASSR configured through strap pin
+	 * seems there is no way to change this setting from SW
+	 *
+	 * check is tc configured for same mode
+	 */
+	if (tc->assr != tc->link.assr) {
+		dev_dbg(dev, "Trying to set display to ASSR: %d\n",
+			tc->assr);
+		/* try to set ASSR on display side */
+		tmp[0] = tc->assr;
+		ret = drm_dp_dpcd_writeb(aux, DP_EDP_CONFIGURATION_SET, tmp[0]);
+		if (ret < 0)
+			goto err_dpcd_read;
+		/* read back */
+		ret = drm_dp_dpcd_readb(aux, DP_EDP_CONFIGURATION_SET, tmp);
+		if (ret < 0)
+			goto err_dpcd_read;
+
+		if (tmp[0] != tc->assr) {
+			dev_warn(dev, "Failed to switch display ASSR to %d, falling back to unscrambled mode\n",
+				 tc->assr);
+			/* trying with disabled scrambler */
+			tc->link.scrambler_dis = 1;
+		}
+	}
+
+	/* Setup Link & DPRx Config for Training */
+	ret = drm_dp_link_configure(aux, &tc->link.base);
+	if (ret < 0)
+		goto err_dpcd_write;
+
+	/* DOWNSPREAD_CTRL */
+	tmp[0] = tc->link.spread ? DP_SPREAD_AMP_0_5 : 0x00;
+	/* MAIN_LINK_CHANNEL_CODING_SET */
+	tmp[1] =  tc->link.coding8b10b ? DP_SET_ANSI_8B10B : 0x00;
+	ret = drm_dp_dpcd_write(aux, DP_DOWNSPREAD_CTRL, tmp, 2);
+	if (ret < 0)
+		goto err_dpcd_write;
+
+	ret = tc_link_training(tc, DP_TRAINING_PATTERN_1);
+	if (ret)
+		goto err;
+
+	ret = tc_link_training(tc, DP_TRAINING_PATTERN_2);
+	if (ret)
+		goto err;
+
+	/* Clear DPCD 0x102 */
+	/* Note: Can Not use DP0_SNKLTCTRL (0x06E4) short cut */
+	tmp[0] = tc->link.scrambler_dis ? DP_LINK_SCRAMBLING_DISABLE : 0x00;
+	ret = drm_dp_dpcd_writeb(aux, DP_TRAINING_PATTERN_SET, tmp[0]);
+	if (ret < 0)
+		goto err_dpcd_write;
+
+	/* Clear Training Pattern, set AutoCorrect Mode = 1 */
+	tc_write(DP0_SRCCTRL, tc_srcctrl(tc) | DP0_SRCCTRL_AUTOCORRECT);
+
+	/* Wait */
+	timeout = 100;
+	do {
+		udelay(1);
+		/* Read DPCD 0x202-0x207 */
+		ret = drm_dp_dpcd_read_link_status(aux, tmp + 2);
+		if (ret < 0)
+			goto err_dpcd_read;
+		ready = (tmp[2] == ((DP_CHANNEL_EQ_BITS << 4) | /* Lane1 */
+				     DP_CHANNEL_EQ_BITS));      /* Lane0 */
+		aligned = tmp[4] & DP_INTERLANE_ALIGN_DONE;
+	} while ((--timeout) && !(ready && aligned));
+
+	if (timeout == 0) {
+		/* Read DPCD 0x200-0x201 */
+		ret = drm_dp_dpcd_read(aux, DP_SINK_COUNT, tmp, 2);
+		if (ret < 0)
+			goto err_dpcd_read;
+		dev_info(dev, "0x0200 SINK_COUNT: 0x%02x\n", tmp[0]);
+		dev_info(dev, "0x0201 DEVICE_SERVICE_IRQ_VECTOR: 0x%02x\n",
+			 tmp[1]);
+		dev_info(dev, "0x0202 LANE0_1_STATUS: 0x%02x\n", tmp[2]);
+		dev_info(dev, "0x0204 LANE_ALIGN_STATUS_UPDATED: 0x%02x\n",
+			 tmp[4]);
+		dev_info(dev, "0x0205 SINK_STATUS: 0x%02x\n", tmp[5]);
+		dev_info(dev, "0x0206 ADJUST_REQUEST_LANE0_1: 0x%02x\n",
+			 tmp[6]);
+
+		if (!ready)
+			dev_err(dev, "Lane0/1 not ready\n");
+		if (!aligned)
+			dev_err(dev, "Lane0/1 not aligned\n");
+		return -EAGAIN;
+	}
+
+	ret = tc_set_video_mode(tc, tc->mode);
+	if (ret)
+		goto err;
+
+	/* Set M/N */
+	ret = tc_stream_clock_calc(tc);
+	if (ret)
+		goto err;
+
+	return 0;
+err_dpcd_read:
+	dev_err(tc->dev, "Failed to read DPCD: %d\n", ret);
+	return ret;
+err_dpcd_write:
+	dev_err(tc->dev, "Failed to write DPCD: %d\n", ret);
+err:
+	return ret;
+}
+
+static int tc_main_link_stream(struct tc_data *tc, int state)
+{
+	int ret;
+	u32 value;
+
+	dev_dbg(tc->dev, "stream: %d\n", state);
+
+	if (state) {
+		value = VID_MN_GEN | DP_EN;
+		if (tc->link.base.capabilities & DP_LINK_CAP_ENHANCED_FRAMING)
+			value |= EF_EN;
+		tc_write(DP0CTL, value);
+		/*
+		 * VID_EN assertion should be delayed by at least N * LSCLK
+		 * cycles from the time VID_MN_GEN is enabled in order to
+		 * generate stable values for VID_M. LSCLK is 270 MHz or
+		 * 162 MHz, VID_N is set to 32768 in  tc_stream_clock_calc(),
+		 * so a delay of at least 203 us should suffice.
+		 */
+		usleep_range(500, 1000);
+		value |= VID_EN;
+		tc_write(DP0CTL, value);
+		/* Set input interface */
+		value = DP0_AUDSRC_NO_INPUT;
+		if (tc_test_pattern)
+			value |= DP0_VIDSRC_COLOR_BAR;
+		else
+			value |= DP0_VIDSRC_DPI_RX;
+		tc_write(SYSCTRL, value);
+	} else {
+		tc_write(DP0CTL, 0);
+	}
+
+	return 0;
+err:
+	return ret;
+}
+
+static enum drm_connector_status
+tc_connector_detect(struct drm_connector *connector, bool force)
+{
+	return connector_status_connected;
+}
+
+static void tc_bridge_pre_enable(struct drm_bridge *bridge)
+{
+	struct tc_data *tc = bridge_to_tc(bridge);
+
+	drm_panel_prepare(tc->panel);
+}
+
+static void tc_bridge_enable(struct drm_bridge *bridge)
+{
+	struct tc_data *tc = bridge_to_tc(bridge);
+	int ret;
+
+	ret = tc_main_link_setup(tc);
+	if (ret < 0) {
+		dev_err(tc->dev, "main link setup error: %d\n", ret);
+		return;
+	}
+
+	ret = tc_main_link_stream(tc, 1);
+	if (ret < 0) {
+		dev_err(tc->dev, "main link stream start error: %d\n", ret);
+		return;
+	}
+
+	drm_panel_enable(tc->panel);
+}
+
+static void tc_bridge_disable(struct drm_bridge *bridge)
+{
+	struct tc_data *tc = bridge_to_tc(bridge);
+	int ret;
+
+	drm_panel_disable(tc->panel);
+
+	ret = tc_main_link_stream(tc, 0);
+	if (ret < 0)
+		dev_err(tc->dev, "main link stream stop error: %d\n", ret);
+}
+
+static void tc_bridge_post_disable(struct drm_bridge *bridge)
+{
+	struct tc_data *tc = bridge_to_tc(bridge);
+
+	drm_panel_unprepare(tc->panel);
+}
+
+static bool tc_bridge_mode_fixup(struct drm_bridge *bridge,
+				 const struct drm_display_mode *mode,
+				 struct drm_display_mode *adj)
+{
+	/* Fixup sync polarities, both hsync and vsync are active low */
+	adj->flags = mode->flags;
+	adj->flags |= (DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC);
+	adj->flags &= ~(DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC);
+
+	return true;
+}
+
+static int tc_connector_mode_valid(struct drm_connector *connector,
+				   struct drm_display_mode *mode)
+{
+	/* Accept any mode */
+	return MODE_OK;
+}
+
+static void tc_bridge_mode_set(struct drm_bridge *bridge,
+			       struct drm_display_mode *mode,
+			       struct drm_display_mode *adj)
+{
+	struct tc_data *tc = bridge_to_tc(bridge);
+
+	tc->mode = mode;
+}
+
+static int tc_connector_get_modes(struct drm_connector *connector)
+{
+	struct tc_data *tc = connector_to_tc(connector);
+	struct edid *edid;
+	unsigned int count;
+
+	if (tc->panel && tc->panel->funcs && tc->panel->funcs->get_modes) {
+		count = tc->panel->funcs->get_modes(tc->panel);
+		if (count > 0)
+			return count;
+	}
+
+	edid = drm_get_edid(connector, &tc->aux.ddc);
+
+	kfree(tc->edid);
+	tc->edid = edid;
+	if (!edid)
+		return 0;
+
+	drm_mode_connector_update_edid_property(connector, edid);
+	count = drm_add_edid_modes(connector, edid);
+
+	return count;
+}
+
+static void tc_connector_set_polling(struct tc_data *tc,
+				     struct drm_connector *connector)
+{
+	/* TODO: add support for HPD */
+	connector->polled = DRM_CONNECTOR_POLL_CONNECT |
+			    DRM_CONNECTOR_POLL_DISCONNECT;
+}
+
+static struct drm_encoder *
+tc_connector_best_encoder(struct drm_connector *connector)
+{
+	struct tc_data *tc = connector_to_tc(connector);
+
+	return tc->bridge.encoder;
+}
+
+static const struct drm_connector_helper_funcs tc_connector_helper_funcs = {
+	.get_modes = tc_connector_get_modes,
+	.mode_valid = tc_connector_mode_valid,
+	.best_encoder = tc_connector_best_encoder,
+};
+
+static void tc_connector_destroy(struct drm_connector *connector)
+{
+	drm_connector_unregister(connector);
+	drm_connector_cleanup(connector);
+}
+
+static const struct drm_connector_funcs tc_connector_funcs = {
+	.dpms = drm_atomic_helper_connector_dpms,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.detect = tc_connector_detect,
+	.destroy = tc_connector_destroy,
+	.reset = drm_atomic_helper_connector_reset,
+	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+};
+
+static int tc_bridge_attach(struct drm_bridge *bridge)
+{
+	u32 bus_format = MEDIA_BUS_FMT_RGB888_1X24;
+	struct tc_data *tc = bridge_to_tc(bridge);
+	struct drm_device *drm = bridge->dev;
+	int ret;
+
+	/* Create eDP connector */
+	drm_connector_helper_add(&tc->connector, &tc_connector_helper_funcs);
+	ret = drm_connector_init(drm, &tc->connector, &tc_connector_funcs,
+				 DRM_MODE_CONNECTOR_eDP);
+	if (ret)
+		return ret;
+
+	if (tc->panel)
+		drm_panel_attach(tc->panel, &tc->connector);
+
+	drm_display_info_set_bus_formats(&tc->connector.display_info,
+					 &bus_format, 1);
+	drm_mode_connector_attach_encoder(&tc->connector, tc->bridge.encoder);
+
+	return 0;
+}
+
+static const struct drm_bridge_funcs tc_bridge_funcs = {
+	.attach = tc_bridge_attach,
+	.mode_set = tc_bridge_mode_set,
+	.pre_enable = tc_bridge_pre_enable,
+	.enable = tc_bridge_enable,
+	.disable = tc_bridge_disable,
+	.post_disable = tc_bridge_post_disable,
+	.mode_fixup = tc_bridge_mode_fixup,
+};
+
+static bool tc_readable_reg(struct device *dev, unsigned int reg)
+{
+	return reg != SYSCTRL;
+}
+
+static const struct regmap_range tc_volatile_ranges[] = {
+	regmap_reg_range(DP0_AUXWDATA(0), DP0_AUXSTATUS),
+	regmap_reg_range(DP0_LTSTAT, DP0_SNKLTCHGREQ),
+	regmap_reg_range(DP_PHY_CTRL, DP_PHY_CTRL),
+	regmap_reg_range(DP0_PLLCTRL, PXL_PLLCTRL),
+	regmap_reg_range(VFUEN0, VFUEN0),
+};
+
+static const struct regmap_access_table tc_volatile_table = {
+	.yes_ranges = tc_volatile_ranges,
+	.n_yes_ranges = ARRAY_SIZE(tc_volatile_ranges),
+};
+
+static bool tc_writeable_reg(struct device *dev, unsigned int reg)
+{
+	return (reg != TC_IDREG) &&
+	       (reg != DP0_LTSTAT) &&
+	       (reg != DP0_SNKLTCHGREQ);
+}
+
+static const struct regmap_config tc_regmap_config = {
+	.name = "tc358767",
+	.reg_bits = 16,
+	.val_bits = 32,
+	.reg_stride = 4,
+	.max_register = PLL_DBG,
+	.cache_type = REGCACHE_RBTREE,
+	.readable_reg = tc_readable_reg,
+	.volatile_table = &tc_volatile_table,
+	.writeable_reg = tc_writeable_reg,
+	.reg_format_endian = REGMAP_ENDIAN_BIG,
+	.val_format_endian = REGMAP_ENDIAN_LITTLE,
+};
+
+static int tc_probe(struct i2c_client *client, const struct i2c_device_id *id)
+{
+	struct device *dev = &client->dev;
+	struct device_node *ep;
+	struct tc_data *tc;
+	int ret;
+
+	tc = devm_kzalloc(dev, sizeof(*tc), GFP_KERNEL);
+	if (!tc)
+		return -ENOMEM;
+
+	tc->dev = dev;
+
+	/* port@2 is the output port */
+	ep = of_graph_get_endpoint_by_regs(dev->of_node, 2, -1);
+	if (ep) {
+		struct device_node *remote;
+
+		remote = of_graph_get_remote_port_parent(ep);
+		if (!remote) {
+			dev_warn(dev, "endpoint %s not connected\n",
+				 ep->full_name);
+			of_node_put(ep);
+			return -ENODEV;
+		}
+		of_node_put(ep);
+		tc->panel = of_drm_find_panel(remote);
+		if (tc->panel) {
+			dev_dbg(dev, "found panel %s\n", remote->full_name);
+		} else {
+			dev_dbg(dev, "waiting for panel %s\n",
+				remote->full_name);
+			of_node_put(remote);
+			return -EPROBE_DEFER;
+		}
+		of_node_put(remote);
+	}
+
+	/* Shut down GPIO is optional */
+	tc->sd_gpio = devm_gpiod_get_optional(dev, "shutdown", GPIOD_OUT_HIGH);
+	if (IS_ERR(tc->sd_gpio))
+		return PTR_ERR(tc->sd_gpio);
+
+	if (tc->sd_gpio) {
+		gpiod_set_value_cansleep(tc->sd_gpio, 0);
+		usleep_range(5000, 10000);
+	}
+
+	/* Reset GPIO is optional */
+	tc->reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW);
+	if (IS_ERR(tc->reset_gpio))
+		return PTR_ERR(tc->reset_gpio);
+
+	if (tc->reset_gpio) {
+		gpiod_set_value_cansleep(tc->reset_gpio, 1);
+		usleep_range(5000, 10000);
+	}
+
+	tc->refclk = devm_clk_get(dev, "ref");
+	if (IS_ERR(tc->refclk)) {
+		ret = PTR_ERR(tc->refclk);
+		dev_err(dev, "Failed to get refclk: %d\n", ret);
+		return ret;
+	}
+
+	tc->regmap = devm_regmap_init_i2c(client, &tc_regmap_config);
+	if (IS_ERR(tc->regmap)) {
+		ret = PTR_ERR(tc->regmap);
+		dev_err(dev, "Failed to initialize regmap: %d\n", ret);
+		return ret;
+	}
+
+	ret = regmap_read(tc->regmap, TC_IDREG, &tc->rev);
+	if (ret) {
+		dev_err(tc->dev, "can not read device ID: %d\n", ret);
+		return ret;
+	}
+
+	if ((tc->rev != 0x6601) && (tc->rev != 0x6603)) {
+		dev_err(tc->dev, "invalid device ID: 0x%08x\n", tc->rev);
+		return -EINVAL;
+	}
+
+	tc->assr = (tc->rev == 0x6601); /* Enable ASSR for eDP panels */
+
+	ret = tc_aux_link_setup(tc);
+	if (ret)
+		return ret;
+
+	/* Register DP AUX channel */
+	tc->aux.name = "TC358767 AUX i2c adapter";
+	tc->aux.dev = tc->dev;
+	tc->aux.transfer = tc_aux_transfer;
+	ret = drm_dp_aux_register(&tc->aux);
+	if (ret)
+		return ret;
+
+	ret = tc_get_display_props(tc);
+	if (ret)
+		goto err_unregister_aux;
+
+	tc_connector_set_polling(tc, &tc->connector);
+
+	tc->bridge.funcs = &tc_bridge_funcs;
+	tc->bridge.of_node = dev->of_node;
+	ret = drm_bridge_add(&tc->bridge);
+	if (ret) {
+		dev_err(dev, "Failed to add drm_bridge: %d\n", ret);
+		goto err_unregister_aux;
+	}
+
+	i2c_set_clientdata(client, tc);
+
+	return 0;
+err_unregister_aux:
+	drm_dp_aux_unregister(&tc->aux);
+	return ret;
+}
+
+static int tc_remove(struct i2c_client *client)
+{
+	struct tc_data *tc = i2c_get_clientdata(client);
+
+	drm_bridge_remove(&tc->bridge);
+	drm_dp_aux_unregister(&tc->aux);
+
+	tc_pxl_pll_dis(tc);
+
+	return 0;
+}
+
+static const struct i2c_device_id tc358767_i2c_ids[] = {
+	{ "tc358767", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, tc358767_i2c_ids);
+
+static const struct of_device_id tc358767_of_ids[] = {
+	{ .compatible = "toshiba,tc358767", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, tc358767_of_ids);
+
+static struct i2c_driver tc358767_driver = {
+	.driver = {
+		.name = "tc358767",
+		.of_match_table = tc358767_of_ids,
+	},
+	.id_table = tc358767_i2c_ids,
+	.probe = tc_probe,
+	.remove	= tc_remove,
+};
+module_i2c_driver(tc358767_driver);
+
+MODULE_AUTHOR("Andrey Gusakov <andrey.gusakov@cogentembedded.com>");
+MODULE_DESCRIPTION("tc358767 eDP encoder driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/cirrus/Kconfig b/drivers/gpu/drm/cirrus/Kconfig
index 9864559e5fb9..04b3c161dfae 100644
--- a/drivers/gpu/drm/cirrus/Kconfig
+++ b/drivers/gpu/drm/cirrus/Kconfig
@@ -1,11 +1,7 @@
 config DRM_CIRRUS_QEMU
 	tristate "Cirrus driver for QEMU emulated device"
 	depends on DRM && PCI
-	select FB_SYS_FILLRECT
-	select FB_SYS_COPYAREA
-	select FB_SYS_IMAGEBLIT
 	select DRM_KMS_HELPER
-	select DRM_KMS_FB_HELPER
 	select DRM_TTM
 	help
 	 This is a KMS driver for emulated cirrus device in qemu.
diff --git a/drivers/gpu/drm/cirrus/cirrus_main.c b/drivers/gpu/drm/cirrus/cirrus_main.c
index 32d32c5b7b17..80446e2d3ab6 100644
--- a/drivers/gpu/drm/cirrus/cirrus_main.c
+++ b/drivers/gpu/drm/cirrus/cirrus_main.c
@@ -17,8 +17,8 @@
 static void cirrus_user_framebuffer_destroy(struct drm_framebuffer *fb)
 {
 	struct cirrus_framebuffer *cirrus_fb = to_cirrus_framebuffer(fb);
-	if (cirrus_fb->obj)
-		drm_gem_object_unreference_unlocked(cirrus_fb->obj);
+
+	drm_gem_object_unreference_unlocked(cirrus_fb->obj);
 	drm_framebuffer_cleanup(fb);
 	kfree(fb);
 }
diff --git a/drivers/gpu/drm/cirrus/cirrus_ttm.c b/drivers/gpu/drm/cirrus/cirrus_ttm.c
index 6768b7b1af32..1cc9ee607128 100644
--- a/drivers/gpu/drm/cirrus/cirrus_ttm.c
+++ b/drivers/gpu/drm/cirrus/cirrus_ttm.c
@@ -186,17 +186,6 @@ static void cirrus_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re
 {
 }
 
-static int cirrus_bo_move(struct ttm_buffer_object *bo,
-		       bool evict, bool interruptible,
-		       bool no_wait_gpu,
-		       struct ttm_mem_reg *new_mem)
-{
-	int r;
-	r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
-	return r;
-}
-
-
 static void cirrus_ttm_backend_destroy(struct ttm_tt *tt)
 {
 	ttm_tt_fini(tt);
@@ -241,7 +230,7 @@ struct ttm_bo_driver cirrus_bo_driver = {
 	.ttm_tt_unpopulate = cirrus_ttm_tt_unpopulate,
 	.init_mem_type = cirrus_bo_init_mem_type,
 	.evict_flags = cirrus_bo_evict_flags,
-	.move = cirrus_bo_move,
+	.move = NULL,
 	.verify_access = cirrus_bo_verify_access,
 	.io_mem_reserve = &cirrus_ttm_io_mem_reserve,
 	.io_mem_free = &cirrus_ttm_io_mem_free,
diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index d99ab2f6663f..8d2f111fa113 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -404,8 +404,7 @@ drm_atomic_replace_property_blob(struct drm_property_blob **blob,
 	if (old_blob == new_blob)
 		return;
 
-	if (old_blob)
-		drm_property_unreference_blob(old_blob);
+	drm_property_unreference_blob(old_blob);
 	if (new_blob)
 		drm_property_reference_blob(new_blob);
 	*blob = new_blob;
@@ -1299,14 +1298,39 @@ EXPORT_SYMBOL(drm_atomic_add_affected_planes);
  */
 void drm_atomic_legacy_backoff(struct drm_atomic_state *state)
 {
+	struct drm_device *dev = state->dev;
+	unsigned crtc_mask = 0;
+	struct drm_crtc *crtc;
 	int ret;
+	bool global = false;
+
+	drm_for_each_crtc(crtc, dev) {
+		if (crtc->acquire_ctx != state->acquire_ctx)
+			continue;
+
+		crtc_mask |= drm_crtc_mask(crtc);
+		crtc->acquire_ctx = NULL;
+	}
+
+	if (WARN_ON(dev->mode_config.acquire_ctx == state->acquire_ctx)) {
+		global = true;
+
+		dev->mode_config.acquire_ctx = NULL;
+	}
 
 retry:
 	drm_modeset_backoff(state->acquire_ctx);
 
-	ret = drm_modeset_lock_all_ctx(state->dev, state->acquire_ctx);
+	ret = drm_modeset_lock_all_ctx(dev, state->acquire_ctx);
 	if (ret)
 		goto retry;
+
+	drm_for_each_crtc(crtc, dev)
+		if (drm_crtc_mask(crtc) & crtc_mask)
+			crtc->acquire_ctx = state->acquire_ctx;
+
+	if (global)
+		dev->mode_config.acquire_ctx = state->acquire_ctx;
 }
 EXPORT_SYMBOL(drm_atomic_legacy_backoff);
 
diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c
index 059f7c39c582..a7916e5f8864 100644
--- a/drivers/gpu/drm/drm_cache.c
+++ b/drivers/gpu/drm/drm_cache.c
@@ -136,6 +136,7 @@ drm_clflush_virt_range(void *addr, unsigned long length)
 		mb();
 		for (; addr < end; addr += size)
 			clflushopt(addr);
+		clflushopt(end - 1); /* force serialisation */
 		mb();
 		return;
 	}
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index fd93e9c79d28..f1d9f0569d7f 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -396,6 +396,51 @@ void drm_mode_object_reference(struct drm_mode_object *obj)
 }
 EXPORT_SYMBOL(drm_mode_object_reference);
 
+/**
+ * drm_crtc_force_disable - Forcibly turn off a CRTC
+ * @crtc: CRTC to turn off
+ *
+ * Returns:
+ * Zero on success, error code on failure.
+ */
+int drm_crtc_force_disable(struct drm_crtc *crtc)
+{
+	struct drm_mode_set set = {
+		.crtc = crtc,
+	};
+
+	return drm_mode_set_config_internal(&set);
+}
+EXPORT_SYMBOL(drm_crtc_force_disable);
+
+/**
+ * drm_crtc_force_disable_all - Forcibly turn off all enabled CRTCs
+ * @dev: DRM device whose CRTCs to turn off
+ *
+ * Drivers may want to call this on unload to ensure that all displays are
+ * unlit and the GPU is in a consistent, low power state. Takes modeset locks.
+ *
+ * Returns:
+ * Zero on success, error code on failure.
+ */
+int drm_crtc_force_disable_all(struct drm_device *dev)
+{
+	struct drm_crtc *crtc;
+	int ret = 0;
+
+	drm_modeset_lock_all(dev);
+	drm_for_each_crtc(crtc, dev)
+		if (crtc->enabled) {
+			ret = drm_crtc_force_disable(crtc);
+			if (ret)
+				goto out;
+		}
+out:
+	drm_modeset_unlock_all(dev);
+	return ret;
+}
+EXPORT_SYMBOL(drm_crtc_force_disable_all);
+
 static void drm_framebuffer_free(struct kref *kref)
 {
 	struct drm_framebuffer *fb =
@@ -544,8 +589,6 @@ void drm_framebuffer_remove(struct drm_framebuffer *fb)
 	struct drm_device *dev;
 	struct drm_crtc *crtc;
 	struct drm_plane *plane;
-	struct drm_mode_set set;
-	int ret;
 
 	if (!fb)
 		return;
@@ -575,11 +618,7 @@ void drm_framebuffer_remove(struct drm_framebuffer *fb)
 		drm_for_each_crtc(crtc, dev) {
 			if (crtc->primary->fb == fb) {
 				/* should turn off the crtc */
-				memset(&set, 0, sizeof(struct drm_mode_set));
-				set.crtc = crtc;
-				set.fb = NULL;
-				ret = drm_mode_set_config_internal(&set);
-				if (ret)
+				if (drm_crtc_force_disable(crtc))
 					DRM_ERROR("failed to reset crtc %p when fb was deleted\n", crtc);
 			}
 		}
@@ -889,11 +928,11 @@ int drm_connector_init(struct drm_device *dev,
 	connector->dev = dev;
 	connector->funcs = funcs;
 
-	connector->connector_id = ida_simple_get(&config->connector_ida, 0, 0, GFP_KERNEL);
-	if (connector->connector_id < 0) {
-		ret = connector->connector_id;
+	ret = ida_simple_get(&config->connector_ida, 0, 0, GFP_KERNEL);
+	if (ret < 0)
 		goto out_put;
-	}
+	connector->index = ret;
+	ret = 0;
 
 	connector->connector_type = connector_type;
 	connector->connector_type_id =
@@ -941,7 +980,7 @@ out_put_type_id:
 		ida_remove(connector_ida, connector->connector_type_id);
 out_put_id:
 	if (ret)
-		ida_remove(&config->connector_ida, connector->connector_id);
+		ida_remove(&config->connector_ida, connector->index);
 out_put:
 	if (ret)
 		drm_mode_object_unregister(dev, &connector->base);
@@ -985,7 +1024,7 @@ void drm_connector_cleanup(struct drm_connector *connector)
 		   connector->connector_type_id);
 
 	ida_remove(&dev->mode_config.connector_ida,
-		   connector->connector_id);
+		   connector->index);
 
 	kfree(connector->display_info.bus_formats);
 	drm_mode_object_unregister(dev, &connector->base);
@@ -1068,23 +1107,16 @@ void drm_connector_unregister(struct drm_connector *connector)
 }
 EXPORT_SYMBOL(drm_connector_unregister);
 
-/**
- * drm_connector_register_all - register all connectors
- * @dev: drm device
- *
- * This function registers all connectors in sysfs and other places so that
- * userspace can start to access them. drm_connector_register_all() is called
- * automatically from drm_dev_register() to complete the device registration,
- * if they don't call drm_connector_register() on each connector individually.
- *
- * When a device is unplugged and should be removed from userspace access,
- * call drm_connector_unregister_all(), which is the inverse of this
- * function.
- *
- * Returns:
- * Zero on success, error code on failure.
- */
-int drm_connector_register_all(struct drm_device *dev)
+static void drm_connector_unregister_all(struct drm_device *dev)
+{
+	struct drm_connector *connector;
+
+	/* FIXME: taking the mode config mutex ends up in a clash with sysfs */
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head)
+		drm_connector_unregister(connector);
+}
+
+static int drm_connector_register_all(struct drm_device *dev)
 {
 	struct drm_connector *connector;
 	int ret;
@@ -1106,27 +1138,6 @@ err:
 	drm_connector_unregister_all(dev);
 	return ret;
 }
-EXPORT_SYMBOL(drm_connector_register_all);
-
-/**
- * drm_connector_unregister_all - unregister connector userspace interfaces
- * @dev: drm device
- *
- * This functions unregisters all connectors from sysfs and other places so
- * that userspace can no longer access them. Drivers should call this as the
- * first step tearing down the device instace, or when the underlying
- * physical device disappeared (e.g. USB unplug), right before calling
- * drm_dev_unregister().
- */
-void drm_connector_unregister_all(struct drm_device *dev)
-{
-	struct drm_connector *connector;
-
-	/* FIXME: taking the mode config mutex ends up in a clash with sysfs */
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head)
-		drm_connector_unregister(connector);
-}
-EXPORT_SYMBOL(drm_connector_unregister_all);
 
 static int drm_encoder_register_all(struct drm_device *dev)
 {
diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index d61591274ff6..604d3ef72ffa 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -531,11 +531,11 @@ drm_crtc_helper_disable(struct drm_crtc *crtc)
 int drm_crtc_helper_set_config(struct drm_mode_set *set)
 {
 	struct drm_device *dev;
-	struct drm_crtc *new_crtc;
-	struct drm_encoder *save_encoders, *new_encoder, *encoder;
+	struct drm_crtc **save_encoder_crtcs, *new_crtc;
+	struct drm_encoder **save_connector_encoders, *new_encoder, *encoder;
 	bool mode_changed = false; /* if true do a full mode set */
 	bool fb_changed = false; /* if true and !mode_changed just do a flip */
-	struct drm_connector *save_connectors, *connector;
+	struct drm_connector *connector;
 	int count = 0, ro, fail = 0;
 	const struct drm_crtc_helper_funcs *crtc_funcs;
 	struct drm_mode_set save_set;
@@ -577,15 +577,15 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set)
 	 * Allocate space for the backup of all (non-pointer) encoder and
 	 * connector data.
 	 */
-	save_encoders = kzalloc(dev->mode_config.num_encoder *
-				sizeof(struct drm_encoder), GFP_KERNEL);
-	if (!save_encoders)
+	save_encoder_crtcs = kzalloc(dev->mode_config.num_encoder *
+				sizeof(struct drm_crtc *), GFP_KERNEL);
+	if (!save_encoder_crtcs)
 		return -ENOMEM;
 
-	save_connectors = kzalloc(dev->mode_config.num_connector *
-				sizeof(struct drm_connector), GFP_KERNEL);
-	if (!save_connectors) {
-		kfree(save_encoders);
+	save_connector_encoders = kzalloc(dev->mode_config.num_connector *
+				sizeof(struct drm_encoder *), GFP_KERNEL);
+	if (!save_connector_encoders) {
+		kfree(save_encoder_crtcs);
 		return -ENOMEM;
 	}
 
@@ -596,12 +596,12 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set)
 	 */
 	count = 0;
 	drm_for_each_encoder(encoder, dev) {
-		save_encoders[count++] = *encoder;
+		save_encoder_crtcs[count++] = encoder->crtc;
 	}
 
 	count = 0;
 	drm_for_each_connector(connector, dev) {
-		save_connectors[count++] = *connector;
+		save_connector_encoders[count++] = connector->encoder;
 	}
 
 	save_set.crtc = set->crtc;
@@ -634,8 +634,12 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set)
 		mode_changed = true;
 	}
 
-	/* take a reference on all connectors in set */
+	/* take a reference on all unbound connectors in set, reuse the
+	 * already taken reference for bound connectors
+	 */
 	for (ro = 0; ro < set->num_connectors; ro++) {
+		if (set->connectors[ro]->encoder)
+			continue;
 		drm_connector_reference(set->connectors[ro]);
 	}
 
@@ -757,30 +761,28 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set)
 		}
 	}
 
-	/* after fail drop reference on all connectors in save set */
-	count = 0;
-	drm_for_each_connector(connector, dev) {
-		drm_connector_unreference(&save_connectors[count++]);
-	}
-
-	kfree(save_connectors);
-	kfree(save_encoders);
+	kfree(save_connector_encoders);
+	kfree(save_encoder_crtcs);
 	return 0;
 
 fail:
 	/* Restore all previous data. */
 	count = 0;
 	drm_for_each_encoder(encoder, dev) {
-		*encoder = save_encoders[count++];
+		encoder->crtc = save_encoder_crtcs[count++];
 	}
 
 	count = 0;
 	drm_for_each_connector(connector, dev) {
-		*connector = save_connectors[count++];
+		connector->encoder = save_connector_encoders[count++];
 	}
 
-	/* after fail drop reference on all connectors in set */
+	/* after fail drop reference on all unbound connectors in set, let
+	 * bound connectors keep their reference
+	 */
 	for (ro = 0; ro < set->num_connectors; ro++) {
+		if (set->connectors[ro]->encoder)
+			continue;
 		drm_connector_unreference(set->connectors[ro]);
 	}
 
@@ -790,8 +792,8 @@ fail:
 				      save_set.y, save_set.fb))
 		DRM_ERROR("failed to restore config after modeset failure\n");
 
-	kfree(save_connectors);
-	kfree(save_encoders);
+	kfree(save_connector_encoders);
+	kfree(save_encoder_crtcs);
 	return ret;
 }
 EXPORT_SYMBOL(drm_crtc_helper_set_config);
diff --git a/drivers/gpu/drm/drm_dp_aux_dev.c b/drivers/gpu/drm/drm_dp_aux_dev.c
index 3334baacf43d..734f86a345f6 100644
--- a/drivers/gpu/drm/drm_dp_aux_dev.c
+++ b/drivers/gpu/drm/drm_dp_aux_dev.c
@@ -355,8 +355,7 @@ int drm_dp_aux_dev_init(void)
 
 	drm_dp_aux_dev_class = class_create(THIS_MODULE, "drm_dp_aux_dev");
 	if (IS_ERR(drm_dp_aux_dev_class)) {
-		res = PTR_ERR(drm_dp_aux_dev_class);
-		goto out;
+		return PTR_ERR(drm_dp_aux_dev_class);
 	}
 	drm_dp_aux_dev_class->dev_groups = drm_dp_aux_groups;
 
diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c
index 091053e995e5..8f11b8741e42 100644
--- a/drivers/gpu/drm/drm_dp_helper.c
+++ b/drivers/gpu/drm/drm_dp_helper.c
@@ -203,7 +203,7 @@ static int drm_dp_dpcd_access(struct drm_dp_aux *aux, u8 request,
 
 		ret = aux->transfer(aux, &msg);
 
-		if (ret > 0) {
+		if (ret >= 0) {
 			native_reply = msg.reply & DP_AUX_NATIVE_REPLY_MASK;
 			if (native_reply == DP_AUX_NATIVE_REPLY_ACK) {
 				if (ret == size)
diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index a13edf5de2d6..04e457117980 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -1493,11 +1493,8 @@ static void process_single_down_tx_qlock(struct drm_dp_mst_topology_mgr *mgr)
 	WARN_ON(!mutex_is_locked(&mgr->qlock));
 
 	/* construct a chunk from the first msg in the tx_msg queue */
-	if (list_empty(&mgr->tx_msg_downq)) {
-		mgr->tx_down_in_progress = false;
+	if (list_empty(&mgr->tx_msg_downq))
 		return;
-	}
-	mgr->tx_down_in_progress = true;
 
 	txmsg = list_first_entry(&mgr->tx_msg_downq, struct drm_dp_sideband_msg_tx, next);
 	ret = process_single_tx_qlock(mgr, txmsg, false);
@@ -1512,10 +1509,6 @@ static void process_single_down_tx_qlock(struct drm_dp_mst_topology_mgr *mgr)
 		txmsg->state = DRM_DP_SIDEBAND_TX_TIMEOUT;
 		wake_up(&mgr->tx_waitq);
 	}
-	if (list_empty(&mgr->tx_msg_downq)) {
-		mgr->tx_down_in_progress = false;
-		return;
-	}
 }
 
 /* called holding qlock */
@@ -1538,7 +1531,7 @@ static void drm_dp_queue_down_tx(struct drm_dp_mst_topology_mgr *mgr,
 {
 	mutex_lock(&mgr->qlock);
 	list_add_tail(&txmsg->next, &mgr->tx_msg_downq);
-	if (!mgr->tx_down_in_progress)
+	if (list_is_singular(&mgr->tx_msg_downq))
 		process_single_down_tx_qlock(mgr);
 	mutex_unlock(&mgr->qlock);
 }
@@ -2372,6 +2365,7 @@ EXPORT_SYMBOL(drm_dp_mst_hpd_irq);
 
 /**
  * drm_dp_mst_detect_port() - get connection status for an MST port
+ * @connector: DRM connector for this port
  * @mgr: manager for this port
  * @port: unverified pointer to a port
  *
@@ -2887,7 +2881,7 @@ static void drm_dp_tx_work(struct work_struct *work)
 	struct drm_dp_mst_topology_mgr *mgr = container_of(work, struct drm_dp_mst_topology_mgr, tx_work);
 
 	mutex_lock(&mgr->qlock);
-	if (mgr->tx_down_in_progress)
+	if (!list_empty(&mgr->tx_msg_downq))
 		process_single_down_tx_qlock(mgr);
 	mutex_unlock(&mgr->qlock);
 }
@@ -2927,11 +2921,9 @@ static void drm_dp_destroy_connector_work(struct work_struct *work)
 		drm_dp_port_teardown_pdt(port, port->pdt);
 
 		if (!port->input && port->vcpi.vcpi > 0) {
-			if (mgr->mst_state) {
-				drm_dp_mst_reset_vcpi_slots(mgr, port);
-				drm_dp_update_payload_part1(mgr);
-				drm_dp_mst_put_payload_id(mgr, port->vcpi.vcpi);
-			}
+			drm_dp_mst_reset_vcpi_slots(mgr, port);
+			drm_dp_update_payload_part1(mgr);
+			drm_dp_mst_put_payload_id(mgr, port->vcpi.vcpi);
 		}
 
 		kref_put(&port->kref, drm_dp_free_mst_port);
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index aead9ffcbe29..be27ed36f56e 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -362,9 +362,7 @@ EXPORT_SYMBOL(drm_put_dev);
 void drm_unplug_dev(struct drm_device *dev)
 {
 	/* for a USB device */
-	drm_minor_unregister(dev, DRM_MINOR_LEGACY);
-	drm_minor_unregister(dev, DRM_MINOR_RENDER);
-	drm_minor_unregister(dev, DRM_MINOR_CONTROL);
+	drm_dev_unregister(dev);
 
 	mutex_lock(&drm_global_mutex);
 
diff --git a/drivers/gpu/drm/drm_fb_cma_helper.c b/drivers/gpu/drm/drm_fb_cma_helper.c
index c0b0c718994a..1fd6eac1400c 100644
--- a/drivers/gpu/drm/drm_fb_cma_helper.c
+++ b/drivers/gpu/drm/drm_fb_cma_helper.c
@@ -596,3 +596,18 @@ void drm_fbdev_cma_hotplug_event(struct drm_fbdev_cma *fbdev_cma)
 		drm_fb_helper_hotplug_event(&fbdev_cma->fb_helper);
 }
 EXPORT_SYMBOL_GPL(drm_fbdev_cma_hotplug_event);
+
+/**
+ * drm_fbdev_cma_set_suspend - wrapper around drm_fb_helper_set_suspend
+ * @fbdev_cma: The drm_fbdev_cma struct, may be NULL
+ * @state: desired state, zero to resume, non-zero to suspend
+ *
+ * Calls drm_fb_helper_set_suspend, which is a wrapper around
+ * fb_set_suspend implemented by fbdev core.
+ */
+void drm_fbdev_cma_set_suspend(struct drm_fbdev_cma *fbdev_cma, int state)
+{
+	if (fbdev_cma)
+		drm_fb_helper_set_suspend(&fbdev_cma->fb_helper, state);
+}
+EXPORT_SYMBOL(drm_fbdev_cma_set_suspend);
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 1f84ff5f1bf8..33af4a5ddca1 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -648,7 +648,7 @@ long drm_ioctl(struct file *filp,
 	int retcode = -EINVAL;
 	char stack_kdata[128];
 	char *kdata = NULL;
-	unsigned int usize, asize, drv_size;
+	unsigned int in_size, out_size, drv_size, ksize;
 	bool is_driver_ioctl;
 
 	dev = file_priv->minor->dev;
@@ -671,9 +671,12 @@ long drm_ioctl(struct file *filp,
 	}
 
 	drv_size = _IOC_SIZE(ioctl->cmd);
-	usize = _IOC_SIZE(cmd);
-	asize = max(usize, drv_size);
-	cmd = ioctl->cmd;
+	out_size = in_size = _IOC_SIZE(cmd);
+	if ((cmd & ioctl->cmd & IOC_IN) == 0)
+		in_size = 0;
+	if ((cmd & ioctl->cmd & IOC_OUT) == 0)
+		out_size = 0;
+	ksize = max(max(in_size, out_size), drv_size);
 
 	DRM_DEBUG("pid=%d, dev=0x%lx, auth=%d, %s\n",
 		  task_pid_nr(current),
@@ -693,30 +696,24 @@ long drm_ioctl(struct file *filp,
 	if (unlikely(retcode))
 		goto err_i1;
 
-	if (cmd & (IOC_IN | IOC_OUT)) {
-		if (asize <= sizeof(stack_kdata)) {
-			kdata = stack_kdata;
-		} else {
-			kdata = kmalloc(asize, GFP_KERNEL);
-			if (!kdata) {
-				retcode = -ENOMEM;
-				goto err_i1;
-			}
+	if (ksize <= sizeof(stack_kdata)) {
+		kdata = stack_kdata;
+	} else {
+		kdata = kmalloc(ksize, GFP_KERNEL);
+		if (!kdata) {
+			retcode = -ENOMEM;
+			goto err_i1;
 		}
-		if (asize > usize)
-			memset(kdata + usize, 0, asize - usize);
 	}
 
-	if (cmd & IOC_IN) {
-		if (copy_from_user(kdata, (void __user *)arg,
-				   usize) != 0) {
-			retcode = -EFAULT;
-			goto err_i1;
-		}
-	} else if (cmd & IOC_OUT) {
-		memset(kdata, 0, usize);
+	if (copy_from_user(kdata, (void __user *)arg, in_size) != 0) {
+		retcode = -EFAULT;
+		goto err_i1;
 	}
 
+	if (ksize > in_size)
+		memset(kdata + in_size, 0, ksize - in_size);
+
 	/* Enforce sane locking for kms driver ioctls. Core ioctls are
 	 * too messy still. */
 	if ((drm_core_check_feature(dev, DRIVER_MODESET) && is_driver_ioctl) ||
@@ -728,11 +725,8 @@ long drm_ioctl(struct file *filp,
 		mutex_unlock(&drm_global_mutex);
 	}
 
-	if (cmd & IOC_OUT) {
-		if (copy_to_user((void __user *)arg, kdata,
-				 usize) != 0)
-			retcode = -EFAULT;
-	}
+	if (copy_to_user((void __user *)arg, kdata, out_size) != 0)
+		retcode = -EFAULT;
 
       err_i1:
 	if (!ioctl)
@@ -759,7 +753,7 @@ EXPORT_SYMBOL(drm_ioctl);
  * shouldn't be used by any drivers.
  *
  * Returns:
- * True if the @nr corresponds to a DRM core ioctl numer, false otherwise.
+ * True if the @nr corresponds to a DRM core ioctl number, false otherwise.
  */
 bool drm_ioctl_flags(unsigned int nr, unsigned int *flags)
 {
diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index 8ca3d2bf2bda..77f357b2c386 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -532,7 +532,7 @@ int drm_irq_uninstall(struct drm_device *dev)
 
 	/*
 	 * Wake up any waiters so they don't hang. This is just to paper over
-	 * isssues for UMS drivers which aren't in full control of their
+	 * issues for UMS drivers which aren't in full control of their
 	 * vblank/irq handling. KMS drivers must ensure that vblanks are all
 	 * disabled when uninstalling the irq handler.
 	 */
@@ -594,7 +594,7 @@ int drm_control(struct drm_device *dev, void *data,
 		return 0;
 	if (drm_core_check_feature(dev, DRIVER_MODESET))
 		return 0;
-	/* UMS was only ever support on pci devices. */
+	/* UMS was only ever supported on pci devices. */
 	if (WARN_ON(!dev->pdev))
 		return -EINVAL;
 
@@ -945,8 +945,8 @@ EXPORT_SYMBOL(drm_crtc_vblank_count);
  *
  * This is the legacy version of drm_crtc_vblank_count_and_time().
  */
-u32 drm_vblank_count_and_time(struct drm_device *dev, unsigned int pipe,
-			      struct timeval *vblanktime)
+static u32 drm_vblank_count_and_time(struct drm_device *dev, unsigned int pipe,
+				     struct timeval *vblanktime)
 {
 	struct drm_vblank_crtc *vblank = &dev->vblank[pipe];
 	u32 vblank_count;
@@ -963,7 +963,6 @@ u32 drm_vblank_count_and_time(struct drm_device *dev, unsigned int pipe,
 
 	return vblank_count;
 }
-EXPORT_SYMBOL(drm_vblank_count_and_time);
 
 /**
  * drm_crtc_vblank_count_and_time - retrieve "cooked" vblank counter value
@@ -975,8 +974,6 @@ EXPORT_SYMBOL(drm_vblank_count_and_time);
  * vblank events since the system was booted, including lost events due to
  * modesetting activity. Returns corresponding system timestamp of the time
  * of the vblank interval that corresponds to the current vblank counter value.
- *
- * This is the native KMS version of drm_vblank_count_and_time().
  */
 u32 drm_crtc_vblank_count_and_time(struct drm_crtc *crtc,
 				   struct timeval *vblanktime)
@@ -1588,12 +1585,6 @@ static int drm_queue_vblank_event(struct drm_device *dev, unsigned int pipe,
 
 	seq = drm_vblank_count_and_time(dev, pipe, &now);
 
-	if ((vblwait->request.type & _DRM_VBLANK_NEXTONMISS) &&
-	    (seq - vblwait->request.sequence) <= (1 << 23)) {
-		vblwait->request.sequence = seq + 1;
-		vblwait->reply.sequence = vblwait->request.sequence;
-	}
-
 	DRM_DEBUG("event on vblank count %d, current %d, crtc %u\n",
 		  vblwait->request.sequence, seq, pipe);
 
@@ -1690,6 +1681,11 @@ int drm_wait_vblank(struct drm_device *dev, void *data,
 		goto done;
 	}
 
+	if ((flags & _DRM_VBLANK_NEXTONMISS) &&
+	    (seq - vblwait->request.sequence) <= (1 << 23)) {
+		vblwait->request.sequence = seq + 1;
+	}
+
 	if (flags & _DRM_VBLANK_EVENT) {
 		/* must hold on to the vblank ref until the event fires
 		 * drm_vblank_put will be called asynchronously
@@ -1697,14 +1693,8 @@ int drm_wait_vblank(struct drm_device *dev, void *data,
 		return drm_queue_vblank_event(dev, pipe, vblwait, file_priv);
 	}
 
-	if ((flags & _DRM_VBLANK_NEXTONMISS) &&
-	    (seq - vblwait->request.sequence) <= (1<<23)) {
-		vblwait->request.sequence = seq + 1;
-	}
-
 	DRM_DEBUG("waiting on vblank count %d, crtc %u\n",
 		  vblwait->request.sequence, pipe);
-	vblank->last_wait = vblwait->request.sequence;
 	DRM_WAIT_ON(ret, vblank->queue, 3 * HZ,
 		    (((drm_vblank_count(dev, pipe) -
 		       vblwait->request.sequence) <= (1 << 23)) ||
diff --git a/drivers/gpu/drm/drm_memory.c b/drivers/gpu/drm/drm_memory.c
index 87a8cb73366f..fc0ebd273ef8 100644
--- a/drivers/gpu/drm/drm_memory.c
+++ b/drivers/gpu/drm/drm_memory.c
@@ -44,7 +44,7 @@
 # include <asm/agp.h>
 #else
 # ifdef __powerpc__
-#  define PAGE_AGP	__pgprot(_PAGE_KERNEL | _PAGE_NO_CACHE)
+#  define PAGE_AGP	pgprot_noncached_wc(PAGE_KERNEL)
 # else
 #  define PAGE_AGP	PAGE_KERNEL
 # endif
diff --git a/drivers/gpu/drm/drm_mipi_dsi.c b/drivers/gpu/drm/drm_mipi_dsi.c
index 49311fc61d5d..af0d471ee246 100644
--- a/drivers/gpu/drm/drm_mipi_dsi.c
+++ b/drivers/gpu/drm/drm_mipi_dsi.c
@@ -999,17 +999,17 @@ int mipi_dsi_dcs_set_tear_on(struct mipi_dsi_device *dsi,
 EXPORT_SYMBOL(mipi_dsi_dcs_set_tear_on);
 
 /**
- * mipi_dsi_set_tear_scanline() - turn on the display module's Tearing Effect
- * output signal on the TE signal line when display module reaches line N
- * defined by STS[n:0].
+ * mipi_dsi_dcs_set_tear_scanline() - set the scanline to use as trigger for
+ *    the Tearing Effect output signal of the display module
  * @dsi: DSI peripheral device
- * @param: STS[10:0]
+ * @scanline: scanline to use as trigger
+ *
  * Return: 0 on success or a negative error code on failure
  */
-int mipi_dsi_set_tear_scanline(struct mipi_dsi_device *dsi, u16 param)
+int mipi_dsi_dcs_set_tear_scanline(struct mipi_dsi_device *dsi, u16 scanline)
 {
-	u8 payload[3] = { MIPI_DCS_SET_TEAR_SCANLINE, param >> 8,
-			  param & 0xff };
+	u8 payload[3] = { MIPI_DCS_SET_TEAR_SCANLINE, scanline >> 8,
+			  scanline & 0xff };
 	ssize_t err;
 
 	err = mipi_dsi_generic_write(dsi, payload, sizeof(payload));
@@ -1018,7 +1018,7 @@ int mipi_dsi_set_tear_scanline(struct mipi_dsi_device *dsi, u16 param)
 
 	return 0;
 }
-EXPORT_SYMBOL(mipi_dsi_set_tear_scanline);
+EXPORT_SYMBOL(mipi_dsi_dcs_set_tear_scanline);
 
 /**
  * mipi_dsi_dcs_set_pixel_format() - sets the pixel format for the RGB image
diff --git a/drivers/gpu/drm/drm_scatter.c b/drivers/gpu/drm/drm_scatter.c
index 4f0f3b36d537..bf70431073f6 100644
--- a/drivers/gpu/drm/drm_scatter.c
+++ b/drivers/gpu/drm/drm_scatter.c
@@ -41,7 +41,7 @@
 static inline void *drm_vmalloc_dma(unsigned long size)
 {
 #if defined(__powerpc__) && defined(CONFIG_NOT_COHERENT_CACHE)
-	return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL | _PAGE_NO_CACHE);
+	return __vmalloc(size, GFP_KERNEL, pgprot_noncached_wc(PAGE_KERNEL));
 #else
 	return vmalloc_32(size);
 #endif
diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c
index 43ff44a2b8e7..caa4e4ca616d 100644
--- a/drivers/gpu/drm/drm_vm.c
+++ b/drivers/gpu/drm/drm_vm.c
@@ -80,7 +80,7 @@ static pgprot_t drm_dma_prot(uint32_t map_type, struct vm_area_struct *vma)
 	pgprot_t tmp = vm_get_page_prot(vma->vm_flags);
 
 #if defined(__powerpc__) && defined(CONFIG_NOT_COHERENT_CACHE)
-	tmp |= _PAGE_NO_CACHE;
+	tmp = pgprot_noncached_wc(tmp);
 #endif
 	return tmp;
 }
@@ -593,7 +593,7 @@ static int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma)
 			 * pages and mappings in fault()
 			 */
 #if defined(__powerpc__)
-			pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE;
+			vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 #endif
 			vma->vm_ops = &drm_vm_ops;
 			break;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
index 340d390306d8..ffd1b32caa8d 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -91,10 +91,8 @@ static void load_gpu(struct drm_device *dev)
 			int ret;
 
 			ret = etnaviv_gpu_init(g);
-			if (ret) {
-				dev_err(g->dev, "hw init failed: %d\n", ret);
+			if (ret)
 				priv->gpu[i] = NULL;
-			}
 		}
 	}
 }
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index df9bcbab922f..56fb8637bb57 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -535,8 +535,7 @@ void etnaviv_gem_describe_objects(struct etnaviv_drm_private *priv,
 
 static void etnaviv_gem_shmem_release(struct etnaviv_gem_object *etnaviv_obj)
 {
-	if (etnaviv_obj->vaddr)
-		vunmap(etnaviv_obj->vaddr);
+	vunmap(etnaviv_obj->vaddr);
 	put_pages(etnaviv_obj);
 }
 
@@ -670,9 +669,7 @@ static struct drm_gem_object *__etnaviv_gem_new(struct drm_device *dev,
 	return obj;
 
 fail:
-	if (obj)
-		drm_gem_object_unreference_unlocked(obj);
-
+	drm_gem_object_unreference_unlocked(obj);
 	return ERR_PTR(ret);
 }
 
@@ -916,15 +913,12 @@ int etnaviv_gem_new_userptr(struct drm_device *dev, struct drm_file *file,
 	get_task_struct(current);
 
 	ret = etnaviv_gem_obj_add(dev, &etnaviv_obj->base);
-	if (ret) {
-		drm_gem_object_unreference_unlocked(&etnaviv_obj->base);
-		return ret;
-	}
+	if (ret)
+		goto unreference;
 
 	ret = drm_gem_handle_create(file, &etnaviv_obj->base, handle);
-
+unreference:
 	/* drop reference from allocate - handle holds it now */
 	drm_gem_object_unreference_unlocked(&etnaviv_obj->base);
-
 	return ret;
 }
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index ff6aa5dfb2d7..87ef34150d46 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -487,6 +487,47 @@ static int etnaviv_hw_reset(struct etnaviv_gpu *gpu)
 	return 0;
 }
 
+static void etnaviv_gpu_enable_mlcg(struct etnaviv_gpu *gpu)
+{
+	u32 pmc, ppc;
+
+	/* enable clock gating */
+	ppc = gpu_read(gpu, VIVS_PM_POWER_CONTROLS);
+	ppc |= VIVS_PM_POWER_CONTROLS_ENABLE_MODULE_CLOCK_GATING;
+
+	/* Disable stall module clock gating for 4.3.0.1 and 4.3.0.2 revs */
+	if (gpu->identity.revision == 0x4301 ||
+	    gpu->identity.revision == 0x4302)
+		ppc |= VIVS_PM_POWER_CONTROLS_DISABLE_STALL_MODULE_CLOCK_GATING;
+
+	gpu_write(gpu, VIVS_PM_POWER_CONTROLS, ppc);
+
+	pmc = gpu_read(gpu, VIVS_PM_MODULE_CONTROLS);
+
+	/* Disable PA clock gating for GC400+ except for GC420 */
+	if (gpu->identity.model >= chipModel_GC400 &&
+	    gpu->identity.model != chipModel_GC420)
+		pmc |= VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_PA;
+
+	/*
+	 * Disable PE clock gating on revs < 5.0.0.0 when HZ is
+	 * present without a bug fix.
+	 */
+	if (gpu->identity.revision < 0x5000 &&
+	    gpu->identity.minor_features0 & chipMinorFeatures0_HZ &&
+	    !(gpu->identity.minor_features1 &
+	      chipMinorFeatures1_DISABLE_PE_GATING))
+		pmc |= VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_PE;
+
+	if (gpu->identity.revision < 0x5422)
+		pmc |= BIT(15); /* Unknown bit */
+
+	pmc |= VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_RA_HZ;
+	pmc |= VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_RA_EZ;
+
+	gpu_write(gpu, VIVS_PM_MODULE_CONTROLS, pmc);
+}
+
 static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu)
 {
 	u16 prefetch;
@@ -506,6 +547,9 @@ static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu)
 		gpu_write(gpu, VIVS_MC_DEBUG_MEMORY, mc_memory_debug);
 	}
 
+	/* enable module-level clock gating */
+	etnaviv_gpu_enable_mlcg(gpu);
+
 	/*
 	 * Update GPU AXI cache atttribute to "cacheable, no allocate".
 	 * This is necessary to prevent the iMX6 SoC locking up.
@@ -553,8 +597,10 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 	bool mmuv2;
 
 	ret = pm_runtime_get_sync(gpu->dev);
-	if (ret < 0)
+	if (ret < 0) {
+		dev_err(gpu->dev, "Failed to enable GPU power domain\n");
 		return ret;
+	}
 
 	etnaviv_hw_identify(gpu);
 
@@ -591,8 +637,10 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 	}
 
 	ret = etnaviv_hw_reset(gpu);
-	if (ret)
+	if (ret) {
+		dev_err(gpu->dev, "GPU reset failed\n");
 		goto fail;
+	}
 
 	/* Setup IOMMU.. eventually we will (I think) do this once per context
 	 * and have separate page tables per context.  For now, to keep things
@@ -610,12 +658,14 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 	}
 
 	if (!iommu) {
+		dev_err(gpu->dev, "Failed to allocate GPU IOMMU domain\n");
 		ret = -ENOMEM;
 		goto fail;
 	}
 
 	gpu->mmu = etnaviv_iommu_new(gpu, iommu, version);
 	if (!gpu->mmu) {
+		dev_err(gpu->dev, "Failed to instantiate GPU IOMMU\n");
 		iommu_domain_free(iommu);
 		ret = -ENOMEM;
 		goto fail;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
index 522cfd447892..16353ee81651 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
@@ -225,6 +225,7 @@ struct iommu_domain *etnaviv_iommu_domain_alloc(struct etnaviv_gpu *gpu)
 
 	etnaviv_domain->domain.type = __IOMMU_DOMAIN_PAGING;
 	etnaviv_domain->domain.ops = &etnaviv_iommu_ops.ops;
+	etnaviv_domain->domain.pgsize_bitmap = SZ_4K;
 	etnaviv_domain->domain.geometry.aperture_start = GPU_MEM_START;
 	etnaviv_domain->domain.geometry.aperture_end = GPU_MEM_START + PT_ENTRIES * SZ_4K - 1;
 
diff --git a/drivers/gpu/drm/etnaviv/state_hi.xml.h b/drivers/gpu/drm/etnaviv/state_hi.xml.h
index 6a7de5f1454a..807a3d9e0dd5 100644
--- a/drivers/gpu/drm/etnaviv/state_hi.xml.h
+++ b/drivers/gpu/drm/etnaviv/state_hi.xml.h
@@ -218,6 +218,13 @@ Copyright (C) 2015
 #define VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_FE	0x00000001
 #define VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_DE	0x00000002
 #define VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_PE	0x00000004
+#define VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_SH	0x00000008
+#define VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_PA	0x00000010
+#define VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_SE	0x00000020
+#define VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_RA	0x00000040
+#define VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_TX	0x00000080
+#define VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_RA_EZ	0x00010000
+#define VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_RA_HZ	0x00020000
 
 #define VIVS_PM_MODULE_STATUS					0x00000108
 #define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_FE		0x00000001
diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig
index d814b3048ee5..83f61c513b7e 100644
--- a/drivers/gpu/drm/exynos/Kconfig
+++ b/drivers/gpu/drm/exynos/Kconfig
@@ -2,10 +2,6 @@ config DRM_EXYNOS
 	tristate "DRM Support for Samsung SoC EXYNOS Series"
 	depends on OF && DRM && (ARCH_S3C64XX || ARCH_EXYNOS || ARCH_MULTIPLATFORM)
 	select DRM_KMS_HELPER
-	select DRM_KMS_FB_HELPER
-	select FB_CFB_FILLRECT
-	select FB_CFB_COPYAREA
-	select FB_CFB_IMAGEBLIT
 	select VIDEOMODE_HELPERS
 	help
 	  Choose this option if you have a Samsung SoC EXYNOS chipset.
@@ -15,7 +11,7 @@ if DRM_EXYNOS
 
 config DRM_EXYNOS_IOMMU
 	bool
-	depends on EXYNOS_IOMMU && ARM_DMA_USE_IOMMU
+	depends on EXYNOS_IOMMU
 	default y
 
 comment "CRTCs"
diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
index f6223f907c15..7f9901b7777b 100644
--- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
@@ -31,7 +31,6 @@
 #include "exynos_drm_plane.h"
 #include "exynos_drm_drv.h"
 #include "exynos_drm_fb.h"
-#include "exynos_drm_fbdev.h"
 #include "exynos_drm_iommu.h"
 
 /*
diff --git a/drivers/gpu/drm/exynos/exynos_dp.c b/drivers/gpu/drm/exynos/exynos_dp.c
index 468498e3fec1..4f0850585b8e 100644
--- a/drivers/gpu/drm/exynos/exynos_dp.c
+++ b/drivers/gpu/drm/exynos/exynos_dp.c
@@ -34,7 +34,7 @@
 
 struct exynos_dp_device {
 	struct drm_encoder         encoder;
-	struct drm_connector       connector;
+	struct drm_connector       *connector;
 	struct drm_bridge          *ptn_bridge;
 	struct drm_device          *drm_dev;
 	struct device              *dev;
@@ -67,10 +67,10 @@ static int exynos_dp_poweroff(struct analogix_dp_plat_data *plat_data)
 	return exynos_dp_crtc_clock_enable(plat_data, false);
 }
 
-static int exynos_dp_get_modes(struct analogix_dp_plat_data *plat_data)
+static int exynos_dp_get_modes(struct analogix_dp_plat_data *plat_data,
+			       struct drm_connector *connector)
 {
 	struct exynos_dp_device *dp = to_dp(plat_data);
-	struct drm_connector *connector = &dp->connector;
 	struct drm_display_mode *mode;
 	int num_modes = 0;
 
@@ -103,6 +103,7 @@ static int exynos_dp_bridge_attach(struct analogix_dp_plat_data *plat_data,
 	int ret;
 
 	drm_connector_register(connector);
+	dp->connector = connector;
 
 	/* Pre-empt DP connector creation if there's a bridge */
 	if (dp->ptn_bridge) {
diff --git a/drivers/gpu/drm/exynos/exynos_drm_core.c b/drivers/gpu/drm/exynos/exynos_drm_core.c
index 011211e4167d..edbd98ff293e 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_core.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_core.c
@@ -15,7 +15,6 @@
 #include <drm/drmP.h>
 #include "exynos_drm_drv.h"
 #include "exynos_drm_crtc.h"
-#include "exynos_drm_fbdev.h"
 
 static LIST_HEAD(exynos_drm_subdrv_list);
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
index 13d28d4229e2..877d2efa28e2 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -159,12 +159,7 @@ static int exynos_drm_load(struct drm_device *dev, unsigned long flags)
 	DRM_INFO("Exynos DRM: using %s device for DMA mapping operations\n",
 		 dev_name(private->dma_dev));
 
-	/*
-	 * create mapping to manage iommu table and set a pointer to iommu
-	 * mapping structure to iommu_mapping of private data.
-	 * also this iommu_mapping can be used to check if iommu is supported
-	 * or not.
-	 */
+	/* create common IOMMU mapping for all devices attached to Exynos DRM */
 	ret = drm_create_iommu_mapping(dev);
 	if (ret < 0) {
 		DRM_ERROR("failed to create iommu mapping.\n");
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h
index cc33ec9296e7..b39d521f093d 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h
@@ -224,8 +224,6 @@ struct exynos_drm_private {
 	struct drm_property *plane_zpos_property;
 
 	struct device *dma_dev;
-	unsigned long da_start;
-	unsigned long da_space_size;
 	void *mapping;
 
 	unsigned int pipe;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
index 67dcd6831291..fb49443bfd32 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
@@ -269,8 +269,7 @@ static void exynos_drm_fbdev_destroy(struct drm_device *dev,
 	struct exynos_drm_gem *exynos_gem = exynos_fbd->exynos_gem;
 	struct drm_framebuffer *fb;
 
-	if (exynos_gem->kvaddr)
-		vunmap(exynos_gem->kvaddr);
+	vunmap(exynos_gem->kvaddr);
 
 	/* release drm framebuffer and real buffer */
 	if (fb_helper->fb && fb_helper->fb->funcs) {
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index 3efe1aa89416..d47216488985 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -30,7 +30,6 @@
 
 #include "exynos_drm_drv.h"
 #include "exynos_drm_fb.h"
-#include "exynos_drm_fbdev.h"
 #include "exynos_drm_crtc.h"
 #include "exynos_drm_plane.h"
 #include "exynos_drm_iommu.h"
@@ -120,7 +119,6 @@ static struct fimd_driver_data s3c64xx_fimd_driver_data = {
 	.timing_base = 0x0,
 	.has_clksel = 1,
 	.has_limited_fmt = 1,
-	.has_hw_trigger = 1,
 };
 
 static struct fimd_driver_data exynos3_fimd_driver_data = {
@@ -171,14 +169,11 @@ static struct fimd_driver_data exynos5420_fimd_driver_data = {
 	.lcdblk_vt_shift = 24,
 	.lcdblk_bypass_shift = 15,
 	.lcdblk_mic_bypass_shift = 11,
-	.trg_type = I80_HW_TRG,
 	.has_shadowcon = 1,
 	.has_vidoutcon = 1,
 	.has_vtsel = 1,
 	.has_mic_bypass = 1,
 	.has_dp_clk = 1,
-	.has_hw_trigger = 1,
-	.has_trigger_per_te = 1,
 };
 
 struct fimd_context {
diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
index 493552368295..8564c3da0d22 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
@@ -48,13 +48,13 @@
 
 /* registers for base address */
 #define G2D_SRC_BASE_ADDR		0x0304
-#define G2D_SRC_STRIDE_REG		0x0308
+#define G2D_SRC_STRIDE			0x0308
 #define G2D_SRC_COLOR_MODE		0x030C
 #define G2D_SRC_LEFT_TOP		0x0310
 #define G2D_SRC_RIGHT_BOTTOM		0x0314
 #define G2D_SRC_PLANE2_BASE_ADDR	0x0318
 #define G2D_DST_BASE_ADDR		0x0404
-#define G2D_DST_STRIDE_REG		0x0408
+#define G2D_DST_STRIDE			0x0408
 #define G2D_DST_COLOR_MODE		0x040C
 #define G2D_DST_LEFT_TOP		0x0410
 #define G2D_DST_RIGHT_BOTTOM		0x0414
@@ -563,7 +563,7 @@ static enum g2d_reg_type g2d_get_reg_type(int reg_offset)
 
 	switch (reg_offset) {
 	case G2D_SRC_BASE_ADDR:
-	case G2D_SRC_STRIDE_REG:
+	case G2D_SRC_STRIDE:
 	case G2D_SRC_COLOR_MODE:
 	case G2D_SRC_LEFT_TOP:
 	case G2D_SRC_RIGHT_BOTTOM:
@@ -573,7 +573,7 @@ static enum g2d_reg_type g2d_get_reg_type(int reg_offset)
 		reg_type = REG_TYPE_SRC_PLANE2;
 		break;
 	case G2D_DST_BASE_ADDR:
-	case G2D_DST_STRIDE_REG:
+	case G2D_DST_STRIDE:
 	case G2D_DST_COLOR_MODE:
 	case G2D_DST_LEFT_TOP:
 	case G2D_DST_RIGHT_BOTTOM:
@@ -968,8 +968,8 @@ static int g2d_check_reg_offset(struct device *dev,
 			} else
 				buf_info->types[reg_type] = BUF_TYPE_GEM;
 			break;
-		case G2D_SRC_STRIDE_REG:
-		case G2D_DST_STRIDE_REG:
+		case G2D_SRC_STRIDE:
+		case G2D_DST_STRIDE:
 			if (for_addr)
 				goto err;
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_iommu.c b/drivers/gpu/drm/exynos/exynos_drm_iommu.c
index 7ca09ee19656..0f373702414e 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_iommu.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_iommu.c
@@ -14,13 +14,27 @@
 
 #include <linux/dma-mapping.h>
 #include <linux/iommu.h>
-#include <linux/kref.h>
-
-#include <asm/dma-iommu.h>
 
 #include "exynos_drm_drv.h"
 #include "exynos_drm_iommu.h"
 
+static inline int configure_dma_max_seg_size(struct device *dev)
+{
+	if (!dev->dma_parms)
+		dev->dma_parms = kzalloc(sizeof(*dev->dma_parms), GFP_KERNEL);
+	if (!dev->dma_parms)
+		return -ENOMEM;
+
+	dma_set_max_seg_size(dev, DMA_BIT_MASK(32));
+	return 0;
+}
+
+static inline void clear_dma_max_seg_size(struct device *dev)
+{
+	kfree(dev->dma_parms);
+	dev->dma_parms = NULL;
+}
+
 /*
  * drm_create_iommu_mapping - create a mapping structure
  *
@@ -28,38 +42,22 @@
  */
 int drm_create_iommu_mapping(struct drm_device *drm_dev)
 {
-	struct dma_iommu_mapping *mapping = NULL;
 	struct exynos_drm_private *priv = drm_dev->dev_private;
 
-	if (!priv->da_start)
-		priv->da_start = EXYNOS_DEV_ADDR_START;
-	if (!priv->da_space_size)
-		priv->da_space_size = EXYNOS_DEV_ADDR_SIZE;
-
-	mapping = arm_iommu_create_mapping(&platform_bus_type, priv->da_start,
-						priv->da_space_size);
-
-	if (IS_ERR(mapping))
-		return PTR_ERR(mapping);
-
-	priv->mapping = mapping;
-
-	return 0;
+	return __exynos_iommu_create_mapping(priv, EXYNOS_DEV_ADDR_START,
+					     EXYNOS_DEV_ADDR_SIZE);
 }
 
 /*
  * drm_release_iommu_mapping - release iommu mapping structure
  *
  * @drm_dev: DRM device
- *
- * if mapping->kref becomes 0 then all things related to iommu mapping
- * will be released
  */
 void drm_release_iommu_mapping(struct drm_device *drm_dev)
 {
 	struct exynos_drm_private *priv = drm_dev->dev_private;
 
-	arm_iommu_release_mapping(priv->mapping);
+	__exynos_iommu_release_mapping(priv);
 }
 
 /*
@@ -77,25 +75,19 @@ int drm_iommu_attach_device(struct drm_device *drm_dev,
 	struct exynos_drm_private *priv = drm_dev->dev_private;
 	int ret;
 
-	if (!priv->mapping)
-		return 0;
-
-	subdrv_dev->dma_parms = devm_kzalloc(subdrv_dev,
-					sizeof(*subdrv_dev->dma_parms),
-					GFP_KERNEL);
-	if (!subdrv_dev->dma_parms)
-		return -ENOMEM;
-
-	dma_set_max_seg_size(subdrv_dev, 0xffffffffu);
-
-	if (subdrv_dev->archdata.mapping)
-		arm_iommu_detach_device(subdrv_dev);
+	if (get_dma_ops(priv->dma_dev) != get_dma_ops(subdrv_dev)) {
+		DRM_ERROR("Device %s lacks support for IOMMU\n",
+			  dev_name(subdrv_dev));
+		return -EINVAL;
+	}
 
-	ret = arm_iommu_attach_device(subdrv_dev, priv->mapping);
-	if (ret < 0) {
-		DRM_DEBUG_KMS("failed iommu attach.\n");
+	ret = configure_dma_max_seg_size(subdrv_dev);
+	if (ret)
 		return ret;
-	}
+
+	ret = __exynos_iommu_attach(priv, subdrv_dev);
+	if (ret)
+		clear_dma_max_seg_size(subdrv_dev);
 
 	return 0;
 }
@@ -113,10 +105,7 @@ void drm_iommu_detach_device(struct drm_device *drm_dev,
 				struct device *subdrv_dev)
 {
 	struct exynos_drm_private *priv = drm_dev->dev_private;
-	struct dma_iommu_mapping *mapping = priv->mapping;
-
-	if (!mapping || !mapping->domain)
-		return;
 
-	arm_iommu_detach_device(subdrv_dev);
+	__exynos_iommu_detach(priv, subdrv_dev);
+	clear_dma_max_seg_size(subdrv_dev);
 }
diff --git a/drivers/gpu/drm/exynos/exynos_drm_iommu.h b/drivers/gpu/drm/exynos/exynos_drm_iommu.h
index 5ffebe02ee4d..c8de4913fdbe 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_iommu.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_iommu.h
@@ -17,6 +17,97 @@
 
 #ifdef CONFIG_DRM_EXYNOS_IOMMU
 
+#if defined(CONFIG_ARM_DMA_USE_IOMMU)
+#include <asm/dma-iommu.h>
+
+static inline int __exynos_iommu_create_mapping(struct exynos_drm_private *priv,
+					unsigned long start, unsigned long size)
+{
+	priv->mapping = arm_iommu_create_mapping(&platform_bus_type, start,
+						 size);
+	return IS_ERR(priv->mapping);
+}
+
+static inline void
+__exynos_iommu_release_mapping(struct exynos_drm_private *priv)
+{
+	arm_iommu_release_mapping(priv->mapping);
+}
+
+static inline int __exynos_iommu_attach(struct exynos_drm_private *priv,
+					struct device *dev)
+{
+	if (dev->archdata.mapping)
+		arm_iommu_detach_device(dev);
+
+	return arm_iommu_attach_device(dev, priv->mapping);
+}
+
+static inline void __exynos_iommu_detach(struct exynos_drm_private *priv,
+					 struct device *dev)
+{
+	arm_iommu_detach_device(dev);
+}
+
+#elif defined(CONFIG_IOMMU_DMA)
+#include <linux/dma-iommu.h>
+
+static inline int __exynos_iommu_create_mapping(struct exynos_drm_private *priv,
+					unsigned long start, unsigned long size)
+{
+	struct iommu_domain *domain;
+	int ret;
+
+	domain = iommu_domain_alloc(priv->dma_dev->bus);
+	if (!domain)
+		return -ENOMEM;
+
+	ret = iommu_get_dma_cookie(domain);
+	if (ret)
+		goto free_domain;
+
+	ret = iommu_dma_init_domain(domain, start, size);
+	if (ret)
+		goto put_cookie;
+
+	priv->mapping = domain;
+	return 0;
+
+put_cookie:
+	iommu_put_dma_cookie(domain);
+free_domain:
+	iommu_domain_free(domain);
+	return ret;
+}
+
+static inline void __exynos_iommu_release_mapping(struct exynos_drm_private *priv)
+{
+	struct iommu_domain *domain = priv->mapping;
+
+	iommu_put_dma_cookie(domain);
+	iommu_domain_free(domain);
+	priv->mapping = NULL;
+}
+
+static inline int __exynos_iommu_attach(struct exynos_drm_private *priv,
+					struct device *dev)
+{
+	struct iommu_domain *domain = priv->mapping;
+
+	return iommu_attach_device(domain, dev);
+}
+
+static inline void __exynos_iommu_detach(struct exynos_drm_private *priv,
+					 struct device *dev)
+{
+	struct iommu_domain *domain = priv->mapping;
+
+	iommu_detach_device(domain, dev);
+}
+#else
+#error Unsupported architecture and IOMMU/DMA-mapping glue code
+#endif
+
 int drm_create_iommu_mapping(struct drm_device *drm_dev);
 
 void drm_release_iommu_mapping(struct drm_device *drm_dev);
diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c
index 1625d7c8a319..2275efe41acd 100644
--- a/drivers/gpu/drm/exynos/exynos_hdmi.c
+++ b/drivers/gpu/drm/exynos/exynos_hdmi.c
@@ -1820,6 +1820,7 @@ static int hdmi_probe(struct platform_device *pdev)
 		DRM_ERROR("Failed to find ddc node in device tree\n");
 		return -ENODEV;
 	}
+	of_node_put(dev->of_node);
 
 out_get_ddc_adpt:
 	hdata->ddc_adpt = of_find_i2c_adapter_by_node(ddc_node);
@@ -1838,6 +1839,7 @@ out_get_ddc_adpt:
 		ret = -ENODEV;
 		goto err_ddc;
 	}
+	of_node_put(dev->of_node);
 
 out_get_phy_port:
 	if (hdata->drv_data->is_apb_phy) {
diff --git a/drivers/gpu/drm/fsl-dcu/Kconfig b/drivers/gpu/drm/fsl-dcu/Kconfig
index b9c714de6e40..14a72c4c496d 100644
--- a/drivers/gpu/drm/fsl-dcu/Kconfig
+++ b/drivers/gpu/drm/fsl-dcu/Kconfig
@@ -5,12 +5,7 @@ config DRM_FSL_DCU
 	select BACKLIGHT_LCD_SUPPORT
 	select DRM_KMS_HELPER
 	select DRM_KMS_CMA_HELPER
-	select DRM_KMS_FB_HELPER
 	select DRM_PANEL
-	select FB_SYS_FILLRECT
-	select FB_SYS_COPYAREA
-	select FB_SYS_IMAGEBLIT
-	select FB_SYS_FOPS
 	select REGMAP_MMIO
 	select VIDEOMODE_HELPERS
 	help
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c
index 706de3278f1c..3371635cd4d7 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c
@@ -44,6 +44,8 @@ static void fsl_dcu_drm_disable_crtc(struct drm_crtc *crtc)
 	struct drm_device *dev = crtc->dev;
 	struct fsl_dcu_drm_device *fsl_dev = dev->dev_private;
 
+	drm_crtc_vblank_off(crtc);
+
 	regmap_update_bits(fsl_dev->regmap, DCU_DCU_MODE,
 			   DCU_MODE_DCU_MODE_MASK,
 			   DCU_MODE_DCU_MODE(DCU_MODE_OFF));
@@ -61,6 +63,8 @@ static void fsl_dcu_drm_crtc_enable(struct drm_crtc *crtc)
 			   DCU_MODE_DCU_MODE(DCU_MODE_NORMAL));
 	regmap_write(fsl_dev->regmap, DCU_UPDATE_MODE,
 		     DCU_UPDATE_MODE_READREG);
+
+	drm_crtc_vblank_on(crtc);
 }
 
 static void fsl_dcu_drm_crtc_mode_set_nofb(struct drm_crtc *crtc)
@@ -137,9 +141,10 @@ int fsl_dcu_drm_crtc_create(struct fsl_dcu_drm_device *fsl_dev)
 {
 	struct drm_plane *primary;
 	struct drm_crtc *crtc = &fsl_dev->crtc;
-	unsigned int i, j, reg_num;
 	int ret;
 
+	fsl_dcu_drm_init_planes(fsl_dev->drm);
+
 	primary = fsl_dcu_drm_primary_create_plane(fsl_dev->drm);
 	if (!primary)
 		return -ENOMEM;
@@ -153,19 +158,5 @@ int fsl_dcu_drm_crtc_create(struct fsl_dcu_drm_device *fsl_dev)
 
 	drm_crtc_helper_add(crtc, &fsl_dcu_drm_crtc_helper_funcs);
 
-	if (!strcmp(fsl_dev->soc->name, "ls1021a"))
-		reg_num = LS1021A_LAYER_REG_NUM;
-	else
-		reg_num = VF610_LAYER_REG_NUM;
-	for (i = 0; i < fsl_dev->soc->total_layer; i++) {
-		for (j = 1; j <= reg_num; j++)
-			regmap_write(fsl_dev->regmap, DCU_CTRLDESCLN(i, j), 0);
-	}
-	regmap_update_bits(fsl_dev->regmap, DCU_DCU_MODE,
-			   DCU_MODE_DCU_MODE_MASK,
-			   DCU_MODE_DCU_MODE(DCU_MODE_OFF));
-	regmap_write(fsl_dev->regmap, DCU_UPDATE_MODE,
-		     DCU_UPDATE_MODE_READREG);
-
 	return 0;
 }
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c
index 33727d5d826a..7882387f9bff 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c
@@ -11,6 +11,7 @@
 
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
+#include <linux/console.h>
 #include <linux/io.h>
 #include <linux/mfd/syscon.h>
 #include <linux/mm.h>
@@ -22,6 +23,7 @@
 #include <linux/regmap.h>
 
 #include <drm/drmP.h>
+#include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_gem_cma_helper.h>
@@ -42,7 +44,6 @@ static const struct regmap_config fsl_dcu_regmap_config = {
 	.reg_bits = 32,
 	.reg_stride = 4,
 	.val_bits = 32,
-	.cache_type = REGCACHE_RBTREE,
 
 	.volatile_reg = fsl_dcu_drm_is_volatile_reg,
 };
@@ -228,11 +229,26 @@ static int fsl_dcu_drm_pm_suspend(struct device *dev)
 	if (!fsl_dev)
 		return 0;
 
+	disable_irq(fsl_dev->irq);
 	drm_kms_helper_poll_disable(fsl_dev->drm);
-	regcache_cache_only(fsl_dev->regmap, true);
-	regcache_mark_dirty(fsl_dev->regmap);
-	clk_disable(fsl_dev->clk);
-	clk_unprepare(fsl_dev->clk);
+
+	console_lock();
+	drm_fbdev_cma_set_suspend(fsl_dev->fbdev, 1);
+	console_unlock();
+
+	fsl_dev->state = drm_atomic_helper_suspend(fsl_dev->drm);
+	if (IS_ERR(fsl_dev->state)) {
+		console_lock();
+		drm_fbdev_cma_set_suspend(fsl_dev->fbdev, 0);
+		console_unlock();
+
+		drm_kms_helper_poll_enable(fsl_dev->drm);
+		enable_irq(fsl_dev->irq);
+		return PTR_ERR(fsl_dev->state);
+	}
+
+	clk_disable_unprepare(fsl_dev->pix_clk);
+	clk_disable_unprepare(fsl_dev->clk);
 
 	return 0;
 }
@@ -245,21 +261,27 @@ static int fsl_dcu_drm_pm_resume(struct device *dev)
 	if (!fsl_dev)
 		return 0;
 
-	ret = clk_enable(fsl_dev->clk);
+	ret = clk_prepare_enable(fsl_dev->clk);
 	if (ret < 0) {
 		dev_err(dev, "failed to enable dcu clk\n");
-		clk_unprepare(fsl_dev->clk);
 		return ret;
 	}
-	ret = clk_prepare(fsl_dev->clk);
+
+	ret = clk_prepare_enable(fsl_dev->pix_clk);
 	if (ret < 0) {
-		dev_err(dev, "failed to prepare dcu clk\n");
+		dev_err(dev, "failed to enable pix clk\n");
 		return ret;
 	}
 
+	fsl_dcu_drm_init_planes(fsl_dev->drm);
+	drm_atomic_helper_resume(fsl_dev->drm, fsl_dev->state);
+
+	console_lock();
+	drm_fbdev_cma_set_suspend(fsl_dev->fbdev, 0);
+	console_unlock();
+
 	drm_kms_helper_poll_enable(fsl_dev->drm);
-	regcache_cache_only(fsl_dev->regmap, false);
-	regcache_sync(fsl_dev->regmap);
+	enable_irq(fsl_dev->irq);
 
 	return 0;
 }
@@ -273,12 +295,14 @@ static const struct fsl_dcu_soc_data fsl_dcu_ls1021a_data = {
 	.name = "ls1021a",
 	.total_layer = 16,
 	.max_layer = 4,
+	.layer_regs = LS1021A_LAYER_REG_NUM,
 };
 
 static const struct fsl_dcu_soc_data fsl_dcu_vf610_data = {
 	.name = "vf610",
 	.total_layer = 64,
 	.max_layer = 6,
+	.layer_regs = VF610_LAYER_REG_NUM,
 };
 
 static const struct of_device_id fsl_dcu_of_match[] = {
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.h b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.h
index c275f900ff23..3b371fe7491e 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.h
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.h
@@ -175,6 +175,7 @@ struct fsl_dcu_soc_data {
 	unsigned int total_layer;
 	/*max layer number DCU supported*/
 	unsigned int max_layer;
+	unsigned int layer_regs;
 };
 
 struct fsl_dcu_drm_device {
@@ -193,6 +194,7 @@ struct fsl_dcu_drm_device {
 	struct drm_encoder encoder;
 	struct fsl_dcu_drm_connector connector;
 	const struct fsl_dcu_soc_data *soc;
+	struct drm_atomic_state *state;
 };
 
 void fsl_dcu_fbdev_init(struct drm_device *dev);
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_kms.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_kms.c
index a6e4cd591960..d9d6cc1c8e39 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_kms.c
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_kms.c
@@ -43,7 +43,7 @@ int fsl_dcu_drm_modeset_init(struct fsl_dcu_drm_device *fsl_dev)
 	if (ret)
 		goto err;
 
-	ret = fsl_dcu_drm_connector_create(fsl_dev, &fsl_dev->encoder);
+	ret = fsl_dcu_create_outputs(fsl_dev);
 	if (ret)
 		goto err;
 
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_output.h b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_output.h
index 7093109fbc21..5a7b88e19e44 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_output.h
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_output.h
@@ -25,9 +25,8 @@ to_fsl_dcu_connector(struct drm_connector *con)
 		     : NULL;
 }
 
-int fsl_dcu_drm_connector_create(struct fsl_dcu_drm_device *fsl_dev,
-				 struct drm_encoder *encoder);
 int fsl_dcu_drm_encoder_create(struct fsl_dcu_drm_device *fsl_dev,
 			       struct drm_crtc *crtc);
+int fsl_dcu_create_outputs(struct fsl_dcu_drm_device *fsl_dev);
 
 #endif /* __FSL_DCU_DRM_CONNECTOR_H__ */
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c
index 274558b3b32b..e50467a0deb0 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c
@@ -217,6 +217,22 @@ static const u32 fsl_dcu_drm_plane_formats[] = {
 	DRM_FORMAT_YUV422,
 };
 
+void fsl_dcu_drm_init_planes(struct drm_device *dev)
+{
+	struct fsl_dcu_drm_device *fsl_dev = dev->dev_private;
+	int i, j;
+
+	for (i = 0; i < fsl_dev->soc->total_layer; i++) {
+		for (j = 1; j <= fsl_dev->soc->layer_regs; j++)
+			regmap_write(fsl_dev->regmap, DCU_CTRLDESCLN(i, j), 0);
+	}
+	regmap_update_bits(fsl_dev->regmap, DCU_DCU_MODE,
+			   DCU_MODE_DCU_MODE_MASK,
+			   DCU_MODE_DCU_MODE(DCU_MODE_OFF));
+	regmap_write(fsl_dev->regmap, DCU_UPDATE_MODE,
+		     DCU_UPDATE_MODE_READREG);
+}
+
 struct drm_plane *fsl_dcu_drm_primary_create_plane(struct drm_device *dev)
 {
 	struct drm_plane *primary;
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.h b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.h
index d657f088d859..8ee45f813ee8 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.h
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.h
@@ -12,6 +12,7 @@
 #ifndef __FSL_DCU_DRM_PLANE_H__
 #define __FSL_DCU_DRM_PLANE_H__
 
+void fsl_dcu_drm_init_planes(struct drm_device *dev);
 struct drm_plane *fsl_dcu_drm_primary_create_plane(struct drm_device *dev);
 
 #endif /* __FSL_DCU_DRM_PLANE_H__ */
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c
index 0b0989e503ea..26edcc899712 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/backlight.h>
+#include <linux/of_graph.h>
 
 #include <drm/drmP.h>
 #include <drm/drm_atomic_helper.h>
@@ -132,12 +133,12 @@ static const struct drm_connector_helper_funcs connector_helper_funcs = {
 	.mode_valid = fsl_dcu_drm_connector_mode_valid,
 };
 
-int fsl_dcu_drm_connector_create(struct fsl_dcu_drm_device *fsl_dev,
-				 struct drm_encoder *encoder)
+static int fsl_dcu_attach_panel(struct fsl_dcu_drm_device *fsl_dev,
+				 struct drm_panel *panel)
 {
+	struct drm_encoder *encoder = &fsl_dev->encoder;
 	struct drm_connector *connector = &fsl_dev->connector.base;
 	struct drm_mode_config *mode_config = &fsl_dev->drm->mode_config;
-	struct device_node *panel_node;
 	int ret;
 
 	fsl_dev->connector.encoder = encoder;
@@ -161,21 +162,7 @@ int fsl_dcu_drm_connector_create(struct fsl_dcu_drm_device *fsl_dev,
 				      mode_config->dpms_property,
 				      DRM_MODE_DPMS_OFF);
 
-	panel_node = of_parse_phandle(fsl_dev->np, "fsl,panel", 0);
-	if (!panel_node) {
-		dev_err(fsl_dev->dev, "fsl,panel property not found\n");
-		ret = -ENODEV;
-		goto err_sysfs;
-	}
-
-	fsl_dev->connector.panel = of_drm_find_panel(panel_node);
-	if (!fsl_dev->connector.panel) {
-		ret = -EPROBE_DEFER;
-		goto err_panel;
-	}
-	of_node_put(panel_node);
-
-	ret = drm_panel_attach(fsl_dev->connector.panel, connector);
+	ret = drm_panel_attach(panel, connector);
 	if (ret) {
 		dev_err(fsl_dev->dev, "failed to attach panel\n");
 		goto err_sysfs;
@@ -183,11 +170,62 @@ int fsl_dcu_drm_connector_create(struct fsl_dcu_drm_device *fsl_dev,
 
 	return 0;
 
-err_panel:
-	of_node_put(panel_node);
 err_sysfs:
 	drm_connector_unregister(connector);
 err_cleanup:
 	drm_connector_cleanup(connector);
 	return ret;
 }
+
+static int fsl_dcu_attach_endpoint(struct fsl_dcu_drm_device *fsl_dev,
+				   const struct of_endpoint *ep)
+{
+	struct drm_bridge *bridge;
+	struct device_node *np;
+
+	np = of_graph_get_remote_port_parent(ep->local_node);
+
+	fsl_dev->connector.panel = of_drm_find_panel(np);
+	if (fsl_dev->connector.panel) {
+		of_node_put(np);
+		return fsl_dcu_attach_panel(fsl_dev, fsl_dev->connector.panel);
+	}
+
+	bridge = of_drm_find_bridge(np);
+	of_node_put(np);
+	if (!bridge)
+		return -ENODEV;
+
+	fsl_dev->encoder.bridge = bridge;
+	bridge->encoder = &fsl_dev->encoder;
+
+	return drm_bridge_attach(fsl_dev->drm, bridge);
+}
+
+int fsl_dcu_create_outputs(struct fsl_dcu_drm_device *fsl_dev)
+{
+	struct of_endpoint ep;
+	struct device_node *ep_node, *panel_node;
+	int ret;
+
+	/* This is for backward compatibility */
+	panel_node = of_parse_phandle(fsl_dev->np, "fsl,panel", 0);
+	if (panel_node) {
+		fsl_dev->connector.panel = of_drm_find_panel(panel_node);
+		of_node_put(panel_node);
+		if (!fsl_dev->connector.panel)
+			return -EPROBE_DEFER;
+		return fsl_dcu_attach_panel(fsl_dev, fsl_dev->connector.panel);
+	}
+
+	ep_node = of_graph_get_next_endpoint(fsl_dev->np, NULL);
+	if (!ep_node)
+		return -ENODEV;
+
+	ret = of_graph_parse_endpoint(ep_node, &ep);
+	of_node_put(ep_node);
+	if (ret)
+		return -ENODEV;
+
+	return fsl_dcu_attach_endpoint(fsl_dev, &ep);
+}
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_tcon.c b/drivers/gpu/drm/fsl-dcu/fsl_tcon.c
index bbe34f1c0505..bca09ea24632 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_tcon.c
+++ b/drivers/gpu/drm/fsl-dcu/fsl_tcon.c
@@ -92,6 +92,7 @@ struct fsl_tcon *fsl_tcon_init(struct device *dev)
 		goto err_node_put;
 	}
 
+	of_node_put(np);
 	clk_prepare_enable(tcon->ipg_clk);
 
 	dev_info(dev, "Using TCON in bypass mode\n");
diff --git a/drivers/gpu/drm/gma500/Kconfig b/drivers/gpu/drm/gma500/Kconfig
index 17f928ec84ea..8906d67494fc 100644
--- a/drivers/gpu/drm/gma500/Kconfig
+++ b/drivers/gpu/drm/gma500/Kconfig
@@ -1,11 +1,7 @@
 config DRM_GMA500
 	tristate "Intel GMA5/600 KMS Framebuffer"
 	depends on DRM && PCI && X86
-	select FB_CFB_COPYAREA
-	select FB_CFB_FILLRECT
-	select FB_CFB_IMAGEBLIT
 	select DRM_KMS_HELPER
-	select DRM_KMS_FB_HELPER
 	select DRM_TTM
 	# GMA500 depends on ACPI_VIDEO when ACPI is enabled, just like i915
 	select ACPI_VIDEO if ACPI
diff --git a/drivers/gpu/drm/gma500/cdv_intel_hdmi.c b/drivers/gpu/drm/gma500/cdv_intel_hdmi.c
index 28f9d90988ff..563f193fcfac 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_hdmi.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_hdmi.c
@@ -246,8 +246,7 @@ static void cdv_hdmi_destroy(struct drm_connector *connector)
 {
 	struct gma_encoder *gma_encoder = gma_attached_encoder(connector);
 
-	if (gma_encoder->i2c_bus)
-		psb_intel_i2c_destroy(gma_encoder->i2c_bus);
+	psb_intel_i2c_destroy(gma_encoder->i2c_bus);
 	drm_connector_unregister(connector);
 	drm_connector_cleanup(connector);
 	kfree(connector);
diff --git a/drivers/gpu/drm/gma500/cdv_intel_lvds.c b/drivers/gpu/drm/gma500/cdv_intel_lvds.c
index 813ef23a8054..38dc89083148 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_lvds.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_lvds.c
@@ -444,8 +444,7 @@ static void cdv_intel_lvds_destroy(struct drm_connector *connector)
 {
 	struct gma_encoder *gma_encoder = gma_attached_encoder(connector);
 
-	if (gma_encoder->i2c_bus)
-		psb_intel_i2c_destroy(gma_encoder->i2c_bus);
+	psb_intel_i2c_destroy(gma_encoder->i2c_bus);
 	drm_connector_unregister(connector);
 	drm_connector_cleanup(connector);
 	kfree(connector);
@@ -780,12 +779,10 @@ out:
 failed_find:
 	mutex_unlock(&dev->mode_config.mutex);
 	printk(KERN_ERR "Failed find\n");
-	if (gma_encoder->ddc_bus)
-		psb_intel_i2c_destroy(gma_encoder->ddc_bus);
+	psb_intel_i2c_destroy(gma_encoder->ddc_bus);
 failed_ddc:
 	printk(KERN_ERR "Failed DDC\n");
-	if (gma_encoder->i2c_bus)
-		psb_intel_i2c_destroy(gma_encoder->i2c_bus);
+	psb_intel_i2c_destroy(gma_encoder->i2c_bus);
 failed_blc_i2c:
 	printk(KERN_ERR "Failed BLC\n");
 	drm_encoder_cleanup(encoder);
diff --git a/drivers/gpu/drm/gma500/framebuffer.c b/drivers/gpu/drm/gma500/framebuffer.c
index 7440bf90ac9c..0fcdce0817de 100644
--- a/drivers/gpu/drm/gma500/framebuffer.c
+++ b/drivers/gpu/drm/gma500/framebuffer.c
@@ -184,12 +184,6 @@ static int psbfb_mmap(struct fb_info *info, struct vm_area_struct *vma)
 	return 0;
 }
 
-static int psbfb_ioctl(struct fb_info *info, unsigned int cmd,
-						unsigned long arg)
-{
-	return -ENOTTY;
-}
-
 static struct fb_ops psbfb_ops = {
 	.owner = THIS_MODULE,
 	.fb_check_var = drm_fb_helper_check_var,
@@ -201,7 +195,6 @@ static struct fb_ops psbfb_ops = {
 	.fb_imageblit = drm_fb_helper_cfb_imageblit,
 	.fb_mmap = psbfb_mmap,
 	.fb_sync = psbfb_sync,
-	.fb_ioctl = psbfb_ioctl,
 };
 
 static struct fb_ops psbfb_roll_ops = {
@@ -215,7 +208,6 @@ static struct fb_ops psbfb_roll_ops = {
 	.fb_imageblit = drm_fb_helper_cfb_imageblit,
 	.fb_pan_display = psbfb_pan,
 	.fb_mmap = psbfb_mmap,
-	.fb_ioctl = psbfb_ioctl,
 };
 
 static struct fb_ops psbfb_unaccel_ops = {
@@ -228,7 +220,6 @@ static struct fb_ops psbfb_unaccel_ops = {
 	.fb_copyarea = drm_fb_helper_cfb_copyarea,
 	.fb_imageblit = drm_fb_helper_cfb_imageblit,
 	.fb_mmap = psbfb_mmap,
-	.fb_ioctl = psbfb_ioctl,
 };
 
 /**
diff --git a/drivers/gpu/drm/gma500/psb_drv.c b/drivers/gpu/drm/gma500/psb_drv.c
index 82b8ce418b27..50eb944fb78a 100644
--- a/drivers/gpu/drm/gma500/psb_drv.c
+++ b/drivers/gpu/drm/gma500/psb_drv.c
@@ -210,10 +210,8 @@ static int psb_driver_unload(struct drm_device *dev)
 			iounmap(dev_priv->aux_reg);
 			dev_priv->aux_reg = NULL;
 		}
-		if (dev_priv->aux_pdev)
-			pci_dev_put(dev_priv->aux_pdev);
-		if (dev_priv->lpc_pdev)
-			pci_dev_put(dev_priv->lpc_pdev);
+		pci_dev_put(dev_priv->aux_pdev);
+		pci_dev_put(dev_priv->lpc_pdev);
 
 		/* Destroy VBT data */
 		psb_intel_destroy_bios(dev);
diff --git a/drivers/gpu/drm/gma500/psb_intel_lvds.c b/drivers/gpu/drm/gma500/psb_intel_lvds.c
index b1b93317d054..e55733ca46d2 100644
--- a/drivers/gpu/drm/gma500/psb_intel_lvds.c
+++ b/drivers/gpu/drm/gma500/psb_intel_lvds.c
@@ -561,8 +561,7 @@ void psb_intel_lvds_destroy(struct drm_connector *connector)
 	struct gma_encoder *gma_encoder = gma_attached_encoder(connector);
 	struct psb_intel_lvds_priv *lvds_priv = gma_encoder->dev_priv;
 
-	if (lvds_priv->ddc_bus)
-		psb_intel_i2c_destroy(lvds_priv->ddc_bus);
+	psb_intel_i2c_destroy(lvds_priv->ddc_bus);
 	drm_connector_unregister(connector);
 	drm_connector_cleanup(connector);
 	kfree(connector);
@@ -835,11 +834,9 @@ out:
 
 failed_find:
 	mutex_unlock(&dev->mode_config.mutex);
-	if (lvds_priv->ddc_bus)
-		psb_intel_i2c_destroy(lvds_priv->ddc_bus);
+	psb_intel_i2c_destroy(lvds_priv->ddc_bus);
 failed_ddc:
-	if (lvds_priv->i2c_bus)
-		psb_intel_i2c_destroy(lvds_priv->i2c_bus);
+	psb_intel_i2c_destroy(lvds_priv->i2c_bus);
 failed_blc_i2c:
 	drm_encoder_cleanup(encoder);
 	drm_connector_cleanup(connector);
diff --git a/drivers/gpu/drm/hisilicon/kirin/Kconfig b/drivers/gpu/drm/hisilicon/kirin/Kconfig
index ea0df6115f7e..499f64405dac 100644
--- a/drivers/gpu/drm/hisilicon/kirin/Kconfig
+++ b/drivers/gpu/drm/hisilicon/kirin/Kconfig
@@ -4,6 +4,7 @@ config DRM_HISI_KIRIN
 	select DRM_KMS_HELPER
 	select DRM_GEM_CMA_HELPER
 	select DRM_KMS_CMA_HELPER
+	select HISI_KIRIN_DW_DSI
 	help
 	  Choose this option if you have a hisilicon Kirin chipsets(hi6220).
 	  If M is selected the module will be called kirin-drm.
diff --git a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c
index ed76baad525f..c3707d47cd89 100644
--- a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c
+++ b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c
@@ -487,6 +487,7 @@ static void ade_crtc_enable(struct drm_crtc *crtc)
 	ade_set_medianoc_qos(acrtc);
 	ade_display_enable(acrtc);
 	ade_dump_regs(ctx->base);
+	drm_crtc_vblank_on(crtc);
 	acrtc->enable = true;
 }
 
@@ -498,6 +499,7 @@ static void ade_crtc_disable(struct drm_crtc *crtc)
 	if (!acrtc->enable)
 		return;
 
+	drm_crtc_vblank_off(crtc);
 	ade_power_down(ctx);
 	acrtc->enable = false;
 }
@@ -965,21 +967,21 @@ static int ade_dts_parse(struct platform_device *pdev, struct ade_hw_ctx *ctx)
 	}
 
 	ctx->ade_core_clk = devm_clk_get(dev, "clk_ade_core");
-	if (!ctx->ade_core_clk) {
+	if (IS_ERR(ctx->ade_core_clk)) {
 		DRM_ERROR("failed to parse clk ADE_CORE\n");
-		return -ENODEV;
+		return PTR_ERR(ctx->ade_core_clk);
 	}
 
 	ctx->media_noc_clk = devm_clk_get(dev, "clk_codec_jpeg");
-	if (!ctx->media_noc_clk) {
+	if (IS_ERR(ctx->media_noc_clk)) {
 		DRM_ERROR("failed to parse clk CODEC_JPEG\n");
-	    return -ENODEV;
+		return PTR_ERR(ctx->media_noc_clk);
 	}
 
 	ctx->ade_pix_clk = devm_clk_get(dev, "clk_ade_pix");
-	if (!ctx->ade_pix_clk) {
+	if (IS_ERR(ctx->ade_pix_clk)) {
 		DRM_ERROR("failed to parse clk ADE_PIX\n");
-	    return -ENODEV;
+		return PTR_ERR(ctx->ade_pix_clk);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/i2c/Kconfig b/drivers/gpu/drm/i2c/Kconfig
index 22c7ed63a001..4d341db462a2 100644
--- a/drivers/gpu/drm/i2c/Kconfig
+++ b/drivers/gpu/drm/i2c/Kconfig
@@ -1,12 +1,6 @@
 menu "I2C encoder or helper chips"
      depends on DRM && DRM_KMS_HELPER && I2C
 
-config DRM_I2C_ADV7511
-	tristate "AV7511 encoder"
-	select REGMAP_I2C
-	help
-	  Support for the Analog Device ADV7511(W) and ADV7513 HDMI encoders.
-
 config DRM_I2C_CH7006
 	tristate "Chrontel ch7006 TV encoder"
 	default m if DRM_NOUVEAU
diff --git a/drivers/gpu/drm/i2c/Makefile b/drivers/gpu/drm/i2c/Makefile
index 2c72eb584ab7..43aa33baebed 100644
--- a/drivers/gpu/drm/i2c/Makefile
+++ b/drivers/gpu/drm/i2c/Makefile
@@ -1,7 +1,5 @@
 ccflags-y := -Iinclude/drm
 
-obj-$(CONFIG_DRM_I2C_ADV7511) += adv7511.o
-
 ch7006-y := ch7006_drv.o ch7006_mode.o
 obj-$(CONFIG_DRM_I2C_CH7006) += ch7006.o
 
diff --git a/drivers/gpu/drm/i2c/ch7006_drv.c b/drivers/gpu/drm/i2c/ch7006_drv.c
index 0594c45f7164..e9e8ae2ec06b 100644
--- a/drivers/gpu/drm/i2c/ch7006_drv.c
+++ b/drivers/gpu/drm/i2c/ch7006_drv.c
@@ -361,13 +361,8 @@ static int ch7006_encoder_set_property(struct drm_encoder *encoder,
 
 		/* Disable the crtc to ensure a full modeset is
 		 * performed whenever it's turned on again. */
-		if (crtc) {
-			struct drm_mode_set modeset = {
-				.crtc = crtc,
-			};
-
-			drm_mode_set_config_internal(&modeset);
-		}
+		if (crtc)
+			drm_crtc_force_disable(crtc);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
index 1ea0e1f43397..fb27d187876c 100644
--- a/drivers/gpu/drm/i915/intel_csr.c
+++ b/drivers/gpu/drm/i915/intel_csr.c
@@ -34,15 +34,15 @@
  * low-power state and comes back to normal.
  */
 
-#define I915_CSR_KBL "i915/kbl_dmc_ver1_01.bin"
+#define I915_CSR_KBL "i915/kbl_dmc_ver1.bin"
 MODULE_FIRMWARE(I915_CSR_KBL);
 #define KBL_CSR_VERSION_REQUIRED	CSR_VERSION(1, 1)
 
-#define I915_CSR_SKL "i915/skl_dmc_ver1_26.bin"
+#define I915_CSR_SKL "i915/skl_dmc_ver1.bin"
 MODULE_FIRMWARE(I915_CSR_SKL);
-#define SKL_CSR_VERSION_REQUIRED	CSR_VERSION(1, 26)
+#define SKL_CSR_VERSION_REQUIRED	CSR_VERSION(1, 23)
 
-#define I915_CSR_BXT "i915/bxt_dmc_ver1_07.bin"
+#define I915_CSR_BXT "i915/bxt_dmc_ver1.bin"
 MODULE_FIRMWARE(I915_CSR_BXT);
 #define BXT_CSR_VERSION_REQUIRED	CSR_VERSION(1, 7)
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 361977fd1170..a07da548ff49 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -928,6 +928,7 @@ static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine,
 						uint32_t *batch,
 						uint32_t index)
 {
+	struct drm_i915_private *dev_priv = engine->i915;
 	uint32_t l3sqc4_flush = (0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES);
 
 	/*
@@ -936,8 +937,8 @@ static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine,
 	 * this batch updates GEN8_L3SQCREG4 with default value we need to
 	 * set this bit here to retain the WA during flush.
 	 */
-	if (IS_SKL_REVID(engine->i915, 0, SKL_REVID_E0) ||
-	    IS_KBL_REVID(engine->i915, 0, KBL_REVID_E0))
+	if (IS_SKL_REVID(dev_priv, 0, SKL_REVID_E0) ||
+	    IS_KBL_REVID(dev_priv, 0, KBL_REVID_E0))
 		l3sqc4_flush |= GEN8_LQSC_RO_PERF_DIS;
 
 	wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 |
@@ -1077,11 +1078,12 @@ static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine,
 				    uint32_t *offset)
 {
 	int ret;
+	struct drm_i915_private *dev_priv = engine->i915;
 	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
 
 	/* WaDisableCtxRestoreArbitration:skl,bxt */
-	if (IS_SKL_REVID(engine->i915, 0, SKL_REVID_D0) ||
-	    IS_BXT_REVID(engine->i915, 0, BXT_REVID_A1))
+	if (IS_SKL_REVID(dev_priv, 0, SKL_REVID_D0) ||
+	    IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1))
 		wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_DISABLE);
 
 	/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt */
@@ -1099,7 +1101,7 @@ static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine,
 
 	/* WaClearSlmSpaceAtContextSwitch:kbl */
 	/* Actual scratch location is at 128 bytes offset */
-	if (IS_KBL_REVID(engine->i915, 0, KBL_REVID_A0)) {
+	if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_A0)) {
 		uint32_t scratch_addr
 			= engine->scratch.gtt_offset + 2*CACHELINE_BYTES;
 
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 948f24418eac..6bd352a8f30e 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -57,7 +57,7 @@
 
 static void gen9_init_clock_gating(struct drm_device *dev)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	/* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,bxt,kbl */
 	I915_WRITE(CHICKEN_PAR1_1,
@@ -7044,7 +7044,7 @@ static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv,
 
 static void kabylake_init_clock_gating(struct drm_device *dev)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	gen9_init_clock_gating(dev);
 
@@ -7065,7 +7065,7 @@ static void kabylake_init_clock_gating(struct drm_device *dev)
 
 static void skylake_init_clock_gating(struct drm_device *dev)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	gen9_init_clock_gating(dev);
 
diff --git a/drivers/gpu/drm/imx/Kconfig b/drivers/gpu/drm/imx/Kconfig
index a1844b50546c..f2c9ae822149 100644
--- a/drivers/gpu/drm/imx/Kconfig
+++ b/drivers/gpu/drm/imx/Kconfig
@@ -1,7 +1,6 @@
 config DRM_IMX
 	tristate "DRM Support for Freescale i.MX"
 	select DRM_KMS_HELPER
-	select DRM_KMS_FB_HELPER
 	select VIDEOMODE_HELPERS
 	select DRM_GEM_CMA_HELPER
 	select DRM_KMS_CMA_HELPER
diff --git a/drivers/gpu/drm/imx/dw_hdmi-imx.c b/drivers/gpu/drm/imx/dw_hdmi-imx.c
index a24631fdf4ad..359cd2765552 100644
--- a/drivers/gpu/drm/imx/dw_hdmi-imx.c
+++ b/drivers/gpu/drm/imx/dw_hdmi-imx.c
@@ -28,6 +28,11 @@ struct imx_hdmi {
 	struct regmap *regmap;
 };
 
+static inline struct imx_hdmi *enc_to_imx_hdmi(struct drm_encoder *e)
+{
+	return container_of(e, struct imx_hdmi, encoder);
+}
+
 static const struct dw_hdmi_mpll_config imx_mpll_cfg[] = {
 	{
 		45250000, {
@@ -109,15 +114,9 @@ static void dw_hdmi_imx_encoder_disable(struct drm_encoder *encoder)
 {
 }
 
-static void dw_hdmi_imx_encoder_mode_set(struct drm_encoder *encoder,
-					 struct drm_display_mode *mode,
-					 struct drm_display_mode *adj_mode)
+static void dw_hdmi_imx_encoder_enable(struct drm_encoder *encoder)
 {
-}
-
-static void dw_hdmi_imx_encoder_commit(struct drm_encoder *encoder)
-{
-	struct imx_hdmi *hdmi = container_of(encoder, struct imx_hdmi, encoder);
+	struct imx_hdmi *hdmi = enc_to_imx_hdmi(encoder);
 	int mux = drm_of_encoder_active_port_id(hdmi->dev->of_node, encoder);
 
 	regmap_update_bits(hdmi->regmap, IOMUXC_GPR3,
@@ -125,16 +124,23 @@ static void dw_hdmi_imx_encoder_commit(struct drm_encoder *encoder)
 			   mux << IMX6Q_GPR3_HDMI_MUX_CTL_SHIFT);
 }
 
-static void dw_hdmi_imx_encoder_prepare(struct drm_encoder *encoder)
+static int dw_hdmi_imx_atomic_check(struct drm_encoder *encoder,
+				    struct drm_crtc_state *crtc_state,
+				    struct drm_connector_state *conn_state)
 {
-	imx_drm_set_bus_format(encoder, MEDIA_BUS_FMT_RGB888_1X24);
+	struct imx_crtc_state *imx_crtc_state = to_imx_crtc_state(crtc_state);
+
+	imx_crtc_state->bus_format = MEDIA_BUS_FMT_RGB888_1X24;
+	imx_crtc_state->di_hsync_pin = 2;
+	imx_crtc_state->di_vsync_pin = 3;
+
+	return 0;
 }
 
 static const struct drm_encoder_helper_funcs dw_hdmi_imx_encoder_helper_funcs = {
-	.mode_set   = dw_hdmi_imx_encoder_mode_set,
-	.prepare    = dw_hdmi_imx_encoder_prepare,
-	.commit     = dw_hdmi_imx_encoder_commit,
+	.enable     = dw_hdmi_imx_encoder_enable,
 	.disable    = dw_hdmi_imx_encoder_disable,
+	.atomic_check = dw_hdmi_imx_atomic_check,
 };
 
 static const struct drm_encoder_funcs dw_hdmi_imx_encoder_funcs = {
diff --git a/drivers/gpu/drm/imx/imx-drm-core.c b/drivers/gpu/drm/imx/imx-drm-core.c
index 7746418a4c08..9f7dafce3a4c 100644
--- a/drivers/gpu/drm/imx/imx-drm-core.c
+++ b/drivers/gpu/drm/imx/imx-drm-core.c
@@ -15,10 +15,14 @@
  */
 #include <linux/component.h>
 #include <linux/device.h>
+#include <linux/dma-buf.h>
 #include <linux/fb.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
+#include <linux/reservation.h>
 #include <drm/drmP.h>
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_gem_cma_helper.h>
@@ -41,6 +45,7 @@ struct imx_drm_device {
 	struct imx_drm_crtc			*crtc[MAX_CRTC];
 	unsigned int				pipes;
 	struct drm_fbdev_cma			*fbhelper;
+	struct drm_atomic_state			*state;
 };
 
 struct imx_drm_crtc {
@@ -85,45 +90,6 @@ static int imx_drm_driver_unload(struct drm_device *drm)
 	return 0;
 }
 
-static struct imx_drm_crtc *imx_drm_find_crtc(struct drm_crtc *crtc)
-{
-	struct imx_drm_device *imxdrm = crtc->dev->dev_private;
-	unsigned i;
-
-	for (i = 0; i < MAX_CRTC; i++)
-		if (imxdrm->crtc[i] && imxdrm->crtc[i]->crtc == crtc)
-			return imxdrm->crtc[i];
-
-	return NULL;
-}
-
-int imx_drm_set_bus_config(struct drm_encoder *encoder, u32 bus_format,
-		int hsync_pin, int vsync_pin, u32 bus_flags)
-{
-	struct imx_drm_crtc_helper_funcs *helper;
-	struct imx_drm_crtc *imx_crtc;
-
-	imx_crtc = imx_drm_find_crtc(encoder->crtc);
-	if (!imx_crtc)
-		return -EINVAL;
-
-	helper = &imx_crtc->imx_drm_helper_funcs;
-	if (helper->set_interface_pix_fmt)
-		return helper->set_interface_pix_fmt(encoder->crtc,
-					bus_format, hsync_pin, vsync_pin,
-					bus_flags);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(imx_drm_set_bus_config);
-
-int imx_drm_set_bus_format(struct drm_encoder *encoder, u32 bus_format)
-{
-	return imx_drm_set_bus_config(encoder, bus_format, 2, 3,
-				      DRM_BUS_FLAG_DE_HIGH |
-				      DRM_BUS_FLAG_PIXDATA_NEGEDGE);
-}
-EXPORT_SYMBOL_GPL(imx_drm_set_bus_format);
-
 int imx_drm_crtc_vblank_get(struct imx_drm_crtc *imx_drm_crtc)
 {
 	return drm_crtc_vblank_get(imx_drm_crtc->crtc);
@@ -208,6 +174,63 @@ static void imx_drm_output_poll_changed(struct drm_device *drm)
 static const struct drm_mode_config_funcs imx_drm_mode_config_funcs = {
 	.fb_create = drm_fb_cma_create,
 	.output_poll_changed = imx_drm_output_poll_changed,
+	.atomic_check = drm_atomic_helper_check,
+	.atomic_commit = drm_atomic_helper_commit,
+};
+
+static void imx_drm_atomic_commit_tail(struct drm_atomic_state *state)
+{
+	struct drm_device *dev = state->dev;
+	struct drm_crtc *crtc;
+	struct drm_crtc_state *crtc_state;
+	struct drm_plane_state *plane_state;
+	struct drm_gem_cma_object *cma_obj;
+	struct fence *excl;
+	unsigned shared_count;
+	struct fence **shared;
+	unsigned int i, j;
+	int ret;
+
+	/* Wait for fences. */
+	for_each_crtc_in_state(state, crtc, crtc_state, i) {
+		plane_state = crtc->primary->state;
+		if (plane_state->fb) {
+			cma_obj = drm_fb_cma_get_gem_obj(plane_state->fb, 0);
+			if (cma_obj->base.dma_buf) {
+				ret = reservation_object_get_fences_rcu(
+					cma_obj->base.dma_buf->resv, &excl,
+					&shared_count, &shared);
+				if (unlikely(ret))
+					DRM_ERROR("failed to get fences "
+						  "for buffer\n");
+
+				if (excl) {
+					fence_wait(excl, false);
+					fence_put(excl);
+				}
+				for (j = 0; j < shared_count; i++) {
+					fence_wait(shared[j], false);
+					fence_put(shared[j]);
+				}
+			}
+		}
+	}
+
+	drm_atomic_helper_commit_modeset_disables(dev, state);
+
+	drm_atomic_helper_commit_planes(dev, state, true);
+
+	drm_atomic_helper_commit_modeset_enables(dev, state);
+
+	drm_atomic_helper_commit_hw_done(state);
+
+	drm_atomic_helper_wait_for_vblanks(dev, state);
+
+	drm_atomic_helper_cleanup_planes(dev, state);
+}
+
+static struct drm_mode_config_helper_funcs imx_drm_mode_config_helpers = {
+	.atomic_commit_tail = imx_drm_atomic_commit_tail,
 };
 
 /*
@@ -249,6 +272,7 @@ static int imx_drm_driver_load(struct drm_device *drm, unsigned long flags)
 	drm->mode_config.max_width = 4096;
 	drm->mode_config.max_height = 4096;
 	drm->mode_config.funcs = &imx_drm_mode_config_funcs;
+	drm->mode_config.helper_private = &imx_drm_mode_config_helpers;
 
 	drm_mode_config_init(drm);
 
@@ -279,6 +303,8 @@ static int imx_drm_driver_load(struct drm_device *drm, unsigned long flags)
 		}
 	}
 
+	drm_mode_config_reset(drm);
+
 	/*
 	 * All components are now initialised, so setup the fb helper.
 	 * The fb helper takes copies of key hardware information, so the
@@ -289,7 +315,6 @@ static int imx_drm_driver_load(struct drm_device *drm, unsigned long flags)
 		dev_warn(drm->dev, "Invalid legacyfb_depth.  Defaulting to 16bpp\n");
 		legacyfb_depth = 16;
 	}
-	drm_helper_disable_unused_functions(drm);
 	imxdrm->fbhelper = drm_fbdev_cma_init(drm, legacyfb_depth,
 				drm->mode_config.num_crtc, MAX_CRTC);
 	if (IS_ERR(imxdrm->fbhelper)) {
@@ -403,7 +428,8 @@ static const struct drm_ioctl_desc imx_drm_ioctls[] = {
 };
 
 static struct drm_driver imx_drm_driver = {
-	.driver_features	= DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME,
+	.driver_features	= DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME |
+				  DRIVER_ATOMIC,
 	.load			= imx_drm_driver_load,
 	.unload			= imx_drm_driver_unload,
 	.lastclose		= imx_drm_driver_lastclose,
@@ -491,6 +517,7 @@ static int imx_drm_platform_remove(struct platform_device *pdev)
 static int imx_drm_suspend(struct device *dev)
 {
 	struct drm_device *drm_dev = dev_get_drvdata(dev);
+	struct imx_drm_device *imxdrm;
 
 	/* The drm_dev is NULL before .load hook is called */
 	if (drm_dev == NULL)
@@ -498,17 +525,26 @@ static int imx_drm_suspend(struct device *dev)
 
 	drm_kms_helper_poll_disable(drm_dev);
 
+	imxdrm = drm_dev->dev_private;
+	imxdrm->state = drm_atomic_helper_suspend(drm_dev);
+	if (IS_ERR(imxdrm->state)) {
+		drm_kms_helper_poll_enable(drm_dev);
+		return PTR_ERR(imxdrm->state);
+	}
+
 	return 0;
 }
 
 static int imx_drm_resume(struct device *dev)
 {
 	struct drm_device *drm_dev = dev_get_drvdata(dev);
+	struct imx_drm_device *imx_drm;
 
 	if (drm_dev == NULL)
 		return 0;
 
-	drm_helper_resume_force_mode(drm_dev);
+	imx_drm = drm_dev->dev_private;
+	drm_atomic_helper_resume(drm_dev, imx_drm->state);
 	drm_kms_helper_poll_enable(drm_dev);
 
 	return 0;
diff --git a/drivers/gpu/drm/imx/imx-drm.h b/drivers/gpu/drm/imx/imx-drm.h
index 74320a1723b7..07d33e45f90f 100644
--- a/drivers/gpu/drm/imx/imx-drm.h
+++ b/drivers/gpu/drm/imx/imx-drm.h
@@ -15,12 +15,22 @@ struct platform_device;
 
 unsigned int imx_drm_crtc_id(struct imx_drm_crtc *crtc);
 
+struct imx_crtc_state {
+	struct drm_crtc_state			base;
+	u32					bus_format;
+	u32					bus_flags;
+	int					di_hsync_pin;
+	int					di_vsync_pin;
+};
+
+static inline struct imx_crtc_state *to_imx_crtc_state(struct drm_crtc_state *s)
+{
+	return container_of(s, struct imx_crtc_state, base);
+}
+
 struct imx_drm_crtc_helper_funcs {
 	int (*enable_vblank)(struct drm_crtc *crtc);
 	void (*disable_vblank)(struct drm_crtc *crtc);
-	int (*set_interface_pix_fmt)(struct drm_crtc *crtc,
-			u32 bus_format, int hsync_pin, int vsync_pin,
-			u32 bus_flags);
 	const struct drm_crtc_helper_funcs *crtc_helper_funcs;
 	const struct drm_crtc_funcs *crtc_funcs;
 };
@@ -42,11 +52,6 @@ void imx_drm_mode_config_init(struct drm_device *drm);
 
 struct drm_gem_cma_object *imx_drm_fb_get_obj(struct drm_framebuffer *fb);
 
-int imx_drm_set_bus_config(struct drm_encoder *encoder, u32 bus_format,
-		int hsync_pin, int vsync_pin, u32 bus_flags);
-int imx_drm_set_bus_format(struct drm_encoder *encoder,
-		u32 bus_format);
-
 int imx_drm_encoder_parse_of(struct drm_device *drm,
 	struct drm_encoder *encoder, struct device_node *np);
 
diff --git a/drivers/gpu/drm/imx/imx-ldb.c b/drivers/gpu/drm/imx/imx-ldb.c
index beff793bb717..b03919ed60ba 100644
--- a/drivers/gpu/drm/imx/imx-ldb.c
+++ b/drivers/gpu/drm/imx/imx-ldb.c
@@ -17,6 +17,8 @@
 #include <linux/clk.h>
 #include <linux/component.h>
 #include <drm/drmP.h>
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_of.h>
@@ -49,9 +51,6 @@
 #define LDB_DI1_VS_POL_ACT_LOW		(1 << 10)
 #define LDB_BGREF_RMODE_INT		(1 << 15)
 
-#define con_to_imx_ldb_ch(x) container_of(x, struct imx_ldb_channel, connector)
-#define enc_to_imx_ldb_ch(x) container_of(x, struct imx_ldb_channel, encoder)
-
 struct imx_ldb;
 
 struct imx_ldb_channel {
@@ -66,9 +65,19 @@ struct imx_ldb_channel {
 	int edid_len;
 	struct drm_display_mode mode;
 	int mode_valid;
-	int bus_format;
+	u32 bus_format;
 };
 
+static inline struct imx_ldb_channel *con_to_imx_ldb_ch(struct drm_connector *c)
+{
+	return container_of(c, struct imx_ldb_channel, connector);
+}
+
+static inline struct imx_ldb_channel *enc_to_imx_ldb_ch(struct drm_encoder *e)
+{
+	return container_of(e, struct imx_ldb_channel, encoder);
+}
+
 struct bus_mux {
 	int reg;
 	int shift;
@@ -93,6 +102,32 @@ static enum drm_connector_status imx_ldb_connector_detect(
 	return connector_status_connected;
 }
 
+static void imx_ldb_ch_set_bus_format(struct imx_ldb_channel *imx_ldb_ch,
+				      u32 bus_format)
+{
+	struct imx_ldb *ldb = imx_ldb_ch->ldb;
+	int dual = ldb->ldb_ctrl & LDB_SPLIT_MODE_EN;
+
+	switch (bus_format) {
+	case MEDIA_BUS_FMT_RGB666_1X7X3_SPWG:
+		break;
+	case MEDIA_BUS_FMT_RGB888_1X7X4_SPWG:
+		if (imx_ldb_ch->chno == 0 || dual)
+			ldb->ldb_ctrl |= LDB_DATA_WIDTH_CH0_24;
+		if (imx_ldb_ch->chno == 1 || dual)
+			ldb->ldb_ctrl |= LDB_DATA_WIDTH_CH1_24;
+		break;
+	case MEDIA_BUS_FMT_RGB888_1X7X4_JEIDA:
+		if (imx_ldb_ch->chno == 0 || dual)
+			ldb->ldb_ctrl |= LDB_DATA_WIDTH_CH0_24 |
+					 LDB_BIT_MAP_CH0_JEIDA;
+		if (imx_ldb_ch->chno == 1 || dual)
+			ldb->ldb_ctrl |= LDB_DATA_WIDTH_CH1_24 |
+					 LDB_BIT_MAP_CH1_JEIDA;
+		break;
+	}
+}
+
 static int imx_ldb_connector_get_modes(struct drm_connector *connector)
 {
 	struct imx_ldb_channel *imx_ldb_ch = con_to_imx_ldb_ch(connector);
@@ -100,11 +135,7 @@ static int imx_ldb_connector_get_modes(struct drm_connector *connector)
 
 	if (imx_ldb_ch->panel && imx_ldb_ch->panel->funcs &&
 	    imx_ldb_ch->panel->funcs->get_modes) {
-		struct drm_display_info *di = &connector->display_info;
-
 		num_modes = imx_ldb_ch->panel->funcs->get_modes(imx_ldb_ch->panel);
-		if (!imx_ldb_ch->bus_format && di->num_bus_formats)
-			imx_ldb_ch->bus_format = di->bus_formats[0];
 		if (num_modes > 0)
 			return num_modes;
 	}
@@ -141,10 +172,6 @@ static struct drm_encoder *imx_ldb_connector_best_encoder(
 	return &imx_ldb_ch->encoder;
 }
 
-static void imx_ldb_encoder_dpms(struct drm_encoder *encoder, int mode)
-{
-}
-
 static void imx_ldb_set_clock(struct imx_ldb *ldb, int mux, int chno,
 		unsigned long serial_clk, unsigned long di_clk)
 {
@@ -173,43 +200,7 @@ static void imx_ldb_set_clock(struct imx_ldb *ldb, int mux, int chno,
 			chno);
 }
 
-static void imx_ldb_encoder_prepare(struct drm_encoder *encoder)
-{
-	struct imx_ldb_channel *imx_ldb_ch = enc_to_imx_ldb_ch(encoder);
-	struct imx_ldb *ldb = imx_ldb_ch->ldb;
-	int dual = ldb->ldb_ctrl & LDB_SPLIT_MODE_EN;
-	u32 bus_format;
-
-	switch (imx_ldb_ch->bus_format) {
-	default:
-		dev_warn(ldb->dev,
-			 "could not determine data mapping, default to 18-bit \"spwg\"\n");
-		/* fallthrough */
-	case MEDIA_BUS_FMT_RGB666_1X7X3_SPWG:
-		bus_format = MEDIA_BUS_FMT_RGB666_1X18;
-		break;
-	case MEDIA_BUS_FMT_RGB888_1X7X4_SPWG:
-		bus_format = MEDIA_BUS_FMT_RGB888_1X24;
-		if (imx_ldb_ch->chno == 0 || dual)
-			ldb->ldb_ctrl |= LDB_DATA_WIDTH_CH0_24;
-		if (imx_ldb_ch->chno == 1 || dual)
-			ldb->ldb_ctrl |= LDB_DATA_WIDTH_CH1_24;
-		break;
-	case MEDIA_BUS_FMT_RGB888_1X7X4_JEIDA:
-		bus_format = MEDIA_BUS_FMT_RGB888_1X24;
-		if (imx_ldb_ch->chno == 0 || dual)
-			ldb->ldb_ctrl |= LDB_DATA_WIDTH_CH0_24 |
-					 LDB_BIT_MAP_CH0_JEIDA;
-		if (imx_ldb_ch->chno == 1 || dual)
-			ldb->ldb_ctrl |= LDB_DATA_WIDTH_CH1_24 |
-					 LDB_BIT_MAP_CH1_JEIDA;
-		break;
-	}
-
-	imx_drm_set_bus_format(encoder, bus_format);
-}
-
-static void imx_ldb_encoder_commit(struct drm_encoder *encoder)
+static void imx_ldb_encoder_enable(struct drm_encoder *encoder)
 {
 	struct imx_ldb_channel *imx_ldb_ch = enc_to_imx_ldb_ch(encoder);
 	struct imx_ldb *ldb = imx_ldb_ch->ldb;
@@ -219,8 +210,13 @@ static void imx_ldb_encoder_commit(struct drm_encoder *encoder)
 	drm_panel_prepare(imx_ldb_ch->panel);
 
 	if (dual) {
+		clk_set_parent(ldb->clk_sel[mux], ldb->clk[0]);
+		clk_set_parent(ldb->clk_sel[mux], ldb->clk[1]);
+
 		clk_prepare_enable(ldb->clk[0]);
 		clk_prepare_enable(ldb->clk[1]);
+	} else {
+		clk_set_parent(ldb->clk_sel[mux], ldb->clk[imx_ldb_ch->chno]);
 	}
 
 	if (imx_ldb_ch == &ldb->channel[0] || dual) {
@@ -265,6 +261,7 @@ static void imx_ldb_encoder_mode_set(struct drm_encoder *encoder,
 	unsigned long serial_clk;
 	unsigned long di_clk = mode->clock * 1000;
 	int mux = drm_of_encoder_active_port_id(imx_ldb_ch->child, encoder);
+	u32 bus_format = imx_ldb_ch->bus_format;
 
 	if (mode->clock > 170000) {
 		dev_warn(ldb->dev,
@@ -286,18 +283,33 @@ static void imx_ldb_encoder_mode_set(struct drm_encoder *encoder,
 	}
 
 	/* FIXME - assumes straight connections DI0 --> CH0, DI1 --> CH1 */
-	if (imx_ldb_ch == &ldb->channel[0]) {
+	if (imx_ldb_ch == &ldb->channel[0] || dual) {
 		if (mode->flags & DRM_MODE_FLAG_NVSYNC)
 			ldb->ldb_ctrl |= LDB_DI0_VS_POL_ACT_LOW;
 		else if (mode->flags & DRM_MODE_FLAG_PVSYNC)
 			ldb->ldb_ctrl &= ~LDB_DI0_VS_POL_ACT_LOW;
 	}
-	if (imx_ldb_ch == &ldb->channel[1]) {
+	if (imx_ldb_ch == &ldb->channel[1] || dual) {
 		if (mode->flags & DRM_MODE_FLAG_NVSYNC)
 			ldb->ldb_ctrl |= LDB_DI1_VS_POL_ACT_LOW;
 		else if (mode->flags & DRM_MODE_FLAG_PVSYNC)
 			ldb->ldb_ctrl &= ~LDB_DI1_VS_POL_ACT_LOW;
 	}
+
+	if (!bus_format) {
+		struct drm_connector *connector;
+
+		drm_for_each_connector(connector, encoder->dev) {
+			struct drm_display_info *di = &connector->display_info;
+
+			if (connector->encoder == encoder &&
+			    di->num_bus_formats) {
+				bus_format = di->bus_formats[0];
+				break;
+			}
+		}
+	}
+	imx_ldb_ch_set_bus_format(imx_ldb_ch, bus_format);
 }
 
 static void imx_ldb_encoder_disable(struct drm_encoder *encoder)
@@ -357,11 +369,45 @@ static void imx_ldb_encoder_disable(struct drm_encoder *encoder)
 	drm_panel_unprepare(imx_ldb_ch->panel);
 }
 
+static int imx_ldb_encoder_atomic_check(struct drm_encoder *encoder,
+					struct drm_crtc_state *crtc_state,
+					struct drm_connector_state *conn_state)
+{
+	struct imx_crtc_state *imx_crtc_state = to_imx_crtc_state(crtc_state);
+	struct imx_ldb_channel *imx_ldb_ch = enc_to_imx_ldb_ch(encoder);
+	struct drm_display_info *di = &conn_state->connector->display_info;
+	u32 bus_format = imx_ldb_ch->bus_format;
+
+	/* Bus format description in DT overrides connector display info. */
+	if (!bus_format && di->num_bus_formats)
+		bus_format = di->bus_formats[0];
+	switch (bus_format) {
+	case MEDIA_BUS_FMT_RGB666_1X7X3_SPWG:
+		imx_crtc_state->bus_format = MEDIA_BUS_FMT_RGB666_1X18;
+		break;
+	case MEDIA_BUS_FMT_RGB888_1X7X4_SPWG:
+	case MEDIA_BUS_FMT_RGB888_1X7X4_JEIDA:
+		imx_crtc_state->bus_format = MEDIA_BUS_FMT_RGB888_1X24;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	imx_crtc_state->di_hsync_pin = 2;
+	imx_crtc_state->di_vsync_pin = 3;
+
+	return 0;
+}
+
+
 static const struct drm_connector_funcs imx_ldb_connector_funcs = {
-	.dpms = drm_helper_connector_dpms,
+	.dpms = drm_atomic_helper_connector_dpms,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.detect = imx_ldb_connector_detect,
 	.destroy = imx_drm_connector_destroy,
+	.reset = drm_atomic_helper_connector_reset,
+	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
 };
 
 static const struct drm_connector_helper_funcs imx_ldb_connector_helper_funcs = {
@@ -374,11 +420,10 @@ static const struct drm_encoder_funcs imx_ldb_encoder_funcs = {
 };
 
 static const struct drm_encoder_helper_funcs imx_ldb_encoder_helper_funcs = {
-	.dpms = imx_ldb_encoder_dpms,
-	.prepare = imx_ldb_encoder_prepare,
-	.commit = imx_ldb_encoder_commit,
 	.mode_set = imx_ldb_encoder_mode_set,
+	.enable = imx_ldb_encoder_enable,
 	.disable = imx_ldb_encoder_disable,
+	.atomic_check = imx_ldb_encoder_atomic_check,
 };
 
 static int imx_ldb_get_clk(struct imx_ldb *ldb, int chno)
@@ -400,10 +445,10 @@ static int imx_ldb_register(struct drm_device *drm,
 	struct imx_ldb_channel *imx_ldb_ch)
 {
 	struct imx_ldb *ldb = imx_ldb_ch->ldb;
+	struct drm_encoder *encoder = &imx_ldb_ch->encoder;
 	int ret;
 
-	ret = imx_drm_encoder_parse_of(drm, &imx_ldb_ch->encoder,
-				       imx_ldb_ch->child);
+	ret = imx_drm_encoder_parse_of(drm, encoder, imx_ldb_ch->child);
 	if (ret)
 		return ret;
 
@@ -417,9 +462,8 @@ static int imx_ldb_register(struct drm_device *drm,
 			return ret;
 	}
 
-	drm_encoder_helper_add(&imx_ldb_ch->encoder,
-			&imx_ldb_encoder_helper_funcs);
-	drm_encoder_init(drm, &imx_ldb_ch->encoder, &imx_ldb_encoder_funcs,
+	drm_encoder_helper_add(encoder, &imx_ldb_encoder_helper_funcs);
+	drm_encoder_init(drm, encoder, &imx_ldb_encoder_funcs,
 			 DRM_MODE_ENCODER_LVDS, NULL);
 
 	drm_connector_helper_add(&imx_ldb_ch->connector,
@@ -427,11 +471,14 @@ static int imx_ldb_register(struct drm_device *drm,
 	drm_connector_init(drm, &imx_ldb_ch->connector,
 			   &imx_ldb_connector_funcs, DRM_MODE_CONNECTOR_LVDS);
 
-	if (imx_ldb_ch->panel)
-		drm_panel_attach(imx_ldb_ch->panel, &imx_ldb_ch->connector);
+	if (imx_ldb_ch->panel) {
+		ret = drm_panel_attach(imx_ldb_ch->panel,
+				       &imx_ldb_ch->connector);
+		if (ret)
+			return ret;
+	}
 
-	drm_mode_connector_attach_encoder(&imx_ldb_ch->connector,
-			&imx_ldb_ch->encoder);
+	drm_mode_connector_attach_encoder(&imx_ldb_ch->connector, encoder);
 
 	return 0;
 }
@@ -560,6 +607,7 @@ static int imx_ldb_bind(struct device *dev, struct device *master, void *data)
 		struct imx_ldb_channel *channel;
 		struct device_node *ddc_node;
 		struct device_node *ep;
+		int bus_format;
 
 		ret = of_property_read_u32(child, "reg", &i);
 		if (ret || i < 0 || i > 1)
@@ -632,21 +680,22 @@ static int imx_ldb_bind(struct device *dev, struct device *master, void *data)
 			}
 		}
 
-		channel->bus_format = of_get_bus_format(dev, child);
-		if (channel->bus_format == -EINVAL) {
+		bus_format = of_get_bus_format(dev, child);
+		if (bus_format == -EINVAL) {
 			/*
 			 * If no bus format was specified in the device tree,
 			 * we can still get it from the connected panel later.
 			 */
 			if (channel->panel && channel->panel->funcs &&
 			    channel->panel->funcs->get_modes)
-				channel->bus_format = 0;
+				bus_format = 0;
 		}
-		if (channel->bus_format < 0) {
+		if (bus_format < 0) {
 			dev_err(dev, "could not determine data mapping: %d\n",
-				channel->bus_format);
-			return channel->bus_format;
+				bus_format);
+			return bus_format;
 		}
+		channel->bus_format = bus_format;
 
 		ret = imx_ldb_register(drm, channel);
 		if (ret)
diff --git a/drivers/gpu/drm/imx/imx-tve.c b/drivers/gpu/drm/imx/imx-tve.c
index baf788121287..5e875944ffa2 100644
--- a/drivers/gpu/drm/imx/imx-tve.c
+++ b/drivers/gpu/drm/imx/imx-tve.c
@@ -23,6 +23,7 @@
 #include <linux/spinlock.h>
 #include <linux/videodev2.h>
 #include <drm/drmP.h>
+#include <drm/drm_atomic_helper.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_crtc_helper.h>
 #include <video/imx-ipu-v3.h>
@@ -97,9 +98,6 @@
 /* TVE_TST_MODE_REG */
 #define TVE_TVDAC_TEST_MODE_MASK	(0x7 << 0)
 
-#define con_to_tve(x) container_of(x, struct imx_tve, connector)
-#define enc_to_tve(x) container_of(x, struct imx_tve, encoder)
-
 enum {
 	TVE_MODE_TVOUT,
 	TVE_MODE_VGA,
@@ -112,6 +110,8 @@ struct imx_tve {
 	spinlock_t lock;	/* register lock */
 	bool enabled;
 	int mode;
+	int di_hsync_pin;
+	int di_vsync_pin;
 
 	struct regmap *regmap;
 	struct regulator *dac_reg;
@@ -120,10 +120,18 @@ struct imx_tve {
 	struct clk *di_sel_clk;
 	struct clk_hw clk_hw_di;
 	struct clk *di_clk;
-	int vsync_pin;
-	int hsync_pin;
 };
 
+static inline struct imx_tve *con_to_tve(struct drm_connector *c)
+{
+	return container_of(c, struct imx_tve, connector);
+}
+
+static inline struct imx_tve *enc_to_tve(struct drm_encoder *e)
+{
+	return container_of(e, struct imx_tve, encoder);
+}
+
 static void tve_lock(void *__tve)
 __acquires(&tve->lock)
 {
@@ -148,8 +156,7 @@ static void tve_enable(struct imx_tve *tve)
 		tve->enabled = true;
 		clk_prepare_enable(tve->clk);
 		ret = regmap_update_bits(tve->regmap, TVE_COM_CONF_REG,
-					 TVE_IPU_CLK_EN | TVE_EN,
-					 TVE_IPU_CLK_EN | TVE_EN);
+					 TVE_EN, TVE_EN);
 	}
 
 	/* clear interrupt status register */
@@ -172,7 +179,7 @@ static void tve_disable(struct imx_tve *tve)
 	if (tve->enabled) {
 		tve->enabled = false;
 		ret = regmap_update_bits(tve->regmap, TVE_COM_CONF_REG,
-					 TVE_IPU_CLK_EN | TVE_EN, 0);
+					 TVE_EN, 0);
 		clk_disable_unprepare(tve->clk);
 	}
 }
@@ -275,36 +282,6 @@ static struct drm_encoder *imx_tve_connector_best_encoder(
 	return &tve->encoder;
 }
 
-static void imx_tve_encoder_dpms(struct drm_encoder *encoder, int mode)
-{
-	struct imx_tve *tve = enc_to_tve(encoder);
-	int ret;
-
-	ret = regmap_update_bits(tve->regmap, TVE_COM_CONF_REG,
-				 TVE_TV_OUT_MODE_MASK, TVE_TV_OUT_DISABLE);
-	if (ret < 0)
-		dev_err(tve->dev, "failed to disable TVOUT: %d\n", ret);
-}
-
-static void imx_tve_encoder_prepare(struct drm_encoder *encoder)
-{
-	struct imx_tve *tve = enc_to_tve(encoder);
-
-	tve_disable(tve);
-
-	switch (tve->mode) {
-	case TVE_MODE_VGA:
-		imx_drm_set_bus_config(encoder, MEDIA_BUS_FMT_GBR888_1X24,
-				       tve->hsync_pin, tve->vsync_pin,
-				       DRM_BUS_FLAG_DE_HIGH |
-				       DRM_BUS_FLAG_PIXDATA_NEGEDGE);
-		break;
-	case TVE_MODE_TVOUT:
-		imx_drm_set_bus_format(encoder, MEDIA_BUS_FMT_YUV8_1X24);
-		break;
-	}
-}
-
 static void imx_tve_encoder_mode_set(struct drm_encoder *encoder,
 				     struct drm_display_mode *orig_mode,
 				     struct drm_display_mode *mode)
@@ -333,6 +310,9 @@ static void imx_tve_encoder_mode_set(struct drm_encoder *encoder,
 			ret);
 	}
 
+	regmap_update_bits(tve->regmap, TVE_COM_CONF_REG,
+			   TVE_IPU_CLK_EN, TVE_IPU_CLK_EN);
+
 	if (tve->mode == TVE_MODE_VGA)
 		ret = tve_setup_vga(tve);
 	else
@@ -341,7 +321,7 @@ static void imx_tve_encoder_mode_set(struct drm_encoder *encoder,
 		dev_err(tve->dev, "failed to set configuration: %d\n", ret);
 }
 
-static void imx_tve_encoder_commit(struct drm_encoder *encoder)
+static void imx_tve_encoder_enable(struct drm_encoder *encoder)
 {
 	struct imx_tve *tve = enc_to_tve(encoder);
 
@@ -355,11 +335,28 @@ static void imx_tve_encoder_disable(struct drm_encoder *encoder)
 	tve_disable(tve);
 }
 
+static int imx_tve_atomic_check(struct drm_encoder *encoder,
+				struct drm_crtc_state *crtc_state,
+				struct drm_connector_state *conn_state)
+{
+	struct imx_crtc_state *imx_crtc_state = to_imx_crtc_state(crtc_state);
+	struct imx_tve *tve = enc_to_tve(encoder);
+
+	imx_crtc_state->bus_format = MEDIA_BUS_FMT_GBR888_1X24;
+	imx_crtc_state->di_hsync_pin = tve->di_hsync_pin;
+	imx_crtc_state->di_vsync_pin = tve->di_vsync_pin;
+
+	return 0;
+}
+
 static const struct drm_connector_funcs imx_tve_connector_funcs = {
-	.dpms = drm_helper_connector_dpms,
+	.dpms = drm_atomic_helper_connector_dpms,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.detect = imx_tve_connector_detect,
 	.destroy = imx_drm_connector_destroy,
+	.reset = drm_atomic_helper_connector_reset,
+	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
 };
 
 static const struct drm_connector_helper_funcs imx_tve_connector_helper_funcs = {
@@ -373,11 +370,10 @@ static const struct drm_encoder_funcs imx_tve_encoder_funcs = {
 };
 
 static const struct drm_encoder_helper_funcs imx_tve_encoder_helper_funcs = {
-	.dpms = imx_tve_encoder_dpms,
-	.prepare = imx_tve_encoder_prepare,
 	.mode_set = imx_tve_encoder_mode_set,
-	.commit = imx_tve_encoder_commit,
+	.enable = imx_tve_encoder_enable,
 	.disable = imx_tve_encoder_disable,
+	.atomic_check = imx_tve_atomic_check,
 };
 
 static irqreturn_t imx_tve_irq_handler(int irq, void *data)
@@ -495,8 +491,7 @@ static int imx_tve_register(struct drm_device *drm, struct imx_tve *tve)
 	encoder_type = tve->mode == TVE_MODE_VGA ?
 				DRM_MODE_ENCODER_DAC : DRM_MODE_ENCODER_TVDAC;
 
-	ret = imx_drm_encoder_parse_of(drm, &tve->encoder,
-				       tve->dev->of_node);
+	ret = imx_drm_encoder_parse_of(drm, &tve->encoder, tve->dev->of_node);
 	if (ret)
 		return ret;
 
@@ -587,15 +582,15 @@ static int imx_tve_bind(struct device *dev, struct device *master, void *data)
 
 	if (tve->mode == TVE_MODE_VGA) {
 		ret = of_property_read_u32(np, "fsl,hsync-pin",
-					   &tve->hsync_pin);
+					   &tve->di_hsync_pin);
 
 		if (ret < 0) {
-			dev_err(dev, "failed to get vsync pin\n");
+			dev_err(dev, "failed to get hsync pin\n");
 			return ret;
 		}
 
-		ret |= of_property_read_u32(np, "fsl,vsync-pin",
-					    &tve->vsync_pin);
+		ret = of_property_read_u32(np, "fsl,vsync-pin",
+					   &tve->di_vsync_pin);
 
 		if (ret < 0) {
 			dev_err(dev, "failed to get vsync pin\n");
@@ -633,7 +628,9 @@ static int imx_tve_bind(struct device *dev, struct device *master, void *data)
 
 	tve->dac_reg = devm_regulator_get(dev, "dac");
 	if (!IS_ERR(tve->dac_reg)) {
-		regulator_set_voltage(tve->dac_reg, 2750000, 2750000);
+		ret = regulator_set_voltage(tve->dac_reg, 2750000, 2750000);
+		if (ret)
+			return ret;
 		ret = regulator_enable(tve->dac_reg);
 		if (ret)
 			return ret;
diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c
index fc040417e1e8..08e188bc10fc 100644
--- a/drivers/gpu/drm/imx/ipuv3-crtc.c
+++ b/drivers/gpu/drm/imx/ipuv3-crtc.c
@@ -18,12 +18,12 @@
 #include <linux/device.h>
 #include <linux/platform_device.h>
 #include <drm/drmP.h>
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc_helper.h>
 #include <linux/fb.h>
 #include <linux/clk.h>
 #include <linux/errno.h>
-#include <linux/reservation.h>
-#include <linux/dma-buf.h>
 #include <drm/drm_gem_cma_helper.h>
 #include <drm/drm_fb_cma_helper.h>
 
@@ -33,23 +33,6 @@
 
 #define DRIVER_DESC		"i.MX IPUv3 Graphics"
 
-enum ipu_flip_status {
-	IPU_FLIP_NONE,
-	IPU_FLIP_PENDING,
-	IPU_FLIP_SUBMITTED,
-};
-
-struct ipu_flip_work {
-	struct work_struct		unref_work;
-	struct drm_gem_object		*bo;
-	struct drm_pending_vblank_event *page_flip_event;
-	struct work_struct		fence_work;
-	struct ipu_crtc			*crtc;
-	struct fence			*excl;
-	unsigned			shared_count;
-	struct fence			**shared;
-};
-
 struct ipu_crtc {
 	struct device		*dev;
 	struct drm_crtc		base;
@@ -60,201 +43,166 @@ struct ipu_crtc {
 
 	struct ipu_dc		*dc;
 	struct ipu_di		*di;
-	int			enabled;
-	enum ipu_flip_status	flip_state;
-	struct workqueue_struct *flip_queue;
-	struct ipu_flip_work	*flip_work;
 	int			irq;
-	u32			bus_format;
-	u32			bus_flags;
-	int			di_hsync_pin;
-	int			di_vsync_pin;
 };
 
-#define to_ipu_crtc(x) container_of(x, struct ipu_crtc, base)
+static inline struct ipu_crtc *to_ipu_crtc(struct drm_crtc *crtc)
+{
+	return container_of(crtc, struct ipu_crtc, base);
+}
 
-static void ipu_fb_enable(struct ipu_crtc *ipu_crtc)
+static void ipu_crtc_enable(struct drm_crtc *crtc)
 {
+	struct ipu_crtc *ipu_crtc = to_ipu_crtc(crtc);
 	struct ipu_soc *ipu = dev_get_drvdata(ipu_crtc->dev->parent);
 
-	if (ipu_crtc->enabled)
-		return;
-
 	ipu_dc_enable(ipu);
-	ipu_plane_enable(ipu_crtc->plane[0]);
-	/* Start DC channel and DI after IDMAC */
 	ipu_dc_enable_channel(ipu_crtc->dc);
 	ipu_di_enable(ipu_crtc->di);
-	drm_crtc_vblank_on(&ipu_crtc->base);
-
-	ipu_crtc->enabled = 1;
 }
 
-static void ipu_fb_disable(struct ipu_crtc *ipu_crtc)
+static void ipu_crtc_disable(struct drm_crtc *crtc)
 {
+	struct ipu_crtc *ipu_crtc = to_ipu_crtc(crtc);
 	struct ipu_soc *ipu = dev_get_drvdata(ipu_crtc->dev->parent);
 
-	if (!ipu_crtc->enabled)
-		return;
-
-	/* Stop DC channel and DI before IDMAC */
 	ipu_dc_disable_channel(ipu_crtc->dc);
 	ipu_di_disable(ipu_crtc->di);
-	ipu_plane_disable(ipu_crtc->plane[0]);
 	ipu_dc_disable(ipu);
-	drm_crtc_vblank_off(&ipu_crtc->base);
 
-	ipu_crtc->enabled = 0;
+	spin_lock_irq(&crtc->dev->event_lock);
+	if (crtc->state->event) {
+		drm_crtc_send_vblank_event(crtc, crtc->state->event);
+		crtc->state->event = NULL;
+	}
+	spin_unlock_irq(&crtc->dev->event_lock);
 }
 
-static void ipu_crtc_dpms(struct drm_crtc *crtc, int mode)
+static void imx_drm_crtc_reset(struct drm_crtc *crtc)
 {
-	struct ipu_crtc *ipu_crtc = to_ipu_crtc(crtc);
+	struct imx_crtc_state *state;
 
-	dev_dbg(ipu_crtc->dev, "%s mode: %d\n", __func__, mode);
-
-	switch (mode) {
-	case DRM_MODE_DPMS_ON:
-		ipu_fb_enable(ipu_crtc);
-		break;
-	case DRM_MODE_DPMS_STANDBY:
-	case DRM_MODE_DPMS_SUSPEND:
-	case DRM_MODE_DPMS_OFF:
-		ipu_fb_disable(ipu_crtc);
-		break;
+	if (crtc->state) {
+		if (crtc->state->mode_blob)
+			drm_property_unreference_blob(crtc->state->mode_blob);
+
+		state = to_imx_crtc_state(crtc->state);
+		memset(state, 0, sizeof(*state));
+	} else {
+		state = kzalloc(sizeof(*state), GFP_KERNEL);
+		if (!state)
+			return;
+		crtc->state = &state->base;
 	}
+
+	state->base.crtc = crtc;
 }
 
-static void ipu_flip_unref_work_func(struct work_struct *__work)
+static struct drm_crtc_state *imx_drm_crtc_duplicate_state(struct drm_crtc *crtc)
 {
-	struct ipu_flip_work *work =
-			container_of(__work, struct ipu_flip_work, unref_work);
+	struct imx_crtc_state *state;
+
+	state = kzalloc(sizeof(*state), GFP_KERNEL);
+	if (!state)
+		return NULL;
 
-	drm_gem_object_unreference_unlocked(work->bo);
-	kfree(work);
+	__drm_atomic_helper_crtc_duplicate_state(crtc, &state->base);
+
+	WARN_ON(state->base.crtc != crtc);
+	state->base.crtc = crtc;
+
+	return &state->base;
 }
 
-static void ipu_flip_fence_work_func(struct work_struct *__work)
+static void imx_drm_crtc_destroy_state(struct drm_crtc *crtc,
+				       struct drm_crtc_state *state)
 {
-	struct ipu_flip_work *work =
-			container_of(__work, struct ipu_flip_work, fence_work);
-	int i;
-
-	/* wait for all fences attached to the FB obj to signal */
-	if (work->excl) {
-		fence_wait(work->excl, false);
-		fence_put(work->excl);
-	}
-	for (i = 0; i < work->shared_count; i++) {
-		fence_wait(work->shared[i], false);
-		fence_put(work->shared[i]);
-	}
+	__drm_atomic_helper_crtc_destroy_state(state);
+	kfree(to_imx_crtc_state(state));
+}
+
+static const struct drm_crtc_funcs ipu_crtc_funcs = {
+	.set_config = drm_atomic_helper_set_config,
+	.destroy = drm_crtc_cleanup,
+	.page_flip = drm_atomic_helper_page_flip,
+	.reset = imx_drm_crtc_reset,
+	.atomic_duplicate_state = imx_drm_crtc_duplicate_state,
+	.atomic_destroy_state = imx_drm_crtc_destroy_state,
+};
 
-	work->crtc->flip_state = IPU_FLIP_SUBMITTED;
+static irqreturn_t ipu_irq_handler(int irq, void *dev_id)
+{
+	struct ipu_crtc *ipu_crtc = dev_id;
+
+	imx_drm_handle_vblank(ipu_crtc->imx_crtc);
+
+	return IRQ_HANDLED;
 }
 
-static int ipu_page_flip(struct drm_crtc *crtc,
-		struct drm_framebuffer *fb,
-		struct drm_pending_vblank_event *event,
-		uint32_t page_flip_flags)
+static bool ipu_crtc_mode_fixup(struct drm_crtc *crtc,
+				  const struct drm_display_mode *mode,
+				  struct drm_display_mode *adjusted_mode)
 {
-	struct drm_gem_cma_object *cma_obj = drm_fb_cma_get_gem_obj(fb, 0);
 	struct ipu_crtc *ipu_crtc = to_ipu_crtc(crtc);
-	struct ipu_flip_work *flip_work;
+	struct videomode vm;
 	int ret;
 
-	if (ipu_crtc->flip_state != IPU_FLIP_NONE)
-		return -EBUSY;
-
-	ret = imx_drm_crtc_vblank_get(ipu_crtc->imx_crtc);
-	if (ret) {
-		dev_dbg(ipu_crtc->dev, "failed to acquire vblank counter\n");
-		list_del(&event->base.link);
-
-		return ret;
-	}
+	drm_display_mode_to_videomode(adjusted_mode, &vm);
 
-	flip_work = kzalloc(sizeof *flip_work, GFP_KERNEL);
-	if (!flip_work) {
-		ret = -ENOMEM;
-		goto put_vblank;
-	}
-	INIT_WORK(&flip_work->unref_work, ipu_flip_unref_work_func);
-	flip_work->page_flip_event = event;
+	ret = ipu_di_adjust_videomode(ipu_crtc->di, &vm);
+	if (ret)
+		return false;
 
-	/* get BO backing the old framebuffer and take a reference */
-	flip_work->bo = &drm_fb_cma_get_gem_obj(crtc->primary->fb, 0)->base;
-	drm_gem_object_reference(flip_work->bo);
+	if ((vm.vsync_len == 0) || (vm.hsync_len == 0))
+		return false;
 
-	ipu_crtc->flip_work = flip_work;
-	/*
-	 * If the object has a DMABUF attached, we need to wait on its fences
-	 * if there are any.
-	 */
-	if (cma_obj->base.dma_buf) {
-		INIT_WORK(&flip_work->fence_work, ipu_flip_fence_work_func);
-		flip_work->crtc = ipu_crtc;
+	drm_display_mode_from_videomode(&vm, adjusted_mode);
 
-		ret = reservation_object_get_fences_rcu(
-				cma_obj->base.dma_buf->resv, &flip_work->excl,
-				&flip_work->shared_count, &flip_work->shared);
+	return true;
+}
 
-		if (unlikely(ret)) {
-			DRM_ERROR("failed to get fences for buffer\n");
-			goto free_flip_work;
-		}
+static int ipu_crtc_atomic_check(struct drm_crtc *crtc,
+				 struct drm_crtc_state *state)
+{
+	u32 primary_plane_mask = 1 << drm_plane_index(crtc->primary);
 
-		/* No need to queue the worker if the are no fences */
-		if (!flip_work->excl && !flip_work->shared_count) {
-			ipu_crtc->flip_state = IPU_FLIP_SUBMITTED;
-		} else {
-			ipu_crtc->flip_state = IPU_FLIP_PENDING;
-			queue_work(ipu_crtc->flip_queue,
-				   &flip_work->fence_work);
-		}
-	} else {
-		ipu_crtc->flip_state = IPU_FLIP_SUBMITTED;
-	}
+	if (state->active && (primary_plane_mask & state->plane_mask) == 0)
+		return -EINVAL;
 
 	return 0;
-
-free_flip_work:
-	drm_gem_object_unreference_unlocked(flip_work->bo);
-	kfree(flip_work);
-	ipu_crtc->flip_work = NULL;
-put_vblank:
-	imx_drm_crtc_vblank_put(ipu_crtc->imx_crtc);
-
-	return ret;
 }
 
-static const struct drm_crtc_funcs ipu_crtc_funcs = {
-	.set_config = drm_crtc_helper_set_config,
-	.destroy = drm_crtc_cleanup,
-	.page_flip = ipu_page_flip,
-};
+static void ipu_crtc_atomic_begin(struct drm_crtc *crtc,
+				  struct drm_crtc_state *old_crtc_state)
+{
+	spin_lock_irq(&crtc->dev->event_lock);
+	if (crtc->state->event) {
+		WARN_ON(drm_crtc_vblank_get(crtc));
+		drm_crtc_arm_vblank_event(crtc, crtc->state->event);
+		crtc->state->event = NULL;
+	}
+	spin_unlock_irq(&crtc->dev->event_lock);
+}
 
-static int ipu_crtc_mode_set(struct drm_crtc *crtc,
-			       struct drm_display_mode *orig_mode,
-			       struct drm_display_mode *mode,
-			       int x, int y,
-			       struct drm_framebuffer *old_fb)
+static void ipu_crtc_mode_set_nofb(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_encoder *encoder;
 	struct ipu_crtc *ipu_crtc = to_ipu_crtc(crtc);
+	struct drm_display_mode *mode = &crtc->state->adjusted_mode;
+	struct imx_crtc_state *imx_crtc_state = to_imx_crtc_state(crtc->state);
 	struct ipu_di_signal_cfg sig_cfg = {};
 	unsigned long encoder_types = 0;
-	int ret;
 
 	dev_dbg(ipu_crtc->dev, "%s: mode->hdisplay: %d\n", __func__,
 			mode->hdisplay);
 	dev_dbg(ipu_crtc->dev, "%s: mode->vdisplay: %d\n", __func__,
 			mode->vdisplay);
 
-	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head)
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
 		if (encoder->crtc == crtc)
 			encoder_types |= BIT(encoder->encoder_type);
+	}
 
 	dev_dbg(ipu_crtc->dev, "%s: attached to encoder types 0x%lx\n",
 		__func__, encoder_types);
@@ -272,114 +220,30 @@ static int ipu_crtc_mode_set(struct drm_crtc *crtc,
 	else
 		sig_cfg.clkflags = 0;
 
-	sig_cfg.enable_pol = !(ipu_crtc->bus_flags & DRM_BUS_FLAG_DE_LOW);
+	sig_cfg.enable_pol = !(imx_crtc_state->bus_flags & DRM_BUS_FLAG_DE_LOW);
 	/* Default to driving pixel data on negative clock edges */
-	sig_cfg.clk_pol = !!(ipu_crtc->bus_flags &
+	sig_cfg.clk_pol = !!(imx_crtc_state->bus_flags &
 			     DRM_BUS_FLAG_PIXDATA_POSEDGE);
-	sig_cfg.bus_format = ipu_crtc->bus_format;
+	sig_cfg.bus_format = imx_crtc_state->bus_format;
 	sig_cfg.v_to_h_sync = 0;
-	sig_cfg.hsync_pin = ipu_crtc->di_hsync_pin;
-	sig_cfg.vsync_pin = ipu_crtc->di_vsync_pin;
+	sig_cfg.hsync_pin = imx_crtc_state->di_hsync_pin;
+	sig_cfg.vsync_pin = imx_crtc_state->di_vsync_pin;
 
 	drm_display_mode_to_videomode(mode, &sig_cfg.mode);
 
-	ret = ipu_dc_init_sync(ipu_crtc->dc, ipu_crtc->di,
-			       mode->flags & DRM_MODE_FLAG_INTERLACE,
-			       ipu_crtc->bus_format, mode->hdisplay);
-	if (ret) {
-		dev_err(ipu_crtc->dev,
-				"initializing display controller failed with %d\n",
-				ret);
-		return ret;
-	}
-
-	ret = ipu_di_init_sync_panel(ipu_crtc->di, &sig_cfg);
-	if (ret) {
-		dev_err(ipu_crtc->dev,
-				"initializing panel failed with %d\n", ret);
-		return ret;
-	}
-
-	return ipu_plane_mode_set(ipu_crtc->plane[0], crtc, mode,
-				  crtc->primary->fb,
-				  0, 0, mode->hdisplay, mode->vdisplay,
-				  x, y, mode->hdisplay, mode->vdisplay,
-				  mode->flags & DRM_MODE_FLAG_INTERLACE);
-}
-
-static void ipu_crtc_handle_pageflip(struct ipu_crtc *ipu_crtc)
-{
-	unsigned long flags;
-	struct drm_device *drm = ipu_crtc->base.dev;
-	struct ipu_flip_work *work = ipu_crtc->flip_work;
-
-	spin_lock_irqsave(&drm->event_lock, flags);
-	if (work->page_flip_event)
-		drm_crtc_send_vblank_event(&ipu_crtc->base,
-					   work->page_flip_event);
-	imx_drm_crtc_vblank_put(ipu_crtc->imx_crtc);
-	spin_unlock_irqrestore(&drm->event_lock, flags);
-}
-
-static irqreturn_t ipu_irq_handler(int irq, void *dev_id)
-{
-	struct ipu_crtc *ipu_crtc = dev_id;
-
-	imx_drm_handle_vblank(ipu_crtc->imx_crtc);
-
-	if (ipu_crtc->flip_state == IPU_FLIP_SUBMITTED) {
-		struct ipu_plane *plane = ipu_crtc->plane[0];
-
-		ipu_plane_set_base(plane, ipu_crtc->base.primary->fb,
-				   plane->x, plane->y);
-		ipu_crtc_handle_pageflip(ipu_crtc);
-		queue_work(ipu_crtc->flip_queue,
-			   &ipu_crtc->flip_work->unref_work);
-		ipu_crtc->flip_state = IPU_FLIP_NONE;
-	}
-
-	return IRQ_HANDLED;
-}
-
-static bool ipu_crtc_mode_fixup(struct drm_crtc *crtc,
-				  const struct drm_display_mode *mode,
-				  struct drm_display_mode *adjusted_mode)
-{
-	struct ipu_crtc *ipu_crtc = to_ipu_crtc(crtc);
-	struct videomode vm;
-	int ret;
-
-	drm_display_mode_to_videomode(adjusted_mode, &vm);
-
-	ret = ipu_di_adjust_videomode(ipu_crtc->di, &vm);
-	if (ret)
-		return false;
-
-	drm_display_mode_from_videomode(&vm, adjusted_mode);
-
-	return true;
-}
-
-static void ipu_crtc_prepare(struct drm_crtc *crtc)
-{
-	struct ipu_crtc *ipu_crtc = to_ipu_crtc(crtc);
-
-	ipu_fb_disable(ipu_crtc);
-}
-
-static void ipu_crtc_commit(struct drm_crtc *crtc)
-{
-	struct ipu_crtc *ipu_crtc = to_ipu_crtc(crtc);
-
-	ipu_fb_enable(ipu_crtc);
+	ipu_dc_init_sync(ipu_crtc->dc, ipu_crtc->di,
+			 mode->flags & DRM_MODE_FLAG_INTERLACE,
+			 imx_crtc_state->bus_format, mode->hdisplay);
+	ipu_di_init_sync_panel(ipu_crtc->di, &sig_cfg);
 }
 
 static const struct drm_crtc_helper_funcs ipu_helper_funcs = {
-	.dpms = ipu_crtc_dpms,
 	.mode_fixup = ipu_crtc_mode_fixup,
-	.mode_set = ipu_crtc_mode_set,
-	.prepare = ipu_crtc_prepare,
-	.commit = ipu_crtc_commit,
+	.mode_set_nofb = ipu_crtc_mode_set_nofb,
+	.atomic_check = ipu_crtc_atomic_check,
+	.atomic_begin = ipu_crtc_atomic_begin,
+	.disable = ipu_crtc_disable,
+	.enable = ipu_crtc_enable,
 };
 
 static int ipu_enable_vblank(struct drm_crtc *crtc)
@@ -398,23 +262,9 @@ static void ipu_disable_vblank(struct drm_crtc *crtc)
 	disable_irq_nosync(ipu_crtc->irq);
 }
 
-static int ipu_set_interface_pix_fmt(struct drm_crtc *crtc,
-		u32 bus_format, int hsync_pin, int vsync_pin, u32 bus_flags)
-{
-	struct ipu_crtc *ipu_crtc = to_ipu_crtc(crtc);
-
-	ipu_crtc->bus_format = bus_format;
-	ipu_crtc->bus_flags = bus_flags;
-	ipu_crtc->di_hsync_pin = hsync_pin;
-	ipu_crtc->di_vsync_pin = vsync_pin;
-
-	return 0;
-}
-
 static const struct imx_drm_crtc_helper_funcs ipu_crtc_helper_funcs = {
 	.enable_vblank = ipu_enable_vblank,
 	.disable_vblank = ipu_disable_vblank,
-	.set_interface_pix_fmt = ipu_set_interface_pix_fmt,
 	.crtc_funcs = &ipu_crtc_funcs,
 	.crtc_helper_funcs = &ipu_helper_funcs,
 };
@@ -496,8 +346,16 @@ static int ipu_crtc_init(struct ipu_crtc *ipu_crtc,
 						IPU_DP_FLOW_SYNC_FG,
 						drm_crtc_mask(&ipu_crtc->base),
 						DRM_PLANE_TYPE_OVERLAY);
-		if (IS_ERR(ipu_crtc->plane[1]))
+		if (IS_ERR(ipu_crtc->plane[1])) {
 			ipu_crtc->plane[1] = NULL;
+		} else {
+			ret = ipu_plane_get_resources(ipu_crtc->plane[1]);
+			if (ret) {
+				dev_err(ipu_crtc->dev, "getting plane 1 "
+					"resources failed with %d.\n", ret);
+				goto err_put_plane0_res;
+			}
+		}
 	}
 
 	ipu_crtc->irq = ipu_plane_irq(ipu_crtc->plane[0]);
@@ -505,16 +363,17 @@ static int ipu_crtc_init(struct ipu_crtc *ipu_crtc,
 			"imx_drm", ipu_crtc);
 	if (ret < 0) {
 		dev_err(ipu_crtc->dev, "irq request failed with %d.\n", ret);
-		goto err_put_plane_res;
+		goto err_put_plane1_res;
 	}
 	/* Only enable IRQ when we actually need it to trigger work. */
 	disable_irq(ipu_crtc->irq);
 
-	ipu_crtc->flip_queue = create_singlethread_workqueue("ipu-crtc-flip");
-
 	return 0;
 
-err_put_plane_res:
+err_put_plane1_res:
+	if (ipu_crtc->plane[1])
+		ipu_plane_put_resources(ipu_crtc->plane[1]);
+err_put_plane0_res:
 	ipu_plane_put_resources(ipu_crtc->plane[0]);
 err_remove_crtc:
 	imx_drm_remove_crtc(ipu_crtc->imx_crtc);
@@ -553,9 +412,10 @@ static void ipu_drm_unbind(struct device *dev, struct device *master,
 
 	imx_drm_remove_crtc(ipu_crtc->imx_crtc);
 
-	destroy_workqueue(ipu_crtc->flip_queue);
-	ipu_plane_put_resources(ipu_crtc->plane[0]);
 	ipu_put_resources(ipu_crtc);
+	if (ipu_crtc->plane[1])
+		ipu_plane_put_resources(ipu_crtc->plane[1]);
+	ipu_plane_put_resources(ipu_crtc->plane[0]);
 }
 
 static const struct component_ops ipu_crtc_ops = {
diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c
index a4bb44118d33..4ad67d015ec7 100644
--- a/drivers/gpu/drm/imx/ipuv3-plane.c
+++ b/drivers/gpu/drm/imx/ipuv3-plane.c
@@ -14,13 +14,19 @@
  */
 
 #include <drm/drmP.h>
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
 #include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_gem_cma_helper.h>
+#include <drm/drm_plane_helper.h>
 
 #include "video/imx-ipu-v3.h"
 #include "ipuv3-plane.h"
 
-#define to_ipu_plane(x)	container_of(x, struct ipu_plane, base)
+static inline struct ipu_plane *to_ipu_plane(struct drm_plane *p)
+{
+	return container_of(p, struct ipu_plane, base);
+}
 
 static const uint32_t ipu_plane_formats[] = {
 	DRM_FORMAT_ARGB1555,
@@ -53,62 +59,67 @@ int ipu_plane_irq(struct ipu_plane *ipu_plane)
 				     IPU_IRQ_EOF);
 }
 
-static int calc_vref(struct drm_display_mode *mode)
+static inline unsigned long
+drm_plane_state_to_eba(struct drm_plane_state *state)
 {
-	unsigned long htotal, vtotal;
+	struct drm_framebuffer *fb = state->fb;
+	struct drm_gem_cma_object *cma_obj;
 
-	htotal = mode->htotal;
-	vtotal = mode->vtotal;
+	cma_obj = drm_fb_cma_get_gem_obj(fb, 0);
+	BUG_ON(!cma_obj);
 
-	if (!htotal || !vtotal)
-		return 60;
-
-	return DIV_ROUND_UP(mode->clock * 1000, vtotal * htotal);
+	return cma_obj->paddr + fb->offsets[0] +
+	       fb->pitches[0] * (state->src_y >> 16) +
+	       (fb->bits_per_pixel >> 3) * (state->src_x >> 16);
 }
 
-static inline int calc_bandwidth(int width, int height, unsigned int vref)
+static inline unsigned long
+drm_plane_state_to_ubo(struct drm_plane_state *state)
 {
-	return width * height * vref;
-}
+	struct drm_framebuffer *fb = state->fb;
+	struct drm_gem_cma_object *cma_obj;
+	unsigned long eba = drm_plane_state_to_eba(state);
 
-int ipu_plane_set_base(struct ipu_plane *ipu_plane, struct drm_framebuffer *fb,
-		       int x, int y)
-{
-	struct drm_gem_cma_object *cma_obj[3];
-	unsigned long eba, ubo, vbo;
-	int active, i;
+	cma_obj = drm_fb_cma_get_gem_obj(fb, 1);
+	BUG_ON(!cma_obj);
 
-	for (i = 0; i < drm_format_num_planes(fb->pixel_format); i++) {
-		cma_obj[i] = drm_fb_cma_get_gem_obj(fb, i);
-		if (!cma_obj[i]) {
-			DRM_DEBUG_KMS("plane %d entry is null.\n", i);
-			return -EFAULT;
-		}
-	}
+	return cma_obj->paddr + fb->offsets[1] +
+	       fb->pitches[1] * (state->src_y >> 16) / 2 +
+	       (state->src_x >> 16) / 2 - eba;
+}
 
-	eba = cma_obj[0]->paddr + fb->offsets[0] +
-	      fb->pitches[0] * y + (fb->bits_per_pixel >> 3) * x;
+static inline unsigned long
+drm_plane_state_to_vbo(struct drm_plane_state *state)
+{
+	struct drm_framebuffer *fb = state->fb;
+	struct drm_gem_cma_object *cma_obj;
+	unsigned long eba = drm_plane_state_to_eba(state);
 
-	if (eba & 0x7) {
-		DRM_DEBUG_KMS("base address must be a multiple of 8.\n");
-		return -EINVAL;
-	}
+	cma_obj = drm_fb_cma_get_gem_obj(fb, 2);
+	BUG_ON(!cma_obj);
 
-	if (fb->pitches[0] < 1 || fb->pitches[0] > 16384) {
-		DRM_DEBUG_KMS("pitches out of range.\n");
-		return -EINVAL;
-	}
+	return cma_obj->paddr + fb->offsets[2] +
+	       fb->pitches[2] * (state->src_y >> 16) / 2 +
+	       (state->src_x >> 16) / 2 - eba;
+}
 
-	if (ipu_plane->enabled && fb->pitches[0] != ipu_plane->stride[0]) {
-		DRM_DEBUG_KMS("pitches must not change while plane is enabled.\n");
-		return -EINVAL;
-	}
+static void ipu_plane_atomic_set_base(struct ipu_plane *ipu_plane,
+				      struct drm_plane_state *old_state)
+{
+	struct drm_plane *plane = &ipu_plane->base;
+	struct drm_plane_state *state = plane->state;
+	struct drm_framebuffer *fb = state->fb;
+	unsigned long eba, ubo, vbo;
+	int active;
 
-	ipu_plane->stride[0] = fb->pitches[0];
+	eba = drm_plane_state_to_eba(state);
 
 	switch (fb->pixel_format) {
 	case DRM_FORMAT_YUV420:
 	case DRM_FORMAT_YVU420:
+		if (old_state->fb)
+			break;
+
 		/*
 		 * Multiplanar formats have to meet the following restrictions:
 		 * - The (up to) three plane addresses are EBA, EBA+UBO, EBA+VBO
@@ -117,59 +128,28 @@ int ipu_plane_set_base(struct ipu_plane *ipu_plane, struct drm_framebuffer *fb,
 		 * - Only EBA may be changed while scanout is active
 		 * - The strides of U and V planes must be identical.
 		 */
-		ubo = cma_obj[1]->paddr + fb->offsets[1] +
-		      fb->pitches[1] * y / 2 + x / 2 - eba;
-		vbo = cma_obj[2]->paddr + fb->offsets[2] +
-		      fb->pitches[2] * y / 2 + x / 2 - eba;
+		ubo = drm_plane_state_to_ubo(state);
+		vbo = drm_plane_state_to_vbo(state);
 
-		if ((ubo & 0x7) || (vbo & 0x7)) {
-			DRM_DEBUG_KMS("U/V buffer offsets must be a multiple of 8.\n");
-			return -EINVAL;
-		}
-
-		if ((ubo > 0xfffff8) || (vbo > 0xfffff8)) {
-			DRM_DEBUG_KMS("U/V buffer offsets must be positive and not larger than 0xfffff8.\n");
-			return -EINVAL;
-		}
-
-		if (ipu_plane->enabled && ((ipu_plane->u_offset != ubo) ||
-					   (ipu_plane->v_offset != vbo))) {
-			DRM_DEBUG_KMS("U/V buffer offsets must not change while plane is enabled.\n");
-			return -EINVAL;
-		}
-
-		if (fb->pitches[1] != fb->pitches[2]) {
-			DRM_DEBUG_KMS("U/V pitches must be identical.\n");
-			return -EINVAL;
-		}
-
-		if (fb->pitches[1] < 1 || fb->pitches[1] > 16384) {
-			DRM_DEBUG_KMS("U/V pitches out of range.\n");
-			return -EINVAL;
-		}
-
-		if (ipu_plane->enabled &&
-		    (ipu_plane->stride[1] != fb->pitches[1])) {
-			DRM_DEBUG_KMS("U/V pitches must not change while plane is enabled.\n");
-			return -EINVAL;
-		}
-
-		ipu_plane->u_offset = ubo;
-		ipu_plane->v_offset = vbo;
-		ipu_plane->stride[1] = fb->pitches[1];
+		if (fb->pixel_format == DRM_FORMAT_YUV420)
+			ipu_cpmem_set_yuv_planar_full(ipu_plane->ipu_ch,
+						      fb->pitches[1], ubo, vbo);
+		else
+			ipu_cpmem_set_yuv_planar_full(ipu_plane->ipu_ch,
+						      fb->pitches[1], vbo, ubo);
 
 		dev_dbg(ipu_plane->base.dev->dev,
-			"phys = %pad %pad %pad, x = %d, y = %d",
-			&cma_obj[0]->paddr, &cma_obj[1]->paddr,
-			&cma_obj[2]->paddr, x, y);
+			"phy = %lu %lu %lu, x = %d, y = %d", eba, ubo, vbo,
+			state->src_x >> 16, state->src_y >> 16);
 		break;
 	default:
-		dev_dbg(ipu_plane->base.dev->dev, "phys = %pad, x = %d, y = %d",
-			&cma_obj[0]->paddr, x, y);
+		dev_dbg(ipu_plane->base.dev->dev, "phys = %lu, x = %d, y = %d",
+			eba, state->src_x >> 16, state->src_y >> 16);
+
 		break;
 	}
 
-	if (ipu_plane->enabled) {
+	if (old_state->fb) {
 		active = ipu_idmac_get_current_buffer(ipu_plane->ipu_ch);
 		ipu_cpmem_set_buffer(ipu_plane->ipu_ch, !active, eba);
 		ipu_idmac_select_buffer(ipu_plane->ipu_ch, !active);
@@ -177,155 +157,6 @@ int ipu_plane_set_base(struct ipu_plane *ipu_plane, struct drm_framebuffer *fb,
 		ipu_cpmem_set_buffer(ipu_plane->ipu_ch, 0, eba);
 		ipu_cpmem_set_buffer(ipu_plane->ipu_ch, 1, eba);
 	}
-
-	/* cache offsets for subsequent pageflips */
-	ipu_plane->x = x;
-	ipu_plane->y = y;
-
-	return 0;
-}
-
-int ipu_plane_mode_set(struct ipu_plane *ipu_plane, struct drm_crtc *crtc,
-		       struct drm_display_mode *mode,
-		       struct drm_framebuffer *fb, int crtc_x, int crtc_y,
-		       unsigned int crtc_w, unsigned int crtc_h,
-		       uint32_t src_x, uint32_t src_y,
-		       uint32_t src_w, uint32_t src_h, bool interlaced)
-{
-	struct device *dev = ipu_plane->base.dev->dev;
-	int ret;
-
-	/* no scaling */
-	if (src_w != crtc_w || src_h != crtc_h)
-		return -EINVAL;
-
-	/* clip to crtc bounds */
-	if (crtc_x < 0) {
-		if (-crtc_x > crtc_w)
-			return -EINVAL;
-		src_x += -crtc_x;
-		src_w -= -crtc_x;
-		crtc_w -= -crtc_x;
-		crtc_x = 0;
-	}
-	if (crtc_y < 0) {
-		if (-crtc_y > crtc_h)
-			return -EINVAL;
-		src_y += -crtc_y;
-		src_h -= -crtc_y;
-		crtc_h -= -crtc_y;
-		crtc_y = 0;
-	}
-	if (crtc_x + crtc_w > mode->hdisplay) {
-		if (crtc_x > mode->hdisplay)
-			return -EINVAL;
-		crtc_w = mode->hdisplay - crtc_x;
-		src_w = crtc_w;
-	}
-	if (crtc_y + crtc_h > mode->vdisplay) {
-		if (crtc_y > mode->vdisplay)
-			return -EINVAL;
-		crtc_h = mode->vdisplay - crtc_y;
-		src_h = crtc_h;
-	}
-	/* full plane minimum width is 13 pixels */
-	if (crtc_w < 13 && (ipu_plane->dp_flow != IPU_DP_FLOW_SYNC_FG))
-		return -EINVAL;
-	if (crtc_h < 2)
-		return -EINVAL;
-
-	/*
-	 * since we cannot touch active IDMAC channels, we do not support
-	 * resizing the enabled plane or changing its format
-	 */
-	if (ipu_plane->enabled) {
-		if (src_w != ipu_plane->w || src_h != ipu_plane->h ||
-		    fb->pixel_format != ipu_plane->base.fb->pixel_format)
-			return -EINVAL;
-
-		return ipu_plane_set_base(ipu_plane, fb, src_x, src_y);
-	}
-
-	switch (ipu_plane->dp_flow) {
-	case IPU_DP_FLOW_SYNC_BG:
-		ret = ipu_dp_setup_channel(ipu_plane->dp,
-				IPUV3_COLORSPACE_RGB,
-				IPUV3_COLORSPACE_RGB);
-		if (ret) {
-			dev_err(dev,
-				"initializing display processor failed with %d\n",
-				ret);
-			return ret;
-		}
-		ipu_dp_set_global_alpha(ipu_plane->dp, true, 0, true);
-		break;
-	case IPU_DP_FLOW_SYNC_FG:
-		ipu_dp_setup_channel(ipu_plane->dp,
-				ipu_drm_fourcc_to_colorspace(fb->pixel_format),
-				IPUV3_COLORSPACE_UNKNOWN);
-		ipu_dp_set_window_pos(ipu_plane->dp, crtc_x, crtc_y);
-		/* Enable local alpha on partial plane */
-		switch (fb->pixel_format) {
-		case DRM_FORMAT_ARGB1555:
-		case DRM_FORMAT_ABGR1555:
-		case DRM_FORMAT_RGBA5551:
-		case DRM_FORMAT_BGRA5551:
-		case DRM_FORMAT_ARGB4444:
-		case DRM_FORMAT_ARGB8888:
-		case DRM_FORMAT_ABGR8888:
-		case DRM_FORMAT_RGBA8888:
-		case DRM_FORMAT_BGRA8888:
-			ipu_dp_set_global_alpha(ipu_plane->dp, false, 0, false);
-			break;
-		default:
-			break;
-		}
-	}
-
-	ret = ipu_dmfc_alloc_bandwidth(ipu_plane->dmfc,
-			calc_bandwidth(crtc_w, crtc_h,
-				       calc_vref(mode)), 64);
-	if (ret) {
-		dev_err(dev, "allocating dmfc bandwidth failed with %d\n", ret);
-		return ret;
-	}
-
-	ipu_dmfc_config_wait4eot(ipu_plane->dmfc, crtc_w);
-
-	ipu_cpmem_zero(ipu_plane->ipu_ch);
-	ipu_cpmem_set_resolution(ipu_plane->ipu_ch, src_w, src_h);
-	ret = ipu_cpmem_set_fmt(ipu_plane->ipu_ch, fb->pixel_format);
-	if (ret < 0) {
-		dev_err(dev, "unsupported pixel format 0x%08x\n",
-			fb->pixel_format);
-		return ret;
-	}
-	ipu_cpmem_set_high_priority(ipu_plane->ipu_ch);
-	ipu_idmac_set_double_buffer(ipu_plane->ipu_ch, 1);
-	ipu_cpmem_set_stride(ipu_plane->ipu_ch, fb->pitches[0]);
-
-	ret = ipu_plane_set_base(ipu_plane, fb, src_x, src_y);
-	if (ret < 0)
-		return ret;
-	if (interlaced)
-		ipu_cpmem_interlaced_scan(ipu_plane->ipu_ch, fb->pitches[0]);
-
-	if (fb->pixel_format == DRM_FORMAT_YUV420) {
-		ipu_cpmem_set_yuv_planar_full(ipu_plane->ipu_ch,
-					      ipu_plane->stride[1],
-					      ipu_plane->u_offset,
-					      ipu_plane->v_offset);
-	} else if (fb->pixel_format == DRM_FORMAT_YVU420) {
-		ipu_cpmem_set_yuv_planar_full(ipu_plane->ipu_ch,
-					      ipu_plane->stride[1],
-					      ipu_plane->v_offset,
-					      ipu_plane->u_offset);
-	}
-
-	ipu_plane->w = src_w;
-	ipu_plane->h = src_h;
-
-	return 0;
 }
 
 void ipu_plane_put_resources(struct ipu_plane *ipu_plane)
@@ -372,7 +203,7 @@ err_out:
 	return ret;
 }
 
-void ipu_plane_enable(struct ipu_plane *ipu_plane)
+static void ipu_plane_enable(struct ipu_plane *ipu_plane)
 {
 	if (ipu_plane->dp)
 		ipu_dp_enable(ipu_plane->ipu);
@@ -380,14 +211,10 @@ void ipu_plane_enable(struct ipu_plane *ipu_plane)
 	ipu_idmac_enable_channel(ipu_plane->ipu_ch);
 	if (ipu_plane->dp)
 		ipu_dp_enable_channel(ipu_plane->dp);
-
-	ipu_plane->enabled = true;
 }
 
-void ipu_plane_disable(struct ipu_plane *ipu_plane)
+static void ipu_plane_disable(struct ipu_plane *ipu_plane)
 {
-	ipu_plane->enabled = false;
-
 	ipu_idmac_wait_busy(ipu_plane->ipu_ch, 50);
 
 	if (ipu_plane->dp)
@@ -398,74 +225,225 @@ void ipu_plane_disable(struct ipu_plane *ipu_plane)
 		ipu_dp_disable(ipu_plane->ipu);
 }
 
-/*
- * drm_plane API
- */
-
-static int ipu_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
-			    struct drm_framebuffer *fb, int crtc_x, int crtc_y,
-			    unsigned int crtc_w, unsigned int crtc_h,
-			    uint32_t src_x, uint32_t src_y,
-			    uint32_t src_w, uint32_t src_h)
+static int ipu_disable_plane(struct drm_plane *plane)
 {
 	struct ipu_plane *ipu_plane = to_ipu_plane(plane);
-	int ret = 0;
-
-	DRM_DEBUG_KMS("plane - %p\n", plane);
-
-	if (!ipu_plane->enabled)
-		ret = ipu_plane_get_resources(ipu_plane);
-	if (ret < 0)
-		return ret;
-
-	ret = ipu_plane_mode_set(ipu_plane, crtc, &crtc->hwmode, fb,
-			crtc_x, crtc_y, crtc_w, crtc_h,
-			src_x >> 16, src_y >> 16, src_w >> 16, src_h >> 16,
-			false);
-	if (ret < 0) {
-		ipu_plane_put_resources(ipu_plane);
-		return ret;
-	}
 
-	if (crtc != plane->crtc)
-		dev_dbg(plane->dev->dev, "crtc change: %p -> %p\n",
-				plane->crtc, crtc);
+	DRM_DEBUG_KMS("[%d] %s\n", __LINE__, __func__);
 
-	if (!ipu_plane->enabled)
-		ipu_plane_enable(ipu_plane);
+	ipu_plane_disable(ipu_plane);
 
 	return 0;
 }
 
-static int ipu_disable_plane(struct drm_plane *plane)
+static void ipu_plane_destroy(struct drm_plane *plane)
 {
 	struct ipu_plane *ipu_plane = to_ipu_plane(plane);
 
 	DRM_DEBUG_KMS("[%d] %s\n", __LINE__, __func__);
 
-	if (ipu_plane->enabled)
-		ipu_plane_disable(ipu_plane);
+	ipu_disable_plane(plane);
+	drm_plane_cleanup(plane);
+	kfree(ipu_plane);
+}
 
-	ipu_plane_put_resources(ipu_plane);
+static const struct drm_plane_funcs ipu_plane_funcs = {
+	.update_plane	= drm_atomic_helper_update_plane,
+	.disable_plane	= drm_atomic_helper_disable_plane,
+	.destroy	= ipu_plane_destroy,
+	.reset		= drm_atomic_helper_plane_reset,
+	.atomic_duplicate_state	= drm_atomic_helper_plane_duplicate_state,
+	.atomic_destroy_state	= drm_atomic_helper_plane_destroy_state,
+};
+
+static int ipu_plane_atomic_check(struct drm_plane *plane,
+				  struct drm_plane_state *state)
+{
+	struct drm_plane_state *old_state = plane->state;
+	struct drm_crtc_state *crtc_state;
+	struct device *dev = plane->dev->dev;
+	struct drm_framebuffer *fb = state->fb;
+	struct drm_framebuffer *old_fb = old_state->fb;
+	unsigned long eba, ubo, vbo, old_ubo, old_vbo;
+
+	/* Ok to disable */
+	if (!fb)
+		return 0;
+
+	if (!state->crtc)
+		return -EINVAL;
+
+	crtc_state =
+		drm_atomic_get_existing_crtc_state(state->state, state->crtc);
+	if (WARN_ON(!crtc_state))
+		return -EINVAL;
+
+	/* CRTC should be enabled */
+	if (!crtc_state->enable)
+		return -EINVAL;
+
+	/* no scaling */
+	if (state->src_w >> 16 != state->crtc_w ||
+	    state->src_h >> 16 != state->crtc_h)
+		return -EINVAL;
+
+	switch (plane->type) {
+	case DRM_PLANE_TYPE_PRIMARY:
+		/* full plane doesn't support partial off screen */
+		if (state->crtc_x || state->crtc_y ||
+		    state->crtc_w != crtc_state->adjusted_mode.hdisplay ||
+		    state->crtc_h != crtc_state->adjusted_mode.vdisplay)
+			return -EINVAL;
+
+		/* full plane minimum width is 13 pixels */
+		if (state->crtc_w < 13)
+			return -EINVAL;
+		break;
+	case DRM_PLANE_TYPE_OVERLAY:
+		if (state->crtc_x < 0 || state->crtc_y < 0)
+			return -EINVAL;
+
+		if (state->crtc_x + state->crtc_w >
+		    crtc_state->adjusted_mode.hdisplay)
+			return -EINVAL;
+		if (state->crtc_y + state->crtc_h >
+		    crtc_state->adjusted_mode.vdisplay)
+			return -EINVAL;
+		break;
+	default:
+		dev_warn(dev, "Unsupported plane type\n");
+		return -EINVAL;
+	}
+
+	if (state->crtc_h < 2)
+		return -EINVAL;
+
+	/*
+	 * since we cannot touch active IDMAC channels, we do not support
+	 * resizing the enabled plane or changing its format
+	 */
+	if (old_fb && (state->src_w != old_state->src_w ||
+			      state->src_h != old_state->src_h ||
+			      fb->pixel_format != old_fb->pixel_format))
+		return -EINVAL;
+
+	eba = drm_plane_state_to_eba(state);
+
+	if (eba & 0x7)
+		return -EINVAL;
+
+	if (fb->pitches[0] < 1 || fb->pitches[0] > 16384)
+		return -EINVAL;
+
+	if (old_fb && fb->pitches[0] != old_fb->pitches[0])
+		return -EINVAL;
+
+	switch (fb->pixel_format) {
+	case DRM_FORMAT_YUV420:
+	case DRM_FORMAT_YVU420:
+		/*
+		 * Multiplanar formats have to meet the following restrictions:
+		 * - The (up to) three plane addresses are EBA, EBA+UBO, EBA+VBO
+		 * - EBA, UBO and VBO are a multiple of 8
+		 * - UBO and VBO are unsigned and not larger than 0xfffff8
+		 * - Only EBA may be changed while scanout is active
+		 * - The strides of U and V planes must be identical.
+		 */
+		ubo = drm_plane_state_to_ubo(state);
+		vbo = drm_plane_state_to_vbo(state);
+
+		if ((ubo & 0x7) || (vbo & 0x7))
+			return -EINVAL;
+
+		if ((ubo > 0xfffff8) || (vbo > 0xfffff8))
+			return -EINVAL;
+
+		if (old_fb) {
+			old_ubo = drm_plane_state_to_ubo(old_state);
+			old_vbo = drm_plane_state_to_vbo(old_state);
+			if (ubo != old_ubo || vbo != old_vbo)
+				return -EINVAL;
+		}
+
+		if (fb->pitches[1] != fb->pitches[2])
+			return -EINVAL;
+
+		if (fb->pitches[1] < 1 || fb->pitches[1] > 16384)
+			return -EINVAL;
+
+		if (old_fb && old_fb->pitches[1] != fb->pitches[1])
+			return -EINVAL;
+	}
 
 	return 0;
 }
 
-static void ipu_plane_destroy(struct drm_plane *plane)
+static void ipu_plane_atomic_disable(struct drm_plane *plane,
+				     struct drm_plane_state *old_state)
+{
+	ipu_disable_plane(plane);
+}
+
+static void ipu_plane_atomic_update(struct drm_plane *plane,
+				    struct drm_plane_state *old_state)
 {
 	struct ipu_plane *ipu_plane = to_ipu_plane(plane);
+	struct drm_plane_state *state = plane->state;
+	enum ipu_color_space ics;
 
-	DRM_DEBUG_KMS("[%d] %s\n", __LINE__, __func__);
+	if (old_state->fb) {
+		ipu_plane_atomic_set_base(ipu_plane, old_state);
+		return;
+	}
 
-	ipu_disable_plane(plane);
-	drm_plane_cleanup(plane);
-	kfree(ipu_plane);
+	switch (ipu_plane->dp_flow) {
+	case IPU_DP_FLOW_SYNC_BG:
+		ipu_dp_setup_channel(ipu_plane->dp,
+					IPUV3_COLORSPACE_RGB,
+					IPUV3_COLORSPACE_RGB);
+		ipu_dp_set_global_alpha(ipu_plane->dp, true, 0, true);
+		break;
+	case IPU_DP_FLOW_SYNC_FG:
+		ics = ipu_drm_fourcc_to_colorspace(state->fb->pixel_format);
+		ipu_dp_setup_channel(ipu_plane->dp, ics,
+					IPUV3_COLORSPACE_UNKNOWN);
+		ipu_dp_set_window_pos(ipu_plane->dp, state->crtc_x,
+					state->crtc_y);
+		/* Enable local alpha on partial plane */
+		switch (state->fb->pixel_format) {
+		case DRM_FORMAT_ARGB1555:
+		case DRM_FORMAT_ABGR1555:
+		case DRM_FORMAT_RGBA5551:
+		case DRM_FORMAT_BGRA5551:
+		case DRM_FORMAT_ARGB4444:
+		case DRM_FORMAT_ARGB8888:
+		case DRM_FORMAT_ABGR8888:
+		case DRM_FORMAT_RGBA8888:
+		case DRM_FORMAT_BGRA8888:
+			ipu_dp_set_global_alpha(ipu_plane->dp, false, 0, false);
+			break;
+		default:
+			break;
+		}
+	}
+
+	ipu_dmfc_config_wait4eot(ipu_plane->dmfc, state->crtc_w);
+
+	ipu_cpmem_zero(ipu_plane->ipu_ch);
+	ipu_cpmem_set_resolution(ipu_plane->ipu_ch, state->src_w >> 16,
+					state->src_h >> 16);
+	ipu_cpmem_set_fmt(ipu_plane->ipu_ch, state->fb->pixel_format);
+	ipu_cpmem_set_high_priority(ipu_plane->ipu_ch);
+	ipu_idmac_set_double_buffer(ipu_plane->ipu_ch, 1);
+	ipu_cpmem_set_stride(ipu_plane->ipu_ch, state->fb->pitches[0]);
+	ipu_plane_atomic_set_base(ipu_plane, old_state);
+	ipu_plane_enable(ipu_plane);
 }
 
-static const struct drm_plane_funcs ipu_plane_funcs = {
-	.update_plane	= ipu_update_plane,
-	.disable_plane	= ipu_disable_plane,
-	.destroy	= ipu_plane_destroy,
+static const struct drm_plane_helper_funcs ipu_plane_helper_funcs = {
+	.atomic_check = ipu_plane_atomic_check,
+	.atomic_disable = ipu_plane_atomic_disable,
+	.atomic_update = ipu_plane_atomic_update,
 };
 
 struct ipu_plane *ipu_plane_init(struct drm_device *dev, struct ipu_soc *ipu,
@@ -498,5 +476,7 @@ struct ipu_plane *ipu_plane_init(struct drm_device *dev, struct ipu_soc *ipu,
 		return ERR_PTR(ret);
 	}
 
+	drm_plane_helper_add(&ipu_plane->base, &ipu_plane_helper_funcs);
+
 	return ipu_plane;
 }
diff --git a/drivers/gpu/drm/imx/ipuv3-plane.h b/drivers/gpu/drm/imx/ipuv3-plane.h
index 4448fd4ad4eb..338b88a74eb6 100644
--- a/drivers/gpu/drm/imx/ipuv3-plane.h
+++ b/drivers/gpu/drm/imx/ipuv3-plane.h
@@ -23,17 +23,6 @@ struct ipu_plane {
 
 	int			dma;
 	int			dp_flow;
-
-	int			x;
-	int			y;
-	int			w;
-	int			h;
-
-	unsigned int		u_offset;
-	unsigned int		v_offset;
-	unsigned int		stride[2];
-
-	bool			enabled;
 };
 
 struct ipu_plane *ipu_plane_init(struct drm_device *dev, struct ipu_soc *ipu,
@@ -48,11 +37,6 @@ int ipu_plane_mode_set(struct ipu_plane *plane, struct drm_crtc *crtc,
 		       uint32_t src_x, uint32_t src_y, uint32_t src_w,
 		       uint32_t src_h, bool interlaced);
 
-void ipu_plane_enable(struct ipu_plane *plane);
-void ipu_plane_disable(struct ipu_plane *plane);
-int ipu_plane_set_base(struct ipu_plane *plane, struct drm_framebuffer *fb,
-		       int x, int y);
-
 int ipu_plane_get_resources(struct ipu_plane *plane);
 void ipu_plane_put_resources(struct ipu_plane *plane);
 
diff --git a/drivers/gpu/drm/imx/parallel-display.c b/drivers/gpu/drm/imx/parallel-display.c
index 2d1fd02cd3d6..1dad297b01fd 100644
--- a/drivers/gpu/drm/imx/parallel-display.c
+++ b/drivers/gpu/drm/imx/parallel-display.c
@@ -16,6 +16,7 @@
 #include <linux/component.h>
 #include <linux/module.h>
 #include <drm/drmP.h>
+#include <drm/drm_atomic_helper.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_panel.h>
@@ -25,9 +26,6 @@
 
 #include "imx-drm.h"
 
-#define con_to_imxpd(x) container_of(x, struct imx_parallel_display, connector)
-#define enc_to_imxpd(x) container_of(x, struct imx_parallel_display, encoder)
-
 struct imx_parallel_display {
 	struct drm_connector connector;
 	struct drm_encoder encoder;
@@ -37,8 +35,19 @@ struct imx_parallel_display {
 	u32 bus_format;
 	struct drm_display_mode mode;
 	struct drm_panel *panel;
+	struct drm_bridge *bridge;
 };
 
+static inline struct imx_parallel_display *con_to_imxpd(struct drm_connector *c)
+{
+	return container_of(c, struct imx_parallel_display, connector);
+}
+
+static inline struct imx_parallel_display *enc_to_imxpd(struct drm_encoder *e)
+{
+	return container_of(e, struct imx_parallel_display, encoder);
+}
+
 static enum drm_connector_status imx_pd_connector_detect(
 		struct drm_connector *connector, bool force)
 {
@@ -53,11 +62,7 @@ static int imx_pd_connector_get_modes(struct drm_connector *connector)
 
 	if (imxpd->panel && imxpd->panel->funcs &&
 	    imxpd->panel->funcs->get_modes) {
-		struct drm_display_info *di = &connector->display_info;
-
 		num_modes = imxpd->panel->funcs->get_modes(imxpd->panel);
-		if (!imxpd->bus_format && di->num_bus_formats)
-			imxpd->bus_format = di->bus_formats[0];
 		if (num_modes > 0)
 			return num_modes;
 	}
@@ -69,10 +74,16 @@ static int imx_pd_connector_get_modes(struct drm_connector *connector)
 
 	if (np) {
 		struct drm_display_mode *mode = drm_mode_create(connector->dev);
+		int ret;
 
 		if (!mode)
 			return -EINVAL;
-		of_get_drm_display_mode(np, &imxpd->mode, OF_USE_NATIVE_MODE);
+
+		ret = of_get_drm_display_mode(np, &imxpd->mode,
+					      OF_USE_NATIVE_MODE);
+		if (ret)
+			return ret;
+
 		drm_mode_copy(mode, &imxpd->mode);
 		mode->type |= DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED,
 		drm_mode_probed_add(connector, mode);
@@ -90,24 +101,7 @@ static struct drm_encoder *imx_pd_connector_best_encoder(
 	return &imxpd->encoder;
 }
 
-static void imx_pd_encoder_dpms(struct drm_encoder *encoder, int mode)
-{
-	struct imx_parallel_display *imxpd = enc_to_imxpd(encoder);
-
-	if (mode != DRM_MODE_DPMS_ON)
-		drm_panel_disable(imxpd->panel);
-	else
-		drm_panel_enable(imxpd->panel);
-}
-
-static void imx_pd_encoder_prepare(struct drm_encoder *encoder)
-{
-	struct imx_parallel_display *imxpd = enc_to_imxpd(encoder);
-	imx_drm_set_bus_config(encoder, imxpd->bus_format, 2, 3,
-			       imxpd->connector.display_info.bus_flags);
-}
-
-static void imx_pd_encoder_commit(struct drm_encoder *encoder)
+static void imx_pd_encoder_enable(struct drm_encoder *encoder)
 {
 	struct imx_parallel_display *imxpd = enc_to_imxpd(encoder);
 
@@ -115,12 +109,6 @@ static void imx_pd_encoder_commit(struct drm_encoder *encoder)
 	drm_panel_enable(imxpd->panel);
 }
 
-static void imx_pd_encoder_mode_set(struct drm_encoder *encoder,
-			 struct drm_display_mode *orig_mode,
-			 struct drm_display_mode *mode)
-{
-}
-
 static void imx_pd_encoder_disable(struct drm_encoder *encoder)
 {
 	struct imx_parallel_display *imxpd = enc_to_imxpd(encoder);
@@ -129,11 +117,33 @@ static void imx_pd_encoder_disable(struct drm_encoder *encoder)
 	drm_panel_unprepare(imxpd->panel);
 }
 
+static int imx_pd_encoder_atomic_check(struct drm_encoder *encoder,
+				       struct drm_crtc_state *crtc_state,
+				       struct drm_connector_state *conn_state)
+{
+	struct imx_crtc_state *imx_crtc_state = to_imx_crtc_state(crtc_state);
+	struct drm_display_info *di = &conn_state->connector->display_info;
+	struct imx_parallel_display *imxpd = enc_to_imxpd(encoder);
+
+	imx_crtc_state->bus_flags = di->bus_flags;
+	if (!imxpd->bus_format && di->num_bus_formats)
+		imx_crtc_state->bus_format = di->bus_formats[0];
+	else
+		imx_crtc_state->bus_format = imxpd->bus_format;
+	imx_crtc_state->di_hsync_pin = 2;
+	imx_crtc_state->di_vsync_pin = 3;
+
+	return 0;
+}
+
 static const struct drm_connector_funcs imx_pd_connector_funcs = {
-	.dpms = drm_helper_connector_dpms,
+	.dpms = drm_atomic_helper_connector_dpms,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.detect = imx_pd_connector_detect,
 	.destroy = imx_drm_connector_destroy,
+	.reset = drm_atomic_helper_connector_reset,
+	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
 };
 
 static const struct drm_connector_helper_funcs imx_pd_connector_helper_funcs = {
@@ -146,20 +156,18 @@ static const struct drm_encoder_funcs imx_pd_encoder_funcs = {
 };
 
 static const struct drm_encoder_helper_funcs imx_pd_encoder_helper_funcs = {
-	.dpms = imx_pd_encoder_dpms,
-	.prepare = imx_pd_encoder_prepare,
-	.commit = imx_pd_encoder_commit,
-	.mode_set = imx_pd_encoder_mode_set,
+	.enable = imx_pd_encoder_enable,
 	.disable = imx_pd_encoder_disable,
+	.atomic_check = imx_pd_encoder_atomic_check,
 };
 
 static int imx_pd_register(struct drm_device *drm,
 	struct imx_parallel_display *imxpd)
 {
+	struct drm_encoder *encoder = &imxpd->encoder;
 	int ret;
 
-	ret = imx_drm_encoder_parse_of(drm, &imxpd->encoder,
-				       imxpd->dev->of_node);
+	ret = imx_drm_encoder_parse_of(drm, encoder, imxpd->dev->of_node);
 	if (ret)
 		return ret;
 
@@ -170,19 +178,33 @@ static int imx_pd_register(struct drm_device *drm,
 	 */
 	imxpd->connector.dpms = DRM_MODE_DPMS_OFF;
 
-	drm_encoder_helper_add(&imxpd->encoder, &imx_pd_encoder_helper_funcs);
-	drm_encoder_init(drm, &imxpd->encoder, &imx_pd_encoder_funcs,
+	drm_encoder_helper_add(encoder, &imx_pd_encoder_helper_funcs);
+	drm_encoder_init(drm, encoder, &imx_pd_encoder_funcs,
 			 DRM_MODE_ENCODER_NONE, NULL);
 
-	drm_connector_helper_add(&imxpd->connector,
-			&imx_pd_connector_helper_funcs);
-	drm_connector_init(drm, &imxpd->connector, &imx_pd_connector_funcs,
-			   DRM_MODE_CONNECTOR_VGA);
+	if (!imxpd->bridge) {
+		drm_connector_helper_add(&imxpd->connector,
+				&imx_pd_connector_helper_funcs);
+		drm_connector_init(drm, &imxpd->connector,
+				   &imx_pd_connector_funcs,
+				   DRM_MODE_CONNECTOR_VGA);
+	}
 
 	if (imxpd->panel)
 		drm_panel_attach(imxpd->panel, &imxpd->connector);
 
-	drm_mode_connector_attach_encoder(&imxpd->connector, &imxpd->encoder);
+	if (imxpd->bridge) {
+		imxpd->bridge->encoder = encoder;
+		encoder->bridge = imxpd->bridge;
+		ret = drm_bridge_attach(drm, imxpd->bridge);
+		if (ret < 0) {
+			dev_err(imxpd->dev, "failed to attach bridge: %d\n",
+				ret);
+			return ret;
+		}
+	} else {
+		drm_mode_connector_attach_encoder(&imxpd->connector, encoder);
+	}
 
 	return 0;
 }
@@ -195,6 +217,7 @@ static int imx_pd_bind(struct device *dev, struct device *master, void *data)
 	const u8 *edidp;
 	struct imx_parallel_display *imxpd;
 	int ret;
+	u32 bus_format = 0;
 	const char *fmt;
 
 	imxpd = devm_kzalloc(dev, sizeof(*imxpd), GFP_KERNEL);
@@ -208,14 +231,15 @@ static int imx_pd_bind(struct device *dev, struct device *master, void *data)
 	ret = of_property_read_string(np, "interface-pix-fmt", &fmt);
 	if (!ret) {
 		if (!strcmp(fmt, "rgb24"))
-			imxpd->bus_format = MEDIA_BUS_FMT_RGB888_1X24;
+			bus_format = MEDIA_BUS_FMT_RGB888_1X24;
 		else if (!strcmp(fmt, "rgb565"))
-			imxpd->bus_format = MEDIA_BUS_FMT_RGB565_1X16;
+			bus_format = MEDIA_BUS_FMT_RGB565_1X16;
 		else if (!strcmp(fmt, "bgr666"))
-			imxpd->bus_format = MEDIA_BUS_FMT_RGB666_1X18;
+			bus_format = MEDIA_BUS_FMT_RGB666_1X18;
 		else if (!strcmp(fmt, "lvds666"))
-			imxpd->bus_format = MEDIA_BUS_FMT_RGB666_1X24_CPADHI;
+			bus_format = MEDIA_BUS_FMT_RGB666_1X24_CPADHI;
 	}
+	imxpd->bus_format = bus_format;
 
 	/* port@1 is the output port */
 	ep = of_graph_get_endpoint_by_regs(np, 1, -1);
@@ -223,13 +247,30 @@ static int imx_pd_bind(struct device *dev, struct device *master, void *data)
 		struct device_node *remote;
 
 		remote = of_graph_get_remote_port_parent(ep);
+		if (!remote) {
+			dev_warn(dev, "endpoint %s not connected\n",
+				 ep->full_name);
+			of_node_put(ep);
+			return -ENODEV;
+		}
 		of_node_put(ep);
-		if (remote) {
-			imxpd->panel = of_drm_find_panel(remote);
-			of_node_put(remote);
+
+		imxpd->panel = of_drm_find_panel(remote);
+		if (imxpd->panel) {
+			dev_dbg(dev, "found panel %s\n", remote->full_name);
+		} else {
+			imxpd->bridge = of_drm_find_bridge(remote);
+			if (imxpd->bridge)
+				dev_dbg(dev, "found bridge %s\n",
+					remote->full_name);
 		}
-		if (!imxpd->panel)
+		if (!imxpd->panel && !imxpd->bridge) {
+			dev_dbg(dev, "waiting for panel or bridge %s\n",
+				remote->full_name);
+			of_node_put(remote);
 			return -EPROBE_DEFER;
+		}
+		of_node_put(remote);
 	}
 
 	imxpd->dev = dev;
diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c
index ba812ef2c9d1..334562d06731 100644
--- a/drivers/gpu/drm/mediatek/mtk_hdmi.c
+++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c
@@ -1535,7 +1535,7 @@ static int mtk_hdmi_dt_parse_pdata(struct mtk_hdmi *hdmi,
  * HDMI audio codec callbacks
  */
 
-static int mtk_hdmi_audio_hw_params(struct device *dev,
+static int mtk_hdmi_audio_hw_params(struct device *dev, void *data,
 				    struct hdmi_codec_daifmt *daifmt,
 				    struct hdmi_codec_params *params)
 {
@@ -1604,7 +1604,7 @@ static int mtk_hdmi_audio_hw_params(struct device *dev,
 	return 0;
 }
 
-static int mtk_hdmi_audio_startup(struct device *dev)
+static int mtk_hdmi_audio_startup(struct device *dev, void *data)
 {
 	struct mtk_hdmi *hdmi = dev_get_drvdata(dev);
 
@@ -1615,7 +1615,7 @@ static int mtk_hdmi_audio_startup(struct device *dev)
 	return 0;
 }
 
-static void mtk_hdmi_audio_shutdown(struct device *dev)
+static void mtk_hdmi_audio_shutdown(struct device *dev, void *data)
 {
 	struct mtk_hdmi *hdmi = dev_get_drvdata(dev);
 
@@ -1624,7 +1624,7 @@ static void mtk_hdmi_audio_shutdown(struct device *dev)
 	mtk_hdmi_audio_disable(hdmi);
 }
 
-int mtk_hdmi_audio_digital_mute(struct device *dev, bool enable)
+int mtk_hdmi_audio_digital_mute(struct device *dev, void *data, bool enable)
 {
 	struct mtk_hdmi *hdmi = dev_get_drvdata(dev);
 
@@ -1638,7 +1638,7 @@ int mtk_hdmi_audio_digital_mute(struct device *dev, bool enable)
 	return 0;
 }
 
-static int mtk_hdmi_audio_get_eld(struct device *dev, uint8_t *buf, size_t len)
+static int mtk_hdmi_audio_get_eld(struct device *dev, void *data, uint8_t *buf, size_t len)
 {
 	struct mtk_hdmi *hdmi = dev_get_drvdata(dev);
 
diff --git a/drivers/gpu/drm/mediatek/mtk_mipi_tx.c b/drivers/gpu/drm/mediatek/mtk_mipi_tx.c
index cf8f38d39e10..1c366f8cb2d0 100644
--- a/drivers/gpu/drm/mediatek/mtk_mipi_tx.c
+++ b/drivers/gpu/drm/mediatek/mtk_mipi_tx.c
@@ -431,7 +431,7 @@ static int mtk_mipi_tx_probe(struct platform_device *pdev)
 	phy_set_drvdata(phy, mipi_tx);
 
 	phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
-	if (IS_ERR(phy)) {
+	if (IS_ERR(phy_provider)) {
 		ret = PTR_ERR(phy_provider);
 		return ret;
 	}
diff --git a/drivers/gpu/drm/mgag200/Kconfig b/drivers/gpu/drm/mgag200/Kconfig
index 3a1c5fbae54a..520e5e668d6c 100644
--- a/drivers/gpu/drm/mgag200/Kconfig
+++ b/drivers/gpu/drm/mgag200/Kconfig
@@ -1,11 +1,7 @@
 config DRM_MGAG200
 	tristate "Kernel modesetting driver for MGA G200 server engines"
 	depends on DRM && PCI
-	select FB_SYS_FILLRECT
-	select FB_SYS_COPYAREA
-	select FB_SYS_IMAGEBLIT
 	select DRM_KMS_HELPER
-	select DRM_KMS_FB_HELPER
 	select DRM_TTM
 	help
 	 This is a KMS driver for the MGA G200 server chips, it
diff --git a/drivers/gpu/drm/mgag200/mgag200_main.c b/drivers/gpu/drm/mgag200/mgag200_main.c
index 615cbb08ba29..13798b3e6beb 100644
--- a/drivers/gpu/drm/mgag200/mgag200_main.c
+++ b/drivers/gpu/drm/mgag200/mgag200_main.c
@@ -17,8 +17,8 @@
 static void mga_user_framebuffer_destroy(struct drm_framebuffer *fb)
 {
 	struct mga_framebuffer *mga_fb = to_mga_framebuffer(fb);
-	if (mga_fb->obj)
-		drm_gem_object_unreference_unlocked(mga_fb->obj);
+
+	drm_gem_object_unreference_unlocked(mga_fb->obj);
 	drm_framebuffer_cleanup(fb);
 	kfree(fb);
 }
diff --git a/drivers/gpu/drm/mgag200/mgag200_ttm.c b/drivers/gpu/drm/mgag200/mgag200_ttm.c
index 9d5083d0f1ee..68268e55d595 100644
--- a/drivers/gpu/drm/mgag200/mgag200_ttm.c
+++ b/drivers/gpu/drm/mgag200/mgag200_ttm.c
@@ -186,17 +186,6 @@ static void mgag200_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_r
 {
 }
 
-static int mgag200_bo_move(struct ttm_buffer_object *bo,
-		       bool evict, bool interruptible,
-		       bool no_wait_gpu,
-		       struct ttm_mem_reg *new_mem)
-{
-	int r;
-	r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
-	return r;
-}
-
-
 static void mgag200_ttm_backend_destroy(struct ttm_tt *tt)
 {
 	ttm_tt_fini(tt);
@@ -241,7 +230,7 @@ struct ttm_bo_driver mgag200_bo_driver = {
 	.ttm_tt_unpopulate = mgag200_ttm_tt_unpopulate,
 	.init_mem_type = mgag200_bo_init_mem_type,
 	.evict_flags = mgag200_bo_evict_flags,
-	.move = mgag200_bo_move,
+	.move = NULL,
 	.verify_access = mgag200_bo_verify_access,
 	.io_mem_reserve = &mgag200_ttm_io_mem_reserve,
 	.io_mem_free = &mgag200_ttm_io_mem_free,
diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig
index 167a4971f47c..7c7a0314a756 100644
--- a/drivers/gpu/drm/msm/Kconfig
+++ b/drivers/gpu/drm/msm/Kconfig
@@ -10,6 +10,7 @@ config DRM_MSM
 	select SHMEM
 	select TMPFS
 	select QCOM_SCM
+	select SND_SOC_HDMI_CODEC if SND_SOC
 	default y
 	help
 	  DRM/KMS driver for MSM/snapdragon.
diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
index 60cb02624dc0..4e2806cf778c 100644
--- a/drivers/gpu/drm/msm/Makefile
+++ b/drivers/gpu/drm/msm/Makefile
@@ -35,6 +35,7 @@ msm-y := \
 	mdp/mdp5/mdp5_crtc.o \
 	mdp/mdp5/mdp5_encoder.o \
 	mdp/mdp5/mdp5_irq.o \
+	mdp/mdp5/mdp5_mdss.o \
 	mdp/mdp5/mdp5_kms.o \
 	mdp/mdp5/mdp5_plane.o \
 	mdp/mdp5/mdp5_smp.o \
@@ -45,6 +46,7 @@ msm-y := \
 	msm_fence.o \
 	msm_gem.o \
 	msm_gem_prime.o \
+	msm_gem_shrinker.o \
 	msm_gem_submit.o \
 	msm_gpu.o \
 	msm_iommu.o \
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index fbe304ee6c80..f386f463278d 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -139,7 +139,7 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 	struct msm_drm_private *priv = gpu->dev->dev_private;
 	struct msm_ringbuffer *ring = gpu->rb;
-	unsigned i, ibs = 0;
+	unsigned i;
 
 	for (i = 0; i < submit->nr_cmds; i++) {
 		switch (submit->cmd[i].type) {
@@ -155,18 +155,11 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 				CP_INDIRECT_BUFFER_PFE : CP_INDIRECT_BUFFER_PFD, 2);
 			OUT_RING(ring, submit->cmd[i].iova);
 			OUT_RING(ring, submit->cmd[i].size);
-			ibs++;
+			OUT_PKT2(ring);
 			break;
 		}
 	}
 
-	/* on a320, at least, we seem to need to pad things out to an
-	 * even number of qwords to avoid issue w/ CP hanging on wrap-
-	 * around:
-	 */
-	if (ibs % 2)
-		OUT_PKT2(ring);
-
 	OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1);
 	OUT_RING(ring, submit->fence->seqno);
 
@@ -407,8 +400,8 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 		return ret;
 	}
 
-	adreno_gpu->memptrs = msm_gem_vaddr(adreno_gpu->memptrs_bo);
-	if (!adreno_gpu->memptrs) {
+	adreno_gpu->memptrs = msm_gem_get_vaddr(adreno_gpu->memptrs_bo);
+	if (IS_ERR(adreno_gpu->memptrs)) {
 		dev_err(drm->dev, "could not vmap memptrs\n");
 		return -ENOMEM;
 	}
@@ -426,8 +419,12 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 void adreno_gpu_cleanup(struct adreno_gpu *gpu)
 {
 	if (gpu->memptrs_bo) {
+		if (gpu->memptrs)
+			msm_gem_put_vaddr(gpu->memptrs_bo);
+
 		if (gpu->memptrs_iova)
 			msm_gem_put_iova(gpu->memptrs_bo, gpu->base.id);
+
 		drm_gem_object_unreference_unlocked(gpu->memptrs_bo);
 	}
 	release_firmware(gpu->pm4);
diff --git a/drivers/gpu/drm/msm/dsi/dsi.c b/drivers/gpu/drm/msm/dsi/dsi.c
index 6edcd6f57e70..ec572f8389ed 100644
--- a/drivers/gpu/drm/msm/dsi/dsi.c
+++ b/drivers/gpu/drm/msm/dsi/dsi.c
@@ -29,7 +29,7 @@ static int dsi_get_phy(struct msm_dsi *msm_dsi)
 	struct platform_device *phy_pdev;
 	struct device_node *phy_node;
 
-	phy_node = of_parse_phandle(pdev->dev.of_node, "qcom,dsi-phy", 0);
+	phy_node = of_parse_phandle(pdev->dev.of_node, "phys", 0);
 	if (!phy_node) {
 		dev_err(&pdev->dev, "cannot find phy device\n");
 		return -ENXIO;
diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.c b/drivers/gpu/drm/msm/dsi/dsi_cfg.c
index 93c1ee094eac..63436d8ee470 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_cfg.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.c
@@ -29,6 +29,8 @@ static const struct msm_dsi_config apq8064_dsi_cfg = {
 	},
 	.bus_clk_names = dsi_v2_bus_clk_names,
 	.num_bus_clks = ARRAY_SIZE(dsi_v2_bus_clk_names),
+	.io_start = { 0x4700000, 0x5800000 },
+	.num_dsi = 2,
 };
 
 static const char * const dsi_6g_bus_clk_names[] = {
@@ -48,6 +50,8 @@ static const struct msm_dsi_config msm8974_apq8084_dsi_cfg = {
 	},
 	.bus_clk_names = dsi_6g_bus_clk_names,
 	.num_bus_clks = ARRAY_SIZE(dsi_6g_bus_clk_names),
+	.io_start = { 0xfd922800, 0xfd922b00 },
+	.num_dsi = 2,
 };
 
 static const char * const dsi_8916_bus_clk_names[] = {
@@ -66,6 +70,8 @@ static const struct msm_dsi_config msm8916_dsi_cfg = {
 	},
 	.bus_clk_names = dsi_8916_bus_clk_names,
 	.num_bus_clks = ARRAY_SIZE(dsi_8916_bus_clk_names),
+	.io_start = { 0x1a98000 },
+	.num_dsi = 1,
 };
 
 static const struct msm_dsi_config msm8994_dsi_cfg = {
@@ -84,6 +90,8 @@ static const struct msm_dsi_config msm8994_dsi_cfg = {
 	},
 	.bus_clk_names = dsi_6g_bus_clk_names,
 	.num_bus_clks = ARRAY_SIZE(dsi_6g_bus_clk_names),
+	.io_start = { 0xfd998000, 0xfd9a0000 },
+	.num_dsi = 2,
 };
 
 static const struct msm_dsi_cfg_handler dsi_cfg_handlers[] = {
diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.h b/drivers/gpu/drm/msm/dsi/dsi_cfg.h
index a68c836744a3..eeacc3232494 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_cfg.h
+++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.h
@@ -34,6 +34,8 @@ struct msm_dsi_config {
 	struct dsi_reg_config reg_cfg;
 	const char * const *bus_clk_names;
 	const int num_bus_clks;
+	const resource_size_t io_start[DSI_MAX];
+	const int num_dsi;
 };
 
 struct msm_dsi_cfg_handler {
diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c
index a3e47ad83eb3..f05ed0e1f3d6 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_host.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_host.c
@@ -1066,7 +1066,7 @@ static int dsi_cmd_dma_add(struct msm_dsi_host *msm_host,
 	}
 
 	if (cfg_hnd->major == MSM_DSI_VER_MAJOR_6G) {
-		data = msm_gem_vaddr(msm_host->tx_gem_obj);
+		data = msm_gem_get_vaddr(msm_host->tx_gem_obj);
 		if (IS_ERR(data)) {
 			ret = PTR_ERR(data);
 			pr_err("%s: get vaddr failed, %d\n", __func__, ret);
@@ -1094,6 +1094,9 @@ static int dsi_cmd_dma_add(struct msm_dsi_host *msm_host,
 	if (packet.size < len)
 		memset(data + packet.size, 0xff, len - packet.size);
 
+	if (cfg_hnd->major == MSM_DSI_VER_MAJOR_6G)
+		msm_gem_put_vaddr(msm_host->tx_gem_obj);
+
 	return len;
 }
 
@@ -1543,7 +1546,7 @@ static int dsi_host_parse_lane_data(struct msm_dsi_host *msm_host,
 	u32 lane_map[4];
 	int ret, i, len, num_lanes;
 
-	prop = of_find_property(ep, "qcom,data-lane-map", &len);
+	prop = of_find_property(ep, "data-lanes", &len);
 	if (!prop) {
 		dev_dbg(dev, "failed to find data lane mapping\n");
 		return -EINVAL;
@@ -1558,7 +1561,7 @@ static int dsi_host_parse_lane_data(struct msm_dsi_host *msm_host,
 
 	msm_host->num_data_lanes = num_lanes;
 
-	ret = of_property_read_u32_array(ep, "qcom,data-lane-map", lane_map,
+	ret = of_property_read_u32_array(ep, "data-lanes", lane_map,
 					 num_lanes);
 	if (ret) {
 		dev_err(dev, "failed to read lane data\n");
@@ -1573,8 +1576,19 @@ static int dsi_host_parse_lane_data(struct msm_dsi_host *msm_host,
 		const int *swap = supported_data_lane_swaps[i];
 		int j;
 
+		/*
+		 * the data-lanes array we get from DT has a logical->physical
+		 * mapping. The "data lane swap" register field represents
+		 * supported configurations in a physical->logical mapping.
+		 * Translate the DT mapping to what we understand and find a
+		 * configuration that works.
+		 */
 		for (j = 0; j < num_lanes; j++) {
-			if (swap[j] != lane_map[j])
+			if (lane_map[j] < 0 || lane_map[j] > 3)
+				dev_err(dev, "bad physical lane entry %u\n",
+					lane_map[j]);
+
+			if (swap[lane_map[j]] != j)
 				break;
 		}
 
@@ -1594,20 +1608,13 @@ static int dsi_host_parse_dt(struct msm_dsi_host *msm_host)
 	struct device_node *endpoint, *device_node;
 	int ret;
 
-	ret = of_property_read_u32(np, "qcom,dsi-host-index", &msm_host->id);
-	if (ret) {
-		dev_err(dev, "%s: host index not specified, ret=%d\n",
-			__func__, ret);
-		return ret;
-	}
-
 	/*
-	 * Get the first endpoint node. In our case, dsi has one output port
-	 * to which the panel is connected. Don't return an error if a port
-	 * isn't defined. It's possible that there is nothing connected to
-	 * the dsi output.
+	 * Get the endpoint of the output port of the DSI host. In our case,
+	 * this is mapped to port number with reg = 1. Don't return an error if
+	 * the remote endpoint isn't defined. It's possible that there is
+	 * nothing connected to the dsi output.
 	 */
-	endpoint = of_graph_get_next_endpoint(np, NULL);
+	endpoint = of_graph_get_endpoint_by_regs(np, 1, -1);
 	if (!endpoint) {
 		dev_dbg(dev, "%s: no endpoint\n", __func__);
 		return 0;
@@ -1648,6 +1655,25 @@ err:
 	return ret;
 }
 
+static int dsi_host_get_id(struct msm_dsi_host *msm_host)
+{
+	struct platform_device *pdev = msm_host->pdev;
+	const struct msm_dsi_config *cfg = msm_host->cfg_hnd->cfg;
+	struct resource *res;
+	int i;
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dsi_ctrl");
+	if (!res)
+		return -EINVAL;
+
+	for (i = 0; i < cfg->num_dsi; i++) {
+		if (cfg->io_start[i] == res->start)
+			return i;
+	}
+
+	return -EINVAL;
+}
+
 int msm_dsi_host_init(struct msm_dsi *msm_dsi)
 {
 	struct msm_dsi_host *msm_host = NULL;
@@ -1684,6 +1710,13 @@ int msm_dsi_host_init(struct msm_dsi *msm_dsi)
 		goto fail;
 	}
 
+	msm_host->id = dsi_host_get_id(msm_host);
+	if (msm_host->id < 0) {
+		ret = msm_host->id;
+		pr_err("%s: unable to identify DSI host index\n", __func__);
+		goto fail;
+	}
+
 	/* fixup base address by io offset */
 	msm_host->ctrl_base += msm_host->cfg_hnd->cfg->io_offset;
 
@@ -2245,9 +2278,9 @@ int msm_dsi_host_set_display_mode(struct mipi_dsi_host *host,
 	}
 
 	msm_host->mode = drm_mode_duplicate(msm_host->dev, mode);
-	if (IS_ERR(msm_host->mode)) {
+	if (!msm_host->mode) {
 		pr_err("%s: cannot duplicate mode\n", __func__);
-		return PTR_ERR(msm_host->mode);
+		return -ENOMEM;
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
index e2f42d8ea294..f39386ed75e4 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
@@ -271,6 +271,30 @@ static const struct of_device_id dsi_phy_dt_match[] = {
 	{}
 };
 
+/*
+ * Currently, we only support one SoC for each PHY type. When we have multiple
+ * SoCs for the same PHY, we can try to make the index searching a bit more
+ * clever.
+ */
+static int dsi_phy_get_id(struct msm_dsi_phy *phy)
+{
+	struct platform_device *pdev = phy->pdev;
+	const struct msm_dsi_phy_cfg *cfg = phy->cfg;
+	struct resource *res;
+	int i;
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dsi_phy");
+	if (!res)
+		return -EINVAL;
+
+	for (i = 0; i < cfg->num_dsi_phy; i++) {
+		if (cfg->io_start[i] == res->start)
+			return i;
+	}
+
+	return -EINVAL;
+}
+
 static int dsi_phy_driver_probe(struct platform_device *pdev)
 {
 	struct msm_dsi_phy *phy;
@@ -289,10 +313,10 @@ static int dsi_phy_driver_probe(struct platform_device *pdev)
 	phy->cfg = match->data;
 	phy->pdev = pdev;
 
-	ret = of_property_read_u32(dev->of_node,
-				"qcom,dsi-phy-index", &phy->id);
-	if (ret) {
-		dev_err(dev, "%s: PHY index not specified, %d\n",
+	phy->id = dsi_phy_get_id(phy);
+	if (phy->id < 0) {
+		ret = phy->id;
+		dev_err(dev, "%s: couldn't identify PHY index, %d\n",
 			__func__, ret);
 		goto fail;
 	}
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h
index 0d54ed00386d..f24a85439b94 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h
@@ -38,6 +38,8 @@ struct msm_dsi_phy_cfg {
 	 * Fill default H/W values in illegal cells, eg. cell {0, 1}.
 	 */
 	bool src_pll_truthtable[DSI_MAX][DSI_MAX];
+	const resource_size_t io_start[DSI_MAX];
+	const int num_dsi_phy;
 };
 
 extern const struct msm_dsi_phy_cfg dsi_phy_28nm_hpm_cfgs;
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_20nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_20nm.c
index f4bc11af849a..c757e2070cac 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_20nm.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_20nm.c
@@ -145,6 +145,8 @@ const struct msm_dsi_phy_cfg dsi_phy_20nm_cfgs = {
 	.ops = {
 		.enable = dsi_20nm_phy_enable,
 		.disable = dsi_20nm_phy_disable,
-	}
+	},
+	.io_start = { 0xfd998300, 0xfd9a0300 },
+	.num_dsi_phy = 2,
 };
 
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c
index 96d1852af418..63d7fba31380 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c
@@ -145,6 +145,8 @@ const struct msm_dsi_phy_cfg dsi_phy_28nm_hpm_cfgs = {
 		.enable = dsi_28nm_phy_enable,
 		.disable = dsi_28nm_phy_disable,
 	},
+	.io_start = { 0xfd922b00, 0xfd923100 },
+	.num_dsi_phy = 2,
 };
 
 const struct msm_dsi_phy_cfg dsi_phy_28nm_lp_cfgs = {
@@ -160,5 +162,7 @@ const struct msm_dsi_phy_cfg dsi_phy_28nm_lp_cfgs = {
 		.enable = dsi_28nm_phy_enable,
 		.disable = dsi_28nm_phy_disable,
 	},
+	.io_start = { 0x1a98500 },
+	.num_dsi_phy = 1,
 };
 
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c
index 213355a3e767..7bdb9de54968 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c
@@ -192,4 +192,6 @@ const struct msm_dsi_phy_cfg dsi_phy_28nm_8960_cfgs = {
 		.enable = dsi_28nm_phy_enable,
 		.disable = dsi_28nm_phy_disable,
 	},
+	.io_start = { 0x4700300, 0x5800300 },
+	.num_dsi_phy = 2,
 };
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.c b/drivers/gpu/drm/msm/hdmi/hdmi.c
index 51b9ea552f97..973720792236 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi.c
@@ -19,6 +19,7 @@
 #include <linux/of_irq.h>
 #include <linux/of_gpio.h>
 
+#include <sound/hdmi-codec.h>
 #include "hdmi.h"
 
 void msm_hdmi_set_mode(struct hdmi *hdmi, bool power_on)
@@ -434,6 +435,111 @@ static int msm_hdmi_get_gpio(struct device_node *of_node, const char *name)
 	return gpio;
 }
 
+/*
+ * HDMI audio codec callbacks
+ */
+static int msm_hdmi_audio_hw_params(struct device *dev, void *data,
+				    struct hdmi_codec_daifmt *daifmt,
+				    struct hdmi_codec_params *params)
+{
+	struct hdmi *hdmi = dev_get_drvdata(dev);
+	unsigned int chan;
+	unsigned int channel_allocation = 0;
+	unsigned int rate;
+	unsigned int level_shift  = 0; /* 0dB */
+	bool down_mix = false;
+
+	dev_dbg(dev, "%u Hz, %d bit, %d channels\n", params->sample_rate,
+		 params->sample_width, params->cea.channels);
+
+	switch (params->cea.channels) {
+	case 2:
+		/* FR and FL speakers */
+		channel_allocation  = 0;
+		chan = MSM_HDMI_AUDIO_CHANNEL_2;
+		break;
+	case 4:
+		/* FC, LFE, FR and FL speakers */
+		channel_allocation  = 0x3;
+		chan = MSM_HDMI_AUDIO_CHANNEL_4;
+		break;
+	case 6:
+		/* RR, RL, FC, LFE, FR and FL speakers */
+		channel_allocation  = 0x0B;
+		chan = MSM_HDMI_AUDIO_CHANNEL_6;
+		break;
+	case 8:
+		/* FRC, FLC, RR, RL, FC, LFE, FR and FL speakers */
+		channel_allocation  = 0x1F;
+		chan = MSM_HDMI_AUDIO_CHANNEL_8;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (params->sample_rate) {
+	case 32000:
+		rate = HDMI_SAMPLE_RATE_32KHZ;
+		break;
+	case 44100:
+		rate = HDMI_SAMPLE_RATE_44_1KHZ;
+		break;
+	case 48000:
+		rate = HDMI_SAMPLE_RATE_48KHZ;
+		break;
+	case 88200:
+		rate = HDMI_SAMPLE_RATE_88_2KHZ;
+		break;
+	case 96000:
+		rate = HDMI_SAMPLE_RATE_96KHZ;
+		break;
+	case 176400:
+		rate = HDMI_SAMPLE_RATE_176_4KHZ;
+		break;
+	case 192000:
+		rate = HDMI_SAMPLE_RATE_192KHZ;
+		break;
+	default:
+		dev_err(dev, "rate[%d] not supported!\n",
+			params->sample_rate);
+		return -EINVAL;
+	}
+
+	msm_hdmi_audio_set_sample_rate(hdmi, rate);
+	msm_hdmi_audio_info_setup(hdmi, 1, chan, channel_allocation,
+			      level_shift, down_mix);
+
+	return 0;
+}
+
+static void msm_hdmi_audio_shutdown(struct device *dev, void *data)
+{
+	struct hdmi *hdmi = dev_get_drvdata(dev);
+
+	msm_hdmi_audio_info_setup(hdmi, 0, 0, 0, 0, 0);
+}
+
+static const struct hdmi_codec_ops msm_hdmi_audio_codec_ops = {
+	.hw_params = msm_hdmi_audio_hw_params,
+	.audio_shutdown = msm_hdmi_audio_shutdown,
+};
+
+static struct hdmi_codec_pdata codec_data = {
+	.ops = &msm_hdmi_audio_codec_ops,
+	.max_i2s_channels = 8,
+	.i2s = 1,
+};
+
+static int msm_hdmi_register_audio_driver(struct hdmi *hdmi, struct device *dev)
+{
+	hdmi->audio_pdev = platform_device_register_data(dev,
+							 HDMI_CODEC_DRV_NAME,
+							 PLATFORM_DEVID_AUTO,
+							 &codec_data,
+							 sizeof(codec_data));
+	return PTR_ERR_OR_ZERO(hdmi->audio_pdev);
+}
+
 static int msm_hdmi_bind(struct device *dev, struct device *master, void *data)
 {
 	struct drm_device *drm = dev_get_drvdata(master);
@@ -441,7 +547,7 @@ static int msm_hdmi_bind(struct device *dev, struct device *master, void *data)
 	static struct hdmi_platform_config *hdmi_cfg;
 	struct hdmi *hdmi;
 	struct device_node *of_node = dev->of_node;
-	int i;
+	int i, err;
 
 	hdmi_cfg = (struct hdmi_platform_config *)
 			of_device_get_match_data(dev);
@@ -468,6 +574,12 @@ static int msm_hdmi_bind(struct device *dev, struct device *master, void *data)
 		return PTR_ERR(hdmi);
 	priv->hdmi = hdmi;
 
+	err = msm_hdmi_register_audio_driver(hdmi, dev);
+	if (err) {
+		DRM_ERROR("Failed to attach an audio codec %d\n", err);
+		hdmi->audio_pdev = NULL;
+	}
+
 	return 0;
 }
 
@@ -477,6 +589,9 @@ static void msm_hdmi_unbind(struct device *dev, struct device *master,
 	struct drm_device *drm = dev_get_drvdata(master);
 	struct msm_drm_private *priv = drm->dev_private;
 	if (priv->hdmi) {
+		if (priv->hdmi->audio_pdev)
+			platform_device_unregister(priv->hdmi->audio_pdev);
+
 		msm_hdmi_destroy(priv->hdmi);
 		priv->hdmi = NULL;
 	}
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.h b/drivers/gpu/drm/msm/hdmi/hdmi.h
index bc7ba0bdee07..accc9a61611d 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi.h
+++ b/drivers/gpu/drm/msm/hdmi/hdmi.h
@@ -50,6 +50,7 @@ struct hdmi_hdcp_ctrl;
 struct hdmi {
 	struct drm_device *dev;
 	struct platform_device *pdev;
+	struct platform_device *audio_pdev;
 
 	const struct hdmi_platform_config *config;
 
@@ -210,6 +211,19 @@ static inline int msm_hdmi_pll_8996_init(struct platform_device *pdev)
 /*
  * audio:
  */
+/* Supported HDMI Audio channels and rates */
+#define	MSM_HDMI_AUDIO_CHANNEL_2	0
+#define	MSM_HDMI_AUDIO_CHANNEL_4	1
+#define	MSM_HDMI_AUDIO_CHANNEL_6	2
+#define	MSM_HDMI_AUDIO_CHANNEL_8	3
+
+#define	HDMI_SAMPLE_RATE_32KHZ		0
+#define	HDMI_SAMPLE_RATE_44_1KHZ	1
+#define	HDMI_SAMPLE_RATE_48KHZ		2
+#define	HDMI_SAMPLE_RATE_88_2KHZ	3
+#define	HDMI_SAMPLE_RATE_96KHZ		4
+#define	HDMI_SAMPLE_RATE_176_4KHZ	5
+#define	HDMI_SAMPLE_RATE_192KHZ		6
 
 int msm_hdmi_audio_update(struct hdmi *hdmi);
 int msm_hdmi_audio_info_setup(struct hdmi *hdmi, bool enabled,
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_hdcp.c b/drivers/gpu/drm/msm/hdmi/hdmi_hdcp.c
index 0baaaaabd002..6e767979aab3 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi_hdcp.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_hdcp.c
@@ -1430,7 +1430,7 @@ struct hdmi_hdcp_ctrl *msm_hdmi_hdcp_init(struct hdmi *hdmi)
 
 void msm_hdmi_hdcp_destroy(struct hdmi *hdmi)
 {
-	if (hdmi && hdmi->hdcp_ctrl) {
+	if (hdmi) {
 		kfree(hdmi->hdcp_ctrl);
 		hdmi->hdcp_ctrl = NULL;
 	}
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dtv_encoder.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dtv_encoder.c
index 35ad78a1dc1c..24258e3025e3 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dtv_encoder.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dtv_encoder.c
@@ -23,7 +23,6 @@
 
 struct mdp4_dtv_encoder {
 	struct drm_encoder base;
-	struct clk *src_clk;
 	struct clk *hdmi_clk;
 	struct clk *mdp_clk;
 	unsigned long int pixclock;
@@ -179,7 +178,6 @@ static void mdp4_dtv_encoder_disable(struct drm_encoder *encoder)
 	 */
 	mdp_irq_wait(&mdp4_kms->base, MDP4_IRQ_EXTERNAL_VSYNC);
 
-	clk_disable_unprepare(mdp4_dtv_encoder->src_clk);
 	clk_disable_unprepare(mdp4_dtv_encoder->hdmi_clk);
 	clk_disable_unprepare(mdp4_dtv_encoder->mdp_clk);
 
@@ -208,19 +206,21 @@ static void mdp4_dtv_encoder_enable(struct drm_encoder *encoder)
 
 	bs_set(mdp4_dtv_encoder, 1);
 
-	DBG("setting src_clk=%lu", pc);
+	DBG("setting mdp_clk=%lu", pc);
 
-	ret = clk_set_rate(mdp4_dtv_encoder->src_clk, pc);
+	ret = clk_set_rate(mdp4_dtv_encoder->mdp_clk, pc);
 	if (ret)
-		dev_err(dev->dev, "failed to set src_clk to %lu: %d\n", pc, ret);
-	clk_prepare_enable(mdp4_dtv_encoder->src_clk);
-	ret = clk_prepare_enable(mdp4_dtv_encoder->hdmi_clk);
-	if (ret)
-		dev_err(dev->dev, "failed to enable hdmi_clk: %d\n", ret);
+		dev_err(dev->dev, "failed to set mdp_clk to %lu: %d\n",
+			pc, ret);
+
 	ret = clk_prepare_enable(mdp4_dtv_encoder->mdp_clk);
 	if (ret)
 		dev_err(dev->dev, "failed to enabled mdp_clk: %d\n", ret);
 
+	ret = clk_prepare_enable(mdp4_dtv_encoder->hdmi_clk);
+	if (ret)
+		dev_err(dev->dev, "failed to enable hdmi_clk: %d\n", ret);
+
 	mdp4_write(mdp4_kms, REG_MDP4_DTV_ENABLE, 1);
 
 	mdp4_dtv_encoder->enabled = true;
@@ -235,7 +235,7 @@ static const struct drm_encoder_helper_funcs mdp4_dtv_encoder_helper_funcs = {
 long mdp4_dtv_round_pixclk(struct drm_encoder *encoder, unsigned long rate)
 {
 	struct mdp4_dtv_encoder *mdp4_dtv_encoder = to_mdp4_dtv_encoder(encoder);
-	return clk_round_rate(mdp4_dtv_encoder->src_clk, rate);
+	return clk_round_rate(mdp4_dtv_encoder->mdp_clk, rate);
 }
 
 /* initialize encoder */
@@ -257,13 +257,6 @@ struct drm_encoder *mdp4_dtv_encoder_init(struct drm_device *dev)
 			 DRM_MODE_ENCODER_TMDS, NULL);
 	drm_encoder_helper_add(encoder, &mdp4_dtv_encoder_helper_funcs);
 
-	mdp4_dtv_encoder->src_clk = devm_clk_get(dev->dev, "src_clk");
-	if (IS_ERR(mdp4_dtv_encoder->src_clk)) {
-		dev_err(dev->dev, "failed to get src_clk\n");
-		ret = PTR_ERR(mdp4_dtv_encoder->src_clk);
-		goto fail;
-	}
-
 	mdp4_dtv_encoder->hdmi_clk = devm_clk_get(dev->dev, "hdmi_clk");
 	if (IS_ERR(mdp4_dtv_encoder->hdmi_clk)) {
 		dev_err(dev->dev, "failed to get hdmi_clk\n");
@@ -271,9 +264,9 @@ struct drm_encoder *mdp4_dtv_encoder_init(struct drm_device *dev)
 		goto fail;
 	}
 
-	mdp4_dtv_encoder->mdp_clk = devm_clk_get(dev->dev, "mdp_clk");
+	mdp4_dtv_encoder->mdp_clk = devm_clk_get(dev->dev, "tv_clk");
 	if (IS_ERR(mdp4_dtv_encoder->mdp_clk)) {
-		dev_err(dev->dev, "failed to get mdp_clk\n");
+		dev_err(dev->dev, "failed to get tv_clk\n");
 		ret = PTR_ERR(mdp4_dtv_encoder->mdp_clk);
 		goto fail;
 	}
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c
index f145d256e332..7b39e89fbc2b 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c
@@ -158,6 +158,7 @@ static const char * const iommu_ports[] = {
 static void mdp4_destroy(struct msm_kms *kms)
 {
 	struct mdp4_kms *mdp4_kms = to_mdp4_kms(to_mdp_kms(kms));
+	struct device *dev = mdp4_kms->dev->dev;
 	struct msm_mmu *mmu = mdp4_kms->mmu;
 
 	if (mmu) {
@@ -167,8 +168,11 @@ static void mdp4_destroy(struct msm_kms *kms)
 
 	if (mdp4_kms->blank_cursor_iova)
 		msm_gem_put_iova(mdp4_kms->blank_cursor_bo, mdp4_kms->id);
-	if (mdp4_kms->blank_cursor_bo)
-		drm_gem_object_unreference_unlocked(mdp4_kms->blank_cursor_bo);
+	drm_gem_object_unreference_unlocked(mdp4_kms->blank_cursor_bo);
+
+	if (mdp4_kms->rpm_enabled)
+		pm_runtime_disable(dev);
+
 	kfree(mdp4_kms);
 }
 
@@ -436,7 +440,7 @@ struct msm_kms *mdp4_kms_init(struct drm_device *dev)
 	struct mdp4_kms *mdp4_kms;
 	struct msm_kms *kms = NULL;
 	struct msm_mmu *mmu;
-	int ret;
+	int irq, ret;
 
 	mdp4_kms = kzalloc(sizeof(*mdp4_kms), GFP_KERNEL);
 	if (!mdp4_kms) {
@@ -457,6 +461,15 @@ struct msm_kms *mdp4_kms_init(struct drm_device *dev)
 		goto fail;
 	}
 
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		ret = irq;
+		dev_err(dev->dev, "failed to get irq: %d\n", ret);
+		goto fail;
+	}
+
+	kms->irq = irq;
+
 	/* NOTE: driver for this regulator still missing upstream.. use
 	 * _get_exclusive() and ignore the error if it does not exist
 	 * (and hope that the bootloader left it on for us)
@@ -492,7 +505,7 @@ struct msm_kms *mdp4_kms_init(struct drm_device *dev)
 		goto fail;
 	}
 
-	mdp4_kms->axi_clk = devm_clk_get(&pdev->dev, "mdp_axi_clk");
+	mdp4_kms->axi_clk = devm_clk_get(&pdev->dev, "bus_clk");
 	if (IS_ERR(mdp4_kms->axi_clk)) {
 		dev_err(dev->dev, "failed to get axi_clk\n");
 		ret = PTR_ERR(mdp4_kms->axi_clk);
@@ -502,6 +515,9 @@ struct msm_kms *mdp4_kms_init(struct drm_device *dev)
 	clk_set_rate(mdp4_kms->clk, config->max_clk);
 	clk_set_rate(mdp4_kms->lut_clk, config->max_clk);
 
+	pm_runtime_enable(dev->dev);
+	mdp4_kms->rpm_enabled = true;
+
 	/* make sure things are off before attaching iommu (bootloader could
 	 * have left things on, in which case we'll start getting faults if
 	 * we don't disable):
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h
index c5d045d5680d..25fb83997119 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h
@@ -47,6 +47,8 @@ struct mdp4_kms {
 
 	struct mdp_irq error_handler;
 
+	bool rpm_enabled;
+
 	/* empty/blank cursor bo to use when cursor is "disabled" */
 	struct drm_gem_object *blank_cursor_bo;
 	uint32_t blank_cursor_iova;
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5.xml.h b/drivers/gpu/drm/msm/mdp/mdp5/mdp5.xml.h
index b275ce11b24b..ca6ca30650a0 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5.xml.h
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5.xml.h
@@ -8,19 +8,11 @@ http://github.com/freedreno/envytools/
 git clone https://github.com/freedreno/envytools.git
 
 The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/freedreno/envytools/rnndb/msm.xml                 (    676 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1572 bytes, from 2016-02-10 17:07:21)
-- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml            (  20915 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml      (   2849 bytes, from 2015-09-18 12:07:28)
-- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml            (  37194 bytes, from 2015-09-18 12:07:28)
-- /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml             (  27887 bytes, from 2015-10-22 16:34:52)
-- /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml            (    602 bytes, from 2015-10-22 16:35:02)
-- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml         (   1686 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml         (    600 bytes, from 2015-05-20 20:03:07)
-- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  41472 bytes, from 2016-01-22 18:18:18)
-- /home/robclark/src/freedreno/envytools/rnndb/edp/edp.xml             (  10416 bytes, from 2015-05-20 20:03:14)
-
-Copyright (C) 2013-2015 by the following authors:
+- /local/mnt/workspace/source_trees/envytools/rnndb/../rnndb/mdp/mdp5.xml   (  36965 bytes, from 2016-05-10 05:06:30)
+- /local/mnt/workspace/source_trees/envytools/rnndb/freedreno_copyright.xml (   1572 bytes, from 2016-05-09 06:32:54)
+- /local/mnt/workspace/source_trees/envytools/rnndb/mdp/mdp_common.xml      (   2849 bytes, from 2016-01-07 08:45:55)
+
+Copyright (C) 2013-2016 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
 - Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
 
@@ -198,118 +190,109 @@ static inline uint32_t MDSS_HW_VERSION_MAJOR(uint32_t val)
 #define MDSS_HW_INTR_STATUS_INTR_HDMI				0x00000100
 #define MDSS_HW_INTR_STATUS_INTR_EDP				0x00001000
 
-static inline uint32_t __offset_MDP(uint32_t idx)
-{
-	switch (idx) {
-		case 0: return (mdp5_cfg->mdp.base[0]);
-		default: return INVALID_IDX(idx);
-	}
-}
-static inline uint32_t REG_MDP5_MDP(uint32_t i0) { return 0x00000000 + __offset_MDP(i0); }
-
-static inline uint32_t REG_MDP5_MDP_HW_VERSION(uint32_t i0) { return 0x00000000 + __offset_MDP(i0); }
-#define MDP5_MDP_HW_VERSION_STEP__MASK				0x0000ffff
-#define MDP5_MDP_HW_VERSION_STEP__SHIFT				0
-static inline uint32_t MDP5_MDP_HW_VERSION_STEP(uint32_t val)
+#define REG_MDP5_HW_VERSION					0x00000000
+#define MDP5_HW_VERSION_STEP__MASK				0x0000ffff
+#define MDP5_HW_VERSION_STEP__SHIFT				0
+static inline uint32_t MDP5_HW_VERSION_STEP(uint32_t val)
 {
-	return ((val) << MDP5_MDP_HW_VERSION_STEP__SHIFT) & MDP5_MDP_HW_VERSION_STEP__MASK;
+	return ((val) << MDP5_HW_VERSION_STEP__SHIFT) & MDP5_HW_VERSION_STEP__MASK;
 }
-#define MDP5_MDP_HW_VERSION_MINOR__MASK				0x0fff0000
-#define MDP5_MDP_HW_VERSION_MINOR__SHIFT			16
-static inline uint32_t MDP5_MDP_HW_VERSION_MINOR(uint32_t val)
+#define MDP5_HW_VERSION_MINOR__MASK				0x0fff0000
+#define MDP5_HW_VERSION_MINOR__SHIFT				16
+static inline uint32_t MDP5_HW_VERSION_MINOR(uint32_t val)
 {
-	return ((val) << MDP5_MDP_HW_VERSION_MINOR__SHIFT) & MDP5_MDP_HW_VERSION_MINOR__MASK;
+	return ((val) << MDP5_HW_VERSION_MINOR__SHIFT) & MDP5_HW_VERSION_MINOR__MASK;
 }
-#define MDP5_MDP_HW_VERSION_MAJOR__MASK				0xf0000000
-#define MDP5_MDP_HW_VERSION_MAJOR__SHIFT			28
-static inline uint32_t MDP5_MDP_HW_VERSION_MAJOR(uint32_t val)
+#define MDP5_HW_VERSION_MAJOR__MASK				0xf0000000
+#define MDP5_HW_VERSION_MAJOR__SHIFT				28
+static inline uint32_t MDP5_HW_VERSION_MAJOR(uint32_t val)
 {
-	return ((val) << MDP5_MDP_HW_VERSION_MAJOR__SHIFT) & MDP5_MDP_HW_VERSION_MAJOR__MASK;
+	return ((val) << MDP5_HW_VERSION_MAJOR__SHIFT) & MDP5_HW_VERSION_MAJOR__MASK;
 }
 
-static inline uint32_t REG_MDP5_MDP_DISP_INTF_SEL(uint32_t i0) { return 0x00000004 + __offset_MDP(i0); }
-#define MDP5_MDP_DISP_INTF_SEL_INTF0__MASK			0x000000ff
-#define MDP5_MDP_DISP_INTF_SEL_INTF0__SHIFT			0
-static inline uint32_t MDP5_MDP_DISP_INTF_SEL_INTF0(enum mdp5_intf_type val)
+#define REG_MDP5_DISP_INTF_SEL					0x00000004
+#define MDP5_DISP_INTF_SEL_INTF0__MASK				0x000000ff
+#define MDP5_DISP_INTF_SEL_INTF0__SHIFT				0
+static inline uint32_t MDP5_DISP_INTF_SEL_INTF0(enum mdp5_intf_type val)
 {
-	return ((val) << MDP5_MDP_DISP_INTF_SEL_INTF0__SHIFT) & MDP5_MDP_DISP_INTF_SEL_INTF0__MASK;
+	return ((val) << MDP5_DISP_INTF_SEL_INTF0__SHIFT) & MDP5_DISP_INTF_SEL_INTF0__MASK;
 }
-#define MDP5_MDP_DISP_INTF_SEL_INTF1__MASK			0x0000ff00
-#define MDP5_MDP_DISP_INTF_SEL_INTF1__SHIFT			8
-static inline uint32_t MDP5_MDP_DISP_INTF_SEL_INTF1(enum mdp5_intf_type val)
+#define MDP5_DISP_INTF_SEL_INTF1__MASK				0x0000ff00
+#define MDP5_DISP_INTF_SEL_INTF1__SHIFT				8
+static inline uint32_t MDP5_DISP_INTF_SEL_INTF1(enum mdp5_intf_type val)
 {
-	return ((val) << MDP5_MDP_DISP_INTF_SEL_INTF1__SHIFT) & MDP5_MDP_DISP_INTF_SEL_INTF1__MASK;
+	return ((val) << MDP5_DISP_INTF_SEL_INTF1__SHIFT) & MDP5_DISP_INTF_SEL_INTF1__MASK;
 }
-#define MDP5_MDP_DISP_INTF_SEL_INTF2__MASK			0x00ff0000
-#define MDP5_MDP_DISP_INTF_SEL_INTF2__SHIFT			16
-static inline uint32_t MDP5_MDP_DISP_INTF_SEL_INTF2(enum mdp5_intf_type val)
+#define MDP5_DISP_INTF_SEL_INTF2__MASK				0x00ff0000
+#define MDP5_DISP_INTF_SEL_INTF2__SHIFT				16
+static inline uint32_t MDP5_DISP_INTF_SEL_INTF2(enum mdp5_intf_type val)
 {
-	return ((val) << MDP5_MDP_DISP_INTF_SEL_INTF2__SHIFT) & MDP5_MDP_DISP_INTF_SEL_INTF2__MASK;
+	return ((val) << MDP5_DISP_INTF_SEL_INTF2__SHIFT) & MDP5_DISP_INTF_SEL_INTF2__MASK;
 }
-#define MDP5_MDP_DISP_INTF_SEL_INTF3__MASK			0xff000000
-#define MDP5_MDP_DISP_INTF_SEL_INTF3__SHIFT			24
-static inline uint32_t MDP5_MDP_DISP_INTF_SEL_INTF3(enum mdp5_intf_type val)
+#define MDP5_DISP_INTF_SEL_INTF3__MASK				0xff000000
+#define MDP5_DISP_INTF_SEL_INTF3__SHIFT				24
+static inline uint32_t MDP5_DISP_INTF_SEL_INTF3(enum mdp5_intf_type val)
 {
-	return ((val) << MDP5_MDP_DISP_INTF_SEL_INTF3__SHIFT) & MDP5_MDP_DISP_INTF_SEL_INTF3__MASK;
+	return ((val) << MDP5_DISP_INTF_SEL_INTF3__SHIFT) & MDP5_DISP_INTF_SEL_INTF3__MASK;
 }
 
-static inline uint32_t REG_MDP5_MDP_INTR_EN(uint32_t i0) { return 0x00000010 + __offset_MDP(i0); }
+#define REG_MDP5_INTR_EN					0x00000010
 
-static inline uint32_t REG_MDP5_MDP_INTR_STATUS(uint32_t i0) { return 0x00000014 + __offset_MDP(i0); }
+#define REG_MDP5_INTR_STATUS					0x00000014
 
-static inline uint32_t REG_MDP5_MDP_INTR_CLEAR(uint32_t i0) { return 0x00000018 + __offset_MDP(i0); }
+#define REG_MDP5_INTR_CLEAR					0x00000018
 
-static inline uint32_t REG_MDP5_MDP_HIST_INTR_EN(uint32_t i0) { return 0x0000001c + __offset_MDP(i0); }
+#define REG_MDP5_HIST_INTR_EN					0x0000001c
 
-static inline uint32_t REG_MDP5_MDP_HIST_INTR_STATUS(uint32_t i0) { return 0x00000020 + __offset_MDP(i0); }
+#define REG_MDP5_HIST_INTR_STATUS				0x00000020
 
-static inline uint32_t REG_MDP5_MDP_HIST_INTR_CLEAR(uint32_t i0) { return 0x00000024 + __offset_MDP(i0); }
+#define REG_MDP5_HIST_INTR_CLEAR				0x00000024
 
-static inline uint32_t REG_MDP5_MDP_SPARE_0(uint32_t i0) { return 0x00000028 + __offset_MDP(i0); }
-#define MDP5_MDP_SPARE_0_SPLIT_DPL_SINGLE_FLUSH_EN		0x00000001
+#define REG_MDP5_SPARE_0					0x00000028
+#define MDP5_SPARE_0_SPLIT_DPL_SINGLE_FLUSH_EN			0x00000001
 
-static inline uint32_t REG_MDP5_MDP_SMP_ALLOC_W(uint32_t i0, uint32_t i1) { return 0x00000080 + __offset_MDP(i0) + 0x4*i1; }
+static inline uint32_t REG_MDP5_SMP_ALLOC_W(uint32_t i0) { return 0x00000080 + 0x4*i0; }
 
-static inline uint32_t REG_MDP5_MDP_SMP_ALLOC_W_REG(uint32_t i0, uint32_t i1) { return 0x00000080 + __offset_MDP(i0) + 0x4*i1; }
-#define MDP5_MDP_SMP_ALLOC_W_REG_CLIENT0__MASK			0x000000ff
-#define MDP5_MDP_SMP_ALLOC_W_REG_CLIENT0__SHIFT			0
-static inline uint32_t MDP5_MDP_SMP_ALLOC_W_REG_CLIENT0(uint32_t val)
+static inline uint32_t REG_MDP5_SMP_ALLOC_W_REG(uint32_t i0) { return 0x00000080 + 0x4*i0; }
+#define MDP5_SMP_ALLOC_W_REG_CLIENT0__MASK			0x000000ff
+#define MDP5_SMP_ALLOC_W_REG_CLIENT0__SHIFT			0
+static inline uint32_t MDP5_SMP_ALLOC_W_REG_CLIENT0(uint32_t val)
 {
-	return ((val) << MDP5_MDP_SMP_ALLOC_W_REG_CLIENT0__SHIFT) & MDP5_MDP_SMP_ALLOC_W_REG_CLIENT0__MASK;
+	return ((val) << MDP5_SMP_ALLOC_W_REG_CLIENT0__SHIFT) & MDP5_SMP_ALLOC_W_REG_CLIENT0__MASK;
 }
-#define MDP5_MDP_SMP_ALLOC_W_REG_CLIENT1__MASK			0x0000ff00
-#define MDP5_MDP_SMP_ALLOC_W_REG_CLIENT1__SHIFT			8
-static inline uint32_t MDP5_MDP_SMP_ALLOC_W_REG_CLIENT1(uint32_t val)
+#define MDP5_SMP_ALLOC_W_REG_CLIENT1__MASK			0x0000ff00
+#define MDP5_SMP_ALLOC_W_REG_CLIENT1__SHIFT			8
+static inline uint32_t MDP5_SMP_ALLOC_W_REG_CLIENT1(uint32_t val)
 {
-	return ((val) << MDP5_MDP_SMP_ALLOC_W_REG_CLIENT1__SHIFT) & MDP5_MDP_SMP_ALLOC_W_REG_CLIENT1__MASK;
+	return ((val) << MDP5_SMP_ALLOC_W_REG_CLIENT1__SHIFT) & MDP5_SMP_ALLOC_W_REG_CLIENT1__MASK;
 }
-#define MDP5_MDP_SMP_ALLOC_W_REG_CLIENT2__MASK			0x00ff0000
-#define MDP5_MDP_SMP_ALLOC_W_REG_CLIENT2__SHIFT			16
-static inline uint32_t MDP5_MDP_SMP_ALLOC_W_REG_CLIENT2(uint32_t val)
+#define MDP5_SMP_ALLOC_W_REG_CLIENT2__MASK			0x00ff0000
+#define MDP5_SMP_ALLOC_W_REG_CLIENT2__SHIFT			16
+static inline uint32_t MDP5_SMP_ALLOC_W_REG_CLIENT2(uint32_t val)
 {
-	return ((val) << MDP5_MDP_SMP_ALLOC_W_REG_CLIENT2__SHIFT) & MDP5_MDP_SMP_ALLOC_W_REG_CLIENT2__MASK;
+	return ((val) << MDP5_SMP_ALLOC_W_REG_CLIENT2__SHIFT) & MDP5_SMP_ALLOC_W_REG_CLIENT2__MASK;
 }
 
-static inline uint32_t REG_MDP5_MDP_SMP_ALLOC_R(uint32_t i0, uint32_t i1) { return 0x00000130 + __offset_MDP(i0) + 0x4*i1; }
+static inline uint32_t REG_MDP5_SMP_ALLOC_R(uint32_t i0) { return 0x00000130 + 0x4*i0; }
 
-static inline uint32_t REG_MDP5_MDP_SMP_ALLOC_R_REG(uint32_t i0, uint32_t i1) { return 0x00000130 + __offset_MDP(i0) + 0x4*i1; }
-#define MDP5_MDP_SMP_ALLOC_R_REG_CLIENT0__MASK			0x000000ff
-#define MDP5_MDP_SMP_ALLOC_R_REG_CLIENT0__SHIFT			0
-static inline uint32_t MDP5_MDP_SMP_ALLOC_R_REG_CLIENT0(uint32_t val)
+static inline uint32_t REG_MDP5_SMP_ALLOC_R_REG(uint32_t i0) { return 0x00000130 + 0x4*i0; }
+#define MDP5_SMP_ALLOC_R_REG_CLIENT0__MASK			0x000000ff
+#define MDP5_SMP_ALLOC_R_REG_CLIENT0__SHIFT			0
+static inline uint32_t MDP5_SMP_ALLOC_R_REG_CLIENT0(uint32_t val)
 {
-	return ((val) << MDP5_MDP_SMP_ALLOC_R_REG_CLIENT0__SHIFT) & MDP5_MDP_SMP_ALLOC_R_REG_CLIENT0__MASK;
+	return ((val) << MDP5_SMP_ALLOC_R_REG_CLIENT0__SHIFT) & MDP5_SMP_ALLOC_R_REG_CLIENT0__MASK;
 }
-#define MDP5_MDP_SMP_ALLOC_R_REG_CLIENT1__MASK			0x0000ff00
-#define MDP5_MDP_SMP_ALLOC_R_REG_CLIENT1__SHIFT			8
-static inline uint32_t MDP5_MDP_SMP_ALLOC_R_REG_CLIENT1(uint32_t val)
+#define MDP5_SMP_ALLOC_R_REG_CLIENT1__MASK			0x0000ff00
+#define MDP5_SMP_ALLOC_R_REG_CLIENT1__SHIFT			8
+static inline uint32_t MDP5_SMP_ALLOC_R_REG_CLIENT1(uint32_t val)
 {
-	return ((val) << MDP5_MDP_SMP_ALLOC_R_REG_CLIENT1__SHIFT) & MDP5_MDP_SMP_ALLOC_R_REG_CLIENT1__MASK;
+	return ((val) << MDP5_SMP_ALLOC_R_REG_CLIENT1__SHIFT) & MDP5_SMP_ALLOC_R_REG_CLIENT1__MASK;
 }
-#define MDP5_MDP_SMP_ALLOC_R_REG_CLIENT2__MASK			0x00ff0000
-#define MDP5_MDP_SMP_ALLOC_R_REG_CLIENT2__SHIFT			16
-static inline uint32_t MDP5_MDP_SMP_ALLOC_R_REG_CLIENT2(uint32_t val)
+#define MDP5_SMP_ALLOC_R_REG_CLIENT2__MASK			0x00ff0000
+#define MDP5_SMP_ALLOC_R_REG_CLIENT2__SHIFT			16
+static inline uint32_t MDP5_SMP_ALLOC_R_REG_CLIENT2(uint32_t val)
 {
-	return ((val) << MDP5_MDP_SMP_ALLOC_R_REG_CLIENT2__SHIFT) & MDP5_MDP_SMP_ALLOC_R_REG_CLIENT2__MASK;
+	return ((val) << MDP5_SMP_ALLOC_R_REG_CLIENT2__SHIFT) & MDP5_SMP_ALLOC_R_REG_CLIENT2__MASK;
 }
 
 static inline uint32_t __offset_IGC(enum mdp5_igc_type idx)
@@ -322,35 +305,35 @@ static inline uint32_t __offset_IGC(enum mdp5_igc_type idx)
 		default: return INVALID_IDX(idx);
 	}
 }
-static inline uint32_t REG_MDP5_MDP_IGC(uint32_t i0, enum mdp5_igc_type i1) { return 0x00000000 + __offset_MDP(i0) + __offset_IGC(i1); }
+static inline uint32_t REG_MDP5_IGC(enum mdp5_igc_type i0) { return 0x00000000 + __offset_IGC(i0); }
 
-static inline uint32_t REG_MDP5_MDP_IGC_LUT(uint32_t i0, enum mdp5_igc_type i1, uint32_t i2) { return 0x00000000 + __offset_MDP(i0) + __offset_IGC(i1) + 0x4*i2; }
+static inline uint32_t REG_MDP5_IGC_LUT(enum mdp5_igc_type i0, uint32_t i1) { return 0x00000000 + __offset_IGC(i0) + 0x4*i1; }
 
-static inline uint32_t REG_MDP5_MDP_IGC_LUT_REG(uint32_t i0, enum mdp5_igc_type i1, uint32_t i2) { return 0x00000000 + __offset_MDP(i0) + __offset_IGC(i1) + 0x4*i2; }
-#define MDP5_MDP_IGC_LUT_REG_VAL__MASK				0x00000fff
-#define MDP5_MDP_IGC_LUT_REG_VAL__SHIFT				0
-static inline uint32_t MDP5_MDP_IGC_LUT_REG_VAL(uint32_t val)
+static inline uint32_t REG_MDP5_IGC_LUT_REG(enum mdp5_igc_type i0, uint32_t i1) { return 0x00000000 + __offset_IGC(i0) + 0x4*i1; }
+#define MDP5_IGC_LUT_REG_VAL__MASK				0x00000fff
+#define MDP5_IGC_LUT_REG_VAL__SHIFT				0
+static inline uint32_t MDP5_IGC_LUT_REG_VAL(uint32_t val)
 {
-	return ((val) << MDP5_MDP_IGC_LUT_REG_VAL__SHIFT) & MDP5_MDP_IGC_LUT_REG_VAL__MASK;
+	return ((val) << MDP5_IGC_LUT_REG_VAL__SHIFT) & MDP5_IGC_LUT_REG_VAL__MASK;
 }
-#define MDP5_MDP_IGC_LUT_REG_INDEX_UPDATE			0x02000000
-#define MDP5_MDP_IGC_LUT_REG_DISABLE_PIPE_0			0x10000000
-#define MDP5_MDP_IGC_LUT_REG_DISABLE_PIPE_1			0x20000000
-#define MDP5_MDP_IGC_LUT_REG_DISABLE_PIPE_2			0x40000000
+#define MDP5_IGC_LUT_REG_INDEX_UPDATE				0x02000000
+#define MDP5_IGC_LUT_REG_DISABLE_PIPE_0				0x10000000
+#define MDP5_IGC_LUT_REG_DISABLE_PIPE_1				0x20000000
+#define MDP5_IGC_LUT_REG_DISABLE_PIPE_2				0x40000000
 
-static inline uint32_t REG_MDP5_MDP_SPLIT_DPL_EN(uint32_t i0) { return 0x000002f4 + __offset_MDP(i0); }
+#define REG_MDP5_SPLIT_DPL_EN					0x000002f4
 
-static inline uint32_t REG_MDP5_MDP_SPLIT_DPL_UPPER(uint32_t i0) { return 0x000002f8 + __offset_MDP(i0); }
-#define MDP5_MDP_SPLIT_DPL_UPPER_SMART_PANEL			0x00000002
-#define MDP5_MDP_SPLIT_DPL_UPPER_SMART_PANEL_FREE_RUN		0x00000004
-#define MDP5_MDP_SPLIT_DPL_UPPER_INTF1_SW_TRG_MUX		0x00000010
-#define MDP5_MDP_SPLIT_DPL_UPPER_INTF2_SW_TRG_MUX		0x00000100
+#define REG_MDP5_SPLIT_DPL_UPPER				0x000002f8
+#define MDP5_SPLIT_DPL_UPPER_SMART_PANEL			0x00000002
+#define MDP5_SPLIT_DPL_UPPER_SMART_PANEL_FREE_RUN		0x00000004
+#define MDP5_SPLIT_DPL_UPPER_INTF1_SW_TRG_MUX			0x00000010
+#define MDP5_SPLIT_DPL_UPPER_INTF2_SW_TRG_MUX			0x00000100
 
-static inline uint32_t REG_MDP5_MDP_SPLIT_DPL_LOWER(uint32_t i0) { return 0x000003f0 + __offset_MDP(i0); }
-#define MDP5_MDP_SPLIT_DPL_LOWER_SMART_PANEL			0x00000002
-#define MDP5_MDP_SPLIT_DPL_LOWER_SMART_PANEL_FREE_RUN		0x00000004
-#define MDP5_MDP_SPLIT_DPL_LOWER_INTF1_TG_SYNC			0x00000010
-#define MDP5_MDP_SPLIT_DPL_LOWER_INTF2_TG_SYNC			0x00000100
+#define REG_MDP5_SPLIT_DPL_LOWER				0x000003f0
+#define MDP5_SPLIT_DPL_LOWER_SMART_PANEL			0x00000002
+#define MDP5_SPLIT_DPL_LOWER_SMART_PANEL_FREE_RUN		0x00000004
+#define MDP5_SPLIT_DPL_LOWER_INTF1_TG_SYNC			0x00000010
+#define MDP5_SPLIT_DPL_LOWER_INTF2_TG_SYNC			0x00000100
 
 static inline uint32_t __offset_CTL(uint32_t idx)
 {
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c
index 57f73f0c120d..ac9e4cde1380 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c
@@ -26,7 +26,6 @@ const struct mdp5_cfg_hw msm8x74v1_config = {
 	.name = "msm8x74v1",
 	.mdp = {
 		.count = 1,
-		.base = { 0x00100 },
 		.caps = MDP_CAP_SMP |
 			0,
 	},
@@ -41,12 +40,12 @@ const struct mdp5_cfg_hw msm8x74v1_config = {
 	},
 	.ctl = {
 		.count = 5,
-		.base = { 0x00600, 0x00700, 0x00800, 0x00900, 0x00a00 },
+		.base = { 0x00500, 0x00600, 0x00700, 0x00800, 0x00900 },
 		.flush_hw_mask = 0x0003ffff,
 	},
 	.pipe_vig = {
 		.count = 3,
-		.base = { 0x01200, 0x01600, 0x01a00 },
+		.base = { 0x01100, 0x01500, 0x01900 },
 		.caps = MDP_PIPE_CAP_HFLIP |
 			MDP_PIPE_CAP_VFLIP |
 			MDP_PIPE_CAP_SCALE |
@@ -55,7 +54,7 @@ const struct mdp5_cfg_hw msm8x74v1_config = {
 	},
 	.pipe_rgb = {
 		.count = 3,
-		.base = { 0x01e00, 0x02200, 0x02600 },
+		.base = { 0x01d00, 0x02100, 0x02500 },
 		.caps = MDP_PIPE_CAP_HFLIP |
 			MDP_PIPE_CAP_VFLIP |
 			MDP_PIPE_CAP_SCALE |
@@ -63,26 +62,26 @@ const struct mdp5_cfg_hw msm8x74v1_config = {
 	},
 	.pipe_dma = {
 		.count = 2,
-		.base = { 0x02a00, 0x02e00 },
+		.base = { 0x02900, 0x02d00 },
 		.caps = MDP_PIPE_CAP_HFLIP |
 			MDP_PIPE_CAP_VFLIP |
 			0,
 	},
 	.lm = {
 		.count = 5,
-		.base = { 0x03200, 0x03600, 0x03a00, 0x03e00, 0x04200 },
+		.base = { 0x03100, 0x03500, 0x03900, 0x03d00, 0x04100 },
 		.nb_stages = 5,
 	},
 	.dspp = {
 		.count = 3,
-		.base = { 0x04600, 0x04a00, 0x04e00 },
+		.base = { 0x04500, 0x04900, 0x04d00 },
 	},
 	.pp = {
 		.count = 3,
-		.base = { 0x21b00, 0x21c00, 0x21d00 },
+		.base = { 0x21a00, 0x21b00, 0x21c00 },
 	},
 	.intf = {
-		.base = { 0x21100, 0x21300, 0x21500, 0x21700 },
+		.base = { 0x21000, 0x21200, 0x21400, 0x21600 },
 		.connect = {
 			[0] = INTF_eDP,
 			[1] = INTF_DSI,
@@ -97,7 +96,6 @@ const struct mdp5_cfg_hw msm8x74v2_config = {
 	.name = "msm8x74",
 	.mdp = {
 		.count = 1,
-		.base = { 0x00100 },
 		.caps = MDP_CAP_SMP |
 			0,
 	},
@@ -112,48 +110,48 @@ const struct mdp5_cfg_hw msm8x74v2_config = {
 	},
 	.ctl = {
 		.count = 5,
-		.base = { 0x00600, 0x00700, 0x00800, 0x00900, 0x00a00 },
+		.base = { 0x00500, 0x00600, 0x00700, 0x00800, 0x00900 },
 		.flush_hw_mask = 0x0003ffff,
 	},
 	.pipe_vig = {
 		.count = 3,
-		.base = { 0x01200, 0x01600, 0x01a00 },
+		.base = { 0x01100, 0x01500, 0x01900 },
 		.caps = MDP_PIPE_CAP_HFLIP | MDP_PIPE_CAP_VFLIP |
 				MDP_PIPE_CAP_SCALE | MDP_PIPE_CAP_CSC |
 				MDP_PIPE_CAP_DECIMATION,
 	},
 	.pipe_rgb = {
 		.count = 3,
-		.base = { 0x01e00, 0x02200, 0x02600 },
+		.base = { 0x01d00, 0x02100, 0x02500 },
 		.caps = MDP_PIPE_CAP_HFLIP | MDP_PIPE_CAP_VFLIP |
 				MDP_PIPE_CAP_SCALE | MDP_PIPE_CAP_DECIMATION,
 	},
 	.pipe_dma = {
 		.count = 2,
-		.base = { 0x02a00, 0x02e00 },
+		.base = { 0x02900, 0x02d00 },
 		.caps = MDP_PIPE_CAP_HFLIP | MDP_PIPE_CAP_VFLIP,
 	},
 	.lm = {
 		.count = 5,
-		.base = { 0x03200, 0x03600, 0x03a00, 0x03e00, 0x04200 },
+		.base = { 0x03100, 0x03500, 0x03900, 0x03d00, 0x04100 },
 		.nb_stages = 5,
 		.max_width = 2048,
 		.max_height = 0xFFFF,
 	},
 	.dspp = {
 		.count = 3,
-		.base = { 0x04600, 0x04a00, 0x04e00 },
+		.base = { 0x04500, 0x04900, 0x04d00 },
 	},
 	.ad = {
 		.count = 2,
-		.base = { 0x13100, 0x13300 },
+		.base = { 0x13000, 0x13200 },
 	},
 	.pp = {
 		.count = 3,
-		.base = { 0x12d00, 0x12e00, 0x12f00 },
+		.base = { 0x12c00, 0x12d00, 0x12e00 },
 	},
 	.intf = {
-		.base = { 0x12500, 0x12700, 0x12900, 0x12b00 },
+		.base = { 0x12400, 0x12600, 0x12800, 0x12a00 },
 		.connect = {
 			[0] = INTF_eDP,
 			[1] = INTF_DSI,
@@ -168,7 +166,6 @@ const struct mdp5_cfg_hw apq8084_config = {
 	.name = "apq8084",
 	.mdp = {
 		.count = 1,
-		.base = { 0x00100 },
 		.caps = MDP_CAP_SMP |
 			0,
 	},
@@ -190,49 +187,49 @@ const struct mdp5_cfg_hw apq8084_config = {
 	},
 	.ctl = {
 		.count = 5,
-		.base = { 0x00600, 0x00700, 0x00800, 0x00900, 0x00a00 },
+		.base = { 0x00500, 0x00600, 0x00700, 0x00800, 0x00900 },
 		.flush_hw_mask = 0x003fffff,
 	},
 	.pipe_vig = {
 		.count = 4,
-		.base = { 0x01200, 0x01600, 0x01a00, 0x01e00 },
+		.base = { 0x01100, 0x01500, 0x01900, 0x01d00 },
 		.caps = MDP_PIPE_CAP_HFLIP | MDP_PIPE_CAP_VFLIP |
 				MDP_PIPE_CAP_SCALE | MDP_PIPE_CAP_CSC |
 				MDP_PIPE_CAP_DECIMATION,
 	},
 	.pipe_rgb = {
 		.count = 4,
-		.base = { 0x02200, 0x02600, 0x02a00, 0x02e00 },
+		.base = { 0x02100, 0x02500, 0x02900, 0x02d00 },
 		.caps = MDP_PIPE_CAP_HFLIP | MDP_PIPE_CAP_VFLIP |
 				MDP_PIPE_CAP_SCALE | MDP_PIPE_CAP_DECIMATION,
 	},
 	.pipe_dma = {
 		.count = 2,
-		.base = { 0x03200, 0x03600 },
+		.base = { 0x03100, 0x03500 },
 		.caps = MDP_PIPE_CAP_HFLIP | MDP_PIPE_CAP_VFLIP,
 	},
 	.lm = {
 		.count = 6,
-		.base = { 0x03a00, 0x03e00, 0x04200, 0x04600, 0x04a00, 0x04e00 },
+		.base = { 0x03900, 0x03d00, 0x04100, 0x04500, 0x04900, 0x04d00 },
 		.nb_stages = 5,
 		.max_width = 2048,
 		.max_height = 0xFFFF,
 	},
 	.dspp = {
 		.count = 4,
-		.base = { 0x05200, 0x05600, 0x05a00, 0x05e00 },
+		.base = { 0x05100, 0x05500, 0x05900, 0x05d00 },
 
 	},
 	.ad = {
 		.count = 3,
-		.base = { 0x13500, 0x13700, 0x13900 },
+		.base = { 0x13400, 0x13600, 0x13800 },
 	},
 	.pp = {
 		.count = 4,
-		.base = { 0x12f00, 0x13000, 0x13100, 0x13200 },
+		.base = { 0x12e00, 0x12f00, 0x13000, 0x13100 },
 	},
 	.intf = {
-		.base = { 0x12500, 0x12700, 0x12900, 0x12b00, 0x12d00 },
+		.base = { 0x12400, 0x12600, 0x12800, 0x12a00, 0x12c00 },
 		.connect = {
 			[0] = INTF_eDP,
 			[1] = INTF_DSI,
@@ -247,7 +244,7 @@ const struct mdp5_cfg_hw msm8x16_config = {
 	.name = "msm8x16",
 	.mdp = {
 		.count = 1,
-		.base = { 0x01000 },
+		.base = { 0x0 },
 		.caps = MDP_CAP_SMP |
 			0,
 	},
@@ -261,41 +258,41 @@ const struct mdp5_cfg_hw msm8x16_config = {
 	},
 	.ctl = {
 		.count = 5,
-		.base = { 0x02000, 0x02200, 0x02400, 0x02600, 0x02800 },
+		.base = { 0x01000, 0x01200, 0x01400, 0x01600, 0x01800 },
 		.flush_hw_mask = 0x4003ffff,
 	},
 	.pipe_vig = {
 		.count = 1,
-		.base = { 0x05000 },
+		.base = { 0x04000 },
 		.caps = MDP_PIPE_CAP_HFLIP | MDP_PIPE_CAP_VFLIP |
 				MDP_PIPE_CAP_SCALE | MDP_PIPE_CAP_CSC |
 				MDP_PIPE_CAP_DECIMATION,
 	},
 	.pipe_rgb = {
 		.count = 2,
-		.base = { 0x15000, 0x17000 },
+		.base = { 0x14000, 0x16000 },
 		.caps = MDP_PIPE_CAP_HFLIP | MDP_PIPE_CAP_VFLIP |
 				MDP_PIPE_CAP_SCALE | MDP_PIPE_CAP_DECIMATION,
 	},
 	.pipe_dma = {
 		.count = 1,
-		.base = { 0x25000 },
+		.base = { 0x24000 },
 		.caps = MDP_PIPE_CAP_HFLIP | MDP_PIPE_CAP_VFLIP,
 	},
 	.lm = {
 		.count = 2, /* LM0 and LM3 */
-		.base = { 0x45000, 0x48000 },
+		.base = { 0x44000, 0x47000 },
 		.nb_stages = 5,
 		.max_width = 2048,
 		.max_height = 0xFFFF,
 	},
 	.dspp = {
 		.count = 1,
-		.base = { 0x55000 },
+		.base = { 0x54000 },
 
 	},
 	.intf = {
-		.base = { 0x00000, 0x6b800 },
+		.base = { 0x00000, 0x6a800 },
 		.connect = {
 			[0] = INTF_DISABLED,
 			[1] = INTF_DSI,
@@ -308,7 +305,6 @@ const struct mdp5_cfg_hw msm8x94_config = {
 	.name = "msm8x94",
 	.mdp = {
 		.count = 1,
-		.base = { 0x01000 },
 		.caps = MDP_CAP_SMP |
 			0,
 	},
@@ -330,49 +326,49 @@ const struct mdp5_cfg_hw msm8x94_config = {
 	},
 	.ctl = {
 		.count = 5,
-		.base = { 0x02000, 0x02200, 0x02400, 0x02600, 0x02800 },
+		.base = { 0x01000, 0x01200, 0x01400, 0x01600, 0x01800 },
 		.flush_hw_mask = 0xf0ffffff,
 	},
 	.pipe_vig = {
 		.count = 4,
-		.base = { 0x05000, 0x07000, 0x09000, 0x0b000 },
+		.base = { 0x04000, 0x06000, 0x08000, 0x0a000 },
 		.caps = MDP_PIPE_CAP_HFLIP | MDP_PIPE_CAP_VFLIP |
 				MDP_PIPE_CAP_SCALE | MDP_PIPE_CAP_CSC |
 				MDP_PIPE_CAP_DECIMATION,
 	},
 	.pipe_rgb = {
 		.count = 4,
-		.base = { 0x15000, 0x17000, 0x19000, 0x1b000 },
+		.base = { 0x14000, 0x16000, 0x18000, 0x1a000 },
 		.caps = MDP_PIPE_CAP_HFLIP | MDP_PIPE_CAP_VFLIP |
 				MDP_PIPE_CAP_SCALE | MDP_PIPE_CAP_DECIMATION,
 	},
 	.pipe_dma = {
 		.count = 2,
-		.base = { 0x25000, 0x27000 },
+		.base = { 0x24000, 0x26000 },
 		.caps = MDP_PIPE_CAP_HFLIP | MDP_PIPE_CAP_VFLIP,
 	},
 	.lm = {
 		.count = 6,
-		.base = { 0x45000, 0x46000, 0x47000, 0x48000, 0x49000, 0x4a000 },
+		.base = { 0x44000, 0x45000, 0x46000, 0x47000, 0x48000, 0x49000 },
 		.nb_stages = 8,
 		.max_width = 2048,
 		.max_height = 0xFFFF,
 	},
 	.dspp = {
 		.count = 4,
-		.base = { 0x55000, 0x57000, 0x59000, 0x5b000 },
+		.base = { 0x54000, 0x56000, 0x58000, 0x5a000 },
 
 	},
 	.ad = {
 		.count = 3,
-		.base = { 0x79000, 0x79800, 0x7a000 },
+		.base = { 0x78000, 0x78800, 0x79000 },
 	},
 	.pp = {
 		.count = 4,
-		.base = { 0x71000, 0x71800, 0x72000, 0x72800 },
+		.base = { 0x70000, 0x70800, 0x71000, 0x71800 },
 	},
 	.intf = {
-		.base = { 0x6b000, 0x6b800, 0x6c000, 0x6c800, 0x6d000 },
+		.base = { 0x6a000, 0x6a800, 0x6b000, 0x6b800, 0x6c000 },
 		.connect = {
 			[0] = INTF_DISABLED,
 			[1] = INTF_DSI,
@@ -387,19 +383,18 @@ const struct mdp5_cfg_hw msm8x96_config = {
 	.name = "msm8x96",
 	.mdp = {
 		.count = 1,
-		.base = { 0x01000 },
 		.caps = MDP_CAP_DSC |
 			MDP_CAP_CDM |
 			0,
 	},
 	.ctl = {
 		.count = 5,
-		.base = { 0x02000, 0x02200, 0x02400, 0x02600, 0x02800 },
+		.base = { 0x01000, 0x01200, 0x01400, 0x01600, 0x01800 },
 		.flush_hw_mask = 0xf4ffffff,
 	},
 	.pipe_vig = {
 		.count = 4,
-		.base = { 0x05000, 0x07000, 0x09000, 0x0b000 },
+		.base = { 0x04000, 0x06000, 0x08000, 0x0a000 },
 		.caps = MDP_PIPE_CAP_HFLIP	|
 			MDP_PIPE_CAP_VFLIP	|
 			MDP_PIPE_CAP_SCALE	|
@@ -410,7 +405,7 @@ const struct mdp5_cfg_hw msm8x96_config = {
 	},
 	.pipe_rgb = {
 		.count = 4,
-		.base = { 0x15000, 0x17000, 0x19000, 0x1b000 },
+		.base = { 0x14000, 0x16000, 0x18000, 0x1a000 },
 		.caps = MDP_PIPE_CAP_HFLIP	|
 			MDP_PIPE_CAP_VFLIP	|
 			MDP_PIPE_CAP_SCALE	|
@@ -420,7 +415,7 @@ const struct mdp5_cfg_hw msm8x96_config = {
 	},
 	.pipe_dma = {
 		.count = 2,
-		.base = { 0x25000, 0x27000 },
+		.base = { 0x24000, 0x26000 },
 		.caps = MDP_PIPE_CAP_HFLIP	|
 			MDP_PIPE_CAP_VFLIP	|
 			MDP_PIPE_CAP_SW_PIX_EXT	|
@@ -428,33 +423,33 @@ const struct mdp5_cfg_hw msm8x96_config = {
 	},
 	.lm = {
 		.count = 6,
-		.base = { 0x45000, 0x46000, 0x47000, 0x48000, 0x49000, 0x4a000 },
+		.base = { 0x44000, 0x45000, 0x46000, 0x47000, 0x48000, 0x49000 },
 		.nb_stages = 8,
 		.max_width = 2560,
 		.max_height = 0xFFFF,
 	},
 	.dspp = {
 		.count = 2,
-		.base = { 0x55000, 0x57000 },
+		.base = { 0x54000, 0x56000 },
 	},
 	.ad = {
 		.count = 3,
-		.base = { 0x79000, 0x79800, 0x7a000 },
+		.base = { 0x78000, 0x78800, 0x79000 },
 	},
 	.pp = {
 		.count = 4,
-		.base = { 0x71000, 0x71800, 0x72000, 0x72800 },
+		.base = { 0x70000, 0x70800, 0x71000, 0x71800 },
 	},
 	.cdm = {
 		.count = 1,
-		.base = { 0x7a200 },
+		.base = { 0x79200 },
 	},
 	.dsc = {
 		.count = 2,
-		.base = { 0x81000, 0x81400 },
+		.base = { 0x80000, 0x80400 },
 	},
 	.intf = {
-		.base = { 0x6b000, 0x6b800, 0x6c000, 0x6c800, 0x6d000 },
+		.base = { 0x6a000, 0x6a800, 0x6b000, 0x6b800, 0x6c000 },
 		.connect = {
 			[0] = INTF_DISABLED,
 			[1] = INTF_DSI,
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c
index 69094cb28103..c627ab6d0061 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c
@@ -272,22 +272,22 @@ int mdp5_cmd_encoder_set_split_display(struct drm_encoder *encoder,
 	 * start signal for the slave encoder
 	 */
 	if (intf_num == 1)
-		data |= MDP5_MDP_SPLIT_DPL_UPPER_INTF2_SW_TRG_MUX;
+		data |= MDP5_SPLIT_DPL_UPPER_INTF2_SW_TRG_MUX;
 	else if (intf_num == 2)
-		data |= MDP5_MDP_SPLIT_DPL_UPPER_INTF1_SW_TRG_MUX;
+		data |= MDP5_SPLIT_DPL_UPPER_INTF1_SW_TRG_MUX;
 	else
 		return -EINVAL;
 
 	/* Smart Panel, Sync mode */
-	data |= MDP5_MDP_SPLIT_DPL_UPPER_SMART_PANEL;
+	data |= MDP5_SPLIT_DPL_UPPER_SMART_PANEL;
 
 	/* Make sure clocks are on when connectors calling this function. */
 	mdp5_enable(mdp5_kms);
-	mdp5_write(mdp5_kms, REG_MDP5_MDP_SPLIT_DPL_UPPER(0), data);
+	mdp5_write(mdp5_kms, REG_MDP5_SPLIT_DPL_UPPER, data);
 
-	mdp5_write(mdp5_kms, REG_MDP5_MDP_SPLIT_DPL_LOWER(0),
-			MDP5_MDP_SPLIT_DPL_LOWER_SMART_PANEL);
-	mdp5_write(mdp5_kms, REG_MDP5_MDP_SPLIT_DPL_EN(0), 1);
+	mdp5_write(mdp5_kms, REG_MDP5_SPLIT_DPL_LOWER,
+		   MDP5_SPLIT_DPL_LOWER_SMART_PANEL);
+	mdp5_write(mdp5_kms, REG_MDP5_SPLIT_DPL_EN, 1);
 	mdp5_disable(mdp5_kms);
 
 	return 0;
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
index 4e8ed739f558..fa2be7ce9468 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
@@ -490,8 +490,7 @@ static int mdp5_crtc_cursor_set(struct drm_crtc *crtc,
 	struct mdp5_kms *mdp5_kms = get_kms(crtc);
 	struct drm_gem_object *cursor_bo, *old_bo = NULL;
 	uint32_t blendcfg, cursor_addr, stride;
-	int ret, bpp, lm;
-	unsigned int depth;
+	int ret, lm;
 	enum mdp5_cursor_alpha cur_alpha = CURSOR_ALPHA_PER_PIXEL;
 	uint32_t flush_mask = mdp_ctl_flush_mask_cursor(0);
 	uint32_t roi_w, roi_h;
@@ -521,8 +520,7 @@ static int mdp5_crtc_cursor_set(struct drm_crtc *crtc,
 		return -EINVAL;
 
 	lm = mdp5_crtc->lm;
-	drm_fb_get_bpp_depth(DRM_FORMAT_ARGB8888, &depth, &bpp);
-	stride = width * (bpp >> 3);
+	stride = width * drm_format_plane_cpp(DRM_FORMAT_ARGB8888, 0);
 
 	spin_lock_irqsave(&mdp5_crtc->cursor.lock, flags);
 	old_bo = mdp5_crtc->cursor.scanout_bo;
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_ctl.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_ctl.c
index 4e81ca4f964a..d021edc3b307 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_ctl.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_ctl.c
@@ -118,31 +118,31 @@ static void set_display_intf(struct mdp5_kms *mdp5_kms,
 	u32 intf_sel;
 
 	spin_lock_irqsave(&mdp5_kms->resource_lock, flags);
-	intf_sel = mdp5_read(mdp5_kms, REG_MDP5_MDP_DISP_INTF_SEL(0));
+	intf_sel = mdp5_read(mdp5_kms, REG_MDP5_DISP_INTF_SEL);
 
 	switch (intf->num) {
 	case 0:
-		intf_sel &= ~MDP5_MDP_DISP_INTF_SEL_INTF0__MASK;
-		intf_sel |= MDP5_MDP_DISP_INTF_SEL_INTF0(intf->type);
+		intf_sel &= ~MDP5_DISP_INTF_SEL_INTF0__MASK;
+		intf_sel |= MDP5_DISP_INTF_SEL_INTF0(intf->type);
 		break;
 	case 1:
-		intf_sel &= ~MDP5_MDP_DISP_INTF_SEL_INTF1__MASK;
-		intf_sel |= MDP5_MDP_DISP_INTF_SEL_INTF1(intf->type);
+		intf_sel &= ~MDP5_DISP_INTF_SEL_INTF1__MASK;
+		intf_sel |= MDP5_DISP_INTF_SEL_INTF1(intf->type);
 		break;
 	case 2:
-		intf_sel &= ~MDP5_MDP_DISP_INTF_SEL_INTF2__MASK;
-		intf_sel |= MDP5_MDP_DISP_INTF_SEL_INTF2(intf->type);
+		intf_sel &= ~MDP5_DISP_INTF_SEL_INTF2__MASK;
+		intf_sel |= MDP5_DISP_INTF_SEL_INTF2(intf->type);
 		break;
 	case 3:
-		intf_sel &= ~MDP5_MDP_DISP_INTF_SEL_INTF3__MASK;
-		intf_sel |= MDP5_MDP_DISP_INTF_SEL_INTF3(intf->type);
+		intf_sel &= ~MDP5_DISP_INTF_SEL_INTF3__MASK;
+		intf_sel |= MDP5_DISP_INTF_SEL_INTF3(intf->type);
 		break;
 	default:
 		BUG();
 		break;
 	}
 
-	mdp5_write(mdp5_kms, REG_MDP5_MDP_DISP_INTF_SEL(0), intf_sel);
+	mdp5_write(mdp5_kms, REG_MDP5_DISP_INTF_SEL, intf_sel);
 	spin_unlock_irqrestore(&mdp5_kms->resource_lock, flags);
 }
 
@@ -557,7 +557,7 @@ int mdp5_ctl_pair(struct mdp5_ctl *ctlx, struct mdp5_ctl *ctly, bool enable)
 	if (!enable) {
 		ctlx->pair = NULL;
 		ctly->pair = NULL;
-		mdp5_write(mdp5_kms, REG_MDP5_MDP_SPARE_0(0), 0);
+		mdp5_write(mdp5_kms, REG_MDP5_SPARE_0, 0);
 		return 0;
 	} else if ((ctlx->pair != NULL) || (ctly->pair != NULL)) {
 		dev_err(ctl_mgr->dev->dev, "CTLs already paired\n");
@@ -570,8 +570,8 @@ int mdp5_ctl_pair(struct mdp5_ctl *ctlx, struct mdp5_ctl *ctly, bool enable)
 	ctlx->pair = ctly;
 	ctly->pair = ctlx;
 
-	mdp5_write(mdp5_kms, REG_MDP5_MDP_SPARE_0(0),
-		MDP5_MDP_SPARE_0_SPLIT_DPL_SINGLE_FLUSH_EN);
+	mdp5_write(mdp5_kms, REG_MDP5_SPARE_0,
+		   MDP5_SPARE_0_SPLIT_DPL_SINGLE_FLUSH_EN);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c
index 1d95f9fd9dc7..fe0c22230883 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c
@@ -322,18 +322,18 @@ int mdp5_encoder_set_split_display(struct drm_encoder *encoder,
 	 * to use the master's enable signal for the slave encoder.
 	 */
 	if (intf_num == 1)
-		data |= MDP5_MDP_SPLIT_DPL_LOWER_INTF2_TG_SYNC;
+		data |= MDP5_SPLIT_DPL_LOWER_INTF2_TG_SYNC;
 	else if (intf_num == 2)
-		data |= MDP5_MDP_SPLIT_DPL_LOWER_INTF1_TG_SYNC;
+		data |= MDP5_SPLIT_DPL_LOWER_INTF1_TG_SYNC;
 	else
 		return -EINVAL;
 
 	/* Make sure clocks are on when connectors calling this function. */
 	mdp5_enable(mdp5_kms);
 	/* Dumb Panel, Sync mode */
-	mdp5_write(mdp5_kms, REG_MDP5_MDP_SPLIT_DPL_UPPER(0), 0);
-	mdp5_write(mdp5_kms, REG_MDP5_MDP_SPLIT_DPL_LOWER(0), data);
-	mdp5_write(mdp5_kms, REG_MDP5_MDP_SPLIT_DPL_EN(0), 1);
+	mdp5_write(mdp5_kms, REG_MDP5_SPLIT_DPL_UPPER, 0);
+	mdp5_write(mdp5_kms, REG_MDP5_SPLIT_DPL_LOWER, data);
+	mdp5_write(mdp5_kms, REG_MDP5_SPLIT_DPL_EN, 1);
 
 	mdp5_ctl_pair(mdp5_encoder->ctl, mdp5_slave_enc->ctl, true);
 
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c
index 73bc3e312fd4..d53e5510fd7c 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c
@@ -15,7 +15,6 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <linux/irqdomain.h>
 #include <linux/irq.h>
 
 #include "msm_drv.h"
@@ -24,9 +23,9 @@
 void mdp5_set_irqmask(struct mdp_kms *mdp_kms, uint32_t irqmask,
 		uint32_t old_irqmask)
 {
-	mdp5_write(to_mdp5_kms(mdp_kms), REG_MDP5_MDP_INTR_CLEAR(0),
-		irqmask ^ (irqmask & old_irqmask));
-	mdp5_write(to_mdp5_kms(mdp_kms), REG_MDP5_MDP_INTR_EN(0), irqmask);
+	mdp5_write(to_mdp5_kms(mdp_kms), REG_MDP5_INTR_CLEAR,
+		   irqmask ^ (irqmask & old_irqmask));
+	mdp5_write(to_mdp5_kms(mdp_kms), REG_MDP5_INTR_EN, irqmask);
 }
 
 static void mdp5_irq_error_handler(struct mdp_irq *irq, uint32_t irqstatus)
@@ -38,8 +37,8 @@ void mdp5_irq_preinstall(struct msm_kms *kms)
 {
 	struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(kms));
 	mdp5_enable(mdp5_kms);
-	mdp5_write(mdp5_kms, REG_MDP5_MDP_INTR_CLEAR(0), 0xffffffff);
-	mdp5_write(mdp5_kms, REG_MDP5_MDP_INTR_EN(0), 0x00000000);
+	mdp5_write(mdp5_kms, REG_MDP5_INTR_CLEAR, 0xffffffff);
+	mdp5_write(mdp5_kms, REG_MDP5_INTR_EN, 0x00000000);
 	mdp5_disable(mdp5_kms);
 }
 
@@ -55,7 +54,9 @@ int mdp5_irq_postinstall(struct msm_kms *kms)
 			MDP5_IRQ_INTF2_UNDER_RUN |
 			MDP5_IRQ_INTF3_UNDER_RUN;
 
+	mdp5_enable(mdp5_kms);
 	mdp_irq_register(mdp_kms, error_handler);
+	mdp5_disable(mdp5_kms);
 
 	return 0;
 }
@@ -64,21 +65,22 @@ void mdp5_irq_uninstall(struct msm_kms *kms)
 {
 	struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(kms));
 	mdp5_enable(mdp5_kms);
-	mdp5_write(mdp5_kms, REG_MDP5_MDP_INTR_EN(0), 0x00000000);
+	mdp5_write(mdp5_kms, REG_MDP5_INTR_EN, 0x00000000);
 	mdp5_disable(mdp5_kms);
 }
 
-static void mdp5_irq_mdp(struct mdp_kms *mdp_kms)
+irqreturn_t mdp5_irq(struct msm_kms *kms)
 {
+	struct mdp_kms *mdp_kms = to_mdp_kms(kms);
 	struct mdp5_kms *mdp5_kms = to_mdp5_kms(mdp_kms);
 	struct drm_device *dev = mdp5_kms->dev;
 	struct msm_drm_private *priv = dev->dev_private;
 	unsigned int id;
 	uint32_t status, enable;
 
-	enable = mdp5_read(mdp5_kms, REG_MDP5_MDP_INTR_EN(0));
-	status = mdp5_read(mdp5_kms, REG_MDP5_MDP_INTR_STATUS(0)) & enable;
-	mdp5_write(mdp5_kms, REG_MDP5_MDP_INTR_CLEAR(0), status);
+	enable = mdp5_read(mdp5_kms, REG_MDP5_INTR_EN);
+	status = mdp5_read(mdp5_kms, REG_MDP5_INTR_STATUS) & enable;
+	mdp5_write(mdp5_kms, REG_MDP5_INTR_CLEAR, status);
 
 	VERB("status=%08x", status);
 
@@ -87,29 +89,6 @@ static void mdp5_irq_mdp(struct mdp_kms *mdp_kms)
 	for (id = 0; id < priv->num_crtcs; id++)
 		if (status & mdp5_crtc_vblank(priv->crtcs[id]))
 			drm_handle_vblank(dev, id);
-}
-
-irqreturn_t mdp5_irq(struct msm_kms *kms)
-{
-	struct mdp_kms *mdp_kms = to_mdp_kms(kms);
-	struct mdp5_kms *mdp5_kms = to_mdp5_kms(mdp_kms);
-	uint32_t intr;
-
-	intr = mdp5_read(mdp5_kms, REG_MDSS_HW_INTR_STATUS);
-
-	VERB("intr=%08x", intr);
-
-	if (intr & MDSS_HW_INTR_STATUS_INTR_MDP) {
-		mdp5_irq_mdp(mdp_kms);
-		intr &= ~MDSS_HW_INTR_STATUS_INTR_MDP;
-	}
-
-	while (intr) {
-		irq_hw_number_t hwirq = fls(intr) - 1;
-		generic_handle_irq(irq_find_mapping(
-				mdp5_kms->irqcontroller.domain, hwirq));
-		intr &= ~(1 << hwirq);
-	}
 
 	return IRQ_HANDLED;
 }
@@ -135,81 +114,3 @@ void mdp5_disable_vblank(struct msm_kms *kms, struct drm_crtc *crtc)
 			mdp5_crtc_vblank(crtc), false);
 	mdp5_disable(mdp5_kms);
 }
-
-/*
- * interrupt-controller implementation, so sub-blocks (hdmi/eDP/dsi/etc)
- * can register to get their irq's delivered
- */
-
-#define VALID_IRQS  (MDSS_HW_INTR_STATUS_INTR_DSI0 | \
-		MDSS_HW_INTR_STATUS_INTR_DSI1 | \
-		MDSS_HW_INTR_STATUS_INTR_HDMI | \
-		MDSS_HW_INTR_STATUS_INTR_EDP)
-
-static void mdp5_hw_mask_irq(struct irq_data *irqd)
-{
-	struct mdp5_kms *mdp5_kms = irq_data_get_irq_chip_data(irqd);
-	smp_mb__before_atomic();
-	clear_bit(irqd->hwirq, &mdp5_kms->irqcontroller.enabled_mask);
-	smp_mb__after_atomic();
-}
-
-static void mdp5_hw_unmask_irq(struct irq_data *irqd)
-{
-	struct mdp5_kms *mdp5_kms = irq_data_get_irq_chip_data(irqd);
-	smp_mb__before_atomic();
-	set_bit(irqd->hwirq, &mdp5_kms->irqcontroller.enabled_mask);
-	smp_mb__after_atomic();
-}
-
-static struct irq_chip mdp5_hw_irq_chip = {
-	.name		= "mdp5",
-	.irq_mask	= mdp5_hw_mask_irq,
-	.irq_unmask	= mdp5_hw_unmask_irq,
-};
-
-static int mdp5_hw_irqdomain_map(struct irq_domain *d,
-		unsigned int irq, irq_hw_number_t hwirq)
-{
-	struct mdp5_kms *mdp5_kms = d->host_data;
-
-	if (!(VALID_IRQS & (1 << hwirq)))
-		return -EPERM;
-
-	irq_set_chip_and_handler(irq, &mdp5_hw_irq_chip, handle_level_irq);
-	irq_set_chip_data(irq, mdp5_kms);
-
-	return 0;
-}
-
-static struct irq_domain_ops mdp5_hw_irqdomain_ops = {
-	.map = mdp5_hw_irqdomain_map,
-	.xlate = irq_domain_xlate_onecell,
-};
-
-
-int mdp5_irq_domain_init(struct mdp5_kms *mdp5_kms)
-{
-	struct device *dev = mdp5_kms->dev->dev;
-	struct irq_domain *d;
-
-	d = irq_domain_add_linear(dev->of_node, 32,
-			&mdp5_hw_irqdomain_ops, mdp5_kms);
-	if (!d) {
-		dev_err(dev, "mdp5 irq domain add failed\n");
-		return -ENXIO;
-	}
-
-	mdp5_kms->irqcontroller.enabled_mask = 0;
-	mdp5_kms->irqcontroller.domain = d;
-
-	return 0;
-}
-
-void mdp5_irq_domain_fini(struct mdp5_kms *mdp5_kms)
-{
-	if (mdp5_kms->irqcontroller.domain) {
-		irq_domain_remove(mdp5_kms->irqcontroller.domain);
-		mdp5_kms->irqcontroller.domain = NULL;
-	}
-}
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
index f0c285b1c027..ed7143d35b25 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
@@ -16,6 +16,7 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/of_irq.h>
 
 #include "msm_drv.h"
 #include "msm_mmu.h"
@@ -28,10 +29,11 @@ static const char *iommu_ports[] = {
 static int mdp5_hw_init(struct msm_kms *kms)
 {
 	struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(kms));
-	struct drm_device *dev = mdp5_kms->dev;
+	struct platform_device *pdev = mdp5_kms->pdev;
 	unsigned long flags;
 
-	pm_runtime_get_sync(dev->dev);
+	pm_runtime_get_sync(&pdev->dev);
+	mdp5_enable(mdp5_kms);
 
 	/* Magic unknown register writes:
 	 *
@@ -58,12 +60,13 @@ static int mdp5_hw_init(struct msm_kms *kms)
 	 */
 
 	spin_lock_irqsave(&mdp5_kms->resource_lock, flags);
-	mdp5_write(mdp5_kms, REG_MDP5_MDP_DISP_INTF_SEL(0), 0);
+	mdp5_write(mdp5_kms, REG_MDP5_DISP_INTF_SEL, 0);
 	spin_unlock_irqrestore(&mdp5_kms->resource_lock, flags);
 
 	mdp5_ctlm_hw_reset(mdp5_kms->ctlm);
 
-	pm_runtime_put_sync(dev->dev);
+	mdp5_disable(mdp5_kms);
+	pm_runtime_put_sync(&pdev->dev);
 
 	return 0;
 }
@@ -111,26 +114,15 @@ static int mdp5_set_split_display(struct msm_kms *kms,
 		return mdp5_encoder_set_split_display(encoder, slave_encoder);
 }
 
-static void mdp5_destroy(struct msm_kms *kms)
+static void mdp5_kms_destroy(struct msm_kms *kms)
 {
 	struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(kms));
 	struct msm_mmu *mmu = mdp5_kms->mmu;
 
-	mdp5_irq_domain_fini(mdp5_kms);
-
 	if (mmu) {
 		mmu->funcs->detach(mmu, iommu_ports, ARRAY_SIZE(iommu_ports));
 		mmu->funcs->destroy(mmu);
 	}
-
-	if (mdp5_kms->ctlm)
-		mdp5_ctlm_destroy(mdp5_kms->ctlm);
-	if (mdp5_kms->smp)
-		mdp5_smp_destroy(mdp5_kms->smp);
-	if (mdp5_kms->cfg)
-		mdp5_cfg_destroy(mdp5_kms->cfg);
-
-	kfree(mdp5_kms);
 }
 
 static const struct mdp_kms_funcs kms_funcs = {
@@ -148,7 +140,7 @@ static const struct mdp_kms_funcs kms_funcs = {
 		.get_format      = mdp_get_format,
 		.round_pixclk    = mdp5_round_pixclk,
 		.set_split_display = mdp5_set_split_display,
-		.destroy         = mdp5_destroy,
+		.destroy         = mdp5_kms_destroy,
 	},
 	.set_irqmask         = mdp5_set_irqmask,
 };
@@ -345,13 +337,6 @@ static int modeset_init(struct mdp5_kms *mdp5_kms)
 
 	hw_cfg = mdp5_cfg_get_hw_config(mdp5_kms->cfg);
 
-	/* register our interrupt-controller for hdmi/eDP/dsi/etc
-	 * to use for irqs routed through mdp:
-	 */
-	ret = mdp5_irq_domain_init(mdp5_kms);
-	if (ret)
-		goto fail;
-
 	/* construct CRTCs and their private planes: */
 	for (i = 0; i < hw_cfg->pipe_rgb.count; i++) {
 		struct drm_plane *plane;
@@ -419,17 +404,17 @@ fail:
 	return ret;
 }
 
-static void read_hw_revision(struct mdp5_kms *mdp5_kms,
-		uint32_t *major, uint32_t *minor)
+static void read_mdp_hw_revision(struct mdp5_kms *mdp5_kms,
+				 u32 *major, u32 *minor)
 {
-	uint32_t version;
+	u32 version;
 
 	mdp5_enable(mdp5_kms);
-	version = mdp5_read(mdp5_kms, REG_MDSS_HW_VERSION);
+	version = mdp5_read(mdp5_kms, REG_MDP5_HW_VERSION);
 	mdp5_disable(mdp5_kms);
 
-	*major = FIELD(version, MDSS_HW_VERSION_MAJOR);
-	*minor = FIELD(version, MDSS_HW_VERSION_MINOR);
+	*major = FIELD(version, MDP5_HW_VERSION_MAJOR);
+	*minor = FIELD(version, MDP5_HW_VERSION_MINOR);
 
 	DBG("MDP5 version v%d.%d", *major, *minor);
 }
@@ -574,51 +559,146 @@ static u32 mdp5_get_vblank_counter(struct drm_device *dev, unsigned int pipe)
 
 struct msm_kms *mdp5_kms_init(struct drm_device *dev)
 {
-	struct platform_device *pdev = dev->platformdev;
-	struct mdp5_cfg *config;
+	struct msm_drm_private *priv = dev->dev_private;
+	struct platform_device *pdev;
 	struct mdp5_kms *mdp5_kms;
-	struct msm_kms *kms = NULL;
+	struct mdp5_cfg *config;
+	struct msm_kms *kms;
 	struct msm_mmu *mmu;
-	uint32_t major, minor;
-	int i, ret;
+	int irq, i, ret;
 
-	mdp5_kms = kzalloc(sizeof(*mdp5_kms), GFP_KERNEL);
-	if (!mdp5_kms) {
-		dev_err(dev->dev, "failed to allocate kms\n");
-		ret = -ENOMEM;
+	/* priv->kms would have been populated by the MDP5 driver */
+	kms = priv->kms;
+	if (!kms)
+		return NULL;
+
+	mdp5_kms = to_mdp5_kms(to_mdp_kms(kms));
+
+	mdp_kms_init(&mdp5_kms->base, &kms_funcs);
+
+	pdev = mdp5_kms->pdev;
+
+	irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
+	if (irq < 0) {
+		ret = irq;
+		dev_err(&pdev->dev, "failed to get irq: %d\n", ret);
 		goto fail;
 	}
 
-	spin_lock_init(&mdp5_kms->resource_lock);
+	kms->irq = irq;
 
-	mdp_kms_init(&mdp5_kms->base, &kms_funcs);
+	config = mdp5_cfg_get_config(mdp5_kms->cfg);
 
-	kms = &mdp5_kms->base.base;
+	/* make sure things are off before attaching iommu (bootloader could
+	 * have left things on, in which case we'll start getting faults if
+	 * we don't disable):
+	 */
+	mdp5_enable(mdp5_kms);
+	for (i = 0; i < MDP5_INTF_NUM_MAX; i++) {
+		if (mdp5_cfg_intf_is_virtual(config->hw->intf.connect[i]) ||
+		    !config->hw->intf.base[i])
+			continue;
+		mdp5_write(mdp5_kms, REG_MDP5_INTF_TIMING_ENGINE_EN(i), 0);
 
-	mdp5_kms->dev = dev;
+		mdp5_write(mdp5_kms, REG_MDP5_INTF_FRAME_LINE_COUNT_EN(i), 0x3);
+	}
+	mdp5_disable(mdp5_kms);
+	mdelay(16);
 
-	/* mdp5_kms->mmio actually represents the MDSS base address */
-	mdp5_kms->mmio = msm_ioremap(pdev, "mdp_phys", "MDP5");
-	if (IS_ERR(mdp5_kms->mmio)) {
-		ret = PTR_ERR(mdp5_kms->mmio);
+	if (config->platform.iommu) {
+		mmu = msm_iommu_new(&pdev->dev, config->platform.iommu);
+		if (IS_ERR(mmu)) {
+			ret = PTR_ERR(mmu);
+			dev_err(&pdev->dev, "failed to init iommu: %d\n", ret);
+			iommu_domain_free(config->platform.iommu);
+			goto fail;
+		}
+
+		ret = mmu->funcs->attach(mmu, iommu_ports,
+				ARRAY_SIZE(iommu_ports));
+		if (ret) {
+			dev_err(&pdev->dev, "failed to attach iommu: %d\n",
+				ret);
+			mmu->funcs->destroy(mmu);
+			goto fail;
+		}
+	} else {
+		dev_info(&pdev->dev,
+			 "no iommu, fallback to phys contig buffers for scanout\n");
+		mmu = NULL;
+	}
+	mdp5_kms->mmu = mmu;
+
+	mdp5_kms->id = msm_register_mmu(dev, mmu);
+	if (mdp5_kms->id < 0) {
+		ret = mdp5_kms->id;
+		dev_err(&pdev->dev, "failed to register mdp5 iommu: %d\n", ret);
 		goto fail;
 	}
 
-	mdp5_kms->vbif = msm_ioremap(pdev, "vbif_phys", "VBIF");
-	if (IS_ERR(mdp5_kms->vbif)) {
-		ret = PTR_ERR(mdp5_kms->vbif);
+	ret = modeset_init(mdp5_kms);
+	if (ret) {
+		dev_err(&pdev->dev, "modeset_init failed: %d\n", ret);
 		goto fail;
 	}
 
-	mdp5_kms->vdd = devm_regulator_get(&pdev->dev, "vdd");
-	if (IS_ERR(mdp5_kms->vdd)) {
-		ret = PTR_ERR(mdp5_kms->vdd);
+	dev->mode_config.min_width = 0;
+	dev->mode_config.min_height = 0;
+	dev->mode_config.max_width = config->hw->lm.max_width;
+	dev->mode_config.max_height = config->hw->lm.max_height;
+
+	dev->driver->get_vblank_timestamp = mdp5_get_vblank_timestamp;
+	dev->driver->get_scanout_position = mdp5_get_scanoutpos;
+	dev->driver->get_vblank_counter = mdp5_get_vblank_counter;
+	dev->max_vblank_count = 0xffffffff;
+	dev->vblank_disable_immediate = true;
+
+	return kms;
+fail:
+	if (kms)
+		mdp5_kms_destroy(kms);
+	return ERR_PTR(ret);
+}
+
+static void mdp5_destroy(struct platform_device *pdev)
+{
+	struct mdp5_kms *mdp5_kms = platform_get_drvdata(pdev);
+
+	if (mdp5_kms->ctlm)
+		mdp5_ctlm_destroy(mdp5_kms->ctlm);
+	if (mdp5_kms->smp)
+		mdp5_smp_destroy(mdp5_kms->smp);
+	if (mdp5_kms->cfg)
+		mdp5_cfg_destroy(mdp5_kms->cfg);
+
+	if (mdp5_kms->rpm_enabled)
+		pm_runtime_disable(&pdev->dev);
+}
+
+static int mdp5_init(struct platform_device *pdev, struct drm_device *dev)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	struct mdp5_kms *mdp5_kms;
+	struct mdp5_cfg *config;
+	u32 major, minor;
+	int ret;
+
+	mdp5_kms = devm_kzalloc(&pdev->dev, sizeof(*mdp5_kms), GFP_KERNEL);
+	if (!mdp5_kms) {
+		ret = -ENOMEM;
 		goto fail;
 	}
 
-	ret = regulator_enable(mdp5_kms->vdd);
-	if (ret) {
-		dev_err(dev->dev, "failed to enable regulator vdd: %d\n", ret);
+	platform_set_drvdata(pdev, mdp5_kms);
+
+	spin_lock_init(&mdp5_kms->resource_lock);
+
+	mdp5_kms->dev = dev;
+	mdp5_kms->pdev = pdev;
+
+	mdp5_kms->mmio = msm_ioremap(pdev, "mdp_phys", "MDP5");
+	if (IS_ERR(mdp5_kms->mmio)) {
+		ret = PTR_ERR(mdp5_kms->mmio);
 		goto fail;
 	}
 
@@ -629,9 +709,6 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev)
 	ret = get_clk(pdev, &mdp5_kms->ahb_clk, "iface_clk", true);
 	if (ret)
 		goto fail;
-	ret = get_clk(pdev, &mdp5_kms->src_clk, "core_clk_src", true);
-	if (ret)
-		goto fail;
 	ret = get_clk(pdev, &mdp5_kms->core_clk, "core_clk", true);
 	if (ret)
 		goto fail;
@@ -646,9 +723,12 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev)
 	 * rate first, then figure out hw revision, and then set a
 	 * more optimal rate:
 	 */
-	clk_set_rate(mdp5_kms->src_clk, 200000000);
+	clk_set_rate(mdp5_kms->core_clk, 200000000);
+
+	pm_runtime_enable(&pdev->dev);
+	mdp5_kms->rpm_enabled = true;
 
-	read_hw_revision(mdp5_kms, &major, &minor);
+	read_mdp_hw_revision(mdp5_kms, &major, &minor);
 
 	mdp5_kms->cfg = mdp5_cfg_init(mdp5_kms, major, minor);
 	if (IS_ERR(mdp5_kms->cfg)) {
@@ -661,7 +741,7 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev)
 	mdp5_kms->caps = config->hw->mdp.caps;
 
 	/* TODO: compute core clock rate at runtime */
-	clk_set_rate(mdp5_kms->src_clk, config->hw->max_clk);
+	clk_set_rate(mdp5_kms->core_clk, config->hw->max_clk);
 
 	/*
 	 * Some chipsets have a Shared Memory Pool (SMP), while others
@@ -684,73 +764,76 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev)
 		goto fail;
 	}
 
-	/* make sure things are off before attaching iommu (bootloader could
-	 * have left things on, in which case we'll start getting faults if
-	 * we don't disable):
-	 */
-	mdp5_enable(mdp5_kms);
-	for (i = 0; i < MDP5_INTF_NUM_MAX; i++) {
-		if (mdp5_cfg_intf_is_virtual(config->hw->intf.connect[i]) ||
-				!config->hw->intf.base[i])
-			continue;
-		mdp5_write(mdp5_kms, REG_MDP5_INTF_TIMING_ENGINE_EN(i), 0);
+	/* set uninit-ed kms */
+	priv->kms = &mdp5_kms->base.base;
 
-		mdp5_write(mdp5_kms, REG_MDP5_INTF_FRAME_LINE_COUNT_EN(i), 0x3);
-	}
-	mdp5_disable(mdp5_kms);
-	mdelay(16);
+	return 0;
+fail:
+	mdp5_destroy(pdev);
+	return ret;
+}
 
-	if (config->platform.iommu) {
-		mmu = msm_iommu_new(&pdev->dev, config->platform.iommu);
-		if (IS_ERR(mmu)) {
-			ret = PTR_ERR(mmu);
-			dev_err(dev->dev, "failed to init iommu: %d\n", ret);
-			iommu_domain_free(config->platform.iommu);
-			goto fail;
-		}
+static int mdp5_bind(struct device *dev, struct device *master, void *data)
+{
+	struct drm_device *ddev = dev_get_drvdata(master);
+	struct platform_device *pdev = to_platform_device(dev);
 
-		ret = mmu->funcs->attach(mmu, iommu_ports,
-				ARRAY_SIZE(iommu_ports));
-		if (ret) {
-			dev_err(dev->dev, "failed to attach iommu: %d\n", ret);
-			mmu->funcs->destroy(mmu);
-			goto fail;
-		}
-	} else {
-		dev_info(dev->dev, "no iommu, fallback to phys "
-				"contig buffers for scanout\n");
-		mmu = NULL;
-	}
-	mdp5_kms->mmu = mmu;
+	DBG("");
 
-	mdp5_kms->id = msm_register_mmu(dev, mmu);
-	if (mdp5_kms->id < 0) {
-		ret = mdp5_kms->id;
-		dev_err(dev->dev, "failed to register mdp5 iommu: %d\n", ret);
-		goto fail;
-	}
+	return mdp5_init(pdev, ddev);
+}
 
-	ret = modeset_init(mdp5_kms);
-	if (ret) {
-		dev_err(dev->dev, "modeset_init failed: %d\n", ret);
-		goto fail;
-	}
+static void mdp5_unbind(struct device *dev, struct device *master,
+			void *data)
+{
+	struct platform_device *pdev = to_platform_device(dev);
 
-	dev->mode_config.min_width = 0;
-	dev->mode_config.min_height = 0;
-	dev->mode_config.max_width = config->hw->lm.max_width;
-	dev->mode_config.max_height = config->hw->lm.max_height;
+	mdp5_destroy(pdev);
+}
 
-	dev->driver->get_vblank_timestamp = mdp5_get_vblank_timestamp;
-	dev->driver->get_scanout_position = mdp5_get_scanoutpos;
-	dev->driver->get_vblank_counter = mdp5_get_vblank_counter;
-	dev->max_vblank_count = 0xffffffff;
-	dev->vblank_disable_immediate = true;
+static const struct component_ops mdp5_ops = {
+	.bind   = mdp5_bind,
+	.unbind = mdp5_unbind,
+};
 
-	return kms;
+static int mdp5_dev_probe(struct platform_device *pdev)
+{
+	DBG("");
+	return component_add(&pdev->dev, &mdp5_ops);
+}
 
-fail:
-	if (kms)
-		mdp5_destroy(kms);
-	return ERR_PTR(ret);
+static int mdp5_dev_remove(struct platform_device *pdev)
+{
+	DBG("");
+	component_del(&pdev->dev, &mdp5_ops);
+	return 0;
+}
+
+static const struct of_device_id mdp5_dt_match[] = {
+	{ .compatible = "qcom,mdp5", },
+	/* to support downstream DT files */
+	{ .compatible = "qcom,mdss_mdp", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, mdp5_dt_match);
+
+static struct platform_driver mdp5_driver = {
+	.probe = mdp5_dev_probe,
+	.remove = mdp5_dev_remove,
+	.driver = {
+		.name = "msm_mdp",
+		.of_match_table = mdp5_dt_match,
+	},
+};
+
+void __init msm_mdp_register(void)
+{
+	DBG("");
+	platform_driver_register(&mdp5_driver);
+}
+
+void __exit msm_mdp_unregister(void)
+{
+	DBG("");
+	platform_driver_unregister(&mdp5_driver);
 }
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h
index 9a25898239d3..03738927be10 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h
@@ -31,6 +31,8 @@ struct mdp5_kms {
 
 	struct drm_device *dev;
 
+	struct platform_device *pdev;
+
 	struct mdp5_cfg_handler *cfg;
 	uint32_t caps;	/* MDP capabilities (MDP_CAP_XXX bits) */
 
@@ -43,29 +45,23 @@ struct mdp5_kms {
 	struct mdp5_ctl_manager *ctlm;
 
 	/* io/register spaces: */
-	void __iomem *mmio, *vbif;
-
-	struct regulator *vdd;
+	void __iomem *mmio;
 
 	struct clk *axi_clk;
 	struct clk *ahb_clk;
-	struct clk *src_clk;
 	struct clk *core_clk;
 	struct clk *lut_clk;
 	struct clk *vsync_clk;
 
 	/*
 	 * lock to protect access to global resources: ie., following register:
-	 *	- REG_MDP5_MDP_DISP_INTF_SEL
+	 *	- REG_MDP5_DISP_INTF_SEL
 	 */
 	spinlock_t resource_lock;
 
-	struct mdp_irq error_handler;
+	bool rpm_enabled;
 
-	struct {
-		volatile unsigned long enabled_mask;
-		struct irq_domain *domain;
-	} irqcontroller;
+	struct mdp_irq error_handler;
 };
 #define to_mdp5_kms(x) container_of(x, struct mdp5_kms, base)
 
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_mdss.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_mdss.c
new file mode 100644
index 000000000000..d444a6901fff
--- /dev/null
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_mdss.c
@@ -0,0 +1,235 @@
+/*
+ * Copyright (c) 2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/irqdomain.h>
+#include <linux/irq.h>
+
+#include "msm_drv.h"
+#include "mdp5_kms.h"
+
+/*
+ * If needed, this can become more specific: something like struct mdp5_mdss,
+ * which contains a 'struct msm_mdss base' member.
+ */
+struct msm_mdss {
+	struct drm_device *dev;
+
+	void __iomem *mmio, *vbif;
+
+	struct regulator *vdd;
+
+	struct {
+		volatile unsigned long enabled_mask;
+		struct irq_domain *domain;
+	} irqcontroller;
+};
+
+static inline void mdss_write(struct msm_mdss *mdss, u32 reg, u32 data)
+{
+	msm_writel(data, mdss->mmio + reg);
+}
+
+static inline u32 mdss_read(struct msm_mdss *mdss, u32 reg)
+{
+	return msm_readl(mdss->mmio + reg);
+}
+
+static irqreturn_t mdss_irq(int irq, void *arg)
+{
+	struct msm_mdss *mdss = arg;
+	u32 intr;
+
+	intr = mdss_read(mdss, REG_MDSS_HW_INTR_STATUS);
+
+	VERB("intr=%08x", intr);
+
+	while (intr) {
+		irq_hw_number_t hwirq = fls(intr) - 1;
+
+		generic_handle_irq(irq_find_mapping(
+				mdss->irqcontroller.domain, hwirq));
+		intr &= ~(1 << hwirq);
+	}
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * interrupt-controller implementation, so sub-blocks (MDP/HDMI/eDP/DSI/etc)
+ * can register to get their irq's delivered
+ */
+
+#define VALID_IRQS  (MDSS_HW_INTR_STATUS_INTR_MDP | \
+		MDSS_HW_INTR_STATUS_INTR_DSI0 | \
+		MDSS_HW_INTR_STATUS_INTR_DSI1 | \
+		MDSS_HW_INTR_STATUS_INTR_HDMI | \
+		MDSS_HW_INTR_STATUS_INTR_EDP)
+
+static void mdss_hw_mask_irq(struct irq_data *irqd)
+{
+	struct msm_mdss *mdss = irq_data_get_irq_chip_data(irqd);
+
+	smp_mb__before_atomic();
+	clear_bit(irqd->hwirq, &mdss->irqcontroller.enabled_mask);
+	smp_mb__after_atomic();
+}
+
+static void mdss_hw_unmask_irq(struct irq_data *irqd)
+{
+	struct msm_mdss *mdss = irq_data_get_irq_chip_data(irqd);
+
+	smp_mb__before_atomic();
+	set_bit(irqd->hwirq, &mdss->irqcontroller.enabled_mask);
+	smp_mb__after_atomic();
+}
+
+static struct irq_chip mdss_hw_irq_chip = {
+	.name		= "mdss",
+	.irq_mask	= mdss_hw_mask_irq,
+	.irq_unmask	= mdss_hw_unmask_irq,
+};
+
+static int mdss_hw_irqdomain_map(struct irq_domain *d, unsigned int irq,
+				 irq_hw_number_t hwirq)
+{
+	struct msm_mdss *mdss = d->host_data;
+
+	if (!(VALID_IRQS & (1 << hwirq)))
+		return -EPERM;
+
+	irq_set_chip_and_handler(irq, &mdss_hw_irq_chip, handle_level_irq);
+	irq_set_chip_data(irq, mdss);
+
+	return 0;
+}
+
+static struct irq_domain_ops mdss_hw_irqdomain_ops = {
+	.map = mdss_hw_irqdomain_map,
+	.xlate = irq_domain_xlate_onecell,
+};
+
+
+static int mdss_irq_domain_init(struct msm_mdss *mdss)
+{
+	struct device *dev = mdss->dev->dev;
+	struct irq_domain *d;
+
+	d = irq_domain_add_linear(dev->of_node, 32, &mdss_hw_irqdomain_ops,
+				  mdss);
+	if (!d) {
+		dev_err(dev, "mdss irq domain add failed\n");
+		return -ENXIO;
+	}
+
+	mdss->irqcontroller.enabled_mask = 0;
+	mdss->irqcontroller.domain = d;
+
+	return 0;
+}
+
+void msm_mdss_destroy(struct drm_device *dev)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	struct msm_mdss *mdss = priv->mdss;
+
+	if (!mdss)
+		return;
+
+	irq_domain_remove(mdss->irqcontroller.domain);
+	mdss->irqcontroller.domain = NULL;
+
+	regulator_disable(mdss->vdd);
+
+	pm_runtime_put_sync(dev->dev);
+
+	pm_runtime_disable(dev->dev);
+}
+
+int msm_mdss_init(struct drm_device *dev)
+{
+	struct platform_device *pdev = dev->platformdev;
+	struct msm_drm_private *priv = dev->dev_private;
+	struct msm_mdss *mdss;
+	int ret;
+
+	DBG("");
+
+	if (!of_device_is_compatible(dev->dev->of_node, "qcom,mdss"))
+		return 0;
+
+	mdss = devm_kzalloc(dev->dev, sizeof(*mdss), GFP_KERNEL);
+	if (!mdss) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	mdss->dev = dev;
+
+	mdss->mmio = msm_ioremap(pdev, "mdss_phys", "MDSS");
+	if (IS_ERR(mdss->mmio)) {
+		ret = PTR_ERR(mdss->mmio);
+		goto fail;
+	}
+
+	mdss->vbif = msm_ioremap(pdev, "vbif_phys", "VBIF");
+	if (IS_ERR(mdss->vbif)) {
+		ret = PTR_ERR(mdss->vbif);
+		goto fail;
+	}
+
+	/* Regulator to enable GDSCs in downstream kernels */
+	mdss->vdd = devm_regulator_get(dev->dev, "vdd");
+	if (IS_ERR(mdss->vdd)) {
+		ret = PTR_ERR(mdss->vdd);
+		goto fail;
+	}
+
+	ret = regulator_enable(mdss->vdd);
+	if (ret) {
+		dev_err(dev->dev, "failed to enable regulator vdd: %d\n",
+			ret);
+		goto fail;
+	}
+
+	ret = devm_request_irq(dev->dev, platform_get_irq(pdev, 0),
+			       mdss_irq, 0, "mdss_isr", mdss);
+	if (ret) {
+		dev_err(dev->dev, "failed to init irq: %d\n", ret);
+		goto fail_irq;
+	}
+
+	ret = mdss_irq_domain_init(mdss);
+	if (ret) {
+		dev_err(dev->dev, "failed to init sub-block irqs: %d\n", ret);
+		goto fail_irq;
+	}
+
+	priv->mdss = mdss;
+
+	pm_runtime_enable(dev->dev);
+
+	/*
+	 * TODO: This is needed as the MDSS GDSC is only tied to MDSS's power
+	 * domain. Remove this once runtime PM is adapted for all the devices.
+	 */
+	pm_runtime_get_sync(dev->dev);
+
+	return 0;
+fail_irq:
+	regulator_disable(mdss->vdd);
+fail:
+	return ret;
+}
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_smp.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_smp.c
index 6f425c25d9fe..27d7b55b52c9 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_smp.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_smp.c
@@ -42,7 +42,7 @@
  *
  *     configured:
  *     The block is allocated to some client, and assigned to that
- *     client in MDP5_MDP_SMP_ALLOC registers.
+ *     client in MDP5_SMP_ALLOC registers.
  *
  *     inuse:
  *     The block is being actively used by a client.
@@ -59,7 +59,7 @@
  *     mdp5_smp_commit.
  *
  *  2) mdp5_smp_configure():
- *     As hw is programmed, before FLUSH, MDP5_MDP_SMP_ALLOC registers
+ *     As hw is programmed, before FLUSH, MDP5_SMP_ALLOC registers
  *     are configured for the union(pending, inuse)
  *     Current pending is copied to configured.
  *     It is assumed that mdp5_smp_request and mdp5_smp_configure not run
@@ -311,25 +311,25 @@ static void update_smp_state(struct mdp5_smp *smp,
 		int idx = blk / 3;
 		int fld = blk % 3;
 
-		val = mdp5_read(mdp5_kms, REG_MDP5_MDP_SMP_ALLOC_W_REG(0, idx));
+		val = mdp5_read(mdp5_kms, REG_MDP5_SMP_ALLOC_W_REG(idx));
 
 		switch (fld) {
 		case 0:
-			val &= ~MDP5_MDP_SMP_ALLOC_W_REG_CLIENT0__MASK;
-			val |= MDP5_MDP_SMP_ALLOC_W_REG_CLIENT0(cid);
+			val &= ~MDP5_SMP_ALLOC_W_REG_CLIENT0__MASK;
+			val |= MDP5_SMP_ALLOC_W_REG_CLIENT0(cid);
 			break;
 		case 1:
-			val &= ~MDP5_MDP_SMP_ALLOC_W_REG_CLIENT1__MASK;
-			val |= MDP5_MDP_SMP_ALLOC_W_REG_CLIENT1(cid);
+			val &= ~MDP5_SMP_ALLOC_W_REG_CLIENT1__MASK;
+			val |= MDP5_SMP_ALLOC_W_REG_CLIENT1(cid);
 			break;
 		case 2:
-			val &= ~MDP5_MDP_SMP_ALLOC_W_REG_CLIENT2__MASK;
-			val |= MDP5_MDP_SMP_ALLOC_W_REG_CLIENT2(cid);
+			val &= ~MDP5_SMP_ALLOC_W_REG_CLIENT2__MASK;
+			val |= MDP5_SMP_ALLOC_W_REG_CLIENT2(cid);
 			break;
 		}
 
-		mdp5_write(mdp5_kms, REG_MDP5_MDP_SMP_ALLOC_W_REG(0, idx), val);
-		mdp5_write(mdp5_kms, REG_MDP5_MDP_SMP_ALLOC_R_REG(0, idx), val);
+		mdp5_write(mdp5_kms, REG_MDP5_SMP_ALLOC_W_REG(idx), val);
+		mdp5_write(mdp5_kms, REG_MDP5_SMP_ALLOC_R_REG(idx), val);
 	}
 }
 
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index a02dc2b27739..26f859ec24b3 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -21,6 +21,16 @@
 #include "msm_gpu.h"
 #include "msm_kms.h"
 
+
+/*
+ * MSM driver version:
+ * - 1.0.0 - initial interface
+ * - 1.1.0 - adds madvise, and support for submits with > 4 cmd buffers
+ */
+#define MSM_VERSION_MAJOR	1
+#define MSM_VERSION_MINOR	1
+#define MSM_VERSION_PATCHLEVEL	0
+
 static void msm_fb_output_poll_changed(struct drm_device *dev)
 {
 	struct msm_drm_private *priv = dev->dev_private;
@@ -195,6 +205,8 @@ static int msm_drm_uninit(struct device *dev)
 		kfree(vbl_ev);
 	}
 
+	msm_gem_shrinker_cleanup(ddev);
+
 	drm_kms_helper_poll_fini(ddev);
 
 	drm_dev_unregister(ddev);
@@ -215,10 +227,8 @@ static int msm_drm_uninit(struct device *dev)
 	flush_workqueue(priv->atomic_wq);
 	destroy_workqueue(priv->atomic_wq);
 
-	if (kms) {
-		pm_runtime_disable(dev);
+	if (kms)
 		kms->funcs->destroy(kms);
-	}
 
 	if (gpu) {
 		mutex_lock(&ddev->struct_mutex);
@@ -237,6 +247,8 @@ static int msm_drm_uninit(struct device *dev)
 
 	component_unbind_all(dev, ddev);
 
+	msm_mdss_destroy(ddev);
+
 	ddev->dev_private = NULL;
 	drm_dev_unref(ddev);
 
@@ -282,6 +294,7 @@ static int msm_init_vram(struct drm_device *dev)
 	if (node) {
 		struct resource r;
 		ret = of_address_to_resource(node, 0, &r);
+		of_node_put(node);
 		if (ret)
 			return ret;
 		size = r.end - r.start;
@@ -350,6 +363,14 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv)
 	}
 
 	ddev->dev_private = priv;
+	priv->dev = ddev;
+
+	ret = msm_mdss_init(ddev);
+	if (ret) {
+		kfree(priv);
+		drm_dev_unref(ddev);
+		return ret;
+	}
 
 	priv->wq = alloc_ordered_workqueue("msm", 0);
 	priv->atomic_wq = alloc_ordered_workqueue("msm:atomic", 0);
@@ -365,6 +386,7 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv)
 	/* Bind all our sub-components: */
 	ret = component_bind_all(dev, ddev);
 	if (ret) {
+		msm_mdss_destroy(ddev);
 		kfree(priv);
 		drm_dev_unref(ddev);
 		return ret;
@@ -374,9 +396,12 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv)
 	if (ret)
 		goto fail;
 
+	msm_gem_shrinker_init(ddev);
+
 	switch (get_mdp_ver(pdev)) {
 	case 4:
 		kms = mdp4_kms_init(ddev);
+		priv->kms = kms;
 		break;
 	case 5:
 		kms = mdp5_kms_init(ddev);
@@ -398,10 +423,7 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv)
 		goto fail;
 	}
 
-	priv->kms = kms;
-
 	if (kms) {
-		pm_runtime_enable(dev);
 		ret = kms->funcs->hw_init(kms);
 		if (ret) {
 			dev_err(dev, "kms hw init failed: %d\n", ret);
@@ -417,12 +439,14 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv)
 		goto fail;
 	}
 
-	pm_runtime_get_sync(dev);
-	ret = drm_irq_install(ddev, platform_get_irq(pdev, 0));
-	pm_runtime_put_sync(dev);
-	if (ret < 0) {
-		dev_err(dev, "failed to install IRQ handler\n");
-		goto fail;
+	if (kms) {
+		pm_runtime_get_sync(dev);
+		ret = drm_irq_install(ddev, kms->irq);
+		pm_runtime_put_sync(dev);
+		if (ret < 0) {
+			dev_err(dev, "failed to install IRQ handler\n");
+			goto fail;
+		}
 	}
 
 	ret = drm_dev_register(ddev, 0);
@@ -682,6 +706,44 @@ static int msm_ioctl_wait_fence(struct drm_device *dev, void *data,
 	return msm_wait_fence(priv->gpu->fctx, args->fence, &timeout, true);
 }
 
+static int msm_ioctl_gem_madvise(struct drm_device *dev, void *data,
+		struct drm_file *file)
+{
+	struct drm_msm_gem_madvise *args = data;
+	struct drm_gem_object *obj;
+	int ret;
+
+	switch (args->madv) {
+	case MSM_MADV_DONTNEED:
+	case MSM_MADV_WILLNEED:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	obj = drm_gem_object_lookup(file, args->handle);
+	if (!obj) {
+		ret = -ENOENT;
+		goto unlock;
+	}
+
+	ret = msm_gem_madvise(obj, args->madv);
+	if (ret >= 0) {
+		args->retained = ret;
+		ret = 0;
+	}
+
+	drm_gem_object_unreference(obj);
+
+unlock:
+	mutex_unlock(&dev->struct_mutex);
+	return ret;
+}
+
 static const struct drm_ioctl_desc msm_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(MSM_GET_PARAM,    msm_ioctl_get_param,    DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(MSM_GEM_NEW,      msm_ioctl_gem_new,      DRM_AUTH|DRM_RENDER_ALLOW),
@@ -690,6 +752,7 @@ static const struct drm_ioctl_desc msm_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(MSM_GEM_CPU_FINI, msm_ioctl_gem_cpu_fini, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(MSM_GEM_SUBMIT,   msm_ioctl_gem_submit,   DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(MSM_WAIT_FENCE,   msm_ioctl_wait_fence,   DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(MSM_GEM_MADVISE,  msm_ioctl_gem_madvise,  DRM_AUTH|DRM_RENDER_ALLOW),
 };
 
 static const struct vm_operations_struct vm_ops = {
@@ -755,8 +818,9 @@ static struct drm_driver msm_driver = {
 	.name               = "msm",
 	.desc               = "MSM Snapdragon DRM",
 	.date               = "20130625",
-	.major              = 1,
-	.minor              = 0,
+	.major              = MSM_VERSION_MAJOR,
+	.minor              = MSM_VERSION_MINOR,
+	.patchlevel         = MSM_VERSION_PATCHLEVEL,
 };
 
 #ifdef CONFIG_PM_SLEEP
@@ -796,22 +860,146 @@ static int compare_of(struct device *dev, void *data)
 	return dev->of_node == data;
 }
 
-static int add_components(struct device *dev, struct component_match **matchptr,
-		const char *name)
+/*
+ * Identify what components need to be added by parsing what remote-endpoints
+ * our MDP output ports are connected to. In the case of LVDS on MDP4, there
+ * is no external component that we need to add since LVDS is within MDP4
+ * itself.
+ */
+static int add_components_mdp(struct device *mdp_dev,
+			      struct component_match **matchptr)
+{
+	struct device_node *np = mdp_dev->of_node;
+	struct device_node *ep_node;
+	struct device *master_dev;
+
+	/*
+	 * on MDP4 based platforms, the MDP platform device is the component
+	 * master that adds other display interface components to itself.
+	 *
+	 * on MDP5 based platforms, the MDSS platform device is the component
+	 * master that adds MDP5 and other display interface components to
+	 * itself.
+	 */
+	if (of_device_is_compatible(np, "qcom,mdp4"))
+		master_dev = mdp_dev;
+	else
+		master_dev = mdp_dev->parent;
+
+	for_each_endpoint_of_node(np, ep_node) {
+		struct device_node *intf;
+		struct of_endpoint ep;
+		int ret;
+
+		ret = of_graph_parse_endpoint(ep_node, &ep);
+		if (ret) {
+			dev_err(mdp_dev, "unable to parse port endpoint\n");
+			of_node_put(ep_node);
+			return ret;
+		}
+
+		/*
+		 * The LCDC/LVDS port on MDP4 is a speacial case where the
+		 * remote-endpoint isn't a component that we need to add
+		 */
+		if (of_device_is_compatible(np, "qcom,mdp4") &&
+		    ep.port == 0) {
+			of_node_put(ep_node);
+			continue;
+		}
+
+		/*
+		 * It's okay if some of the ports don't have a remote endpoint
+		 * specified. It just means that the port isn't connected to
+		 * any external interface.
+		 */
+		intf = of_graph_get_remote_port_parent(ep_node);
+		if (!intf) {
+			of_node_put(ep_node);
+			continue;
+		}
+
+		component_match_add(master_dev, matchptr, compare_of, intf);
+
+		of_node_put(intf);
+		of_node_put(ep_node);
+	}
+
+	return 0;
+}
+
+static int compare_name_mdp(struct device *dev, void *data)
 {
-	struct device_node *np = dev->of_node;
-	unsigned i;
+	return (strstr(dev_name(dev), "mdp") != NULL);
+}
+
+static int add_display_components(struct device *dev,
+				  struct component_match **matchptr)
+{
+	struct device *mdp_dev;
+	int ret;
+
+	/*
+	 * MDP5 based devices don't have a flat hierarchy. There is a top level
+	 * parent: MDSS, and children: MDP5, DSI, HDMI, eDP etc. Populate the
+	 * children devices, find the MDP5 node, and then add the interfaces
+	 * to our components list.
+	 */
+	if (of_device_is_compatible(dev->of_node, "qcom,mdss")) {
+		ret = of_platform_populate(dev->of_node, NULL, NULL, dev);
+		if (ret) {
+			dev_err(dev, "failed to populate children devices\n");
+			return ret;
+		}
 
-	for (i = 0; ; i++) {
-		struct device_node *node;
+		mdp_dev = device_find_child(dev, NULL, compare_name_mdp);
+		if (!mdp_dev) {
+			dev_err(dev, "failed to find MDSS MDP node\n");
+			of_platform_depopulate(dev);
+			return -ENODEV;
+		}
 
-		node = of_parse_phandle(np, name, i);
-		if (!node)
-			break;
+		put_device(mdp_dev);
 
-		component_match_add(dev, matchptr, compare_of, node);
+		/* add the MDP component itself */
+		component_match_add(dev, matchptr, compare_of,
+				    mdp_dev->of_node);
+	} else {
+		/* MDP4 */
+		mdp_dev = dev;
 	}
 
+	ret = add_components_mdp(mdp_dev, matchptr);
+	if (ret)
+		of_platform_depopulate(dev);
+
+	return ret;
+}
+
+/*
+ * We don't know what's the best binding to link the gpu with the drm device.
+ * Fow now, we just hunt for all the possible gpus that we support, and add them
+ * as components.
+ */
+static const struct of_device_id msm_gpu_match[] = {
+	{ .compatible = "qcom,adreno-3xx" },
+	{ .compatible = "qcom,kgsl-3d0" },
+	{ },
+};
+
+static int add_gpu_components(struct device *dev,
+			      struct component_match **matchptr)
+{
+	struct device_node *np;
+
+	np = of_find_matching_node(NULL, msm_gpu_match);
+	if (!np)
+		return 0;
+
+	component_match_add(dev, matchptr, compare_of, np);
+
+	of_node_put(np);
+
 	return 0;
 }
 
@@ -837,9 +1025,15 @@ static const struct component_master_ops msm_drm_ops = {
 static int msm_pdev_probe(struct platform_device *pdev)
 {
 	struct component_match *match = NULL;
+	int ret;
 
-	add_components(&pdev->dev, &match, "connectors");
-	add_components(&pdev->dev, &match, "gpus");
+	ret = add_display_components(&pdev->dev, &match);
+	if (ret)
+		return ret;
+
+	ret = add_gpu_components(&pdev->dev, &match);
+	if (ret)
+		return ret;
 
 	pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
 	return component_master_add_with_match(&pdev->dev, &msm_drm_ops, match);
@@ -848,20 +1042,14 @@ static int msm_pdev_probe(struct platform_device *pdev)
 static int msm_pdev_remove(struct platform_device *pdev)
 {
 	component_master_del(&pdev->dev, &msm_drm_ops);
+	of_platform_depopulate(&pdev->dev);
 
 	return 0;
 }
 
-static const struct platform_device_id msm_id[] = {
-	{ "mdp", 0 },
-	{ }
-};
-
 static const struct of_device_id dt_match[] = {
-	{ .compatible = "qcom,mdp4", .data = (void *) 4 },	/* mdp4 */
-	{ .compatible = "qcom,mdp5", .data = (void *) 5 },	/* mdp5 */
-	/* to support downstream DT files */
-	{ .compatible = "qcom,mdss_mdp", .data = (void *) 5 },  /* mdp5 */
+	{ .compatible = "qcom,mdp4", .data = (void *)4 },	/* MDP4 */
+	{ .compatible = "qcom,mdss", .data = (void *)5 },	/* MDP5 MDSS */
 	{}
 };
 MODULE_DEVICE_TABLE(of, dt_match);
@@ -874,12 +1062,12 @@ static struct platform_driver msm_platform_driver = {
 		.of_match_table = dt_match,
 		.pm     = &msm_pm_ops,
 	},
-	.id_table   = msm_id,
 };
 
 static int __init msm_drm_register(void)
 {
 	DBG("init");
+	msm_mdp_register();
 	msm_dsi_register();
 	msm_edp_register();
 	msm_hdmi_register();
@@ -895,6 +1083,7 @@ static void __exit msm_drm_unregister(void)
 	adreno_unregister();
 	msm_edp_unregister();
 	msm_dsi_unregister();
+	msm_mdp_unregister();
 }
 
 module_init(msm_drm_register);
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index 5b2963f32291..b4bc7f1ef717 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -46,6 +46,7 @@
 struct msm_kms;
 struct msm_gpu;
 struct msm_mmu;
+struct msm_mdss;
 struct msm_rd_state;
 struct msm_perf_state;
 struct msm_gem_submit;
@@ -77,11 +78,16 @@ struct msm_vblank_ctrl {
 
 struct msm_drm_private {
 
+	struct drm_device *dev;
+
 	struct msm_kms *kms;
 
 	/* subordinate devices, if present: */
 	struct platform_device *gpu_pdev;
 
+	/* top level MDSS wrapper device (for MDP5 only) */
+	struct msm_mdss *mdss;
+
 	/* possibly this should be in the kms component, but it is
 	 * shared by both mdp4 and mdp5..
 	 */
@@ -147,6 +153,9 @@ struct msm_drm_private {
 		struct drm_mm mm;
 	} vram;
 
+	struct notifier_block vmap_notifier;
+	struct shrinker shrinker;
+
 	struct msm_vblank_ctrl vblank_ctrl;
 };
 
@@ -165,6 +174,9 @@ void msm_gem_submit_free(struct msm_gem_submit *submit);
 int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 		struct drm_file *file);
 
+void msm_gem_shrinker_init(struct drm_device *dev);
+void msm_gem_shrinker_cleanup(struct drm_device *dev);
+
 int msm_gem_mmap_obj(struct drm_gem_object *obj,
 			struct vm_area_struct *vma);
 int msm_gem_mmap(struct file *filp, struct vm_area_struct *vma);
@@ -189,8 +201,13 @@ struct drm_gem_object *msm_gem_prime_import_sg_table(struct drm_device *dev,
 		struct dma_buf_attachment *attach, struct sg_table *sg);
 int msm_gem_prime_pin(struct drm_gem_object *obj);
 void msm_gem_prime_unpin(struct drm_gem_object *obj);
-void *msm_gem_vaddr_locked(struct drm_gem_object *obj);
-void *msm_gem_vaddr(struct drm_gem_object *obj);
+void *msm_gem_get_vaddr_locked(struct drm_gem_object *obj);
+void *msm_gem_get_vaddr(struct drm_gem_object *obj);
+void msm_gem_put_vaddr_locked(struct drm_gem_object *obj);
+void msm_gem_put_vaddr(struct drm_gem_object *obj);
+int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv);
+void msm_gem_purge(struct drm_gem_object *obj);
+void msm_gem_vunmap(struct drm_gem_object *obj);
 int msm_gem_sync_object(struct drm_gem_object *obj,
 		struct msm_fence_context *fctx, bool exclusive);
 void msm_gem_move_to_active(struct drm_gem_object *obj,
@@ -257,6 +274,9 @@ static inline int msm_dsi_modeset_init(struct msm_dsi *msm_dsi,
 }
 #endif
 
+void __init msm_mdp_register(void);
+void __exit msm_mdp_unregister(void);
+
 #ifdef CONFIG_DEBUG_FS
 void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m);
 void msm_gem_describe_objects(struct list_head *list, struct seq_file *m);
diff --git a/drivers/gpu/drm/msm/msm_fb.c b/drivers/gpu/drm/msm/msm_fb.c
index 7919c24c6ddd..95cf8fe72ee5 100644
--- a/drivers/gpu/drm/msm/msm_fb.c
+++ b/drivers/gpu/drm/msm/msm_fb.c
@@ -49,8 +49,8 @@ static void msm_framebuffer_destroy(struct drm_framebuffer *fb)
 
 	for (i = 0; i < n; i++) {
 		struct drm_gem_object *bo = msm_fb->planes[i];
-		if (bo)
-			drm_gem_object_unreference_unlocked(bo);
+
+		drm_gem_object_unreference_unlocked(bo);
 	}
 
 	kfree(msm_fb);
diff --git a/drivers/gpu/drm/msm/msm_fbdev.c b/drivers/gpu/drm/msm/msm_fbdev.c
index 1a061e3e8b9e..ffd4a338ca12 100644
--- a/drivers/gpu/drm/msm/msm_fbdev.c
+++ b/drivers/gpu/drm/msm/msm_fbdev.c
@@ -158,7 +158,11 @@ static int msm_fbdev_create(struct drm_fb_helper *helper,
 
 	dev->mode_config.fb_base = paddr;
 
-	fbi->screen_base = msm_gem_vaddr_locked(fbdev->bo);
+	fbi->screen_base = msm_gem_get_vaddr_locked(fbdev->bo);
+	if (IS_ERR(fbi->screen_base)) {
+		ret = PTR_ERR(fbi->screen_base);
+		goto fail_unlock;
+	}
 	fbi->screen_size = fbdev->bo->size;
 	fbi->fix.smem_start = paddr;
 	fbi->fix.smem_len = fbdev->bo->size;
@@ -247,6 +251,7 @@ void msm_fbdev_free(struct drm_device *dev)
 
 	/* this will free the backing object */
 	if (fbdev->fb) {
+		msm_gem_put_vaddr(fbdev->bo);
 		drm_framebuffer_unregister_private(fbdev->fb);
 		drm_framebuffer_remove(fbdev->fb);
 	}
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index 7daf4054dd2b..6cd4af443139 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -276,6 +276,26 @@ uint64_t msm_gem_mmap_offset(struct drm_gem_object *obj)
 	return offset;
 }
 
+static void
+put_iova(struct drm_gem_object *obj)
+{
+	struct drm_device *dev = obj->dev;
+	struct msm_drm_private *priv = obj->dev->dev_private;
+	struct msm_gem_object *msm_obj = to_msm_bo(obj);
+	int id;
+
+	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+
+	for (id = 0; id < ARRAY_SIZE(msm_obj->domain); id++) {
+		struct msm_mmu *mmu = priv->mmus[id];
+		if (mmu && msm_obj->domain[id].iova) {
+			uint32_t offset = msm_obj->domain[id].iova;
+			mmu->funcs->unmap(mmu, offset, msm_obj->sgt, obj->size);
+			msm_obj->domain[id].iova = 0;
+		}
+	}
+}
+
 /* should be called under struct_mutex.. although it can be called
  * from atomic context without struct_mutex to acquire an extra
  * iova ref if you know one is already held.
@@ -388,7 +408,7 @@ fail:
 	return ret;
 }
 
-void *msm_gem_vaddr_locked(struct drm_gem_object *obj)
+void *msm_gem_get_vaddr_locked(struct drm_gem_object *obj)
 {
 	struct msm_gem_object *msm_obj = to_msm_bo(obj);
 	WARN_ON(!mutex_is_locked(&obj->dev->struct_mutex));
@@ -398,19 +418,94 @@ void *msm_gem_vaddr_locked(struct drm_gem_object *obj)
 			return ERR_CAST(pages);
 		msm_obj->vaddr = vmap(pages, obj->size >> PAGE_SHIFT,
 				VM_MAP, pgprot_writecombine(PAGE_KERNEL));
+		if (msm_obj->vaddr == NULL)
+			return ERR_PTR(-ENOMEM);
 	}
+	msm_obj->vmap_count++;
 	return msm_obj->vaddr;
 }
 
-void *msm_gem_vaddr(struct drm_gem_object *obj)
+void *msm_gem_get_vaddr(struct drm_gem_object *obj)
 {
 	void *ret;
 	mutex_lock(&obj->dev->struct_mutex);
-	ret = msm_gem_vaddr_locked(obj);
+	ret = msm_gem_get_vaddr_locked(obj);
 	mutex_unlock(&obj->dev->struct_mutex);
 	return ret;
 }
 
+void msm_gem_put_vaddr_locked(struct drm_gem_object *obj)
+{
+	struct msm_gem_object *msm_obj = to_msm_bo(obj);
+	WARN_ON(!mutex_is_locked(&obj->dev->struct_mutex));
+	WARN_ON(msm_obj->vmap_count < 1);
+	msm_obj->vmap_count--;
+}
+
+void msm_gem_put_vaddr(struct drm_gem_object *obj)
+{
+	mutex_lock(&obj->dev->struct_mutex);
+	msm_gem_put_vaddr_locked(obj);
+	mutex_unlock(&obj->dev->struct_mutex);
+}
+
+/* Update madvise status, returns true if not purged, else
+ * false or -errno.
+ */
+int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv)
+{
+	struct msm_gem_object *msm_obj = to_msm_bo(obj);
+
+	WARN_ON(!mutex_is_locked(&obj->dev->struct_mutex));
+
+	if (msm_obj->madv != __MSM_MADV_PURGED)
+		msm_obj->madv = madv;
+
+	return (msm_obj->madv != __MSM_MADV_PURGED);
+}
+
+void msm_gem_purge(struct drm_gem_object *obj)
+{
+	struct drm_device *dev = obj->dev;
+	struct msm_gem_object *msm_obj = to_msm_bo(obj);
+
+	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+	WARN_ON(!is_purgeable(msm_obj));
+	WARN_ON(obj->import_attach);
+
+	put_iova(obj);
+
+	msm_gem_vunmap(obj);
+
+	put_pages(obj);
+
+	msm_obj->madv = __MSM_MADV_PURGED;
+
+	drm_vma_node_unmap(&obj->vma_node, dev->anon_inode->i_mapping);
+	drm_gem_free_mmap_offset(obj);
+
+	/* Our goal here is to return as much of the memory as
+	 * is possible back to the system as we are called from OOM.
+	 * To do this we must instruct the shmfs to drop all of its
+	 * backing pages, *now*.
+	 */
+	shmem_truncate_range(file_inode(obj->filp), 0, (loff_t)-1);
+
+	invalidate_mapping_pages(file_inode(obj->filp)->i_mapping,
+			0, (loff_t)-1);
+}
+
+void msm_gem_vunmap(struct drm_gem_object *obj)
+{
+	struct msm_gem_object *msm_obj = to_msm_bo(obj);
+
+	if (!msm_obj->vaddr || WARN_ON(!is_vunmapable(msm_obj)))
+		return;
+
+	vunmap(msm_obj->vaddr);
+	msm_obj->vaddr = NULL;
+}
+
 /* must be called before _move_to_active().. */
 int msm_gem_sync_object(struct drm_gem_object *obj,
 		struct msm_fence_context *fctx, bool exclusive)
@@ -462,6 +557,7 @@ void msm_gem_move_to_active(struct drm_gem_object *obj,
 		struct msm_gpu *gpu, bool exclusive, struct fence *fence)
 {
 	struct msm_gem_object *msm_obj = to_msm_bo(obj);
+	WARN_ON(msm_obj->madv != MSM_MADV_WILLNEED);
 	msm_obj->gpu = gpu;
 	if (exclusive)
 		reservation_object_add_excl_fence(msm_obj->resv, fence);
@@ -530,13 +626,27 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m)
 	struct reservation_object_list *fobj;
 	struct fence *fence;
 	uint64_t off = drm_vma_node_start(&obj->vma_node);
+	const char *madv;
 
 	WARN_ON(!mutex_is_locked(&obj->dev->struct_mutex));
 
-	seq_printf(m, "%08x: %c %2d (%2d) %08llx %p %zu\n",
+	switch (msm_obj->madv) {
+	case __MSM_MADV_PURGED:
+		madv = " purged";
+		break;
+	case MSM_MADV_DONTNEED:
+		madv = " purgeable";
+		break;
+	case MSM_MADV_WILLNEED:
+	default:
+		madv = "";
+		break;
+	}
+
+	seq_printf(m, "%08x: %c %2d (%2d) %08llx %p %zu%s\n",
 			msm_obj->flags, is_active(msm_obj) ? 'A' : 'I',
 			obj->name, obj->refcount.refcount.counter,
-			off, msm_obj->vaddr, obj->size);
+			off, msm_obj->vaddr, obj->size, madv);
 
 	rcu_read_lock();
 	fobj = rcu_dereference(robj->fence);
@@ -576,9 +686,7 @@ void msm_gem_describe_objects(struct list_head *list, struct seq_file *m)
 void msm_gem_free_object(struct drm_gem_object *obj)
 {
 	struct drm_device *dev = obj->dev;
-	struct msm_drm_private *priv = obj->dev->dev_private;
 	struct msm_gem_object *msm_obj = to_msm_bo(obj);
-	int id;
 
 	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
 
@@ -587,13 +695,7 @@ void msm_gem_free_object(struct drm_gem_object *obj)
 
 	list_del(&msm_obj->mm_list);
 
-	for (id = 0; id < ARRAY_SIZE(msm_obj->domain); id++) {
-		struct msm_mmu *mmu = priv->mmus[id];
-		if (mmu && msm_obj->domain[id].iova) {
-			uint32_t offset = msm_obj->domain[id].iova;
-			mmu->funcs->unmap(mmu, offset, msm_obj->sgt, obj->size);
-		}
-	}
+	put_iova(obj);
 
 	if (obj->import_attach) {
 		if (msm_obj->vaddr)
@@ -607,7 +709,7 @@ void msm_gem_free_object(struct drm_gem_object *obj)
 
 		drm_prime_gem_destroy(obj, msm_obj->sgt);
 	} else {
-		vunmap(msm_obj->vaddr);
+		msm_gem_vunmap(obj);
 		put_pages(obj);
 	}
 
@@ -686,6 +788,7 @@ static int msm_gem_new_impl(struct drm_device *dev,
 		msm_obj->vram_node = (void *)&msm_obj[1];
 
 	msm_obj->flags = flags;
+	msm_obj->madv = MSM_MADV_WILLNEED;
 
 	if (resv) {
 		msm_obj->resv = resv;
@@ -727,9 +830,7 @@ struct drm_gem_object *msm_gem_new(struct drm_device *dev,
 	return obj;
 
 fail:
-	if (obj)
-		drm_gem_object_unreference(obj);
-
+	drm_gem_object_unreference(obj);
 	return ERR_PTR(ret);
 }
 
@@ -772,8 +873,6 @@ struct drm_gem_object *msm_gem_import(struct drm_device *dev,
 	return obj;
 
 fail:
-	if (obj)
-		drm_gem_object_unreference_unlocked(obj);
-
+	drm_gem_object_unreference_unlocked(obj);
 	return ERR_PTR(ret);
 }
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index 9facd4b6ffd9..b2f13cfe945e 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -29,6 +29,16 @@ struct msm_gem_object {
 
 	uint32_t flags;
 
+	/**
+	 * Advice: are the backing pages purgeable?
+	 */
+	uint8_t madv;
+
+	/**
+	 * count of active vmap'ing
+	 */
+	uint8_t vmap_count;
+
 	/* And object is either:
 	 *  inactive - on priv->inactive_list
 	 *  active   - on one one of the gpu's active_list..  well, at
@@ -72,7 +82,16 @@ static inline bool is_active(struct msm_gem_object *msm_obj)
 	return msm_obj->gpu != NULL;
 }
 
-#define MAX_CMDS 4
+static inline bool is_purgeable(struct msm_gem_object *msm_obj)
+{
+	return (msm_obj->madv == MSM_MADV_DONTNEED) && msm_obj->sgt &&
+			!msm_obj->base.dma_buf && !msm_obj->base.import_attach;
+}
+
+static inline bool is_vunmapable(struct msm_gem_object *msm_obj)
+{
+	return (msm_obj->vmap_count == 0) && msm_obj->vaddr;
+}
 
 /* Created per submit-ioctl, to track bo's and cmdstream bufs, etc,
  * associated with the cmdstream submission for synchronization (and
@@ -95,7 +114,7 @@ struct msm_gem_submit {
 		uint32_t size;  /* in dwords */
 		uint32_t iova;
 		uint32_t idx;   /* cmdstream buffer idx in bos[] */
-	} cmd[MAX_CMDS];
+	} *cmd;  /* array of size nr_cmds */
 	struct {
 		uint32_t flags;
 		struct msm_gem_object *obj;
diff --git a/drivers/gpu/drm/msm/msm_gem_prime.c b/drivers/gpu/drm/msm/msm_gem_prime.c
index 6b90890faffe..60bb290700ce 100644
--- a/drivers/gpu/drm/msm/msm_gem_prime.c
+++ b/drivers/gpu/drm/msm/msm_gem_prime.c
@@ -33,12 +33,12 @@ struct sg_table *msm_gem_prime_get_sg_table(struct drm_gem_object *obj)
 
 void *msm_gem_prime_vmap(struct drm_gem_object *obj)
 {
-	return msm_gem_vaddr(obj);
+	return msm_gem_get_vaddr(obj);
 }
 
 void msm_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
 {
-	/* TODO msm_gem_vunmap() */
+	msm_gem_put_vaddr(obj);
 }
 
 int msm_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c
new file mode 100644
index 000000000000..283d2841ba58
--- /dev/null
+++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c
@@ -0,0 +1,168 @@
+/*
+ * Copyright (C) 2016 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "msm_drv.h"
+#include "msm_gem.h"
+
+static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task)
+{
+	if (!mutex_is_locked(mutex))
+		return false;
+
+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES)
+	return mutex->owner == task;
+#else
+	/* Since UP may be pre-empted, we cannot assume that we own the lock */
+	return false;
+#endif
+}
+
+static bool msm_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
+{
+	if (!mutex_trylock(&dev->struct_mutex)) {
+		if (!mutex_is_locked_by(&dev->struct_mutex, current))
+			return false;
+		*unlock = false;
+	} else {
+		*unlock = true;
+	}
+
+	return true;
+}
+
+
+static unsigned long
+msm_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
+{
+	struct msm_drm_private *priv =
+		container_of(shrinker, struct msm_drm_private, shrinker);
+	struct drm_device *dev = priv->dev;
+	struct msm_gem_object *msm_obj;
+	unsigned long count = 0;
+	bool unlock;
+
+	if (!msm_gem_shrinker_lock(dev, &unlock))
+		return 0;
+
+	list_for_each_entry(msm_obj, &priv->inactive_list, mm_list) {
+		if (is_purgeable(msm_obj))
+			count += msm_obj->base.size >> PAGE_SHIFT;
+	}
+
+	if (unlock)
+		mutex_unlock(&dev->struct_mutex);
+
+	return count;
+}
+
+static unsigned long
+msm_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
+{
+	struct msm_drm_private *priv =
+		container_of(shrinker, struct msm_drm_private, shrinker);
+	struct drm_device *dev = priv->dev;
+	struct msm_gem_object *msm_obj;
+	unsigned long freed = 0;
+	bool unlock;
+
+	if (!msm_gem_shrinker_lock(dev, &unlock))
+		return SHRINK_STOP;
+
+	list_for_each_entry(msm_obj, &priv->inactive_list, mm_list) {
+		if (freed >= sc->nr_to_scan)
+			break;
+		if (is_purgeable(msm_obj)) {
+			msm_gem_purge(&msm_obj->base);
+			freed += msm_obj->base.size >> PAGE_SHIFT;
+		}
+	}
+
+	if (unlock)
+		mutex_unlock(&dev->struct_mutex);
+
+	if (freed > 0)
+		pr_info_ratelimited("Purging %lu bytes\n", freed << PAGE_SHIFT);
+
+	return freed;
+}
+
+static int
+msm_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr)
+{
+	struct msm_drm_private *priv =
+		container_of(nb, struct msm_drm_private, vmap_notifier);
+	struct drm_device *dev = priv->dev;
+	struct msm_gem_object *msm_obj;
+	unsigned unmapped = 0;
+	bool unlock;
+
+	if (!msm_gem_shrinker_lock(dev, &unlock))
+		return NOTIFY_DONE;
+
+	list_for_each_entry(msm_obj, &priv->inactive_list, mm_list) {
+		if (is_vunmapable(msm_obj)) {
+			msm_gem_vunmap(&msm_obj->base);
+			/* since we don't know any better, lets bail after a few
+			 * and if necessary the shrinker will be invoked again.
+			 * Seems better than unmapping *everything*
+			 */
+			if (++unmapped >= 15)
+				break;
+		}
+	}
+
+	if (unlock)
+		mutex_unlock(&dev->struct_mutex);
+
+	*(unsigned long *)ptr += unmapped;
+
+	if (unmapped > 0)
+		pr_info_ratelimited("Purging %u vmaps\n", unmapped);
+
+	return NOTIFY_DONE;
+}
+
+/**
+ * msm_gem_shrinker_init - Initialize msm shrinker
+ * @dev_priv: msm device
+ *
+ * This function registers and sets up the msm shrinker.
+ */
+void msm_gem_shrinker_init(struct drm_device *dev)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	priv->shrinker.count_objects = msm_gem_shrinker_count;
+	priv->shrinker.scan_objects = msm_gem_shrinker_scan;
+	priv->shrinker.seeks = DEFAULT_SEEKS;
+	WARN_ON(register_shrinker(&priv->shrinker));
+
+	priv->vmap_notifier.notifier_call = msm_gem_shrinker_vmap;
+	WARN_ON(register_vmap_purge_notifier(&priv->vmap_notifier));
+}
+
+/**
+ * msm_gem_shrinker_cleanup - Clean up msm shrinker
+ * @dev_priv: msm device
+ *
+ * This function unregisters the msm shrinker.
+ */
+void msm_gem_shrinker_cleanup(struct drm_device *dev)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	WARN_ON(unregister_vmap_purge_notifier(&priv->vmap_notifier));
+	unregister_shrinker(&priv->shrinker);
+}
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index b89ca5174863..9766f9ae4b7d 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -29,10 +29,11 @@
 #define BO_PINNED   0x2000
 
 static struct msm_gem_submit *submit_create(struct drm_device *dev,
-		struct msm_gpu *gpu, int nr)
+		struct msm_gpu *gpu, int nr_bos, int nr_cmds)
 {
 	struct msm_gem_submit *submit;
-	int sz = sizeof(*submit) + (nr * sizeof(submit->bos[0]));
+	int sz = sizeof(*submit) + (nr_bos * sizeof(submit->bos[0])) +
+			(nr_cmds * sizeof(*submit->cmd));
 
 	submit = kmalloc(sz, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
 	if (!submit)
@@ -40,12 +41,15 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev,
 
 	submit->dev = dev;
 	submit->gpu = gpu;
+	submit->fence = NULL;
 	submit->pid = get_pid(task_pid(current));
+	submit->cmd = (void *)&submit->bos[nr_bos];
 
 	/* initially, until copy_from_user() and bo lookup succeeds: */
 	submit->nr_bos = 0;
 	submit->nr_cmds = 0;
 
+	INIT_LIST_HEAD(&submit->node);
 	INIT_LIST_HEAD(&submit->bo_list);
 	ww_acquire_init(&submit->ticket, &reservation_ww_class);
 
@@ -75,6 +79,11 @@ static int submit_lookup_objects(struct msm_gem_submit *submit,
 		void __user *userptr =
 			u64_to_user_ptr(args->bos + (i * sizeof(submit_bo)));
 
+		/* make sure we don't have garbage flags, in case we hit
+		 * error path before flags is initialized:
+		 */
+		submit->bos[i].flags = 0;
+
 		ret = copy_from_user(&submit_bo, userptr, sizeof(submit_bo));
 		if (ret) {
 			ret = -EFAULT;
@@ -272,7 +281,7 @@ static int submit_reloc(struct msm_gem_submit *submit, struct msm_gem_object *ob
 	/* For now, just map the entire thing.  Eventually we probably
 	 * to do it page-by-page, w/ kmap() if not vmap()d..
 	 */
-	ptr = msm_gem_vaddr_locked(&obj->base);
+	ptr = msm_gem_get_vaddr_locked(&obj->base);
 
 	if (IS_ERR(ptr)) {
 		ret = PTR_ERR(ptr);
@@ -325,6 +334,8 @@ static int submit_reloc(struct msm_gem_submit *submit, struct msm_gem_object *ob
 		last_offset = off;
 	}
 
+	msm_gem_put_vaddr_locked(&obj->base);
+
 	return 0;
 }
 
@@ -362,14 +373,15 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 	if (args->pipe != MSM_PIPE_3D0)
 		return -EINVAL;
 
-	if (args->nr_cmds > MAX_CMDS)
-		return -EINVAL;
-
-	submit = submit_create(dev, gpu, args->nr_bos);
-	if (!submit)
-		return -ENOMEM;
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
 
-	mutex_lock(&dev->struct_mutex);
+	submit = submit_create(dev, gpu, args->nr_bos, args->nr_cmds);
+	if (!submit) {
+		ret = -ENOMEM;
+		goto out_unlock;
+	}
 
 	ret = submit_lookup_objects(submit, args, file);
 	if (ret)
@@ -455,6 +467,7 @@ out:
 	submit_cleanup(submit);
 	if (ret)
 		msm_gem_submit_free(submit);
+out_unlock:
 	mutex_unlock(&dev->struct_mutex);
 	return ret;
 }
diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
index a7a0b6d9b057..3a294d0da3a0 100644
--- a/drivers/gpu/drm/msm/msm_iommu.c
+++ b/drivers/gpu/drm/msm/msm_iommu.c
@@ -59,10 +59,10 @@ static int msm_iommu_map(struct msm_mmu *mmu, uint32_t iova,
 		return -EINVAL;
 
 	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
-		u32 pa = sg_phys(sg) - sg->offset;
+		dma_addr_t pa = sg_phys(sg) - sg->offset;
 		size_t bytes = sg->length + sg->offset;
 
-		VERB("map[%d]: %08x %08x(%zx)", i, iova, pa, bytes);
+		VERB("map[%d]: %08x %08lx(%zx)", i, da, (unsigned long)pa, bytes);
 
 		ret = iommu_map(domain, da, pa, bytes, prot);
 		if (ret)
@@ -101,7 +101,7 @@ static int msm_iommu_unmap(struct msm_mmu *mmu, uint32_t iova,
 		if (unmapped < bytes)
 			return unmapped;
 
-		VERB("unmap[%d]: %08x(%zx)", i, iova, bytes);
+		VERB("unmap[%d]: %08x(%zx)", i, da, bytes);
 
 		BUG_ON(!PAGE_ALIGNED(bytes));
 
diff --git a/drivers/gpu/drm/msm/msm_kms.h b/drivers/gpu/drm/msm/msm_kms.h
index e32222c3d44f..40e41e5cdbc6 100644
--- a/drivers/gpu/drm/msm/msm_kms.h
+++ b/drivers/gpu/drm/msm/msm_kms.h
@@ -61,10 +61,8 @@ struct msm_kms_funcs {
 struct msm_kms {
 	const struct msm_kms_funcs *funcs;
 
-	/* irq handling: */
-	bool in_irq;
-	struct list_head irq_list;    /* list of mdp4_irq */
-	uint32_t vblank_mask;         /* irq bits set for userspace vblank */
+	/* irq number to be passed on to drm_irq_install */
+	int irq;
 };
 
 static inline void msm_kms_init(struct msm_kms *kms,
@@ -75,5 +73,7 @@ static inline void msm_kms_init(struct msm_kms *kms,
 
 struct msm_kms *mdp4_kms_init(struct drm_device *dev);
 struct msm_kms *mdp5_kms_init(struct drm_device *dev);
+int msm_mdss_init(struct drm_device *dev);
+void msm_mdss_destroy(struct drm_device *dev);
 
 #endif /* __MSM_KMS_H__ */
diff --git a/drivers/gpu/drm/msm/msm_perf.c b/drivers/gpu/drm/msm/msm_perf.c
index 830857c47c86..17fe4e53e0d1 100644
--- a/drivers/gpu/drm/msm/msm_perf.c
+++ b/drivers/gpu/drm/msm/msm_perf.c
@@ -132,7 +132,7 @@ static ssize_t perf_read(struct file *file, char __user *buf,
 		size_t sz, loff_t *ppos)
 {
 	struct msm_perf_state *perf = file->private_data;
-	int n = 0, ret;
+	int n = 0, ret = 0;
 
 	mutex_lock(&perf->read_lock);
 
@@ -143,9 +143,10 @@ static ssize_t perf_read(struct file *file, char __user *buf,
 	}
 
 	n = min((int)sz, perf->buftot - perf->bufpos);
-	ret = copy_to_user(buf, &perf->buf[perf->bufpos], n);
-	if (ret)
+	if (copy_to_user(buf, &perf->buf[perf->bufpos], n)) {
+		ret = -EFAULT;
 		goto out;
+	}
 
 	perf->bufpos += n;
 	*ppos += n;
diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c
index b48f73ac6389..3a5fdfcd67ae 100644
--- a/drivers/gpu/drm/msm/msm_rd.c
+++ b/drivers/gpu/drm/msm/msm_rd.c
@@ -27,6 +27,11 @@
  * This bypasses drm_debugfs_create_files() mainly because we need to use
  * our own fops for a bit more control.  In particular, we don't want to
  * do anything if userspace doesn't have the debugfs file open.
+ *
+ * The module-param "rd_full", which defaults to false, enables snapshotting
+ * all (non-written) buffers in the submit, rather than just cmdstream bo's.
+ * This is useful to capture the contents of (for example) vbo's or textures,
+ * or shader programs (if not emitted inline in cmdstream).
  */
 
 #ifdef CONFIG_DEBUG_FS
@@ -40,6 +45,10 @@
 #include "msm_gpu.h"
 #include "msm_gem.h"
 
+static bool rd_full = false;
+MODULE_PARM_DESC(rd_full, "If true, $debugfs/.../rd will snapshot all buffer contents");
+module_param_named(rd_full, rd_full, bool, 0600);
+
 enum rd_sect_type {
 	RD_NONE,
 	RD_TEST,       /* ascii text */
@@ -140,9 +149,10 @@ static ssize_t rd_read(struct file *file, char __user *buf,
 		goto out;
 
 	n = min_t(int, sz, circ_count_to_end(&rd->fifo));
-	ret = copy_to_user(buf, fptr, n);
-	if (ret)
+	if (copy_to_user(buf, fptr, n)) {
+		ret = -EFAULT;
 		goto out;
+	}
 
 	fifo->tail = (fifo->tail + n) & (BUF_SZ - 1);
 	*ppos += n;
@@ -277,6 +287,31 @@ void msm_rd_debugfs_cleanup(struct drm_minor *minor)
 	kfree(rd);
 }
 
+static void snapshot_buf(struct msm_rd_state *rd,
+		struct msm_gem_submit *submit, int idx,
+		uint32_t iova, uint32_t size)
+{
+	struct msm_gem_object *obj = submit->bos[idx].obj;
+	const char *buf;
+
+	buf = msm_gem_get_vaddr_locked(&obj->base);
+	if (IS_ERR(buf))
+		return;
+
+	if (iova) {
+		buf += iova - submit->bos[idx].iova;
+	} else {
+		iova = submit->bos[idx].iova;
+		size = obj->base.size;
+	}
+
+	rd_write_section(rd, RD_GPUADDR,
+			(uint32_t[2]){ iova, size }, 8);
+	rd_write_section(rd, RD_BUFFER_CONTENTS, buf, size);
+
+	msm_gem_put_vaddr_locked(&obj->base);
+}
+
 /* called under struct_mutex */
 void msm_rd_dump_submit(struct msm_gem_submit *submit)
 {
@@ -300,24 +335,27 @@ void msm_rd_dump_submit(struct msm_gem_submit *submit)
 
 	rd_write_section(rd, RD_CMD, msg, ALIGN(n, 4));
 
-	/* could be nice to have an option (module-param?) to snapshot
-	 * all the bo's associated with the submit.  Handy to see vtx
-	 * buffers, etc.  For now just the cmdstream bo's is enough.
-	 */
+	if (rd_full) {
+		for (i = 0; i < submit->nr_bos; i++) {
+			/* buffers that are written to probably don't start out
+			 * with anything interesting:
+			 */
+			if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE)
+				continue;
+
+			snapshot_buf(rd, submit, i, 0, 0);
+		}
+	}
 
 	for (i = 0; i < submit->nr_cmds; i++) {
-		uint32_t idx  = submit->cmd[i].idx;
 		uint32_t iova = submit->cmd[i].iova;
 		uint32_t szd  = submit->cmd[i].size; /* in dwords */
-		struct msm_gem_object *obj = submit->bos[idx].obj;
-		const char *buf = msm_gem_vaddr_locked(&obj->base);
 
-		buf += iova - submit->bos[idx].iova;
-
-		rd_write_section(rd, RD_GPUADDR,
-				(uint32_t[2]){ iova, szd * 4 }, 8);
-		rd_write_section(rd, RD_BUFFER_CONTENTS,
-				buf, szd * 4);
+		/* snapshot cmdstream bo's (if we haven't already): */
+		if (!rd_full) {
+			snapshot_buf(rd, submit, submit->cmd[i].idx,
+					submit->cmd[i].iova, szd * 4);
+		}
 
 		switch (submit->cmd[i].type) {
 		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
index 1f14b908b221..f326cf6a32e6 100644
--- a/drivers/gpu/drm/msm/msm_ringbuffer.c
+++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
@@ -39,7 +39,11 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size)
 		goto fail;
 	}
 
-	ring->start = msm_gem_vaddr_locked(ring->bo);
+	ring->start = msm_gem_get_vaddr_locked(ring->bo);
+	if (IS_ERR(ring->start)) {
+		ret = PTR_ERR(ring->start);
+		goto fail;
+	}
 	ring->end   = ring->start + (size / 4);
 	ring->cur   = ring->start;
 
@@ -55,7 +59,9 @@ fail:
 
 void msm_ringbuffer_destroy(struct msm_ringbuffer *ring)
 {
-	if (ring->bo)
+	if (ring->bo) {
+		msm_gem_put_vaddr(ring->bo);
 		drm_gem_object_unreference_unlocked(ring->bo);
+	}
 	kfree(ring);
 }
diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig
index 5ab13e7939db..2922a82cba8e 100644
--- a/drivers/gpu/drm/nouveau/Kconfig
+++ b/drivers/gpu/drm/nouveau/Kconfig
@@ -3,13 +3,7 @@ config DRM_NOUVEAU
 	depends on DRM && PCI
         select FW_LOADER
 	select DRM_KMS_HELPER
-	select DRM_KMS_FB_HELPER
 	select DRM_TTM
-	select FB_CFB_FILLRECT
-	select FB_CFB_COPYAREA
-	select FB_CFB_IMAGEBLIT
-	select FB
-	select FRAMEBUFFER_CONSOLE if !EXPERT
 	select FB_BACKLIGHT if DRM_NOUVEAU_BACKLIGHT
 	select ACPI_VIDEO if ACPI && X86 && BACKLIGHT_CLASS_DEVICE && INPUT
 	select X86_PLATFORM_DEVICES if ACPI && X86
diff --git a/drivers/gpu/drm/nouveau/dispnv04/disp.c b/drivers/gpu/drm/nouveau/dispnv04/disp.c
index aea81a547e85..34c0f2f67548 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/disp.c
@@ -125,18 +125,8 @@ nv04_display_destroy(struct drm_device *dev)
 	struct nv04_display *disp = nv04_display(dev);
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct nouveau_encoder *encoder;
-	struct drm_crtc *crtc;
 	struct nouveau_crtc *nv_crtc;
 
-	/* Turn every CRTC off. */
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		struct drm_mode_set modeset = {
-			.crtc = crtc,
-		};
-
-		drm_mode_set_config_internal(&modeset);
-	}
-
 	/* Restore state */
 	list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.base.head)
 		encoder->enc_restore(&encoder->base.base);
diff --git a/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c b/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c
index a665b78b2af5..434d1e29f279 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c
@@ -749,13 +749,8 @@ static int nv17_tv_set_property(struct drm_encoder *encoder,
 
 		/* Disable the crtc to ensure a full modeset is
 		 * performed whenever it's turned on again. */
-		if (crtc) {
-			struct drm_mode_set modeset = {
-				.crtc = crtc,
-			};
-
-			drm_mode_set_config_internal(&modeset);
-		}
+		if (crtc)
+			drm_crtc_force_disable(crtc);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl0080.h b/drivers/gpu/drm/nouveau/include/nvif/cl0080.h
index 331620a52afa..287a7d6fa480 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/cl0080.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/cl0080.h
@@ -29,6 +29,7 @@ struct nv_device_info_v0 {
 #define NV_DEVICE_INFO_V0_FERMI                                            0x07
 #define NV_DEVICE_INFO_V0_KEPLER                                           0x08
 #define NV_DEVICE_INFO_V0_MAXWELL                                          0x09
+#define NV_DEVICE_INFO_V0_PASCAL                                           0x0a
 	__u8  family;
 	__u8  pad06[2];
 	__u64 ram_size;
diff --git a/drivers/gpu/drm/nouveau/include/nvif/class.h b/drivers/gpu/drm/nouveau/include/nvif/class.h
index 982aad8fa645..e6e9537537cf 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/class.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/class.h
@@ -39,6 +39,7 @@
 #define KEPLER_CHANNEL_GPFIFO_A                       /* cla06f.h */ 0x0000a06f
 #define KEPLER_CHANNEL_GPFIFO_B                       /* cla06f.h */ 0x0000a16f
 #define MAXWELL_CHANNEL_GPFIFO_A                      /* cla06f.h */ 0x0000b06f
+#define PASCAL_CHANNEL_GPFIFO_A                       /* cla06f.h */ 0x0000c06f
 
 #define NV50_DISP                                     /* cl5070.h */ 0x00005070
 #define G82_DISP                                      /* cl5070.h */ 0x00008270
@@ -50,6 +51,8 @@
 #define GK110_DISP                                    /* cl5070.h */ 0x00009270
 #define GM107_DISP                                    /* cl5070.h */ 0x00009470
 #define GM200_DISP                                    /* cl5070.h */ 0x00009570
+#define GP100_DISP                                    /* cl5070.h */ 0x00009770
+#define GP104_DISP                                    /* cl5070.h */ 0x00009870
 
 #define NV31_MPEG                                                    0x00003174
 #define G82_MPEG                                                     0x00008274
@@ -86,6 +89,8 @@
 #define GK110_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000927d
 #define GM107_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000947d
 #define GM200_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000957d
+#define GP100_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000977d
+#define GP104_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000987d
 
 #define NV50_DISP_OVERLAY_CHANNEL_DMA                 /* cl507e.h */ 0x0000507e
 #define G82_DISP_OVERLAY_CHANNEL_DMA                  /* cl507e.h */ 0x0000827e
@@ -105,6 +110,8 @@
 #define MAXWELL_A                                     /* cl9097.h */ 0x0000b097
 #define MAXWELL_B                                     /* cl9097.h */ 0x0000b197
 
+#define PASCAL_A                                      /* cl9097.h */ 0x0000c097
+
 #define NV74_BSP                                                     0x000074b0
 
 #define GT212_MSVLD                                                  0x000085b1
@@ -128,6 +135,8 @@
 #define FERMI_DMA                                                    0x000090b5
 #define KEPLER_DMA_COPY_A                                            0x0000a0b5
 #define MAXWELL_DMA_COPY_A                                           0x0000b0b5
+#define PASCAL_DMA_COPY_A                                            0x0000c0b5
+#define PASCAL_DMA_COPY_B                                            0x0000c1b5
 
 #define FERMI_DECOMPRESS                                             0x000090b8
 
@@ -137,6 +146,7 @@
 #define KEPLER_COMPUTE_B                                             0x0000a1c0
 #define MAXWELL_COMPUTE_A                                            0x0000b0c0
 #define MAXWELL_COMPUTE_B                                            0x0000b1c0
+#define PASCAL_COMPUTE_A                                             0x0000c0c0
 
 #define NV74_CIPHER                                                  0x000074c1
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
index c612dc1f1eb4..7ea8aa7ca408 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
@@ -16,9 +16,9 @@ enum nvkm_devidx {
 	NVKM_SUBDEV_MC,
 	NVKM_SUBDEV_BUS,
 	NVKM_SUBDEV_TIMER,
+	NVKM_SUBDEV_INSTMEM,
 	NVKM_SUBDEV_FB,
 	NVKM_SUBDEV_LTC,
-	NVKM_SUBDEV_INSTMEM,
 	NVKM_SUBDEV_MMU,
 	NVKM_SUBDEV_BAR,
 	NVKM_SUBDEV_PMU,
@@ -33,7 +33,10 @@ enum nvkm_devidx {
 	NVKM_ENGINE_CE0,
 	NVKM_ENGINE_CE1,
 	NVKM_ENGINE_CE2,
-	NVKM_ENGINE_CE_LAST = NVKM_ENGINE_CE2,
+	NVKM_ENGINE_CE3,
+	NVKM_ENGINE_CE4,
+	NVKM_ENGINE_CE5,
+	NVKM_ENGINE_CE_LAST = NVKM_ENGINE_CE5,
 
 	NVKM_ENGINE_CIPHER,
 	NVKM_ENGINE_DISP,
@@ -50,7 +53,8 @@ enum nvkm_devidx {
 
 	NVKM_ENGINE_NVENC0,
 	NVKM_ENGINE_NVENC1,
-	NVKM_ENGINE_NVENC_LAST = NVKM_ENGINE_NVENC1,
+	NVKM_ENGINE_NVENC2,
+	NVKM_ENGINE_NVENC_LAST = NVKM_ENGINE_NVENC2,
 
 	NVKM_ENGINE_NVDEC,
 	NVKM_ENGINE_PM,
@@ -102,6 +106,7 @@ struct nvkm_device {
 		NV_C0    = 0xc0,
 		NV_E0    = 0xe0,
 		GM100    = 0x110,
+		GP100    = 0x130,
 	} card_type;
 	u32 chipset;
 	u8  chiprev;
@@ -136,7 +141,7 @@ struct nvkm_device {
 	struct nvkm_volt *volt;
 
 	struct nvkm_engine *bsp;
-	struct nvkm_engine *ce[3];
+	struct nvkm_engine *ce[6];
 	struct nvkm_engine *cipher;
 	struct nvkm_disp *disp;
 	struct nvkm_dma *dma;
@@ -149,7 +154,7 @@ struct nvkm_device {
 	struct nvkm_engine *mspdec;
 	struct nvkm_engine *msppp;
 	struct nvkm_engine *msvld;
-	struct nvkm_engine *nvenc[2];
+	struct nvkm_engine *nvenc[3];
 	struct nvkm_engine *nvdec;
 	struct nvkm_pm *pm;
 	struct nvkm_engine *sec;
@@ -170,7 +175,6 @@ struct nvkm_device_func {
 	void (*fini)(struct nvkm_device *, bool suspend);
 	resource_size_t (*resource_addr)(struct nvkm_device *, unsigned bar);
 	resource_size_t (*resource_size)(struct nvkm_device *, unsigned bar);
-	bool cpu_coherent;
 };
 
 struct nvkm_device_quirk {
@@ -206,7 +210,7 @@ struct nvkm_device_chip {
 	int (*volt    )(struct nvkm_device *, int idx, struct nvkm_volt **);
 
 	int (*bsp     )(struct nvkm_device *, int idx, struct nvkm_engine **);
-	int (*ce[3]   )(struct nvkm_device *, int idx, struct nvkm_engine **);
+	int (*ce[6]   )(struct nvkm_device *, int idx, struct nvkm_engine **);
 	int (*cipher  )(struct nvkm_device *, int idx, struct nvkm_engine **);
 	int (*disp    )(struct nvkm_device *, int idx, struct nvkm_disp **);
 	int (*dma     )(struct nvkm_device *, int idx, struct nvkm_dma **);
@@ -219,7 +223,7 @@ struct nvkm_device_chip {
 	int (*mspdec  )(struct nvkm_device *, int idx, struct nvkm_engine **);
 	int (*msppp   )(struct nvkm_device *, int idx, struct nvkm_engine **);
 	int (*msvld   )(struct nvkm_device *, int idx, struct nvkm_engine **);
-	int (*nvenc[2])(struct nvkm_device *, int idx, struct nvkm_engine **);
+	int (*nvenc[3])(struct nvkm_device *, int idx, struct nvkm_engine **);
 	int (*nvdec   )(struct nvkm_device *, int idx, struct nvkm_engine **);
 	int (*pm      )(struct nvkm_device *, int idx, struct nvkm_pm **);
 	int (*sec     )(struct nvkm_device *, int idx, struct nvkm_engine **);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/tegra.h b/drivers/gpu/drm/nouveau/include/nvkm/core/tegra.h
index b5370cb56e3c..e5c9b6268dcc 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/tegra.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/tegra.h
@@ -28,6 +28,7 @@ struct nvkm_device_tegra {
 	} iommu;
 
 	int gpu_speedo;
+	int gpu_speedo_id;
 };
 
 struct nvkm_device_tegra_func {
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h
index 594d719ba41e..d3d26a1e215d 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h
@@ -7,4 +7,6 @@ int gf100_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
 int gk104_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
 int gm107_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
 int gm200_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
+int gp100_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
+int gp104_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h
index d4fdce27b297..e82049667ce4 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h
@@ -32,4 +32,6 @@ int gk104_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
 int gk110_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
 int gm107_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
 int gm200_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
+int gp100_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
+int gp104_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
index 15ddfcf5e8db..ed92fec5292c 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
@@ -66,4 +66,5 @@ int gk20a_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
 int gm107_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
 int gm200_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
 int gm20b_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
+int gp100_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h
index 6515f5810a26..89cf99307828 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h
@@ -42,4 +42,5 @@ int gk20a_gr_new(struct nvkm_device *, int, struct nvkm_gr **);
 int gm107_gr_new(struct nvkm_device *, int, struct nvkm_gr **);
 int gm200_gr_new(struct nvkm_device *, int, struct nvkm_gr **);
 int gm20b_gr_new(struct nvkm_device *, int, struct nvkm_gr **);
+int gp100_gr_new(struct nvkm_device *, int, struct nvkm_gr **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios.h
index e39a1fea930b..a72f3290528a 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios.h
@@ -7,6 +7,9 @@ struct nvkm_bios {
 	u32 size;
 	u8 *data;
 
+	u32 image0_size;
+	u32 imaged_addr;
+
 	u32 bmp_offset;
 	u32 bit_offset;
 
@@ -22,10 +25,9 @@ struct nvkm_bios {
 u8  nvbios_checksum(const u8 *data, int size);
 u16 nvbios_findstr(const u8 *data, int size, const char *str, int len);
 int nvbios_memcmp(struct nvkm_bios *, u32 addr, const char *, u32 len);
-
-#define nvbios_rd08(b,o) (b)->data[(o)]
-#define nvbios_rd16(b,o) get_unaligned_le16(&(b)->data[(o)])
-#define nvbios_rd32(b,o) get_unaligned_le32(&(b)->data[(o)])
+u8  nvbios_rd08(struct nvkm_bios *, u32 addr);
+u16 nvbios_rd16(struct nvkm_bios *, u32 addr);
+u32 nvbios_rd32(struct nvkm_bios *, u32 addr);
 
 int nvkm_bios_new(struct nvkm_device *, int, struct nvkm_bios **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/disp.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/disp.h
index db10c11f0595..c5a6ebd5a478 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/disp.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/disp.h
@@ -25,7 +25,8 @@ u16 nvbios_outp_match(struct nvkm_bios *, u16 type, u16 mask,
 		      u8 *ver, u8 *hdr, u8 *cnt, u8 *len, struct nvbios_outp *);
 
 struct nvbios_ocfg {
-	u16 match;
+	u8  proto;
+	u8  flags;
 	u16 clkcmp[2];
 };
 
@@ -33,7 +34,7 @@ u16 nvbios_ocfg_entry(struct nvkm_bios *, u16 outp, u8 idx,
 		      u8 *ver, u8 *hdr, u8 *cnt, u8 *len);
 u16 nvbios_ocfg_parse(struct nvkm_bios *, u16 outp, u8 idx,
 		      u8 *ver, u8 *hdr, u8 *cnt, u8 *len, struct nvbios_ocfg *);
-u16 nvbios_ocfg_match(struct nvkm_bios *, u16 outp, u16 type,
+u16 nvbios_ocfg_match(struct nvkm_bios *, u16 outp, u8 proto, u8 flags,
 		      u8 *ver, u8 *hdr, u8 *cnt, u8 *len, struct nvbios_ocfg *);
 u16 nvbios_oclk_match(struct nvkm_bios *, u16 cmp, u32 khz);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h
index 0a734fd06acf..3a410275fa71 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h
@@ -56,6 +56,8 @@ struct nvkm_fb {
 		int regions;
 	} tile;
 
+	u8 page;
+
 	struct nvkm_memory *mmu_rd;
 	struct nvkm_memory *mmu_wr;
 };
@@ -91,6 +93,8 @@ int gk104_fb_new(struct nvkm_device *, int, struct nvkm_fb **);
 int gk20a_fb_new(struct nvkm_device *, int, struct nvkm_fb **);
 int gm107_fb_new(struct nvkm_device *, int, struct nvkm_fb **);
 int gm200_fb_new(struct nvkm_device *, int, struct nvkm_fb **);
+int gp100_fb_new(struct nvkm_device *, int, struct nvkm_fb **);
+int gp104_fb_new(struct nvkm_device *, int, struct nvkm_fb **);
 
 #include <subdev/bios.h>
 #include <subdev/bios/ramcfg.h>
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h
index c6b90b6543b3..cd755baf9cab 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h
@@ -38,4 +38,5 @@ int gk104_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
 int gk20a_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
 int gm107_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
 int gm200_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
+int gp100_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h
index 2e80682b2da1..27d25b18d85c 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h
@@ -7,11 +7,14 @@ struct nvkm_mc {
 	struct nvkm_subdev subdev;
 };
 
-void nvkm_mc_intr(struct nvkm_mc *, bool *handled);
-void nvkm_mc_intr_unarm(struct nvkm_mc *);
-void nvkm_mc_intr_rearm(struct nvkm_mc *);
-void nvkm_mc_reset(struct nvkm_mc *, enum nvkm_devidx);
-void nvkm_mc_unk260(struct nvkm_mc *, u32 data);
+void nvkm_mc_enable(struct nvkm_device *, enum nvkm_devidx);
+void nvkm_mc_disable(struct nvkm_device *, enum nvkm_devidx);
+void nvkm_mc_reset(struct nvkm_device *, enum nvkm_devidx);
+void nvkm_mc_intr(struct nvkm_device *, bool *handled);
+void nvkm_mc_intr_unarm(struct nvkm_device *);
+void nvkm_mc_intr_rearm(struct nvkm_device *);
+void nvkm_mc_intr_mask(struct nvkm_device *, enum nvkm_devidx, bool enable);
+void nvkm_mc_unk260(struct nvkm_device *, u32 data);
 
 int nv04_mc_new(struct nvkm_device *, int, struct nvkm_mc **);
 int nv11_mc_new(struct nvkm_device *, int, struct nvkm_mc **);
@@ -24,4 +27,5 @@ int gt215_mc_new(struct nvkm_device *, int, struct nvkm_mc **);
 int gf100_mc_new(struct nvkm_device *, int, struct nvkm_mc **);
 int gk104_mc_new(struct nvkm_device *, int, struct nvkm_mc **);
 int gk20a_mc_new(struct nvkm_device *, int, struct nvkm_mc **);
+int gp100_mc_new(struct nvkm_device *, int, struct nvkm_mc **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h
index ddb913889d7e..e6523e2cea9f 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h
@@ -47,6 +47,7 @@ int g94_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int gf100_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int gf106_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int gk104_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
+int gp100_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 
 /* pcie functions */
 int nvkm_pcie_set_link(struct nvkm_pci *, enum nvkm_pcie_speed, u8 width);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h
index c6edd95a5b69..b04c38c07761 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h
@@ -43,9 +43,8 @@ struct nvkm_secboot {
 	const struct nvkm_secboot_func *func;
 	struct nvkm_subdev subdev;
 
+	enum nvkm_devidx devidx;
 	u32 base;
-	u32 irq_mask;
-	u32 enable_mask;
 };
 #define nvkm_secboot(p) container_of((p), struct nvkm_secboot, subdev)
 
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h
index 8fb575a92c48..71ebbfd4484f 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h
@@ -8,10 +8,11 @@ struct nvkm_top {
 	struct list_head device;
 };
 
-u32 nvkm_top_reset(struct nvkm_top *, enum nvkm_devidx);
-u32 nvkm_top_intr(struct nvkm_top *, u32 intr, u64 *subdevs);
-enum nvkm_devidx nvkm_top_fault(struct nvkm_top *, int fault);
-enum nvkm_devidx nvkm_top_engine(struct nvkm_top *, int, int *runl, int *engn);
+u32 nvkm_top_reset(struct nvkm_device *, enum nvkm_devidx);
+u32 nvkm_top_intr(struct nvkm_device *, u32 intr, u64 *subdevs);
+u32 nvkm_top_intr_mask(struct nvkm_device *, enum nvkm_devidx);
+enum nvkm_devidx nvkm_top_fault(struct nvkm_device *, int fault);
+enum nvkm_devidx nvkm_top_engine(struct nvkm_device *, int, int *runl, int *engn);
 
 int gk104_top_new(struct nvkm_device *, int, struct nvkm_top **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h
index feff55cff05b..b765f4ffcde6 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h
@@ -12,6 +12,9 @@ struct nvkm_volt {
 		u32 uv;
 		u8 vid;
 	} vid[256];
+
+	u32 max_uv;
+	u32 min_uv;
 };
 
 int nvkm_volt_get(struct nvkm_volt *);
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c
index eb7de487a2b3..7bd4683216d0 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c
@@ -100,6 +100,7 @@ nouveau_abi16_swclass(struct nouveau_drm *drm)
 	case NV_DEVICE_INFO_V0_FERMI:
 	case NV_DEVICE_INFO_V0_KEPLER:
 	case NV_DEVICE_INFO_V0_MAXWELL:
+	case NV_DEVICE_INFO_V0_PASCAL:
 		return NVIF_CLASS_SW_GF100;
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 5e3f3e826476..528bdeffb339 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -209,8 +209,7 @@ nouveau_bo_new(struct drm_device *dev, int size, int align,
 	nvbo->tile_flags = tile_flags;
 	nvbo->bo.bdev = &drm->ttm.bdev;
 
-	if (!nvxx_device(&drm->device)->func->cpu_coherent)
-		nvbo->force_coherent = flags & TTM_PL_FLAG_UNCACHED;
+	nvbo->force_coherent = flags & TTM_PL_FLAG_UNCACHED;
 
 	nvbo->page_shift = 12;
 	if (drm->client.vm) {
@@ -424,13 +423,7 @@ nouveau_bo_map(struct nouveau_bo *nvbo)
 	if (ret)
 		return ret;
 
-	/*
-	 * TTM buffers allocated using the DMA API already have a mapping, let's
-	 * use it instead.
-	 */
-	if (!nvbo->force_coherent)
-		ret = ttm_bo_kmap(&nvbo->bo, 0, nvbo->bo.mem.num_pages,
-				  &nvbo->kmap);
+	ret = ttm_bo_kmap(&nvbo->bo, 0, nvbo->bo.mem.num_pages, &nvbo->kmap);
 
 	ttm_bo_unreserve(&nvbo->bo);
 	return ret;
@@ -442,12 +435,7 @@ nouveau_bo_unmap(struct nouveau_bo *nvbo)
 	if (!nvbo)
 		return;
 
-	/*
-	 * TTM buffers allocated using the DMA API already had a coherent
-	 * mapping which we used, no need to unmap.
-	 */
-	if (!nvbo->force_coherent)
-		ttm_bo_kunmap(&nvbo->kmap);
+	ttm_bo_kunmap(&nvbo->kmap);
 }
 
 void
@@ -506,35 +494,13 @@ nouveau_bo_validate(struct nouveau_bo *nvbo, bool interruptible,
 	return 0;
 }
 
-static inline void *
-_nouveau_bo_mem_index(struct nouveau_bo *nvbo, unsigned index, void *mem, u8 sz)
-{
-	struct ttm_dma_tt *dma_tt;
-	u8 *m = mem;
-
-	index *= sz;
-
-	if (m) {
-		/* kmap'd address, return the corresponding offset */
-		m += index;
-	} else {
-		/* DMA-API mapping, lookup the right address */
-		dma_tt = (struct ttm_dma_tt *)nvbo->bo.ttm;
-		m = dma_tt->cpu_address[index / PAGE_SIZE];
-		m += index % PAGE_SIZE;
-	}
-
-	return m;
-}
-#define nouveau_bo_mem_index(o, i, m) _nouveau_bo_mem_index(o, i, m, sizeof(*m))
-
 void
 nouveau_bo_wr16(struct nouveau_bo *nvbo, unsigned index, u16 val)
 {
 	bool is_iomem;
 	u16 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem);
 
-	mem = nouveau_bo_mem_index(nvbo, index, mem);
+	mem += index;
 
 	if (is_iomem)
 		iowrite16_native(val, (void __force __iomem *)mem);
@@ -548,7 +514,7 @@ nouveau_bo_rd32(struct nouveau_bo *nvbo, unsigned index)
 	bool is_iomem;
 	u32 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem);
 
-	mem = nouveau_bo_mem_index(nvbo, index, mem);
+	mem += index;
 
 	if (is_iomem)
 		return ioread32_native((void __force __iomem *)mem);
@@ -562,7 +528,7 @@ nouveau_bo_wr32(struct nouveau_bo *nvbo, unsigned index, u32 val)
 	bool is_iomem;
 	u32 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem);
 
-	mem = nouveau_bo_mem_index(nvbo, index, mem);
+	mem += index;
 
 	if (is_iomem)
 		iowrite32_native(val, (void __force __iomem *)mem);
@@ -1082,7 +1048,6 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
 				ret = ttm_bo_move_accel_cleanup(bo,
 								&fence->base,
 								evict,
-								no_wait_gpu,
 								new_mem);
 				nouveau_fence_unref(&fence);
 			}
@@ -1104,6 +1069,10 @@ nouveau_bo_move_init(struct nouveau_drm *drm)
 			    struct ttm_mem_reg *, struct ttm_mem_reg *);
 		int (*init)(struct nouveau_channel *, u32 handle);
 	} _methods[] = {
+		{  "COPY", 4, 0xc1b5, nve0_bo_move_copy, nve0_bo_move_init },
+		{  "GRCE", 0, 0xc1b5, nve0_bo_move_copy, nvc0_bo_move_init },
+		{  "COPY", 4, 0xc0b5, nve0_bo_move_copy, nve0_bo_move_init },
+		{  "GRCE", 0, 0xc0b5, nve0_bo_move_copy, nvc0_bo_move_init },
 		{  "COPY", 4, 0xb0b5, nve0_bo_move_copy, nve0_bo_move_init },
 		{  "GRCE", 0, 0xb0b5, nve0_bo_move_copy, nvc0_bo_move_init },
 		{  "COPY", 4, 0xa0b5, nve0_bo_move_copy, nve0_bo_move_init },
@@ -1289,6 +1258,10 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
 	struct nouveau_drm_tile *new_tile = NULL;
 	int ret = 0;
 
+	ret = ttm_bo_wait(bo, intr, no_wait_gpu);
+	if (ret)
+		return ret;
+
 	if (nvbo->pin_refcnt)
 		NV_WARN(drm, "Moving pinned object %p!\n", nvbo);
 
@@ -1324,7 +1297,7 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
 	/* Fallback to software copy. */
 	ret = ttm_bo_wait(bo, intr, no_wait_gpu);
 	if (ret == 0)
-		ret = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
+		ret = ttm_bo_move_memcpy(bo, evict, intr, no_wait_gpu, new_mem);
 
 out:
 	if (drm->device.info.family < NV_DEVICE_INFO_V0_TESLA) {
@@ -1488,14 +1461,6 @@ nouveau_ttm_tt_populate(struct ttm_tt *ttm)
 	dev = drm->dev;
 	pdev = device->dev;
 
-	/*
-	 * Objects matching this condition have been marked as force_coherent,
-	 * so use the DMA API for them.
-	 */
-	if (!nvxx_device(&drm->device)->func->cpu_coherent &&
-	    ttm->caching_state == tt_uncached)
-		return ttm_dma_populate(ttm_dma, dev->dev);
-
 #if IS_ENABLED(CONFIG_AGP)
 	if (drm->agp.bridge) {
 		return ttm_agp_tt_populate(ttm);
@@ -1553,16 +1518,6 @@ nouveau_ttm_tt_unpopulate(struct ttm_tt *ttm)
 	dev = drm->dev;
 	pdev = device->dev;
 
-	/*
-	 * Objects matching this condition have been marked as force_coherent,
-	 * so use the DMA API for them.
-	 */
-	if (!nvxx_device(&drm->device)->func->cpu_coherent &&
-	    ttm->caching_state == tt_uncached) {
-		ttm_dma_unpopulate(ttm_dma, dev->dev);
-		return;
-	}
-
 #if IS_ENABLED(CONFIG_AGP)
 	if (drm->agp.bridge) {
 		ttm_agp_tt_unpopulate(ttm);
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c
index b1d2527c5625..f9b3c811187e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.c
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.c
@@ -191,7 +191,8 @@ static int
 nouveau_channel_ind(struct nouveau_drm *drm, struct nvif_device *device,
 		    u32 engine, struct nouveau_channel **pchan)
 {
-	static const u16 oclasses[] = { MAXWELL_CHANNEL_GPFIFO_A,
+	static const u16 oclasses[] = { PASCAL_CHANNEL_GPFIFO_A,
+					MAXWELL_CHANNEL_GPFIFO_A,
 					KEPLER_CHANNEL_GPFIFO_B,
 					KEPLER_CHANNEL_GPFIFO_A,
 					FERMI_CHANNEL_GPFIFO,
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index 6072fe292db8..afbf557b23d4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -47,7 +47,7 @@ nouveau_display_vblank_handler(struct nvif_notify *notify)
 {
 	struct nouveau_crtc *nv_crtc =
 		container_of(notify, typeof(*nv_crtc), vblank);
-	drm_handle_vblank(nv_crtc->base.dev, nv_crtc->index);
+	drm_crtc_handle_vblank(&nv_crtc->base);
 	return NVIF_NOTIFY_KEEP;
 }
 
@@ -495,6 +495,8 @@ nouveau_display_create(struct drm_device *dev)
 
 	if (nouveau_modeset != 2 && drm->vbios.dcb.entries) {
 		static const u16 oclass[] = {
+			GP104_DISP,
+			GP100_DISP,
 			GM200_DISP,
 			GM107_DISP,
 			GK110_DISP,
@@ -554,6 +556,7 @@ nouveau_display_destroy(struct drm_device *dev)
 	nouveau_display_vblank_fini(dev);
 
 	drm_kms_helper_poll_fini(dev);
+	drm_crtc_force_disable_all(dev);
 	drm_mode_config_cleanup(dev);
 
 	if (disp->dtor)
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 295e7621cc68..66c1280c0f1f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -198,6 +198,7 @@ nouveau_accel_init(struct nouveau_drm *drm)
 		case KEPLER_CHANNEL_GPFIFO_A:
 		case KEPLER_CHANNEL_GPFIFO_B:
 		case MAXWELL_CHANNEL_GPFIFO_A:
+		case PASCAL_CHANNEL_GPFIFO_A:
 			ret = nvc0_fence_create(drm);
 			break;
 		default:
@@ -316,7 +317,16 @@ static int nouveau_drm_probe(struct pci_dev *pdev,
 	if (vga_switcheroo_client_probe_defer(pdev))
 		return -EPROBE_DEFER;
 
-	/* remove conflicting drivers (vesafb, efifb etc) */
+	/* We need to check that the chipset is supported before booting
+	 * fbdev off the hardware, as there's no way to put it back.
+	 */
+	ret = nvkm_device_pci_new(pdev, NULL, "error", true, false, 0, &device);
+	if (ret)
+		return ret;
+
+	nvkm_device_del(&device);
+
+	/* Remove conflicting drivers (vesafb, efifb etc). */
 	aper = alloc_apertures(3);
 	if (!aper)
 		return -ENOMEM;
@@ -430,6 +440,11 @@ nouveau_drm_load(struct drm_device *dev, unsigned long flags)
 	nouveau_vga_init(drm);
 
 	if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
+		if (!nvxx_device(&drm->device)->mmu) {
+			ret = -ENOSYS;
+			goto fail_device;
+		}
+
 		ret = nvkm_vm_new(nvxx_device(&drm->device), 0, (1ULL << 40),
 				  0x1000, NULL, &drm->client.vm);
 		if (ret)
@@ -490,7 +505,11 @@ nouveau_drm_unload(struct drm_device *dev)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
 
-	pm_runtime_get_sync(dev->dev);
+	if (nouveau_runtime_pm != 0) {
+		pm_runtime_get_sync(dev->dev);
+		pm_runtime_forbid(dev->dev);
+	}
+
 	nouveau_fbcon_fini(dev);
 	nouveau_accel_fini(drm);
 	nouveau_hwmon_fini(dev);
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index 57aaf98a26f9..d1f248fd3506 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -552,6 +552,8 @@ nouveau_fbcon_init(struct drm_device *dev)
 	if (ret)
 		goto fini;
 
+	if (fbcon->helper.fbdev)
+		fbcon->helper.fbdev->pixmap.buf_align = 4;
 	return 0;
 
 fini:
diff --git a/drivers/gpu/drm/nouveau/nouveau_hwmon.c b/drivers/gpu/drm/nouveau/nouveau_hwmon.c
index 1ff4166af26e..71f764bf4cc6 100644
--- a/drivers/gpu/drm/nouveau/nouveau_hwmon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_hwmon.c
@@ -535,6 +535,40 @@ static SENSOR_DEVICE_ATTR(in0_input, S_IRUGO,
 			  nouveau_hwmon_get_in0_input, NULL, 0);
 
 static ssize_t
+nouveau_hwmon_get_in0_min(struct device *d,
+			    struct device_attribute *a, char *buf)
+{
+	struct drm_device *dev = dev_get_drvdata(d);
+	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct nvkm_volt *volt = nvxx_volt(&drm->device);
+
+	if (!volt || !volt->min_uv)
+		return -ENODEV;
+
+	return sprintf(buf, "%i\n", volt->min_uv / 1000);
+}
+
+static SENSOR_DEVICE_ATTR(in0_min, S_IRUGO,
+			  nouveau_hwmon_get_in0_min, NULL, 0);
+
+static ssize_t
+nouveau_hwmon_get_in0_max(struct device *d,
+			    struct device_attribute *a, char *buf)
+{
+	struct drm_device *dev = dev_get_drvdata(d);
+	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct nvkm_volt *volt = nvxx_volt(&drm->device);
+
+	if (!volt || !volt->max_uv)
+		return -ENODEV;
+
+	return sprintf(buf, "%i\n", volt->max_uv / 1000);
+}
+
+static SENSOR_DEVICE_ATTR(in0_max, S_IRUGO,
+			  nouveau_hwmon_get_in0_max, NULL, 0);
+
+static ssize_t
 nouveau_hwmon_get_in0_label(struct device *d,
 			    struct device_attribute *a, char *buf)
 {
@@ -594,6 +628,8 @@ static struct attribute *hwmon_pwm_fan_attributes[] = {
 
 static struct attribute *hwmon_in0_attributes[] = {
 	&sensor_dev_attr_in0_input.dev_attr.attr,
+	&sensor_dev_attr_in0_min.dev_attr.attr,
+	&sensor_dev_attr_in0_max.dev_attr.attr,
 	&sensor_dev_attr_in0_label.dev_attr.attr,
 	NULL
 };
diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c
index bcee91497eb9..1825dbc33192 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
@@ -164,6 +164,7 @@ nouveau_gart_manager_new(struct ttm_mem_type_manager *man,
 	case NV_DEVICE_INFO_V0_FERMI:
 	case NV_DEVICE_INFO_V0_KEPLER:
 	case NV_DEVICE_INFO_V0_MAXWELL:
+	case NV_DEVICE_INFO_V0_PASCAL:
 		node->memtype = (nvbo->tile_flags & 0xff00) >> 8;
 		break;
 	default:
diff --git a/drivers/gpu/drm/nouveau/nv04_fbcon.c b/drivers/gpu/drm/nouveau/nv04_fbcon.c
index 0f3e4bb411cc..7d9248b8c664 100644
--- a/drivers/gpu/drm/nouveau/nv04_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nv04_fbcon.c
@@ -82,7 +82,6 @@ nv04_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 	uint32_t fg;
 	uint32_t bg;
 	uint32_t dsize;
-	uint32_t width;
 	uint32_t *data = (uint32_t *)image->data;
 	int ret;
 
@@ -93,9 +92,6 @@ nv04_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 	if (ret)
 		return ret;
 
-	width = ALIGN(image->width, 8);
-	dsize = ALIGN(width * image->height, 32) >> 5;
-
 	if (info->fix.visual == FB_VISUAL_TRUECOLOR ||
 	    info->fix.visual == FB_VISUAL_DIRECTCOLOR) {
 		fg = ((uint32_t *) info->pseudo_palette)[image->fg_color];
@@ -111,10 +107,11 @@ nv04_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 			 ((image->dx + image->width) & 0xffff));
 	OUT_RING(chan, bg);
 	OUT_RING(chan, fg);
-	OUT_RING(chan, (image->height << 16) | width);
+	OUT_RING(chan, (image->height << 16) | image->width);
 	OUT_RING(chan, (image->height << 16) | image->width);
 	OUT_RING(chan, (image->dy << 16) | (image->dx & 0xffff));
 
+	dsize = ALIGN(image->width * image->height, 32) >> 5;
 	while (dsize) {
 		int iter_len = dsize > 128 ? 128 : dsize;
 
diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index 7a7788212df7..7d0edcbcfca7 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -297,6 +297,8 @@ nv50_core_create(struct nvif_device *device, struct nvif_object *disp,
 		.pushbuf = 0xb0007d00,
 	};
 	static const s32 oclass[] = {
+		GP104_DISP_CORE_CHANNEL_DMA,
+		GP100_DISP_CORE_CHANNEL_DMA,
 		GM200_DISP_CORE_CHANNEL_DMA,
 		GM107_DISP_CORE_CHANNEL_DMA,
 		GK110_DISP_CORE_CHANNEL_DMA,
diff --git a/drivers/gpu/drm/nouveau/nv50_fbcon.c b/drivers/gpu/drm/nouveau/nv50_fbcon.c
index 33d9ee0fac40..1aeb698e9707 100644
--- a/drivers/gpu/drm/nouveau/nv50_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nv50_fbcon.c
@@ -95,7 +95,7 @@ nv50_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 	struct nouveau_fbdev *nfbdev = info->par;
 	struct nouveau_drm *drm = nouveau_drm(nfbdev->dev);
 	struct nouveau_channel *chan = drm->channel;
-	uint32_t width, dwords, *data = (uint32_t *)image->data;
+	uint32_t dwords, *data = (uint32_t *)image->data;
 	uint32_t mask = ~(~0 >> (32 - info->var.bits_per_pixel));
 	uint32_t *palette = info->pseudo_palette;
 	int ret;
@@ -107,9 +107,6 @@ nv50_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 	if (ret)
 		return ret;
 
-	width = ALIGN(image->width, 32);
-	dwords = (width * image->height) >> 5;
-
 	BEGIN_NV04(chan, NvSub2D, 0x0814, 2);
 	if (info->fix.visual == FB_VISUAL_TRUECOLOR ||
 	    info->fix.visual == FB_VISUAL_DIRECTCOLOR) {
@@ -128,6 +125,7 @@ nv50_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 	OUT_RING(chan, 0);
 	OUT_RING(chan, image->dy);
 
+	dwords = ALIGN(image->width * image->height, 32) >> 5;
 	while (dwords) {
 		int push = dwords > 2047 ? 2047 : dwords;
 
diff --git a/drivers/gpu/drm/nouveau/nvc0_fbcon.c b/drivers/gpu/drm/nouveau/nvc0_fbcon.c
index a0913359ac05..839f4c8c1805 100644
--- a/drivers/gpu/drm/nouveau/nvc0_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nvc0_fbcon.c
@@ -95,7 +95,7 @@ nvc0_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 	struct nouveau_fbdev *nfbdev = info->par;
 	struct nouveau_drm *drm = nouveau_drm(nfbdev->dev);
 	struct nouveau_channel *chan = drm->channel;
-	uint32_t width, dwords, *data = (uint32_t *)image->data;
+	uint32_t dwords, *data = (uint32_t *)image->data;
 	uint32_t mask = ~(~0 >> (32 - info->var.bits_per_pixel));
 	uint32_t *palette = info->pseudo_palette;
 	int ret;
@@ -107,9 +107,6 @@ nvc0_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 	if (ret)
 		return ret;
 
-	width = ALIGN(image->width, 32);
-	dwords = (width * image->height) >> 5;
-
 	BEGIN_NVC0(chan, NvSub2D, 0x0814, 2);
 	if (info->fix.visual == FB_VISUAL_TRUECOLOR ||
 	    info->fix.visual == FB_VISUAL_DIRECTCOLOR) {
@@ -128,6 +125,7 @@ nvc0_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 	OUT_RING  (chan, 0);
 	OUT_RING  (chan, image->dy);
 
+	dwords = ALIGN(image->width * image->height, 32) >> 5;
 	while (dwords) {
 		int push = dwords > 2047 ? 2047 : dwords;
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/subdev.c b/drivers/gpu/drm/nouveau/nvkm/core/subdev.c
index b18557858f19..19044aba265e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/subdev.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/subdev.c
@@ -57,6 +57,9 @@ nvkm_subdev_name[NVKM_SUBDEV_NR] = {
 	[NVKM_ENGINE_CE0     ] = "ce0",
 	[NVKM_ENGINE_CE1     ] = "ce1",
 	[NVKM_ENGINE_CE2     ] = "ce2",
+	[NVKM_ENGINE_CE3     ] = "ce3",
+	[NVKM_ENGINE_CE4     ] = "ce4",
+	[NVKM_ENGINE_CE5     ] = "ce5",
 	[NVKM_ENGINE_CIPHER  ] = "cipher",
 	[NVKM_ENGINE_DISP    ] = "disp",
 	[NVKM_ENGINE_DMAOBJ  ] = "dma",
@@ -71,6 +74,7 @@ nvkm_subdev_name[NVKM_SUBDEV_NR] = {
 	[NVKM_ENGINE_MSVLD   ] = "msvld",
 	[NVKM_ENGINE_NVENC0  ] = "nvenc0",
 	[NVKM_ENGINE_NVENC1  ] = "nvenc1",
+	[NVKM_ENGINE_NVENC2  ] = "nvenc2",
 	[NVKM_ENGINE_NVDEC   ] = "nvdec",
 	[NVKM_ENGINE_PM      ] = "pm",
 	[NVKM_ENGINE_SEC     ] = "sec",
@@ -105,7 +109,7 @@ nvkm_subdev_fini(struct nvkm_subdev *subdev, bool suspend)
 		}
 	}
 
-	nvkm_mc_reset(device->mc, subdev->index);
+	nvkm_mc_reset(device, subdev->index);
 
 	time = ktime_to_us(ktime_get()) - time;
 	nvkm_trace(subdev, "%s completed in %lldus\n", action, time);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild
index 9c19d59b47df..a4458a8eb30a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild
@@ -3,3 +3,5 @@ nvkm-y += nvkm/engine/ce/gf100.o
 nvkm-y += nvkm/engine/ce/gk104.o
 nvkm-y += nvkm/engine/ce/gm107.o
 nvkm-y += nvkm/engine/ce/gm200.o
+nvkm-y += nvkm/engine/ce/gp100.o
+nvkm-y += nvkm/engine/ce/gp104.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/gp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/ce/gp100.c
new file mode 100644
index 000000000000..c7710456bc30
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/gp100.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright 2015 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+#include "priv.h"
+#include <core/enum.h>
+
+#include <nvif/class.h>
+
+static const struct nvkm_enum
+gp100_ce_launcherr_report[] = {
+	{ 0x0, "NO_ERR" },
+	{ 0x1, "2D_LAYER_EXCEEDS_DEPTH" },
+	{ 0x2, "INVALID_ALIGNMENT" },
+	{ 0x3, "MEM2MEM_RECT_OUT_OF_BOUNDS" },
+	{ 0x4, "SRC_LINE_EXCEEDS_PITCH" },
+	{ 0x5, "SRC_LINE_EXCEEDS_NEG_PITCH" },
+	{ 0x6, "DST_LINE_EXCEEDS_PITCH" },
+	{ 0x7, "DST_LINE_EXCEEDS_NEG_PITCH" },
+	{ 0x8, "BAD_SRC_PIXEL_COMP_REF" },
+	{ 0x9, "INVALID_VALUE" },
+	{ 0xa, "UNUSED_FIELD" },
+	{ 0xb, "INVALID_OPERATION" },
+	{ 0xc, "NO_RESOURCES" },
+	{ 0xd, "INVALID_CONFIG" },
+	{}
+};
+
+static void
+gp100_ce_intr_launcherr(struct nvkm_engine *ce, const u32 base)
+{
+	struct nvkm_subdev *subdev = &ce->subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 stat = nvkm_rd32(device, 0x104418 + base);
+	const struct nvkm_enum *en =
+		nvkm_enum_find(gp100_ce_launcherr_report, stat & 0x0000000f);
+	nvkm_warn(subdev, "LAUNCHERR %08x [%s]\n", stat, en ? en->name : "");
+}
+
+void
+gp100_ce_intr(struct nvkm_engine *ce)
+{
+	const u32 base = (ce->subdev.index - NVKM_ENGINE_CE0) * 0x80;
+	struct nvkm_subdev *subdev = &ce->subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 mask = nvkm_rd32(device, 0x10440c + base);
+	u32 intr = nvkm_rd32(device, 0x104410 + base) & mask;
+	if (intr & 0x00000001) { //XXX: guess
+		nvkm_warn(subdev, "BLOCKPIPE\n");
+		nvkm_wr32(device, 0x104410 + base, 0x00000001);
+		intr &= ~0x00000001;
+	}
+	if (intr & 0x00000002) { //XXX: guess
+		nvkm_warn(subdev, "NONBLOCKPIPE\n");
+		nvkm_wr32(device, 0x104410 + base, 0x00000002);
+		intr &= ~0x00000002;
+	}
+	if (intr & 0x00000004) {
+		gp100_ce_intr_launcherr(ce, base);
+		nvkm_wr32(device, 0x104410 + base, 0x00000004);
+		intr &= ~0x00000004;
+	}
+	if (intr) {
+		nvkm_warn(subdev, "intr %08x\n", intr);
+		nvkm_wr32(device, 0x104410 + base, intr);
+	}
+}
+
+static const struct nvkm_engine_func
+gp100_ce = {
+	.intr = gp100_ce_intr,
+	.sclass = {
+		{ -1, -1, PASCAL_DMA_COPY_A },
+		{}
+	}
+};
+
+int
+gp100_ce_new(struct nvkm_device *device, int index,
+	     struct nvkm_engine **pengine)
+{
+	return nvkm_engine_new_(&gp100_ce, device, index, true, pengine);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/gp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/ce/gp104.c
new file mode 100644
index 000000000000..20e019788a53
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/gp104.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2015 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+#include "priv.h"
+#include <core/enum.h>
+
+#include <nvif/class.h>
+
+static const struct nvkm_engine_func
+gp104_ce = {
+	.intr = gp100_ce_intr,
+	.sclass = {
+		{ -1, -1, PASCAL_DMA_COPY_B },
+		{ -1, -1, PASCAL_DMA_COPY_A },
+		{}
+	}
+};
+
+int
+gp104_ce_new(struct nvkm_device *device, int index,
+	     struct nvkm_engine **pengine)
+{
+	return nvkm_engine_new_(&gp104_ce, device, index, true, pengine);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/ce/priv.h
index e2fa8b161943..2dce405976ad 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/ce/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/priv.h
@@ -4,4 +4,5 @@
 
 void gt215_ce_intr(struct nvkm_falcon *, struct nvkm_fifo_chan *);
 void gk104_ce_intr(struct nvkm_engine *);
+void gp100_ce_intr(struct nvkm_engine *);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index 4572debcb0c9..7218a067a6c5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -2148,6 +2148,67 @@ nv12b_chipset = {
 	.sw = gf100_sw_new,
 };
 
+static const struct nvkm_device_chip
+nv130_chipset = {
+	.name = "GP100",
+	.bar = gf100_bar_new,
+	.bios = nvkm_bios_new,
+	.bus = gf100_bus_new,
+	.devinit = gm200_devinit_new,
+	.fb = gp100_fb_new,
+	.fuse = gm107_fuse_new,
+	.gpio = gk104_gpio_new,
+	.i2c = gm200_i2c_new,
+	.ibus = gm200_ibus_new,
+	.imem = nv50_instmem_new,
+	.ltc = gp100_ltc_new,
+	.mc = gp100_mc_new,
+	.mmu = gf100_mmu_new,
+	.secboot = gm200_secboot_new,
+	.pci = gp100_pci_new,
+	.timer = gk20a_timer_new,
+	.top = gk104_top_new,
+	.ce[0] = gp100_ce_new,
+	.ce[1] = gp100_ce_new,
+	.ce[2] = gp100_ce_new,
+	.ce[3] = gp100_ce_new,
+	.ce[4] = gp100_ce_new,
+	.ce[5] = gp100_ce_new,
+	.dma = gf119_dma_new,
+	.disp = gp100_disp_new,
+	.fifo = gp100_fifo_new,
+	.gr = gp100_gr_new,
+	.sw = gf100_sw_new,
+};
+
+static const struct nvkm_device_chip
+nv134_chipset = {
+	.name = "GP104",
+	.bar = gf100_bar_new,
+	.bios = nvkm_bios_new,
+	.bus = gf100_bus_new,
+	.devinit = gm200_devinit_new,
+	.fb = gp104_fb_new,
+	.fuse = gm107_fuse_new,
+	.gpio = gk104_gpio_new,
+	.i2c = gm200_i2c_new,
+	.ibus = gm200_ibus_new,
+	.imem = nv50_instmem_new,
+	.ltc = gp100_ltc_new,
+	.mc = gp100_mc_new,
+	.mmu = gf100_mmu_new,
+	.pci = gp100_pci_new,
+	.timer = gk20a_timer_new,
+	.top = gk104_top_new,
+	.ce[0] = gp104_ce_new,
+	.ce[1] = gp104_ce_new,
+	.ce[2] = gp104_ce_new,
+	.ce[3] = gp104_ce_new,
+	.disp = gp104_disp_new,
+	.dma = gf119_dma_new,
+	.fifo = gp100_fifo_new,
+};
+
 static int
 nvkm_device_event_ctor(struct nvkm_object *object, void *data, u32 size,
 		       struct nvkm_notify *notify)
@@ -2221,6 +2282,9 @@ nvkm_device_engine(struct nvkm_device *device, int index)
 	_(CE0    , device->ce[0]   ,  device->ce[0]);
 	_(CE1    , device->ce[1]   ,  device->ce[1]);
 	_(CE2    , device->ce[2]   ,  device->ce[2]);
+	_(CE3    , device->ce[3]   ,  device->ce[3]);
+	_(CE4    , device->ce[4]   ,  device->ce[4]);
+	_(CE5    , device->ce[5]   ,  device->ce[5]);
 	_(CIPHER , device->cipher  ,  device->cipher);
 	_(DISP   , device->disp    , &device->disp->engine);
 	_(DMAOBJ , device->dma     , &device->dma->engine);
@@ -2235,6 +2299,7 @@ nvkm_device_engine(struct nvkm_device *device, int index)
 	_(MSVLD  , device->msvld   ,  device->msvld);
 	_(NVENC0 , device->nvenc[0],  device->nvenc[0]);
 	_(NVENC1 , device->nvenc[1],  device->nvenc[1]);
+	_(NVENC2 , device->nvenc[2],  device->nvenc[2]);
 	_(NVDEC  , device->nvdec   ,  device->nvdec);
 	_(PM     , device->pm      , &device->pm->engine);
 	_(SEC    , device->sec     ,  device->sec);
@@ -2492,6 +2557,7 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
 			case 0x100: device->card_type = NV_E0; break;
 			case 0x110:
 			case 0x120: device->card_type = GM100; break;
+			case 0x130: device->card_type = GP100; break;
 			default:
 				break;
 			}
@@ -2576,6 +2642,8 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
 		case 0x124: device->chip = &nv124_chipset; break;
 		case 0x126: device->chip = &nv126_chipset; break;
 		case 0x12b: device->chip = &nv12b_chipset; break;
+		case 0x130: device->chip = &nv130_chipset; break;
+		case 0x134: device->chip = &nv134_chipset; break;
 		default:
 			nvdev_error(device, "unknown chipset (%08x)\n", boot0);
 			goto done;
@@ -2659,6 +2727,9 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
 		_(NVKM_ENGINE_CE0     ,    ce[0]);
 		_(NVKM_ENGINE_CE1     ,    ce[1]);
 		_(NVKM_ENGINE_CE2     ,    ce[2]);
+		_(NVKM_ENGINE_CE3     ,    ce[3]);
+		_(NVKM_ENGINE_CE4     ,    ce[4]);
+		_(NVKM_ENGINE_CE5     ,    ce[5]);
 		_(NVKM_ENGINE_CIPHER  ,   cipher);
 		_(NVKM_ENGINE_DISP    ,     disp);
 		_(NVKM_ENGINE_DMAOBJ  ,      dma);
@@ -2673,6 +2744,7 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
 		_(NVKM_ENGINE_MSVLD   ,    msvld);
 		_(NVKM_ENGINE_NVENC0  , nvenc[0]);
 		_(NVKM_ENGINE_NVENC1  , nvenc[1]);
+		_(NVKM_ENGINE_NVENC2  , nvenc[2]);
 		_(NVKM_ENGINE_NVDEC   ,    nvdec);
 		_(NVKM_ENGINE_PM      ,       pm);
 		_(NVKM_ENGINE_SEC     ,      sec);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
index 18fab3973ce5..b1b693219db3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
@@ -1614,7 +1614,6 @@ nvkm_device_pci_func = {
 	.fini = nvkm_device_pci_fini,
 	.resource_addr = nvkm_device_pci_resource_addr,
 	.resource_size = nvkm_device_pci_resource_size,
-	.cpu_coherent = !IS_ENABLED(CONFIG_ARM) && !IS_ENABLED(CONFIG_ARM64),
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
index ec12efb4689a..939682f18788 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
@@ -191,13 +191,11 @@ static irqreturn_t
 nvkm_device_tegra_intr(int irq, void *arg)
 {
 	struct nvkm_device_tegra *tdev = arg;
-	struct nvkm_mc *mc = tdev->device.mc;
+	struct nvkm_device *device = &tdev->device;
 	bool handled = false;
-	if (likely(mc)) {
-		nvkm_mc_intr_unarm(mc);
-		nvkm_mc_intr(mc, &handled);
-		nvkm_mc_intr_rearm(mc);
-	}
+	nvkm_mc_intr_unarm(device);
+	nvkm_mc_intr(device, &handled);
+	nvkm_mc_intr_rearm(device);
 	return handled ? IRQ_HANDLED : IRQ_NONE;
 }
 
@@ -247,7 +245,6 @@ nvkm_device_tegra_func = {
 	.fini = nvkm_device_tegra_fini,
 	.resource_addr = nvkm_device_tegra_resource_addr,
 	.resource_size = nvkm_device_tegra_resource_size,
-	.cpu_coherent = false,
 };
 
 int
@@ -313,6 +310,7 @@ nvkm_device_tegra_new(const struct nvkm_device_tegra_func *func,
 		goto remove;
 
 	tdev->gpu_speedo = tegra_sku_info.gpu_speedo_value;
+	tdev->gpu_speedo_id = tegra_sku_info.gpu_speedo_id;
 	ret = nvkm_device_ctor(&nvkm_device_tegra_func, NULL, &pdev->dev,
 			       NVKM_DEVICE_TEGRA, pdev->id, NULL,
 			       cfg, dbg, detect, mmio, subdev_mask,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c
index 137066426ed7..79a8f71cf788 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c
@@ -102,6 +102,7 @@ nvkm_udevice_info(struct nvkm_udevice *udev, void *data, u32 size)
 	case NV_C0: args->v0.family = NV_DEVICE_INFO_V0_FERMI; break;
 	case NV_E0: args->v0.family = NV_DEVICE_INFO_V0_KEPLER; break;
 	case GM100: args->v0.family = NV_DEVICE_INFO_V0_MAXWELL; break;
+	case GP100: args->v0.family = NV_DEVICE_INFO_V0_PASCAL; break;
 	default:
 		args->v0.family = 0;
 		break;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild
index a74c5dd27dc0..77a52b54a31e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild
@@ -10,6 +10,8 @@ nvkm-y += nvkm/engine/disp/gk104.o
 nvkm-y += nvkm/engine/disp/gk110.o
 nvkm-y += nvkm/engine/disp/gm107.o
 nvkm-y += nvkm/engine/disp/gm200.o
+nvkm-y += nvkm/engine/disp/gp100.o
+nvkm-y += nvkm/engine/disp/gp104.o
 
 nvkm-y += nvkm/engine/disp/outp.o
 nvkm-y += nvkm/engine/disp/outpdp.o
@@ -18,6 +20,7 @@ nvkm-y += nvkm/engine/disp/piornv50.o
 nvkm-y += nvkm/engine/disp/sornv50.o
 nvkm-y += nvkm/engine/disp/sorg94.o
 nvkm-y += nvkm/engine/disp/sorgf119.o
+nvkm-y += nvkm/engine/disp/sorgm107.o
 nvkm-y += nvkm/engine/disp/sorgm200.o
 nvkm-y += nvkm/engine/disp/dport.o
 
@@ -44,12 +47,15 @@ nvkm-y += nvkm/engine/disp/rootgk104.o
 nvkm-y += nvkm/engine/disp/rootgk110.o
 nvkm-y += nvkm/engine/disp/rootgm107.o
 nvkm-y += nvkm/engine/disp/rootgm200.o
+nvkm-y += nvkm/engine/disp/rootgp100.o
+nvkm-y += nvkm/engine/disp/rootgp104.o
 
 nvkm-y += nvkm/engine/disp/channv50.o
 nvkm-y += nvkm/engine/disp/changf119.o
 
 nvkm-y += nvkm/engine/disp/dmacnv50.o
 nvkm-y += nvkm/engine/disp/dmacgf119.o
+nvkm-y += nvkm/engine/disp/dmacgp104.o
 
 nvkm-y += nvkm/engine/disp/basenv50.o
 nvkm-y += nvkm/engine/disp/baseg84.o
@@ -58,6 +64,7 @@ nvkm-y += nvkm/engine/disp/basegt215.o
 nvkm-y += nvkm/engine/disp/basegf119.o
 nvkm-y += nvkm/engine/disp/basegk104.o
 nvkm-y += nvkm/engine/disp/basegk110.o
+nvkm-y += nvkm/engine/disp/basegp104.o
 
 nvkm-y += nvkm/engine/disp/corenv50.o
 nvkm-y += nvkm/engine/disp/coreg84.o
@@ -69,6 +76,8 @@ nvkm-y += nvkm/engine/disp/coregk104.o
 nvkm-y += nvkm/engine/disp/coregk110.o
 nvkm-y += nvkm/engine/disp/coregm107.o
 nvkm-y += nvkm/engine/disp/coregm200.o
+nvkm-y += nvkm/engine/disp/coregp100.o
+nvkm-y += nvkm/engine/disp/coregp104.o
 
 nvkm-y += nvkm/engine/disp/ovlynv50.o
 nvkm-y += nvkm/engine/disp/ovlyg84.o
@@ -76,6 +85,7 @@ nvkm-y += nvkm/engine/disp/ovlygt200.o
 nvkm-y += nvkm/engine/disp/ovlygt215.o
 nvkm-y += nvkm/engine/disp/ovlygf119.o
 nvkm-y += nvkm/engine/disp/ovlygk104.o
+nvkm-y += nvkm/engine/disp/ovlygp104.o
 
 nvkm-y += nvkm/engine/disp/piocnv50.o
 nvkm-y += nvkm/engine/disp/piocgf119.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegp104.c
new file mode 100644
index 000000000000..51688e37c54e
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegp104.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "dmacnv50.h"
+#include "rootnv50.h"
+
+#include <nvif/class.h>
+
+const struct nv50_disp_dmac_oclass
+gp104_disp_base_oclass = {
+	.base.oclass = GK110_DISP_BASE_CHANNEL_DMA,
+	.base.minver = 0,
+	.base.maxver = 0,
+	.ctor = nv50_disp_base_new,
+	.func = &gp104_disp_dmac_func,
+	.mthd = &gf119_disp_base_chan_mthd,
+	.chid = 1,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h
index aee374884c96..f5f683d9fd20 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h
@@ -85,6 +85,7 @@ extern const struct nv50_disp_mthd_list gf119_disp_core_mthd_pior;
 extern const struct nv50_disp_chan_mthd gf119_disp_base_chan_mthd;
 
 extern const struct nv50_disp_chan_mthd gk104_disp_core_chan_mthd;
+extern const struct nv50_disp_chan_mthd gk104_disp_ovly_chan_mthd;
 
 struct nv50_disp_pioc_oclass {
 	int (*ctor)(const struct nv50_disp_chan_func *,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregf119.c
index 6b1dc703dac7..21fbf89b6319 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregf119.c
@@ -171,7 +171,7 @@ gf119_disp_core_chan_mthd = {
 	}
 };
 
-static void
+void
 gf119_disp_core_fini(struct nv50_disp_dmac *chan)
 {
 	struct nv50_disp *disp = chan->base.root->disp;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregp100.c
new file mode 100644
index 000000000000..d5dff6619d4d
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregp100.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2015 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "dmacnv50.h"
+#include "rootnv50.h"
+
+#include <nvif/class.h>
+
+const struct nv50_disp_dmac_oclass
+gp100_disp_core_oclass = {
+	.base.oclass = GP100_DISP_CORE_CHANNEL_DMA,
+	.base.minver = 0,
+	.base.maxver = 0,
+	.ctor = nv50_disp_core_new,
+	.func = &gf119_disp_core_func,
+	.mthd = &gk104_disp_core_chan_mthd,
+	.chid = 0,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregp104.c
new file mode 100644
index 000000000000..6922f4007b61
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregp104.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "dmacnv50.h"
+#include "rootnv50.h"
+
+#include <subdev/timer.h>
+
+#include <nvif/class.h>
+
+static int
+gp104_disp_core_init(struct nv50_disp_dmac *chan)
+{
+	struct nv50_disp *disp = chan->base.root->disp;
+	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+
+	/* enable error reporting */
+	nvkm_mask(device, 0x6100a0, 0x00000001, 0x00000001);
+
+	/* initialise channel for dma command submission */
+	nvkm_wr32(device, 0x611494, chan->push);
+	nvkm_wr32(device, 0x611498, 0x00010000);
+	nvkm_wr32(device, 0x61149c, 0x00000001);
+	nvkm_mask(device, 0x610490, 0x00000010, 0x00000010);
+	nvkm_wr32(device, 0x640000, 0x00000000);
+	nvkm_wr32(device, 0x610490, 0x01000013);
+
+	/* wait for it to go inactive */
+	if (nvkm_msec(device, 2000,
+		if (!(nvkm_rd32(device, 0x610490) & 0x80000000))
+			break;
+	) < 0) {
+		nvkm_error(subdev, "core init: %08x\n",
+			   nvkm_rd32(device, 0x610490));
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+const struct nv50_disp_dmac_func
+gp104_disp_core_func = {
+	.init = gp104_disp_core_init,
+	.fini = gf119_disp_core_fini,
+	.bind = gf119_disp_dmac_bind,
+};
+
+const struct nv50_disp_dmac_oclass
+gp104_disp_core_oclass = {
+	.base.oclass = GP104_DISP_CORE_CHANNEL_DMA,
+	.base.minver = 0,
+	.base.maxver = 0,
+	.ctor = nv50_disp_core_new,
+	.func = &gp104_disp_core_func,
+	.mthd = &gk104_disp_core_chan_mthd,
+	.chid = 0,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgf119.c
index 876b14549a58..a57f7cef307a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgf119.c
@@ -36,7 +36,7 @@ gf119_disp_dmac_bind(struct nv50_disp_dmac *chan,
 				 chan->base.chid << 27 | 0x00000001);
 }
 
-static void
+void
 gf119_disp_dmac_fini(struct nv50_disp_dmac *chan)
 {
 	struct nv50_disp *disp = chan->base.root->disp;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgp104.c
new file mode 100644
index 000000000000..ad24c2c57696
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgp104.c
@@ -0,0 +1,66 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "dmacnv50.h"
+#include "rootnv50.h"
+
+#include <subdev/timer.h>
+
+static int
+gp104_disp_dmac_init(struct nv50_disp_dmac *chan)
+{
+	struct nv50_disp *disp = chan->base.root->disp;
+	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	int chid = chan->base.chid;
+
+	/* enable error reporting */
+	nvkm_mask(device, 0x6100a0, 0x00000001 << chid, 0x00000001 << chid);
+
+	/* initialise channel for dma command submission */
+	nvkm_wr32(device, 0x611494 + (chid * 0x0010), chan->push);
+	nvkm_wr32(device, 0x611498 + (chid * 0x0010), 0x00010000);
+	nvkm_wr32(device, 0x61149c + (chid * 0x0010), 0x00000001);
+	nvkm_mask(device, 0x610490 + (chid * 0x0010), 0x00000010, 0x00000010);
+	nvkm_wr32(device, 0x640000 + (chid * 0x1000), 0x00000000);
+	nvkm_wr32(device, 0x610490 + (chid * 0x0010), 0x00000013);
+
+	/* wait for it to go inactive */
+	if (nvkm_msec(device, 2000,
+		if (!(nvkm_rd32(device, 0x610490 + (chid * 0x10)) & 0x80000000))
+			break;
+	) < 0) {
+		nvkm_error(subdev, "ch %d init: %08x\n", chid,
+			   nvkm_rd32(device, 0x610490 + (chid * 0x10)));
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+const struct nv50_disp_dmac_func
+gp104_disp_dmac_func = {
+	.init = gp104_disp_dmac_init,
+	.fini = gf119_disp_dmac_fini,
+	.bind = gf119_disp_dmac_bind,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.h
index fc84eb8b5c45..43ac05857853 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.h
@@ -25,8 +25,12 @@ int nv50_disp_dmac_bind(struct nv50_disp_dmac *, struct nvkm_object *, u32);
 extern const struct nv50_disp_dmac_func nv50_disp_core_func;
 
 extern const struct nv50_disp_dmac_func gf119_disp_dmac_func;
+void gf119_disp_dmac_fini(struct nv50_disp_dmac *);
 int gf119_disp_dmac_bind(struct nv50_disp_dmac *, struct nvkm_object *, u32);
 extern const struct nv50_disp_dmac_func gf119_disp_core_func;
+void gf119_disp_core_fini(struct nv50_disp_dmac *);
+
+extern const struct nv50_disp_dmac_func gp104_disp_dmac_func;
 
 struct nv50_disp_dmac_oclass {
 	int (*ctor)(const struct nv50_disp_dmac_func *,
@@ -88,4 +92,10 @@ extern const struct nv50_disp_dmac_oclass gk110_disp_base_oclass;
 extern const struct nv50_disp_dmac_oclass gm107_disp_core_oclass;
 
 extern const struct nv50_disp_dmac_oclass gm200_disp_core_oclass;
+
+extern const struct nv50_disp_dmac_oclass gp100_disp_core_oclass;
+
+extern const struct nv50_disp_dmac_oclass gp104_disp_core_oclass;
+extern const struct nv50_disp_dmac_oclass gp104_disp_base_oclass;
+extern const struct nv50_disp_dmac_oclass gp104_disp_ovly_oclass;
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c
index f0314664349c..29e84b241cca 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c
@@ -79,8 +79,7 @@ exec_lookup(struct nv50_disp *disp, int head, int or, u32 ctrl,
 	list_for_each_entry(outp, &disp->base.outp, head) {
 		if ((outp->info.hasht & 0xff) == type &&
 		    (outp->info.hashm & mask) == mask) {
-			*data = nvbios_outp_match(bios, outp->info.hasht,
-							outp->info.hashm,
+			*data = nvbios_outp_match(bios, outp->info.hasht, mask,
 						  ver, hdr, cnt, len, info);
 			if (!*data)
 				return NULL;
@@ -155,25 +154,21 @@ exec_clkcmp(struct nv50_disp *disp, int head, int id, u32 pclk, u32 *conf)
 	if (!outp)
 		return NULL;
 
+	*conf = (ctrl & 0x00000f00) >> 8;
 	switch (outp->info.type) {
 	case DCB_OUTPUT_TMDS:
-		*conf = (ctrl & 0x00000f00) >> 8;
 		if (*conf == 5)
 			*conf |= 0x0100;
 		break;
 	case DCB_OUTPUT_LVDS:
-		*conf = disp->sor.lvdsconf;
+		*conf |= disp->sor.lvdsconf;
 		break;
-	case DCB_OUTPUT_DP:
-		*conf = (ctrl & 0x00000f00) >> 8;
-		break;
-	case DCB_OUTPUT_ANALOG:
 	default:
-		*conf = 0x00ff;
 		break;
 	}
 
-	data = nvbios_ocfg_match(bios, data, *conf, &ver, &hdr, &cnt, &len, &info2);
+	data = nvbios_ocfg_match(bios, data, *conf & 0xff, *conf >> 8,
+				 &ver, &hdr, &cnt, &len, &info2);
 	if (data && id < 0xff) {
 		data = nvbios_oclk_match(bios, info2.clkcmp[id], pclk);
 		if (data) {
@@ -418,7 +413,7 @@ gf119_disp_intr_supervisor(struct work_struct *work)
 	nvkm_wr32(device, 0x6101d0, 0x80000000);
 }
 
-static void
+void
 gf119_disp_intr_error(struct nv50_disp *disp, int chid)
 {
 	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
@@ -466,7 +461,7 @@ gf119_disp_intr(struct nv50_disp *disp)
 		u32 stat = nvkm_rd32(device, 0x61009c);
 		int chid = ffs(stat) - 1;
 		if (chid >= 0)
-			gf119_disp_intr_error(disp, chid);
+			disp->func->intr_error(disp, chid);
 		intr &= ~0x00000002;
 	}
 
@@ -510,6 +505,7 @@ gf119_disp_new_(const struct nv50_disp_func *func, struct nvkm_device *device,
 static const struct nv50_disp_func
 gf119_disp = {
 	.intr = gf119_disp_intr,
+	.intr_error = gf119_disp_intr_error,
 	.uevent = &gf119_disp_chan_uevent,
 	.super = gf119_disp_intr_supervisor,
 	.root = &gf119_disp_root_oclass,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk104.c
index a86384b8e388..37f145cf30d7 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk104.c
@@ -27,6 +27,7 @@
 static const struct nv50_disp_func
 gk104_disp = {
 	.intr = gf119_disp_intr,
+	.intr_error = gf119_disp_intr_error,
 	.uevent = &gf119_disp_chan_uevent,
 	.super = gf119_disp_intr_supervisor,
 	.root = &gk104_disp_root_oclass,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk110.c
index 0d574c7e594a..e14ac946608c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk110.c
@@ -27,6 +27,7 @@
 static const struct nv50_disp_func
 gk110_disp = {
 	.intr = gf119_disp_intr,
+	.intr_error = gf119_disp_intr_error,
 	.uevent = &gf119_disp_chan_uevent,
 	.super = gf119_disp_intr_supervisor,
 	.root = &gk110_disp_root_oclass,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c
index b6944142d616..2f2437cc5891 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c
@@ -27,6 +27,7 @@
 static const struct nv50_disp_func
 gm107_disp = {
 	.intr = gf119_disp_intr,
+	.intr_error = gf119_disp_intr_error,
 	.uevent = &gf119_disp_chan_uevent,
 	.super = gf119_disp_intr_supervisor,
 	.root = &gm107_disp_root_oclass,
@@ -36,7 +37,7 @@ gm107_disp = {
 	.outp.internal.crt = nv50_dac_output_new,
 	.outp.internal.tmds = nv50_sor_output_new,
 	.outp.internal.lvds = nv50_sor_output_new,
-	.outp.internal.dp = gf119_sor_dp_new,
+	.outp.internal.dp = gm107_sor_dp_new,
 	.dac.nr = 3,
 	.dac.power = nv50_dac_power,
 	.dac.sense = nv50_dac_sense,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm200.c
index 67eec8620719..9f368d4ee61e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm200.c
@@ -27,6 +27,7 @@
 static const struct nv50_disp_func
 gm200_disp = {
 	.intr = gf119_disp_intr,
+	.intr_error = gf119_disp_intr_error,
 	.uevent = &gf119_disp_chan_uevent,
 	.super = gf119_disp_intr_supervisor,
 	.root = &gm200_disp_root_oclass,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp100.c
new file mode 100644
index 000000000000..4f81bf31435e
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp100.c
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2015 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "nv50.h"
+#include "rootnv50.h"
+
+static const struct nv50_disp_func
+gp100_disp = {
+	.intr = gf119_disp_intr,
+	.intr_error = gf119_disp_intr_error,
+	.uevent = &gf119_disp_chan_uevent,
+	.super = gf119_disp_intr_supervisor,
+	.root = &gp100_disp_root_oclass,
+	.head.vblank_init = gf119_disp_vblank_init,
+	.head.vblank_fini = gf119_disp_vblank_fini,
+	.head.scanoutpos = gf119_disp_root_scanoutpos,
+	.outp.internal.crt = nv50_dac_output_new,
+	.outp.internal.tmds = nv50_sor_output_new,
+	.outp.internal.lvds = nv50_sor_output_new,
+	.outp.internal.dp = gm200_sor_dp_new,
+	.dac.nr = 3,
+	.dac.power = nv50_dac_power,
+	.dac.sense = nv50_dac_sense,
+	.sor.nr = 4,
+	.sor.power = nv50_sor_power,
+	.sor.hda_eld = gf119_hda_eld,
+	.sor.hdmi = gk104_hdmi_ctrl,
+	.sor.magic = gm200_sor_magic,
+};
+
+int
+gp100_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp)
+{
+	return gf119_disp_new_(&gp100_disp, device, index, pdisp);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp104.c
new file mode 100644
index 000000000000..3bf3380336e4
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp104.c
@@ -0,0 +1,81 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "nv50.h"
+#include "rootnv50.h"
+
+static void
+gp104_disp_intr_error(struct nv50_disp *disp, int chid)
+{
+	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 mthd = nvkm_rd32(device, 0x6111f0 + (chid * 12));
+	u32 data = nvkm_rd32(device, 0x6111f4 + (chid * 12));
+	u32 unkn = nvkm_rd32(device, 0x6111f8 + (chid * 12));
+
+	nvkm_error(subdev, "chid %d mthd %04x data %08x %08x %08x\n",
+		   chid, (mthd & 0x0000ffc), data, mthd, unkn);
+
+	if (chid < ARRAY_SIZE(disp->chan)) {
+		switch (mthd & 0xffc) {
+		case 0x0080:
+			nv50_disp_chan_mthd(disp->chan[chid], NV_DBG_ERROR);
+			break;
+		default:
+			break;
+		}
+	}
+
+	nvkm_wr32(device, 0x61009c, (1 << chid));
+	nvkm_wr32(device, 0x6111f0 + (chid * 12), 0x90000000);
+}
+
+static const struct nv50_disp_func
+gp104_disp = {
+	.intr = gf119_disp_intr,
+	.intr_error = gp104_disp_intr_error,
+	.uevent = &gf119_disp_chan_uevent,
+	.super = gf119_disp_intr_supervisor,
+	.root = &gp104_disp_root_oclass,
+	.head.vblank_init = gf119_disp_vblank_init,
+	.head.vblank_fini = gf119_disp_vblank_fini,
+	.head.scanoutpos = gf119_disp_root_scanoutpos,
+	.outp.internal.crt = nv50_dac_output_new,
+	.outp.internal.tmds = nv50_sor_output_new,
+	.outp.internal.lvds = nv50_sor_output_new,
+	.outp.internal.dp = gm200_sor_dp_new,
+	.dac.nr = 3,
+	.dac.power = nv50_dac_power,
+	.dac.sense = nv50_dac_sense,
+	.sor.nr = 4,
+	.sor.power = nv50_sor_power,
+	.sor.hda_eld = gf119_hda_eld,
+	.sor.hdmi = gk104_hdmi_ctrl,
+	.sor.magic = gm200_sor_magic,
+};
+
+int
+gp104_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp)
+{
+	return gf119_disp_new_(&gp104_disp, device, index, pdisp);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c
index 4226d2153b9c..fbb8c7dc18fd 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c
@@ -32,6 +32,7 @@
 #include <subdev/bios/init.h>
 #include <subdev/bios/pll.h>
 #include <subdev/devinit.h>
+#include <subdev/timer.h>
 
 static const struct nvkm_disp_oclass *
 nv50_disp_root_(struct nvkm_disp *base)
@@ -269,8 +270,7 @@ exec_lookup(struct nv50_disp *disp, int head, int or, u32 ctrl,
 	list_for_each_entry(outp, &disp->base.outp, head) {
 		if ((outp->info.hasht & 0xff) == type &&
 		    (outp->info.hashm & mask) == mask) {
-			*data = nvbios_outp_match(bios, outp->info.hasht,
-							outp->info.hashm,
+			*data = nvbios_outp_match(bios, outp->info.hasht, mask,
 						  ver, hdr, cnt, len, info);
 			if (!*data)
 				return NULL;
@@ -387,22 +387,17 @@ exec_clkcmp(struct nv50_disp *disp, int head, int id, u32 pclk, u32 *conf)
 	if (!outp)
 		return NULL;
 
+	*conf = (ctrl & 0x00000f00) >> 8;
 	if (outp->info.location == 0) {
 		switch (outp->info.type) {
 		case DCB_OUTPUT_TMDS:
-			*conf = (ctrl & 0x00000f00) >> 8;
 			if (*conf == 5)
 				*conf |= 0x0100;
 			break;
 		case DCB_OUTPUT_LVDS:
-			*conf = disp->sor.lvdsconf;
+			*conf |= disp->sor.lvdsconf;
 			break;
-		case DCB_OUTPUT_DP:
-			*conf = (ctrl & 0x00000f00) >> 8;
-			break;
-		case DCB_OUTPUT_ANALOG:
 		default:
-			*conf = 0x00ff;
 			break;
 		}
 	} else {
@@ -410,7 +405,8 @@ exec_clkcmp(struct nv50_disp *disp, int head, int id, u32 pclk, u32 *conf)
 		pclk = pclk / 2;
 	}
 
-	data = nvbios_ocfg_match(bios, data, *conf, &ver, &hdr, &cnt, &len, &info2);
+	data = nvbios_ocfg_match(bios, data, *conf & 0xff, *conf >> 8,
+				 &ver, &hdr, &cnt, &len, &info2);
 	if (data && id < 0xff) {
 		data = nvbios_oclk_match(bios, info2.clkcmp[id], pclk);
 		if (data) {
@@ -430,6 +426,134 @@ exec_clkcmp(struct nv50_disp *disp, int head, int id, u32 pclk, u32 *conf)
 	return outp;
 }
 
+static bool
+nv50_disp_dptmds_war(struct nvkm_device *device)
+{
+	switch (device->chipset) {
+	case 0x94:
+	case 0x96:
+	case 0x98:
+	case 0xaa:
+	case 0xac:
+		return true;
+	default:
+		break;
+	}
+	return false;
+}
+
+static bool
+nv50_disp_dptmds_war_needed(struct nv50_disp *disp, struct dcb_output *outp)
+{
+	struct nvkm_device *device = disp->base.engine.subdev.device;
+	const u32 soff = __ffs(outp->or) * 0x800;
+	if (nv50_disp_dptmds_war(device) && outp->type == DCB_OUTPUT_TMDS) {
+		switch (nvkm_rd32(device, 0x614300 + soff) & 0x00030000) {
+		case 0x00000000:
+		case 0x00030000:
+			return true;
+		default:
+			break;
+		}
+	}
+	return false;
+
+}
+
+static void
+nv50_disp_dptmds_war_2(struct nv50_disp *disp, struct dcb_output *outp)
+{
+	struct nvkm_device *device = disp->base.engine.subdev.device;
+	const u32 soff = __ffs(outp->or) * 0x800;
+
+	if (!nv50_disp_dptmds_war_needed(disp, outp))
+		return;
+
+	nvkm_mask(device, 0x00e840, 0x80000000, 0x80000000);
+	nvkm_mask(device, 0x614300 + soff, 0x03000000, 0x03000000);
+	nvkm_mask(device, 0x61c10c + soff, 0x00000001, 0x00000001);
+
+	nvkm_mask(device, 0x61c00c + soff, 0x0f000000, 0x00000000);
+	nvkm_mask(device, 0x61c008 + soff, 0xff000000, 0x14000000);
+	nvkm_usec(device, 400, NVKM_DELAY);
+	nvkm_mask(device, 0x61c008 + soff, 0xff000000, 0x00000000);
+	nvkm_mask(device, 0x61c00c + soff, 0x0f000000, 0x01000000);
+
+	if (nvkm_rd32(device, 0x61c004 + soff) & 0x00000001) {
+		u32 seqctl = nvkm_rd32(device, 0x61c030 + soff);
+		u32  pu_pc = seqctl & 0x0000000f;
+		nvkm_wr32(device, 0x61c040 + soff + pu_pc * 4, 0x1f008000);
+	}
+}
+
+static void
+nv50_disp_dptmds_war_3(struct nv50_disp *disp, struct dcb_output *outp)
+{
+	struct nvkm_device *device = disp->base.engine.subdev.device;
+	const u32 soff = __ffs(outp->or) * 0x800;
+	u32 sorpwr;
+
+	if (!nv50_disp_dptmds_war_needed(disp, outp))
+		return;
+
+	sorpwr = nvkm_rd32(device, 0x61c004 + soff);
+	if (sorpwr & 0x00000001) {
+		u32 seqctl = nvkm_rd32(device, 0x61c030 + soff);
+		u32  pd_pc = (seqctl & 0x00000f00) >> 8;
+		u32  pu_pc =  seqctl & 0x0000000f;
+
+		nvkm_wr32(device, 0x61c040 + soff + pd_pc * 4, 0x1f008000);
+
+		nvkm_msec(device, 2000,
+			if (!(nvkm_rd32(device, 0x61c030 + soff) & 0x10000000))
+				break;
+		);
+		nvkm_mask(device, 0x61c004 + soff, 0x80000001, 0x80000000);
+		nvkm_msec(device, 2000,
+			if (!(nvkm_rd32(device, 0x61c030 + soff) & 0x10000000))
+				break;
+		);
+
+		nvkm_wr32(device, 0x61c040 + soff + pd_pc * 4, 0x00002000);
+		nvkm_wr32(device, 0x61c040 + soff + pu_pc * 4, 0x1f000000);
+	}
+
+	nvkm_mask(device, 0x61c10c + soff, 0x00000001, 0x00000000);
+	nvkm_mask(device, 0x614300 + soff, 0x03000000, 0x00000000);
+
+	if (sorpwr & 0x00000001) {
+		nvkm_mask(device, 0x61c004 + soff, 0x80000001, 0x80000001);
+	}
+}
+
+static void
+nv50_disp_update_sppll1(struct nv50_disp *disp)
+{
+	struct nvkm_device *device = disp->base.engine.subdev.device;
+	bool used = false;
+	int sor;
+
+	if (!nv50_disp_dptmds_war(device))
+		return;
+
+	for (sor = 0; sor < disp->func->sor.nr; sor++) {
+		u32 clksor = nvkm_rd32(device, 0x614300 + (sor * 0x800));
+		switch (clksor & 0x03000000) {
+		case 0x02000000:
+		case 0x03000000:
+			used = true;
+			break;
+		default:
+			break;
+		}
+	}
+
+	if (used)
+		return;
+
+	nvkm_mask(device, 0x00e840, 0x80000000, 0x00000000);
+}
+
 static void
 nv50_disp_intr_unk10_0(struct nv50_disp *disp, int head)
 {
@@ -683,6 +807,8 @@ nv50_disp_intr_unk20_2(struct nv50_disp *disp, int head)
 
 	nvkm_mask(device, hreg, 0x0000000f, hval);
 	nvkm_mask(device, oreg, mask, oval);
+
+	nv50_disp_dptmds_war_2(disp, &outp->info);
 }
 
 /* If programming a TMDS output on a SOR that can also be configured for
@@ -724,6 +850,7 @@ nv50_disp_intr_unk40_0(struct nv50_disp *disp, int head)
 
 	if (outp->info.location == 0 && outp->info.type == DCB_OUTPUT_TMDS)
 		nv50_disp_intr_unk40_0_tmds(disp, &outp->info);
+	nv50_disp_dptmds_war_3(disp, &outp->info);
 }
 
 void
@@ -771,6 +898,7 @@ nv50_disp_intr_supervisor(struct work_struct *work)
 				continue;
 			nv50_disp_intr_unk40_0(disp, head);
 		}
+		nv50_disp_update_sppll1(disp);
 	}
 
 	nvkm_wr32(device, 0x610030, 0x80000000);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h
index aecebd8717e5..1e1de6bfe85a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h
@@ -68,6 +68,7 @@ struct nv50_disp_func_outp {
 
 struct nv50_disp_func {
 	void (*intr)(struct nv50_disp *);
+	void (*intr_error)(struct nv50_disp *, int chid);
 
 	const struct nvkm_event_func *uevent;
 	void (*super)(struct work_struct *);
@@ -114,4 +115,5 @@ void gf119_disp_vblank_init(struct nv50_disp *, int);
 void gf119_disp_vblank_fini(struct nv50_disp *, int);
 void gf119_disp_intr(struct nv50_disp *);
 void gf119_disp_intr_supervisor(struct work_struct *);
+void gf119_disp_intr_error(struct nv50_disp *, int);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outpdp.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outpdp.h
index e9067ba4e179..4e983f6d7032 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outpdp.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outpdp.h
@@ -62,7 +62,12 @@ int g94_sor_dp_lnk_pwr(struct nvkm_output_dp *, int);
 int gf119_sor_dp_new(struct nvkm_disp *, int, struct dcb_output *,
 		     struct nvkm_output **);
 int gf119_sor_dp_lnk_ctl(struct nvkm_output_dp *, int, int, bool);
+int gf119_sor_dp_drv_ctl(struct nvkm_output_dp *, int, int, int, int);
 
-int  gm200_sor_dp_new(struct nvkm_disp *, int, struct dcb_output *,
-		      struct nvkm_output **);
+int gm107_sor_dp_new(struct nvkm_disp *, int, struct dcb_output *,
+		     struct nvkm_output **);
+int gm107_sor_dp_pattern(struct nvkm_output_dp *, int);
+
+int gm200_sor_dp_new(struct nvkm_disp *, int, struct dcb_output *,
+		     struct nvkm_output **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygk104.c
index 2e2dc0641ef2..2f0220b39f34 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygk104.c
@@ -80,7 +80,7 @@ gk104_disp_ovly_mthd_base = {
 	}
 };
 
-static const struct nv50_disp_chan_mthd
+const struct nv50_disp_chan_mthd
 gk104_disp_ovly_chan_mthd = {
 	.name = "Overlay",
 	.addr = 0x001000,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygp104.c
new file mode 100644
index 000000000000..97e2dd2d908e
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygp104.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+#include "dmacnv50.h"
+#include "rootnv50.h"
+
+#include <nvif/class.h>
+
+const struct nv50_disp_dmac_oclass
+gp104_disp_ovly_oclass = {
+	.base.oclass = GK104_DISP_OVERLAY_CONTROL_DMA,
+	.base.minver = 0,
+	.base.maxver = 0,
+	.ctor = nv50_disp_ovly_new,
+	.func = &gp104_disp_dmac_func,
+	.mthd = &gk104_disp_ovly_chan_mthd,
+	.chid = 5,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp100.c
new file mode 100644
index 000000000000..ac8fdd728ec6
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp100.c
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2015 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "rootnv50.h"
+#include "dmacnv50.h"
+
+#include <nvif/class.h>
+
+static const struct nv50_disp_root_func
+gp100_disp_root = {
+	.init = gf119_disp_root_init,
+	.fini = gf119_disp_root_fini,
+	.dmac = {
+		&gp100_disp_core_oclass,
+		&gk110_disp_base_oclass,
+		&gk104_disp_ovly_oclass,
+	},
+	.pioc = {
+		&gk104_disp_oimm_oclass,
+		&gk104_disp_curs_oclass,
+	},
+};
+
+static int
+gp100_disp_root_new(struct nvkm_disp *disp, const struct nvkm_oclass *oclass,
+		    void *data, u32 size, struct nvkm_object **pobject)
+{
+	return nv50_disp_root_new_(&gp100_disp_root, disp, oclass,
+				   data, size, pobject);
+}
+
+const struct nvkm_disp_oclass
+gp100_disp_root_oclass = {
+	.base.oclass = GP100_DISP,
+	.base.minver = -1,
+	.base.maxver = -1,
+	.ctor = gp100_disp_root_new,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp104.c
new file mode 100644
index 000000000000..8443e04dc626
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp104.c
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "rootnv50.h"
+#include "dmacnv50.h"
+
+#include <nvif/class.h>
+
+static const struct nv50_disp_root_func
+gp104_disp_root = {
+	.init = gf119_disp_root_init,
+	.fini = gf119_disp_root_fini,
+	.dmac = {
+		&gp104_disp_core_oclass,
+		&gp104_disp_base_oclass,
+		&gp104_disp_ovly_oclass,
+	},
+	.pioc = {
+		&gk104_disp_oimm_oclass,
+		&gk104_disp_curs_oclass,
+	},
+};
+
+static int
+gp104_disp_root_new(struct nvkm_disp *disp, const struct nvkm_oclass *oclass,
+		    void *data, u32 size, struct nvkm_object **pobject)
+{
+	return nv50_disp_root_new_(&gp104_disp_root, disp, oclass,
+				   data, size, pobject);
+}
+
+const struct nvkm_disp_oclass
+gp104_disp_root_oclass = {
+	.base.oclass = GP104_DISP,
+	.base.minver = -1,
+	.base.maxver = -1,
+	.ctor = gp104_disp_root_new,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h
index cb449ed8d92c..ad00f1724b72 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h
@@ -40,4 +40,6 @@ extern const struct nvkm_disp_oclass gk104_disp_root_oclass;
 extern const struct nvkm_disp_oclass gk110_disp_root_oclass;
 extern const struct nvkm_disp_oclass gm107_disp_root_oclass;
 extern const struct nvkm_disp_oclass gm200_disp_root_oclass;
+extern const struct nvkm_disp_oclass gp100_disp_root_oclass;
+extern const struct nvkm_disp_oclass gp104_disp_root_oclass;
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c
index b4b41b135643..49bd5da194e1 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c
@@ -40,8 +40,8 @@ static int
 gf119_sor_dp_pattern(struct nvkm_output_dp *outp, int pattern)
 {
 	struct nvkm_device *device = outp->base.disp->engine.subdev.device;
-	const u32 loff = gf119_sor_loff(outp);
-	nvkm_mask(device, 0x61c110 + loff, 0x0f0f0f0f, 0x01010101 * pattern);
+	const u32 soff = gf119_sor_soff(outp);
+	nvkm_mask(device, 0x61c110 + soff, 0x0f0f0f0f, 0x01010101 * pattern);
 	return 0;
 }
 
@@ -64,7 +64,7 @@ gf119_sor_dp_lnk_ctl(struct nvkm_output_dp *outp, int nr, int bw, bool ef)
 	return 0;
 }
 
-static int
+int
 gf119_sor_dp_drv_ctl(struct nvkm_output_dp *outp,
 		     int ln, int vs, int pe, int pc)
 {
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm107.c
new file mode 100644
index 000000000000..37790b2617c5
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm107.c
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "nv50.h"
+#include "outpdp.h"
+
+int
+gm107_sor_dp_pattern(struct nvkm_output_dp *outp, int pattern)
+{
+	struct nvkm_device *device = outp->base.disp->engine.subdev.device;
+	const u32 soff = outp->base.or * 0x800;
+	const u32 data = 0x01010101 * pattern;
+	if (outp->base.info.sorconf.link & 1)
+		nvkm_mask(device, 0x61c110 + soff, 0x0f0f0f0f, data);
+	else
+		nvkm_mask(device, 0x61c12c + soff, 0x0f0f0f0f, data);
+	return 0;
+}
+
+static const struct nvkm_output_dp_func
+gm107_sor_dp_func = {
+	.pattern = gm107_sor_dp_pattern,
+	.lnk_pwr = g94_sor_dp_lnk_pwr,
+	.lnk_ctl = gf119_sor_dp_lnk_ctl,
+	.drv_ctl = gf119_sor_dp_drv_ctl,
+};
+
+int
+gm107_sor_dp_new(struct nvkm_disp *disp, int index,
+		 struct dcb_output *dcbE, struct nvkm_output **poutp)
+{
+	return nvkm_output_dp_new_(&gm107_sor_dp_func, disp, index, dcbE, poutp);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c
index 2cfbef9c344f..c44fa7ea672a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c
@@ -57,19 +57,6 @@ gm200_sor_dp_lane_map(struct nvkm_device *device, u8 lane)
 }
 
 static int
-gm200_sor_dp_pattern(struct nvkm_output_dp *outp, int pattern)
-{
-	struct nvkm_device *device = outp->base.disp->engine.subdev.device;
-	const u32 soff = gm200_sor_soff(outp);
-	const u32 data = 0x01010101 * pattern;
-	if (outp->base.info.sorconf.link & 1)
-		nvkm_mask(device, 0x61c110 + soff, 0x0f0f0f0f, data);
-	else
-		nvkm_mask(device, 0x61c12c + soff, 0x0f0f0f0f, data);
-	return 0;
-}
-
-static int
 gm200_sor_dp_lnk_pwr(struct nvkm_output_dp *outp, int nr)
 {
 	struct nvkm_device *device = outp->base.disp->engine.subdev.device;
@@ -129,7 +116,7 @@ gm200_sor_dp_drv_ctl(struct nvkm_output_dp *outp,
 
 static const struct nvkm_output_dp_func
 gm200_sor_dp_func = {
-	.pattern = gm200_sor_dp_pattern,
+	.pattern = gm107_sor_dp_pattern,
 	.lnk_pwr = gm200_sor_dp_lnk_pwr,
 	.lnk_ctl = gf119_sor_dp_lnk_ctl,
 	.drv_ctl = gm200_sor_dp_drv_ctl,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild
index 65e5d291ecda..98651a43bc12 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild
@@ -13,6 +13,7 @@ nvkm-y += nvkm/engine/fifo/gk20a.o
 nvkm-y += nvkm/engine/fifo/gm107.o
 nvkm-y += nvkm/engine/fifo/gm200.o
 nvkm-y += nvkm/engine/fifo/gm20b.o
+nvkm-y += nvkm/engine/fifo/gp100.o
 
 nvkm-y += nvkm/engine/fifo/chan.o
 nvkm-y += nvkm/engine/fifo/channv50.o
@@ -31,3 +32,4 @@ nvkm-y += nvkm/engine/fifo/gpfifogf100.o
 nvkm-y += nvkm/engine/fifo/gpfifogk104.o
 nvkm-y += nvkm/engine/fifo/gpfifogk110.o
 nvkm-y += nvkm/engine/fifo/gpfifogm200.o
+nvkm-y += nvkm/engine/fifo/gpfifogp100.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h
index e06f4d46f802..230f64e5f731 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h
@@ -27,4 +27,5 @@ int gk104_fifo_gpfifo_new(struct nvkm_fifo *, const struct nvkm_oclass *,
 extern const struct nvkm_fifo_chan_oclass gk104_fifo_gpfifo_oclass;
 extern const struct nvkm_fifo_chan_oclass gk110_fifo_gpfifo_oclass;
 extern const struct nvkm_fifo_chan_oclass gm200_fifo_gpfifo_oclass;
+extern const struct nvkm_fifo_chan_oclass gp100_fifo_gpfifo_oclass;
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
index 743f3a189f28..103c0afaaa6d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
@@ -329,7 +329,7 @@ gk104_fifo_intr_fault(struct gk104_fifo *fifo, int unit)
 	}
 
 	if (eu == NULL) {
-		enum nvkm_devidx engidx = nvkm_top_fault(device->top, unit);
+		enum nvkm_devidx engidx = nvkm_top_fault(device, unit);
 		if (engidx < NVKM_SUBDEV_NR) {
 			const char *src = nvkm_subdev_name[engidx];
 			char *dst = en;
@@ -589,7 +589,6 @@ gk104_fifo_oneinit(struct nvkm_fifo *base)
 	struct gk104_fifo *fifo = gk104_fifo(base);
 	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	struct nvkm_top *top = device->top;
 	int engn, runl, pbid, ret, i, j;
 	enum nvkm_devidx engidx;
 	u32 *map;
@@ -608,7 +607,7 @@ gk104_fifo_oneinit(struct nvkm_fifo *base)
 
 	/* Determine runlist configuration from topology device info. */
 	i = 0;
-	while ((int)(engidx = nvkm_top_engine(top, i++, &runl, &engn)) >= 0) {
+	while ((int)(engidx = nvkm_top_engine(device, i++, &runl, &engn)) >= 0) {
 		/* Determine which PBDMA handles requests for this engine. */
 		for (j = 0, pbid = -1; j < fifo->pbdma_nr; j++) {
 			if (map[j] & (1 << runl)) {
@@ -617,8 +616,8 @@ gk104_fifo_oneinit(struct nvkm_fifo *base)
 			}
 		}
 
-		nvkm_debug(subdev, "engine %2d: runlist %2d pbdma %2d\n",
-			   engn, runl, pbid);
+		nvkm_debug(subdev, "engine %2d: runlist %2d pbdma %2d (%s)\n",
+			   engn, runl, pbid, nvkm_subdev_name[engidx]);
 
 		fifo->engine[engn].engine = nvkm_device_engine(device, engidx);
 		fifo->engine[engn].runl = runl;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp100.c
new file mode 100644
index 000000000000..eff83f7fb705
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp100.c
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+#include "gk104.h"
+#include "changk104.h"
+
+static const struct nvkm_enum
+gp100_fifo_fault_engine[] = {
+	{ 0x01, "DISPLAY" },
+	{ 0x03, "IFB", NULL, NVKM_ENGINE_IFB },
+	{ 0x04, "BAR1", NULL, NVKM_SUBDEV_BAR },
+	{ 0x05, "BAR2", NULL, NVKM_SUBDEV_INSTMEM },
+	{ 0x06, "HOST0" },
+	{ 0x07, "HOST1" },
+	{ 0x08, "HOST2" },
+	{ 0x09, "HOST3" },
+	{ 0x0a, "HOST4" },
+	{ 0x0b, "HOST5" },
+	{ 0x0c, "HOST6" },
+	{ 0x0d, "HOST7" },
+	{ 0x0e, "HOST8" },
+	{ 0x0f, "HOST9" },
+	{ 0x10, "HOST10" },
+	{ 0x13, "PERF" },
+	{ 0x17, "PMU" },
+	{ 0x18, "PTP" },
+	{ 0x1f, "PHYSICAL" },
+	{}
+};
+
+static const struct gk104_fifo_func
+gp100_fifo = {
+	.fault.engine = gp100_fifo_fault_engine,
+	.fault.reason = gk104_fifo_fault_reason,
+	.fault.hubclient = gk104_fifo_fault_hubclient,
+	.fault.gpcclient = gk104_fifo_fault_gpcclient,
+	.chan = {
+		&gp100_fifo_gpfifo_oclass,
+		NULL
+	},
+};
+
+int
+gp100_fifo_new(struct nvkm_device *device, int index, struct nvkm_fifo **pfifo)
+{
+	return gk104_fifo_new_(&gp100_fifo, device, index, 4096, pfifo);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogp100.c
new file mode 100644
index 000000000000..1530a9217aea
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogp100.c
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+#include "changk104.h"
+
+#include <nvif/class.h>
+
+const struct nvkm_fifo_chan_oclass
+gp100_fifo_gpfifo_oclass = {
+	.base.oclass = PASCAL_CHANNEL_GPFIFO_A,
+	.base.minver = 0,
+	.base.maxver = 0,
+	.ctor = gk104_fifo_gpfifo_new,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild
index 290ed0db8047..f1c494182248 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild
@@ -31,6 +31,7 @@ nvkm-y += nvkm/engine/gr/gk20a.o
 nvkm-y += nvkm/engine/gr/gm107.o
 nvkm-y += nvkm/engine/gr/gm200.o
 nvkm-y += nvkm/engine/gr/gm20b.o
+nvkm-y += nvkm/engine/gr/gp100.o
 
 nvkm-y += nvkm/engine/gr/ctxnv40.o
 nvkm-y += nvkm/engine/gr/ctxnv50.o
@@ -48,3 +49,4 @@ nvkm-y += nvkm/engine/gr/ctxgk20a.o
 nvkm-y += nvkm/engine/gr/ctxgm107.o
 nvkm-y += nvkm/engine/gr/ctxgm200.o
 nvkm-y += nvkm/engine/gr/ctxgm20b.o
+nvkm-y += nvkm/engine/gr/ctxgp100.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
index b02d8f50ea6a..bc77eea351a5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
@@ -1240,7 +1240,7 @@ gf100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	u32 idle_timeout;
 
-	nvkm_mc_unk260(device->mc, 0);
+	nvkm_mc_unk260(device, 0);
 
 	gf100_gr_mmio(gr, grctx->hub);
 	gf100_gr_mmio(gr, grctx->gpc);
@@ -1264,7 +1264,7 @@ gf100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 	gf100_gr_icmd(gr, grctx->icmd);
 	nvkm_wr32(device, 0x404154, idle_timeout);
 	gf100_gr_mthd(gr, grctx->mthd);
-	nvkm_mc_unk260(device->mc, 1);
+	nvkm_mc_unk260(device, 1);
 }
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
index ac895edce164..52048b5a5274 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
@@ -101,6 +101,8 @@ void gm200_grctx_generate_405b60(struct gf100_gr *);
 
 extern const struct gf100_grctx_func gm20b_grctx;
 
+extern const struct gf100_grctx_func gp100_grctx;
+
 /* context init value lists */
 
 extern const struct gf100_gr_pack gf100_grctx_pack_icmd[];
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
index f521de11a299..c925ade5880e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
@@ -226,7 +226,7 @@ gf117_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 	u32 idle_timeout;
 	int i;
 
-	nvkm_mc_unk260(device->mc, 0);
+	nvkm_mc_unk260(device, 0);
 
 	gf100_gr_mmio(gr, grctx->hub);
 	gf100_gr_mmio(gr, grctx->gpc);
@@ -253,7 +253,7 @@ gf117_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 	gf100_gr_icmd(gr, grctx->icmd);
 	nvkm_wr32(device, 0x404154, idle_timeout);
 	gf100_gr_mthd(gr, grctx->mthd);
-	nvkm_mc_unk260(device->mc, 1);
+	nvkm_mc_unk260(device, 1);
 }
 
 const struct gf100_grctx_func
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
index 9ba337778ef5..c46b3fdf7203 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
@@ -950,7 +950,7 @@ gk104_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 	u32 idle_timeout;
 	int i;
 
-	nvkm_mc_unk260(device->mc, 0);
+	nvkm_mc_unk260(device, 0);
 
 	gf100_gr_mmio(gr, grctx->hub);
 	gf100_gr_mmio(gr, grctx->gpc);
@@ -979,7 +979,7 @@ gk104_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 	gf100_gr_icmd(gr, grctx->icmd);
 	nvkm_wr32(device, 0x404154, idle_timeout);
 	gf100_gr_mthd(gr, grctx->mthd);
-	nvkm_mc_unk260(device->mc, 1);
+	nvkm_mc_unk260(device, 1);
 
 	nvkm_mask(device, 0x418800, 0x00200000, 0x00200000);
 	nvkm_mask(device, 0x41be10, 0x00800000, 0x00800000);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c
new file mode 100644
index 000000000000..3d1ae7ddf7dd
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c
@@ -0,0 +1,179 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "ctxgf100.h"
+
+#include <subdev/fb.h>
+
+/*******************************************************************************
+ * PGRAPH context implementation
+ ******************************************************************************/
+
+static void
+gp100_grctx_generate_pagepool(struct gf100_grctx *info)
+{
+	const struct gf100_grctx_func *grctx = info->gr->func->grctx;
+	const u32 access = NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS;
+	const int s = 8;
+	const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), access);
+	mmio_refn(info, 0x40800c, 0x00000000, s, b);
+	mmio_wr32(info, 0x408010, 0x80000000);
+	mmio_refn(info, 0x419004, 0x00000000, s, b);
+	mmio_wr32(info, 0x419008, 0x00000000);
+}
+
+static void
+gp100_grctx_generate_attrib(struct gf100_grctx *info)
+{
+	struct gf100_gr *gr = info->gr;
+	const struct gf100_grctx_func *grctx = gr->func->grctx;
+	const u32  alpha = grctx->alpha_nr;
+	const u32 attrib = grctx->attrib_nr;
+	const u32 pertpc = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max);
+	const u32   size = roundup(gr->tpc_total * pertpc, 0x80);
+	const u32 access = NV_MEM_ACCESS_RW;
+	const int s = 12;
+	const int b = mmio_vram(info, size, (1 << s), access);
+	const int max_batches = 0xffff;
+	u32 ao = 0;
+	u32 bo = ao + grctx->alpha_nr_max * gr->tpc_total;
+	int gpc, ppc, n = 0;
+
+	mmio_refn(info, 0x418810, 0x80000000, s, b);
+	mmio_refn(info, 0x419848, 0x10000000, s, b);
+	mmio_refn(info, 0x419c2c, 0x10000000, s, b);
+	mmio_refn(info, 0x419b00, 0x00000000, s, b);
+	mmio_wr32(info, 0x419b04, 0x80000000 | size >> 7);
+	mmio_wr32(info, 0x405830, attrib);
+	mmio_wr32(info, 0x40585c, alpha);
+	mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches);
+
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) {
+			const u32 as =  alpha * gr->ppc_tpc_nr[gpc][ppc];
+			const u32 bs = attrib * gr->ppc_tpc_nr[gpc][ppc];
+			const u32 u = 0x418ea0 + (n * 0x04);
+			const u32 o = PPC_UNIT(gpc, ppc, 0);
+			if (!(gr->ppc_mask[gpc] & (1 << ppc)))
+				continue;
+			mmio_wr32(info, o + 0xc0, bs);
+			mmio_wr32(info, o + 0xf4, bo);
+			mmio_wr32(info, o + 0xf0, bs);
+			bo += grctx->attrib_nr_max * gr->ppc_tpc_nr[gpc][ppc];
+			mmio_wr32(info, o + 0xe4, as);
+			mmio_wr32(info, o + 0xf8, ao);
+			ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc];
+			mmio_wr32(info, u, bs);
+		}
+	}
+
+	mmio_wr32(info, 0x418eec, 0x00000000);
+	mmio_wr32(info, 0x41befc, 0x00000000);
+}
+
+static void
+gp100_grctx_generate_405b60(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	const u32 dist_nr = DIV_ROUND_UP(gr->tpc_total, 4);
+	u32 dist[TPC_MAX / 4] = {};
+	u32 gpcs[GPC_MAX * 2] = {};
+	u8  tpcnr[GPC_MAX];
+	int tpc, gpc, i;
+
+	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
+
+	/* won't result in the same distribution as the binary driver where
+	 * some of the gpcs have more tpcs than others, but this shall do
+	 * for the moment.  the code for earlier gpus has this issue too.
+	 */
+	for (gpc = -1, i = 0; i < gr->tpc_total; i++) {
+		do {
+			gpc = (gpc + 1) % gr->gpc_nr;
+		} while(!tpcnr[gpc]);
+		tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
+
+		dist[i / 4] |= ((gpc << 4) | tpc) << ((i % 4) * 8);
+		gpcs[gpc + (gr->gpc_nr * (tpc / 4))] |= i << (tpc * 8);
+	}
+
+	for (i = 0; i < dist_nr; i++)
+		nvkm_wr32(device, 0x405b60 + (i * 4), dist[i]);
+	for (i = 0; i < gr->gpc_nr * 2; i++)
+		nvkm_wr32(device, 0x405ba0 + (i * 4), gpcs[i]);
+}
+
+static void
+gp100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	const struct gf100_grctx_func *grctx = gr->func->grctx;
+	u32 idle_timeout, tmp;
+	int i;
+
+	gf100_gr_mmio(gr, gr->fuc_sw_ctx);
+
+	idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000);
+
+	grctx->pagepool(info);
+	grctx->bundle(info);
+	grctx->attrib(info);
+	grctx->unkn(gr);
+
+	gm200_grctx_generate_tpcid(gr);
+	gf100_grctx_generate_r406028(gr);
+	gk104_grctx_generate_r418bb8(gr);
+
+	for (i = 0; i < 8; i++)
+		nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
+	nvkm_wr32(device, 0x406500, 0x00000000);
+
+	nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr);
+
+	for (tmp = 0, i = 0; i < gr->gpc_nr; i++)
+		tmp |= ((1 << gr->tpc_nr[i]) - 1) << (i * 5);
+	nvkm_wr32(device, 0x4041c4, tmp);
+
+	gp100_grctx_generate_405b60(gr);
+
+	gf100_gr_icmd(gr, gr->fuc_bundle);
+	nvkm_wr32(device, 0x404154, idle_timeout);
+	gf100_gr_mthd(gr, gr->fuc_method);
+}
+
+const struct gf100_grctx_func
+gp100_grctx = {
+	.main  = gp100_grctx_generate_main,
+	.unkn  = gk104_grctx_generate_unkn,
+	.bundle = gm107_grctx_generate_bundle,
+	.bundle_size = 0x3000,
+	.bundle_min_gpm_fifo_depth = 0x180,
+	.bundle_token_limit = 0x1080,
+	.pagepool = gp100_grctx_generate_pagepool,
+	.pagepool_size = 0x20000,
+	.attrib = gp100_grctx_generate_attrib,
+	.attrib_nr_max = 0x660,
+	.attrib_nr = 0x440,
+	.alpha_nr_max = 0xc00,
+	.alpha_nr = 0x800,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
index 9513badb8220..157919c788e6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
@@ -949,22 +949,41 @@ gf100_gr_trap_gpc_rop(struct gf100_gr *gr, int gpc)
 }
 
 static const struct nvkm_enum gf100_mp_warp_error[] = {
-	{ 0x00, "NO_ERROR" },
-	{ 0x01, "STACK_MISMATCH" },
+	{ 0x01, "STACK_ERROR" },
+	{ 0x02, "API_STACK_ERROR" },
+	{ 0x03, "RET_EMPTY_STACK_ERROR" },
+	{ 0x04, "PC_WRAP" },
 	{ 0x05, "MISALIGNED_PC" },
-	{ 0x08, "MISALIGNED_GPR" },
-	{ 0x09, "INVALID_OPCODE" },
-	{ 0x0d, "GPR_OUT_OF_BOUNDS" },
-	{ 0x0e, "MEM_OUT_OF_BOUNDS" },
-	{ 0x0f, "UNALIGNED_MEM_ACCESS" },
+	{ 0x06, "PC_OVERFLOW" },
+	{ 0x07, "MISALIGNED_IMMC_ADDR" },
+	{ 0x08, "MISALIGNED_REG" },
+	{ 0x09, "ILLEGAL_INSTR_ENCODING" },
+	{ 0x0a, "ILLEGAL_SPH_INSTR_COMBO" },
+	{ 0x0b, "ILLEGAL_INSTR_PARAM" },
+	{ 0x0c, "INVALID_CONST_ADDR" },
+	{ 0x0d, "OOR_REG" },
+	{ 0x0e, "OOR_ADDR" },
+	{ 0x0f, "MISALIGNED_ADDR" },
 	{ 0x10, "INVALID_ADDR_SPACE" },
-	{ 0x11, "INVALID_PARAM" },
+	{ 0x11, "ILLEGAL_INSTR_PARAM2" },
+	{ 0x12, "INVALID_CONST_ADDR_LDC" },
+	{ 0x13, "GEOMETRY_SM_ERROR" },
+	{ 0x14, "DIVERGENT" },
+	{ 0x15, "WARP_EXIT" },
 	{}
 };
 
 static const struct nvkm_bitfield gf100_mp_global_error[] = {
+	{ 0x00000001, "SM_TO_SM_FAULT" },
+	{ 0x00000002, "L1_ERROR" },
 	{ 0x00000004, "MULTIPLE_WARP_ERRORS" },
-	{ 0x00000008, "OUT_OF_STACK_SPACE" },
+	{ 0x00000008, "PHYSICAL_STACK_OVERFLOW" },
+	{ 0x00000010, "BPT_INT" },
+	{ 0x00000020, "BPT_PAUSE" },
+	{ 0x00000040, "SINGLE_STEP_COMPLETE" },
+	{ 0x20000000, "ECC_SEC_ERROR" },
+	{ 0x40000000, "ECC_DED_ERROR" },
+	{ 0x80000000, "TIMEOUT" },
 	{}
 };
 
@@ -1438,24 +1457,30 @@ gf100_gr_init_ctxctl(struct gf100_gr *gr)
 	struct nvkm_device *device = subdev->device;
 	struct nvkm_secboot *sb = device->secboot;
 	int i;
+	int ret = 0;
 
 	if (gr->firmware) {
 		/* load fuc microcode */
-		nvkm_mc_unk260(device->mc, 0);
+		nvkm_mc_unk260(device, 0);
 
 		/* securely-managed falcons must be reset using secure boot */
 		if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_FECS))
-			nvkm_secboot_reset(sb, NVKM_SECBOOT_FALCON_FECS);
+			ret = nvkm_secboot_reset(sb, NVKM_SECBOOT_FALCON_FECS);
 		else
 			gf100_gr_init_fw(gr, 0x409000, &gr->fuc409c,
 					 &gr->fuc409d);
+		if (ret)
+			return ret;
+
 		if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_GPCCS))
-			nvkm_secboot_reset(sb, NVKM_SECBOOT_FALCON_GPCCS);
+			ret = nvkm_secboot_reset(sb, NVKM_SECBOOT_FALCON_GPCCS);
 		else
 			gf100_gr_init_fw(gr, 0x41a000, &gr->fuc41ac,
 					 &gr->fuc41ad);
+		if (ret)
+			return ret;
 
-		nvkm_mc_unk260(device->mc, 1);
+		nvkm_mc_unk260(device, 1);
 
 		/* start both of them running */
 		nvkm_wr32(device, 0x409840, 0xffffffff);
@@ -1557,7 +1582,7 @@ gf100_gr_init_ctxctl(struct gf100_gr *gr)
 	}
 
 	/* load HUB microcode */
-	nvkm_mc_unk260(device->mc, 0);
+	nvkm_mc_unk260(device, 0);
 	nvkm_wr32(device, 0x4091c0, 0x01000000);
 	for (i = 0; i < gr->func->fecs.ucode->data.size / 4; i++)
 		nvkm_wr32(device, 0x4091c4, gr->func->fecs.ucode->data.data[i]);
@@ -1580,7 +1605,7 @@ gf100_gr_init_ctxctl(struct gf100_gr *gr)
 			nvkm_wr32(device, 0x41a188, i >> 6);
 		nvkm_wr32(device, 0x41a184, gr->func->gpccs.ucode->code.data[i]);
 	}
-	nvkm_mc_unk260(device->mc, 1);
+	nvkm_mc_unk260(device, 1);
 
 	/* load register lists */
 	gf100_gr_init_csdata(gr, grctx->hub, 0x409000, 0x000, 0x000000);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
index 2b98abdb9270..268b8d60ff73 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
@@ -292,4 +292,6 @@ extern const struct gf100_gr_init gm107_gr_init_l1c_0[];
 extern const struct gf100_gr_init gm107_gr_init_wwdx_0[];
 extern const struct gf100_gr_init gm107_gr_init_cbm_0[];
 void gm107_gr_init_bios(struct gf100_gr *);
+
+void gm200_gr_init_gpc_mmu(struct gf100_gr *);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
index 4ca8ed15191c..de8b806b88fd 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
@@ -361,6 +361,5 @@ gk20a_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
 	if (ret)
 		return ret;
 
-
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c
index 4dfa4513bb6c..6435f1257572 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c
@@ -38,7 +38,7 @@ gm200_gr_rops(struct gf100_gr *gr)
 	return nvkm_rd32(gr->base.engine.subdev.device, 0x12006c);
 }
 
-static void
+void
 gm200_gr_init_gpc_mmu(struct gf100_gr *gr)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c
new file mode 100644
index 000000000000..26ad79def0ff
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c
@@ -0,0 +1,171 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "gf100.h"
+#include "ctxgf100.h"
+
+#include <nvif/class.h>
+
+/*******************************************************************************
+ * PGRAPH engine/subdev functions
+ ******************************************************************************/
+
+static void
+gp100_gr_init_rop_active_fbps(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	/*XXX: otherwise identical to gm200 aside from mask.. do everywhere? */
+	const u32 fbp_count = nvkm_rd32(device, 0x12006c) & 0x0000000f;
+	nvkm_mask(device, 0x408850, 0x0000000f, fbp_count); /* zrop */
+	nvkm_mask(device, 0x408958, 0x0000000f, fbp_count); /* crop */
+}
+
+static int
+gp100_gr_init(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
+	u32 data[TPC_MAX / 8] = {};
+	u8  tpcnr[GPC_MAX];
+	int gpc, tpc, rop;
+	int i;
+
+	gr->func->init_gpc_mmu(gr);
+
+	gf100_gr_mmio(gr, gr->fuc_sw_nonctx);
+
+	nvkm_wr32(device, GPC_UNIT(0, 0x3018), 0x00000001);
+
+	memset(data, 0x00, sizeof(data));
+	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
+	for (i = 0, gpc = -1; i < gr->tpc_total; i++) {
+		do {
+			gpc = (gpc + 1) % gr->gpc_nr;
+		} while (!tpcnr[gpc]);
+		tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
+
+		data[i / 8] |= tpc << ((i % 8) * 4);
+	}
+
+	nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
+	nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
+	nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
+	nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
+
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
+			  gr->screen_tile_row_offset << 8 | gr->tpc_nr[gpc]);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
+							 gr->tpc_total);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
+	}
+
+	nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
+	nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800));
+	nvkm_wr32(device, GPC_BCAST(0x033c), nvkm_rd32(device, 0x100804));
+
+	gr->func->init_rop_active_fbps(gr);
+
+	nvkm_wr32(device, 0x400500, 0x00010001);
+	nvkm_wr32(device, 0x400100, 0xffffffff);
+	nvkm_wr32(device, 0x40013c, 0xffffffff);
+	nvkm_wr32(device, 0x400124, 0x00000002);
+	nvkm_wr32(device, 0x409c24, 0x000f0002);
+	nvkm_wr32(device, 0x405848, 0xc0000000);
+	nvkm_mask(device, 0x40584c, 0x00000000, 0x00000001);
+	nvkm_wr32(device, 0x404000, 0xc0000000);
+	nvkm_wr32(device, 0x404600, 0xc0000000);
+	nvkm_wr32(device, 0x408030, 0xc0000000);
+	nvkm_wr32(device, 0x404490, 0xc0000000);
+	nvkm_wr32(device, 0x406018, 0xc0000000);
+	nvkm_wr32(device, 0x407020, 0x40000000);
+	nvkm_wr32(device, 0x405840, 0xc0000000);
+	nvkm_wr32(device, 0x405844, 0x00ffffff);
+	nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008);
+
+	nvkm_mask(device, 0x419c9c, 0x00010000, 0x00010000);
+	nvkm_mask(device, 0x419c9c, 0x00020000, 0x00020000);
+
+	gr->func->init_ppc_exceptions(gr);
+
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
+		for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x430), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x00dffffe);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x00000105);
+		}
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
+	}
+
+	for (rop = 0; rop < gr->rop_nr; rop++) {
+		nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0x40000000);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0x40000000);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff);
+	}
+
+	nvkm_wr32(device, 0x400108, 0xffffffff);
+	nvkm_wr32(device, 0x400138, 0xffffffff);
+	nvkm_wr32(device, 0x400118, 0xffffffff);
+	nvkm_wr32(device, 0x400130, 0xffffffff);
+	nvkm_wr32(device, 0x40011c, 0xffffffff);
+	nvkm_wr32(device, 0x400134, 0xffffffff);
+
+	gf100_gr_zbc_init(gr);
+
+	return gf100_gr_init_ctxctl(gr);
+}
+
+static const struct gf100_gr_func
+gp100_gr = {
+	.init = gp100_gr_init,
+	.init_gpc_mmu = gm200_gr_init_gpc_mmu,
+	.init_rop_active_fbps = gp100_gr_init_rop_active_fbps,
+	.init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
+	.rops = gm200_gr_rops,
+	.ppc_nr = 2,
+	.grctx = &gp100_grctx,
+	.sclass = {
+		{ -1, -1, FERMI_TWOD_A },
+		{ -1, -1, KEPLER_INLINE_TO_MEMORY_B },
+		{ -1, -1, PASCAL_A, &gf100_fermi },
+		{ -1, -1, PASCAL_COMPUTE_A },
+		{}
+	}
+};
+
+int
+gp100_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+	return gm200_gr_new_(&gp100_gr, device, index, pgr);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c
index e15b9627b07e..f3c30b2a788e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c
@@ -26,6 +26,49 @@
 #include <subdev/bios.h>
 #include <subdev/bios/bmp.h>
 #include <subdev/bios/bit.h>
+#include <subdev/bios/image.h>
+
+static bool
+nvbios_addr(struct nvkm_bios *bios, u32 *addr, u8 size)
+{
+	u32 p = *addr;
+
+	if (*addr > bios->image0_size && bios->imaged_addr) {
+		*addr -= bios->image0_size;
+		*addr += bios->imaged_addr;
+	}
+
+	if (unlikely(*addr + size >= bios->size)) {
+		nvkm_error(&bios->subdev, "OOB %d %08x %08x\n", size, p, *addr);
+		return false;
+	}
+
+	return true;
+}
+
+u8
+nvbios_rd08(struct nvkm_bios *bios, u32 addr)
+{
+	if (likely(nvbios_addr(bios, &addr, 1)))
+		return bios->data[addr];
+	return 0x00;
+}
+
+u16
+nvbios_rd16(struct nvkm_bios *bios, u32 addr)
+{
+	if (likely(nvbios_addr(bios, &addr, 2)))
+		return get_unaligned_le16(&bios->data[addr]);
+	return 0x0000;
+}
+
+u32
+nvbios_rd32(struct nvkm_bios *bios, u32 addr)
+{
+	if (likely(nvbios_addr(bios, &addr, 4)))
+		return get_unaligned_le32(&bios->data[addr]);
+	return 0x00000000;
+}
 
 u8
 nvbios_checksum(const u8 *data, int size)
@@ -100,8 +143,9 @@ int
 nvkm_bios_new(struct nvkm_device *device, int index, struct nvkm_bios **pbios)
 {
 	struct nvkm_bios *bios;
+	struct nvbios_image image;
 	struct bit_entry bit_i;
-	int ret;
+	int ret, idx = 0;
 
 	if (!(bios = *pbios = kzalloc(sizeof(*bios), GFP_KERNEL)))
 		return -ENOMEM;
@@ -111,6 +155,19 @@ nvkm_bios_new(struct nvkm_device *device, int index, struct nvkm_bios **pbios)
 	if (ret)
 		return ret;
 
+	/* Some tables have weird pointers that need adjustment before
+	 * they're dereferenced.  I'm not entirely sure why...
+	 */
+	if (nvbios_image(bios, idx++, &image)) {
+		bios->image0_size = image.size;
+		while (nvbios_image(bios, idx++, &image)) {
+			if (image.type == 0xe0) {
+				bios->imaged_addr = image.base;
+				break;
+			}
+		}
+	}
+
 	/* detect type of vbios we're dealing with */
 	bios->bmp_offset = nvbios_findstr(bios->data, bios->size,
 					  "\xff\x7f""NV\0", 5);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/disp.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/disp.c
index a5e92135cd77..9efb1b48cd54 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/disp.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/disp.c
@@ -141,7 +141,8 @@ nvbios_ocfg_parse(struct nvkm_bios *bios, u16 outp, u8 idx,
 {
 	u16 data = nvbios_ocfg_entry(bios, outp, idx, ver, hdr, cnt, len);
 	if (data) {
-		info->match     = nvbios_rd16(bios, data + 0x00);
+		info->proto     = nvbios_rd08(bios, data + 0x00);
+		info->flags     = nvbios_rd16(bios, data + 0x01);
 		info->clkcmp[0] = nvbios_rd16(bios, data + 0x02);
 		info->clkcmp[1] = nvbios_rd16(bios, data + 0x04);
 	}
@@ -149,12 +150,13 @@ nvbios_ocfg_parse(struct nvkm_bios *bios, u16 outp, u8 idx,
 }
 
 u16
-nvbios_ocfg_match(struct nvkm_bios *bios, u16 outp, u16 type,
+nvbios_ocfg_match(struct nvkm_bios *bios, u16 outp, u8 proto, u8 flags,
 		  u8 *ver, u8 *hdr, u8 *cnt, u8 *len, struct nvbios_ocfg *info)
 {
 	u16 data, idx = 0;
 	while ((data = nvbios_ocfg_parse(bios, outp, idx++, ver, hdr, cnt, len, info))) {
-		if (info->match == type)
+		if ((info->proto == proto || info->proto == 0xff) &&
+		    (info->flags == flags))
 			break;
 	}
 	return data;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c
index 05332476354a..d89e78c4e689 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c
@@ -40,6 +40,7 @@ nvbios_dp_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
 				case 0x30:
 				case 0x40:
 				case 0x41:
+				case 0x42:
 					*hdr = nvbios_rd08(bios, data + 0x01);
 					*len = nvbios_rd08(bios, data + 0x02);
 					*cnt = nvbios_rd08(bios, data + 0x03);
@@ -70,6 +71,7 @@ nvbios_dpout_entry(struct nvkm_bios *bios, u8 idx,
 			break;
 		case 0x40:
 		case 0x41:
+		case 0x42:
 			*hdr = nvbios_rd08(bios, data + 0x04);
 			*cnt = 0;
 			*len = 0;
@@ -109,6 +111,7 @@ nvbios_dpout_parse(struct nvkm_bios *bios, u8 idx,
 			break;
 		case 0x40:
 		case 0x41:
+		case 0x42:
 			info->flags     = nvbios_rd08(bios, data + 0x04);
 			info->script[0] = nvbios_rd16(bios, data + 0x05);
 			info->script[1] = nvbios_rd16(bios, data + 0x07);
@@ -180,6 +183,11 @@ nvbios_dpcfg_parse(struct nvkm_bios *bios, u16 outp, u8 idx,
 			info->pe    = nvbios_rd08(bios, data + 0x02);
 			info->tx_pu = nvbios_rd08(bios, data + 0x03);
 			break;
+		case 0x42:
+			info->dc    = nvbios_rd08(bios, data + 0x00);
+			info->pe    = nvbios_rd08(bios, data + 0x01);
+			info->tx_pu = nvbios_rd08(bios, data + 0x02);
+			break;
 		default:
 			data = 0x0000;
 			break;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/image.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/image.c
index 74b14cf09308..1dbff7aeafec 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/image.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/image.c
@@ -68,11 +68,16 @@ nvbios_imagen(struct nvkm_bios *bios, struct nvbios_image *image)
 bool
 nvbios_image(struct nvkm_bios *bios, int idx, struct nvbios_image *image)
 {
+	u32 imaged_addr = bios->imaged_addr;
 	memset(image, 0x00, sizeof(*image));
+	bios->imaged_addr = 0;
 	do {
 		image->base += image->size;
-		if (image->last || !nvbios_imagen(bios, image))
+		if (image->last || !nvbios_imagen(bios, image)) {
+			bios->imaged_addr = imaged_addr;
 			return false;
+		}
 	} while(idx--);
+	bios->imaged_addr = imaged_addr;
 	return true;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/pll.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/pll.c
index 91a7dc56e406..2ca23a9157ab 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/pll.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/pll.c
@@ -77,15 +77,17 @@ g84_pll_mapping[] = {
 	{}
 };
 
-static u16
+static u32
 pll_limits_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
 {
 	struct bit_entry bit_C;
-	u16 data = 0x0000;
+	u32 data = 0x0000;
 
 	if (!bit_entry(bios, 'C', &bit_C)) {
 		if (bit_C.version == 1 && bit_C.length >= 10)
 			data = nvbios_rd16(bios, bit_C.offset + 8);
+		if (bit_C.version == 2 && bit_C.length >= 4)
+			data = nvbios_rd32(bios, bit_C.offset + 0);
 		if (data) {
 			*ver = nvbios_rd08(bios, data + 0);
 			*hdr = nvbios_rd08(bios, data + 1);
@@ -137,12 +139,12 @@ pll_map(struct nvkm_bios *bios)
 	}
 }
 
-static u16
+static u32
 pll_map_reg(struct nvkm_bios *bios, u32 reg, u32 *type, u8 *ver, u8 *len)
 {
 	struct pll_mapping *map;
 	u8  hdr, cnt;
-	u16 data;
+	u32 data;
 
 	data = pll_limits_table(bios, ver, &hdr, &cnt, len);
 	if (data && *ver >= 0x30) {
@@ -160,7 +162,7 @@ pll_map_reg(struct nvkm_bios *bios, u32 reg, u32 *type, u8 *ver, u8 *len)
 	map = pll_map(bios);
 	while (map && map->reg) {
 		if (map->reg == reg && *ver >= 0x20) {
-			u16 addr = (data += hdr);
+			u32 addr = (data += hdr);
 			*type = map->type;
 			while (cnt--) {
 				if (nvbios_rd32(bios, data) == map->reg)
@@ -179,12 +181,12 @@ pll_map_reg(struct nvkm_bios *bios, u32 reg, u32 *type, u8 *ver, u8 *len)
 	return 0x0000;
 }
 
-static u16
+static u32
 pll_map_type(struct nvkm_bios *bios, u8 type, u32 *reg, u8 *ver, u8 *len)
 {
 	struct pll_mapping *map;
 	u8  hdr, cnt;
-	u16 data;
+	u32 data;
 
 	data = pll_limits_table(bios, ver, &hdr, &cnt, len);
 	if (data && *ver >= 0x30) {
@@ -202,7 +204,7 @@ pll_map_type(struct nvkm_bios *bios, u8 type, u32 *reg, u8 *ver, u8 *len)
 	map = pll_map(bios);
 	while (map && map->reg) {
 		if (map->type == type && *ver >= 0x20) {
-			u16 addr = (data += hdr);
+			u32 addr = (data += hdr);
 			*reg = map->reg;
 			while (cnt--) {
 				if (nvbios_rd32(bios, data) == map->reg)
@@ -228,7 +230,7 @@ nvbios_pll_parse(struct nvkm_bios *bios, u32 type, struct nvbios_pll *info)
 	struct nvkm_device *device = subdev->device;
 	u8  ver, len;
 	u32 reg = type;
-	u16 data;
+	u32 data;
 
 	if (type > PLL_MAX) {
 		reg  = type;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/pmu.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/pmu.c
index c268e5afe852..b4a308f3cf7b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/pmu.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/pmu.c
@@ -26,21 +26,6 @@
 #include <subdev/bios/image.h>
 #include <subdev/bios/pmu.h>
 
-static u32
-weirdo_pointer(struct nvkm_bios *bios, u32 data)
-{
-	struct nvbios_image image;
-	int idx = 0;
-	if (nvbios_image(bios, idx++, &image)) {
-		data -= image.size;
-		while (nvbios_image(bios, idx++, &image)) {
-			if (image.type == 0xe0)
-				return image.base + data;
-		}
-	}
-	return 0;
-}
-
 u32
 nvbios_pmuTe(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
 {
@@ -50,7 +35,7 @@ nvbios_pmuTe(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
 	if (!bit_entry(bios, 'p', &bit_p)) {
 		if (bit_p.version == 2 && bit_p.length >= 4)
 			data = nvbios_rd32(bios, bit_p.offset + 0x00);
-		if ((data = weirdo_pointer(bios, data))) {
+		if (data) {
 			*ver = nvbios_rd08(bios, data + 0x00); /* maybe? */
 			*hdr = nvbios_rd08(bios, data + 0x01);
 			*len = nvbios_rd08(bios, data + 0x02);
@@ -97,8 +82,7 @@ nvbios_pmuRm(struct nvkm_bios *bios, u8 type, struct nvbios_pmuR *info)
 	u32 data;
 	memset(info, 0x00, sizeof(*info));
 	while ((data = nvbios_pmuEp(bios, idx++, &ver, &hdr, &pmuE))) {
-		if ( pmuE.type == type &&
-		    (data = weirdo_pointer(bios, pmuE.data))) {
+		if (pmuE.type == type && (data = pmuE.data)) {
 			info->init_addr_pmu = nvbios_rd32(bios, data + 0x08);
 			info->args_addr_pmu = nvbios_rd32(bios, data + 0x0c);
 			info->boot_addr     = data + 0x30;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/rammap.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/rammap.c
index d0ae7454764e..b57c370c725d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/rammap.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/rammap.c
@@ -30,11 +30,11 @@ nvbios_rammapTe(struct nvkm_bios *bios, u8 *ver, u8 *hdr,
 		u8 *cnt, u8 *len, u8 *snr, u8 *ssz)
 {
 	struct bit_entry bit_P;
-	u16 rammap = 0x0000;
+	u32 rammap = 0x0000;
 
 	if (!bit_entry(bios, 'P', &bit_P)) {
 		if (bit_P.version == 2)
-			rammap = nvbios_rd16(bios, bit_P.offset + 4);
+			rammap = nvbios_rd32(bios, bit_P.offset + 4);
 
 		if (rammap) {
 			*ver = nvbios_rd08(bios, rammap + 0);
@@ -61,7 +61,7 @@ nvbios_rammapEe(struct nvkm_bios *bios, int idx,
 		u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
 {
 	u8  snr, ssz;
-	u16 rammap = nvbios_rammapTe(bios, ver, hdr, cnt, len, &snr, &ssz);
+	u32 rammap = nvbios_rammapTe(bios, ver, hdr, cnt, len, &snr, &ssz);
 	if (rammap && idx < *cnt) {
 		rammap = rammap + *hdr + (idx * (*len + (snr * ssz)));
 		*hdr = *len;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gf100.c
index 78c449b417b7..89d5543118cf 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gf100.c
@@ -99,7 +99,7 @@ read_div(struct gf100_clk *clk, int doff, u32 dsrc, u32 dctl)
 {
 	struct nvkm_device *device = clk->base.subdev.device;
 	u32 ssrc = nvkm_rd32(device, dsrc + (doff * 4));
-	u32 sctl = nvkm_rd32(device, dctl + (doff * 4));
+	u32 sclk, sctl, sdiv = 2;
 
 	switch (ssrc & 0x00000003) {
 	case 0:
@@ -109,13 +109,21 @@ read_div(struct gf100_clk *clk, int doff, u32 dsrc, u32 dctl)
 	case 2:
 		return 100000;
 	case 3:
-		if (sctl & 0x80000000) {
-			u32 sclk = read_vco(clk, dsrc + (doff * 4));
-			u32 sdiv = (sctl & 0x0000003f) + 2;
-			return (sclk * 2) / sdiv;
+		sclk = read_vco(clk, dsrc + (doff * 4));
+
+		/* Memclk has doff of 0 despite its alt. location */
+		if (doff <= 2) {
+			sctl = nvkm_rd32(device, dctl + (doff * 4));
+
+			if (sctl & 0x80000000) {
+				if (ssrc & 0x100)
+					sctl >>= 8;
+
+				sdiv = (sctl & 0x3f) + 2;
+			}
 		}
 
-		return read_vco(clk, dsrc + (doff * 4));
+		return (sclk * 2) / sdiv;
 	default:
 		return 0;
 	}
@@ -366,11 +374,17 @@ gf100_clk_prog_2(struct gf100_clk *clk, int idx)
 		if (info->coef) {
 			nvkm_wr32(device, addr + 0x04, info->coef);
 			nvkm_mask(device, addr + 0x00, 0x00000001, 0x00000001);
+
+			/* Test PLL lock */
+			nvkm_mask(device, addr + 0x00, 0x00000010, 0x00000000);
 			nvkm_msec(device, 2000,
 				if (nvkm_rd32(device, addr + 0x00) & 0x00020000)
 					break;
 			);
-			nvkm_mask(device, addr + 0x00, 0x00020004, 0x00000004);
+			nvkm_mask(device, addr + 0x00, 0x00000010, 0x00000010);
+
+			/* Enable sync mode */
+			nvkm_mask(device, addr + 0x00, 0x00000004, 0x00000004);
 		}
 	}
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk104.c
index 975c401bccab..06bc0d2d6ae1 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk104.c
@@ -393,11 +393,17 @@ gk104_clk_prog_2(struct gk104_clk *clk, int idx)
 	if (info->coef) {
 		nvkm_wr32(device, addr + 0x04, info->coef);
 		nvkm_mask(device, addr + 0x00, 0x00000001, 0x00000001);
+
+		/* Test PLL lock */
+		nvkm_mask(device, addr + 0x00, 0x00000010, 0x00000000);
 		nvkm_msec(device, 2000,
 			if (nvkm_rd32(device, addr + 0x00) & 0x00020000)
 				break;
 		);
-		nvkm_mask(device, addr + 0x00, 0x00020004, 0x00000004);
+		nvkm_mask(device, addr + 0x00, 0x00000010, 0x00000010);
+
+		/* Enable sync mode */
+		nvkm_mask(device, addr + 0x00, 0x00000004, 0x00000004);
 	}
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.c
index 5f0ee24e31b8..218893e3e5f9 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.c
@@ -28,69 +28,6 @@
 #include <core/tegra.h>
 #include <subdev/timer.h>
 
-#define KHZ (1000)
-#define MHZ (KHZ * 1000)
-
-#define MASK(w)	((1 << w) - 1)
-
-#define GPCPLL_CFG		(SYS_GPCPLL_CFG_BASE + 0)
-#define GPCPLL_CFG_ENABLE	BIT(0)
-#define GPCPLL_CFG_IDDQ		BIT(1)
-#define GPCPLL_CFG_LOCK_DET_OFF	BIT(4)
-#define GPCPLL_CFG_LOCK		BIT(17)
-
-#define GPCPLL_COEFF		(SYS_GPCPLL_CFG_BASE + 4)
-#define GPCPLL_COEFF_M_SHIFT	0
-#define GPCPLL_COEFF_M_WIDTH	8
-#define GPCPLL_COEFF_N_SHIFT	8
-#define GPCPLL_COEFF_N_WIDTH	8
-#define GPCPLL_COEFF_P_SHIFT	16
-#define GPCPLL_COEFF_P_WIDTH	6
-
-#define GPCPLL_CFG2			(SYS_GPCPLL_CFG_BASE + 0xc)
-#define GPCPLL_CFG2_SETUP2_SHIFT	16
-#define GPCPLL_CFG2_PLL_STEPA_SHIFT	24
-
-#define GPCPLL_CFG3			(SYS_GPCPLL_CFG_BASE + 0x18)
-#define GPCPLL_CFG3_PLL_STEPB_SHIFT	16
-
-#define GPC_BCASE_GPCPLL_CFG_BASE		0x00132800
-#define GPCPLL_NDIV_SLOWDOWN			(SYS_GPCPLL_CFG_BASE + 0x1c)
-#define GPCPLL_NDIV_SLOWDOWN_NDIV_LO_SHIFT	0
-#define GPCPLL_NDIV_SLOWDOWN_NDIV_MID_SHIFT	8
-#define GPCPLL_NDIV_SLOWDOWN_STEP_SIZE_LO2MID_SHIFT	16
-#define GPCPLL_NDIV_SLOWDOWN_SLOWDOWN_USING_PLL_SHIFT	22
-#define GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT	31
-
-#define SEL_VCO				(SYS_GPCPLL_CFG_BASE + 0x100)
-#define SEL_VCO_GPC2CLK_OUT_SHIFT	0
-
-#define GPC2CLK_OUT			(SYS_GPCPLL_CFG_BASE + 0x250)
-#define GPC2CLK_OUT_SDIV14_INDIV4_WIDTH	1
-#define GPC2CLK_OUT_SDIV14_INDIV4_SHIFT	31
-#define GPC2CLK_OUT_SDIV14_INDIV4_MODE	1
-#define GPC2CLK_OUT_VCODIV_WIDTH	6
-#define GPC2CLK_OUT_VCODIV_SHIFT	8
-#define GPC2CLK_OUT_VCODIV1		0
-#define GPC2CLK_OUT_VCODIV_MASK		(MASK(GPC2CLK_OUT_VCODIV_WIDTH) << \
-					GPC2CLK_OUT_VCODIV_SHIFT)
-#define GPC2CLK_OUT_BYPDIV_WIDTH	6
-#define GPC2CLK_OUT_BYPDIV_SHIFT	0
-#define GPC2CLK_OUT_BYPDIV31		0x3c
-#define GPC2CLK_OUT_INIT_MASK	((MASK(GPC2CLK_OUT_SDIV14_INDIV4_WIDTH) << \
-		GPC2CLK_OUT_SDIV14_INDIV4_SHIFT)\
-		| (MASK(GPC2CLK_OUT_VCODIV_WIDTH) << GPC2CLK_OUT_VCODIV_SHIFT)\
-		| (MASK(GPC2CLK_OUT_BYPDIV_WIDTH) << GPC2CLK_OUT_BYPDIV_SHIFT))
-#define GPC2CLK_OUT_INIT_VAL	((GPC2CLK_OUT_SDIV14_INDIV4_MODE << \
-		GPC2CLK_OUT_SDIV14_INDIV4_SHIFT) \
-		| (GPC2CLK_OUT_VCODIV1 << GPC2CLK_OUT_VCODIV_SHIFT) \
-		| (GPC2CLK_OUT_BYPDIV31 << GPC2CLK_OUT_BYPDIV_SHIFT))
-
-#define GPC_BCAST_NDIV_SLOWDOWN_DEBUG	(GPC_BCASE_GPCPLL_CFG_BASE + 0xa0)
-#define GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_SHIFT	24
-#define GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_MASK \
-	    (0x1 << GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_SHIFT)
-
 static const u8 _pl_to_div[] = {
 /* PL:   0, 1, 2, 3, 4, 5, 6,  7,  8,  9, 10, 11, 12, 13, 14 */
 /* p: */ 1, 2, 3, 4, 5, 6, 8, 10, 12, 16, 12, 16, 20, 24, 32,
@@ -124,7 +61,7 @@ static const struct gk20a_clk_pllg_params gk20a_pllg_params = {
 	.min_pl = 1, .max_pl = 32,
 };
 
-static void
+void
 gk20a_pllg_read_mnp(struct gk20a_clk *clk, struct gk20a_pll *pll)
 {
 	struct nvkm_device *device = clk->base.subdev.device;
@@ -136,20 +73,33 @@ gk20a_pllg_read_mnp(struct gk20a_clk *clk, struct gk20a_pll *pll)
 	pll->pl = (val >> GPCPLL_COEFF_P_SHIFT) & MASK(GPCPLL_COEFF_P_WIDTH);
 }
 
-static u32
-gk20a_pllg_calc_rate(struct gk20a_clk *clk)
+void
+gk20a_pllg_write_mnp(struct gk20a_clk *clk, const struct gk20a_pll *pll)
+{
+	struct nvkm_device *device = clk->base.subdev.device;
+	u32 val;
+
+	val = (pll->m & MASK(GPCPLL_COEFF_M_WIDTH)) << GPCPLL_COEFF_M_SHIFT;
+	val |= (pll->n & MASK(GPCPLL_COEFF_N_WIDTH)) << GPCPLL_COEFF_N_SHIFT;
+	val |= (pll->pl & MASK(GPCPLL_COEFF_P_WIDTH)) << GPCPLL_COEFF_P_SHIFT;
+	nvkm_wr32(device, GPCPLL_COEFF, val);
+}
+
+u32
+gk20a_pllg_calc_rate(struct gk20a_clk *clk, struct gk20a_pll *pll)
 {
 	u32 rate;
 	u32 divider;
 
-	rate = clk->parent_rate * clk->pll.n;
-	divider = clk->pll.m * clk->pl_to_div(clk->pll.pl);
+	rate = clk->parent_rate * pll->n;
+	divider = pll->m * clk->pl_to_div(pll->pl);
 
 	return rate / divider / 2;
 }
 
-static int
-gk20a_pllg_calc_mnp(struct gk20a_clk *clk, unsigned long rate)
+int
+gk20a_pllg_calc_mnp(struct gk20a_clk *clk, unsigned long rate,
+		    struct gk20a_pll *pll)
 {
 	struct nvkm_subdev *subdev = &clk->base.subdev;
 	u32 target_clk_f, ref_clk_f, target_freq;
@@ -163,16 +113,13 @@ gk20a_pllg_calc_mnp(struct gk20a_clk *clk, unsigned long rate)
 	target_clk_f = rate * 2 / KHZ;
 	ref_clk_f = clk->parent_rate / KHZ;
 
-	max_vco_f = clk->params->max_vco;
+	target_vco_f = target_clk_f + target_clk_f / 50;
+	max_vco_f = max(clk->params->max_vco, target_vco_f);
 	min_vco_f = clk->params->min_vco;
 	best_m = clk->params->max_m;
 	best_n = clk->params->min_n;
 	best_pl = clk->params->min_pl;
 
-	target_vco_f = target_clk_f + target_clk_f / 50;
-	if (max_vco_f < target_vco_f)
-		max_vco_f = target_vco_f;
-
 	/* min_pl <= high_pl <= max_pl */
 	high_pl = (max_vco_f + target_vco_f - 1) / target_vco_f;
 	high_pl = min(high_pl, clk->params->max_pl);
@@ -195,9 +142,7 @@ gk20a_pllg_calc_mnp(struct gk20a_clk *clk, unsigned long rate)
 		target_vco_f = target_clk_f * clk->pl_to_div(pl);
 
 		for (m = clk->params->min_m; m <= clk->params->max_m; m++) {
-			u32 u_f, vco_f;
-
-			u_f = ref_clk_f / m;
+			u32 u_f = ref_clk_f / m;
 
 			if (u_f < clk->params->min_u)
 				break;
@@ -211,6 +156,8 @@ gk20a_pllg_calc_mnp(struct gk20a_clk *clk, unsigned long rate)
 				break;
 
 			for (; n <= n2; n++) {
+				u32 vco_f;
+
 				if (n < clk->params->min_n)
 					continue;
 				if (n > clk->params->max_n)
@@ -247,16 +194,16 @@ found_match:
 			   "no best match for target @ %dMHz on gpc_pll",
 			   target_clk_f / KHZ);
 
-	clk->pll.m = best_m;
-	clk->pll.n = best_n;
-	clk->pll.pl = best_pl;
+	pll->m = best_m;
+	pll->n = best_n;
+	pll->pl = best_pl;
 
-	target_freq = gk20a_pllg_calc_rate(clk);
+	target_freq = gk20a_pllg_calc_rate(clk, pll);
 
 	nvkm_debug(subdev,
-		   "actual target freq %d MHz, M %d, N %d, PL %d(div%d)\n",
-		   target_freq / MHZ, clk->pll.m, clk->pll.n, clk->pll.pl,
-		   clk->pl_to_div(clk->pll.pl));
+		   "actual target freq %d KHz, M %d, N %d, PL %d(div%d)\n",
+		   target_freq / KHZ, pll->m, pll->n, pll->pl,
+		   clk->pl_to_div(pll->pl));
 	return 0;
 }
 
@@ -265,45 +212,36 @@ gk20a_pllg_slide(struct gk20a_clk *clk, u32 n)
 {
 	struct nvkm_subdev *subdev = &clk->base.subdev;
 	struct nvkm_device *device = subdev->device;
-	u32 val;
-	int ramp_timeout;
+	struct gk20a_pll pll;
+	int ret = 0;
 
 	/* get old coefficients */
-	val = nvkm_rd32(device, GPCPLL_COEFF);
+	gk20a_pllg_read_mnp(clk, &pll);
 	/* do nothing if NDIV is the same */
-	if (n == ((val >> GPCPLL_COEFF_N_SHIFT) & MASK(GPCPLL_COEFF_N_WIDTH)))
+	if (n == pll.n)
 		return 0;
 
-	/* setup */
-	nvkm_mask(device, GPCPLL_CFG2, 0xff << GPCPLL_CFG2_PLL_STEPA_SHIFT,
-		0x2b << GPCPLL_CFG2_PLL_STEPA_SHIFT);
-	nvkm_mask(device, GPCPLL_CFG3, 0xff << GPCPLL_CFG3_PLL_STEPB_SHIFT,
-		0xb << GPCPLL_CFG3_PLL_STEPB_SHIFT);
-
 	/* pll slowdown mode */
 	nvkm_mask(device, GPCPLL_NDIV_SLOWDOWN,
 		BIT(GPCPLL_NDIV_SLOWDOWN_SLOWDOWN_USING_PLL_SHIFT),
 		BIT(GPCPLL_NDIV_SLOWDOWN_SLOWDOWN_USING_PLL_SHIFT));
 
 	/* new ndiv ready for ramp */
-	val = nvkm_rd32(device, GPCPLL_COEFF);
-	val &= ~(MASK(GPCPLL_COEFF_N_WIDTH) << GPCPLL_COEFF_N_SHIFT);
-	val |= (n & MASK(GPCPLL_COEFF_N_WIDTH)) << GPCPLL_COEFF_N_SHIFT;
+	pll.n = n;
 	udelay(1);
-	nvkm_wr32(device, GPCPLL_COEFF, val);
+	gk20a_pllg_write_mnp(clk, &pll);
 
 	/* dynamic ramp to new ndiv */
-	val = nvkm_rd32(device, GPCPLL_NDIV_SLOWDOWN);
-	val |= 0x1 << GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT;
 	udelay(1);
-	nvkm_wr32(device, GPCPLL_NDIV_SLOWDOWN, val);
+	nvkm_mask(device, GPCPLL_NDIV_SLOWDOWN,
+		  BIT(GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT),
+		  BIT(GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT));
 
-	for (ramp_timeout = 500; ramp_timeout > 0; ramp_timeout--) {
-		udelay(1);
-		val = nvkm_rd32(device, GPC_BCAST_NDIV_SLOWDOWN_DEBUG);
-		if (val & GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_MASK)
-			break;
-	}
+	/* wait for ramping to complete */
+	if (nvkm_wait_usec(device, 500, GPC_BCAST_NDIV_SLOWDOWN_DEBUG,
+		GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_MASK,
+		GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_MASK) < 0)
+		ret = -ETIMEDOUT;
 
 	/* exit slowdown mode */
 	nvkm_mask(device, GPCPLL_NDIV_SLOWDOWN,
@@ -311,21 +249,35 @@ gk20a_pllg_slide(struct gk20a_clk *clk, u32 n)
 		BIT(GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT), 0);
 	nvkm_rd32(device, GPCPLL_NDIV_SLOWDOWN);
 
-	if (ramp_timeout <= 0) {
-		nvkm_error(subdev, "gpcpll dynamic ramp timeout\n");
-		return -ETIMEDOUT;
-	}
-
-	return 0;
+	return ret;
 }
 
-static void
+static int
 gk20a_pllg_enable(struct gk20a_clk *clk)
 {
 	struct nvkm_device *device = clk->base.subdev.device;
+	u32 val;
 
 	nvkm_mask(device, GPCPLL_CFG, GPCPLL_CFG_ENABLE, GPCPLL_CFG_ENABLE);
 	nvkm_rd32(device, GPCPLL_CFG);
+
+	/* enable lock detection */
+	val = nvkm_rd32(device, GPCPLL_CFG);
+	if (val & GPCPLL_CFG_LOCK_DET_OFF) {
+		val &= ~GPCPLL_CFG_LOCK_DET_OFF;
+		nvkm_wr32(device, GPCPLL_CFG, val);
+	}
+
+	/* wait for lock */
+	if (nvkm_wait_usec(device, 300, GPCPLL_CFG, GPCPLL_CFG_LOCK,
+			   GPCPLL_CFG_LOCK) < 0)
+		return -ETIMEDOUT;
+
+	/* switch to VCO mode */
+	nvkm_mask(device, SEL_VCO, BIT(SEL_VCO_GPC2CLK_OUT_SHIFT),
+		BIT(SEL_VCO_GPC2CLK_OUT_SHIFT));
+
+	return 0;
 }
 
 static void
@@ -333,117 +285,81 @@ gk20a_pllg_disable(struct gk20a_clk *clk)
 {
 	struct nvkm_device *device = clk->base.subdev.device;
 
+	/* put PLL in bypass before disabling it */
+	nvkm_mask(device, SEL_VCO, BIT(SEL_VCO_GPC2CLK_OUT_SHIFT), 0);
+
 	nvkm_mask(device, GPCPLL_CFG, GPCPLL_CFG_ENABLE, 0);
 	nvkm_rd32(device, GPCPLL_CFG);
 }
 
 static int
-_gk20a_pllg_program_mnp(struct gk20a_clk *clk, bool allow_slide)
+gk20a_pllg_program_mnp(struct gk20a_clk *clk, const struct gk20a_pll *pll)
 {
 	struct nvkm_subdev *subdev = &clk->base.subdev;
 	struct nvkm_device *device = subdev->device;
-	u32 val, cfg;
-	struct gk20a_pll old_pll;
-	u32 n_lo;
-
-	/* get old coefficients */
-	gk20a_pllg_read_mnp(clk, &old_pll);
-
-	/* do NDIV slide if there is no change in M and PL */
-	cfg = nvkm_rd32(device, GPCPLL_CFG);
-	if (allow_slide && clk->pll.m == old_pll.m &&
-	    clk->pll.pl == old_pll.pl && (cfg & GPCPLL_CFG_ENABLE)) {
-		return gk20a_pllg_slide(clk, clk->pll.n);
-	}
-
-	/* slide down to NDIV_LO */
-	if (allow_slide && (cfg & GPCPLL_CFG_ENABLE)) {
-		int ret;
-
-		n_lo = DIV_ROUND_UP(old_pll.m * clk->params->min_vco,
-				    clk->parent_rate / KHZ);
-		ret = gk20a_pllg_slide(clk, n_lo);
+	struct gk20a_pll cur_pll;
+	int ret;
 
-		if (ret)
-			return ret;
-	}
+	gk20a_pllg_read_mnp(clk, &cur_pll);
 
-	/* split FO-to-bypass jump in halfs by setting out divider 1:2 */
+	/* split VCO-to-bypass jump in half by setting out divider 1:2 */
 	nvkm_mask(device, GPC2CLK_OUT, GPC2CLK_OUT_VCODIV_MASK,
-		0x2 << GPC2CLK_OUT_VCODIV_SHIFT);
-
-	/* put PLL in bypass before programming it */
-	val = nvkm_rd32(device, SEL_VCO);
-	val &= ~(BIT(SEL_VCO_GPC2CLK_OUT_SHIFT));
+		  GPC2CLK_OUT_VCODIV2 << GPC2CLK_OUT_VCODIV_SHIFT);
+	/* Intentional 2nd write to assure linear divider operation */
+	nvkm_mask(device, GPC2CLK_OUT, GPC2CLK_OUT_VCODIV_MASK,
+		  GPC2CLK_OUT_VCODIV2 << GPC2CLK_OUT_VCODIV_SHIFT);
+	nvkm_rd32(device, GPC2CLK_OUT);
 	udelay(2);
-	nvkm_wr32(device, SEL_VCO, val);
-
-	/* get out from IDDQ */
-	val = nvkm_rd32(device, GPCPLL_CFG);
-	if (val & GPCPLL_CFG_IDDQ) {
-		val &= ~GPCPLL_CFG_IDDQ;
-		nvkm_wr32(device, GPCPLL_CFG, val);
-		nvkm_rd32(device, GPCPLL_CFG);
-		udelay(2);
-	}
 
 	gk20a_pllg_disable(clk);
 
-	nvkm_debug(subdev, "%s: m=%d n=%d pl=%d\n", __func__,
-		   clk->pll.m, clk->pll.n, clk->pll.pl);
-
-	n_lo = DIV_ROUND_UP(clk->pll.m * clk->params->min_vco,
-			    clk->parent_rate / KHZ);
-	val = clk->pll.m << GPCPLL_COEFF_M_SHIFT;
-	val |= (allow_slide ? n_lo : clk->pll.n) << GPCPLL_COEFF_N_SHIFT;
-	val |= clk->pll.pl << GPCPLL_COEFF_P_SHIFT;
-	nvkm_wr32(device, GPCPLL_COEFF, val);
+	gk20a_pllg_write_mnp(clk, pll);
 
-	gk20a_pllg_enable(clk);
-
-	val = nvkm_rd32(device, GPCPLL_CFG);
-	if (val & GPCPLL_CFG_LOCK_DET_OFF) {
-		val &= ~GPCPLL_CFG_LOCK_DET_OFF;
-		nvkm_wr32(device, GPCPLL_CFG, val);
-	}
-
-	if (nvkm_usec(device, 300,
-		if (nvkm_rd32(device, GPCPLL_CFG) & GPCPLL_CFG_LOCK)
-			break;
-	) < 0)
-		return -ETIMEDOUT;
-
-	/* switch to VCO mode */
-	nvkm_mask(device, SEL_VCO, BIT(SEL_VCO_GPC2CLK_OUT_SHIFT),
-		  BIT(SEL_VCO_GPC2CLK_OUT_SHIFT));
+	ret = gk20a_pllg_enable(clk);
+	if (ret)
+		return ret;
 
 	/* restore out divider 1:1 */
-	val = nvkm_rd32(device, GPC2CLK_OUT);
-	if ((val & GPC2CLK_OUT_VCODIV_MASK) !=
-	    (GPC2CLK_OUT_VCODIV1 << GPC2CLK_OUT_VCODIV_SHIFT)) {
-		val &= ~GPC2CLK_OUT_VCODIV_MASK;
-		val |= GPC2CLK_OUT_VCODIV1 << GPC2CLK_OUT_VCODIV_SHIFT;
-		udelay(2);
-		nvkm_wr32(device, GPC2CLK_OUT, val);
-		/* Intentional 2nd write to assure linear divider operation */
-		nvkm_wr32(device, GPC2CLK_OUT, val);
-		nvkm_rd32(device, GPC2CLK_OUT);
-	}
+	udelay(2);
+	nvkm_mask(device, GPC2CLK_OUT, GPC2CLK_OUT_VCODIV_MASK,
+		  GPC2CLK_OUT_VCODIV1 << GPC2CLK_OUT_VCODIV_SHIFT);
+	/* Intentional 2nd write to assure linear divider operation */
+	nvkm_mask(device, GPC2CLK_OUT, GPC2CLK_OUT_VCODIV_MASK,
+		  GPC2CLK_OUT_VCODIV1 << GPC2CLK_OUT_VCODIV_SHIFT);
+	nvkm_rd32(device, GPC2CLK_OUT);
 
-	/* slide up to new NDIV */
-	return allow_slide ? gk20a_pllg_slide(clk, clk->pll.n) : 0;
+	return 0;
 }
 
 static int
-gk20a_pllg_program_mnp(struct gk20a_clk *clk)
+gk20a_pllg_program_mnp_slide(struct gk20a_clk *clk, const struct gk20a_pll *pll)
 {
-	int err;
+	struct gk20a_pll cur_pll;
+	int ret;
 
-	err = _gk20a_pllg_program_mnp(clk, true);
-	if (err)
-		err = _gk20a_pllg_program_mnp(clk, false);
+	if (gk20a_pllg_is_enabled(clk)) {
+		gk20a_pllg_read_mnp(clk, &cur_pll);
+
+		/* just do NDIV slide if there is no change to M and PL */
+		if (pll->m == cur_pll.m && pll->pl == cur_pll.pl)
+			return gk20a_pllg_slide(clk, pll->n);
+
+		/* slide down to current NDIV_LO */
+		cur_pll.n = gk20a_pllg_n_lo(clk, &cur_pll);
+		ret = gk20a_pllg_slide(clk, cur_pll.n);
+		if (ret)
+			return ret;
+	}
+
+	/* program MNP with the new clock parameters and new NDIV_LO */
+	cur_pll = *pll;
+	cur_pll.n = gk20a_pllg_n_lo(clk, &cur_pll);
+	ret = gk20a_pllg_program_mnp(clk, &cur_pll);
+	if (ret)
+		return ret;
 
-	return err;
+	/* slide up to new NDIV */
+	return gk20a_pllg_slide(clk, pll->n);
 }
 
 static struct nvkm_pstate
@@ -546,13 +462,14 @@ gk20a_clk_read(struct nvkm_clk *base, enum nv_clk_src src)
 	struct gk20a_clk *clk = gk20a_clk(base);
 	struct nvkm_subdev *subdev = &clk->base.subdev;
 	struct nvkm_device *device = subdev->device;
+	struct gk20a_pll pll;
 
 	switch (src) {
 	case nv_clk_src_crystal:
 		return device->crystal;
 	case nv_clk_src_gpc:
-		gk20a_pllg_read_mnp(clk, &clk->pll);
-		return gk20a_pllg_calc_rate(clk) / GK20A_CLK_GPC_MDIV;
+		gk20a_pllg_read_mnp(clk, &pll);
+		return gk20a_pllg_calc_rate(clk, &pll) / GK20A_CLK_GPC_MDIV;
 	default:
 		nvkm_error(subdev, "invalid clock source %d\n", src);
 		return -EINVAL;
@@ -565,15 +482,20 @@ gk20a_clk_calc(struct nvkm_clk *base, struct nvkm_cstate *cstate)
 	struct gk20a_clk *clk = gk20a_clk(base);
 
 	return gk20a_pllg_calc_mnp(clk, cstate->domain[nv_clk_src_gpc] *
-					 GK20A_CLK_GPC_MDIV);
+					 GK20A_CLK_GPC_MDIV, &clk->pll);
 }
 
 int
 gk20a_clk_prog(struct nvkm_clk *base)
 {
 	struct gk20a_clk *clk = gk20a_clk(base);
+	int ret;
+
+	ret = gk20a_pllg_program_mnp_slide(clk, &clk->pll);
+	if (ret)
+		ret = gk20a_pllg_program_mnp(clk, &clk->pll);
 
-	return gk20a_pllg_program_mnp(clk);
+	return ret;
 }
 
 void
@@ -581,29 +503,62 @@ gk20a_clk_tidy(struct nvkm_clk *base)
 {
 }
 
+int
+gk20a_clk_setup_slide(struct gk20a_clk *clk)
+{
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 step_a, step_b;
+
+	switch (clk->parent_rate) {
+	case 12000000:
+	case 12800000:
+	case 13000000:
+		step_a = 0x2b;
+		step_b = 0x0b;
+		break;
+	case 19200000:
+		step_a = 0x12;
+		step_b = 0x08;
+		break;
+	case 38400000:
+		step_a = 0x04;
+		step_b = 0x05;
+		break;
+	default:
+		nvkm_error(subdev, "invalid parent clock rate %u KHz",
+			   clk->parent_rate / KHZ);
+		return -EINVAL;
+	}
+
+	nvkm_mask(device, GPCPLL_CFG2, 0xff << GPCPLL_CFG2_PLL_STEPA_SHIFT,
+		step_a << GPCPLL_CFG2_PLL_STEPA_SHIFT);
+	nvkm_mask(device, GPCPLL_CFG3, 0xff << GPCPLL_CFG3_PLL_STEPB_SHIFT,
+		step_b << GPCPLL_CFG3_PLL_STEPB_SHIFT);
+
+	return 0;
+}
+
 void
 gk20a_clk_fini(struct nvkm_clk *base)
 {
 	struct nvkm_device *device = base->subdev.device;
 	struct gk20a_clk *clk = gk20a_clk(base);
-	u32 val;
 
 	/* slide to VCO min */
-	val = nvkm_rd32(device, GPCPLL_CFG);
-	if (val & GPCPLL_CFG_ENABLE) {
+	if (gk20a_pllg_is_enabled(clk)) {
 		struct gk20a_pll pll;
 		u32 n_lo;
 
 		gk20a_pllg_read_mnp(clk, &pll);
-		n_lo = DIV_ROUND_UP(pll.m * clk->params->min_vco,
-				    clk->parent_rate / KHZ);
+		n_lo = gk20a_pllg_n_lo(clk, &pll);
 		gk20a_pllg_slide(clk, n_lo);
 	}
 
-	/* put PLL in bypass before disabling it */
-	nvkm_mask(device, SEL_VCO, BIT(SEL_VCO_GPC2CLK_OUT_SHIFT), 0);
-
 	gk20a_pllg_disable(clk);
+
+	/* set IDDQ */
+	nvkm_mask(device, GPCPLL_CFG, GPCPLL_CFG_IDDQ, 1);
 }
 
 static int
@@ -614,9 +569,18 @@ gk20a_clk_init(struct nvkm_clk *base)
 	struct nvkm_device *device = subdev->device;
 	int ret;
 
+	/* get out from IDDQ */
+	nvkm_mask(device, GPCPLL_CFG, GPCPLL_CFG_IDDQ, 0);
+	nvkm_rd32(device, GPCPLL_CFG);
+	udelay(5);
+
 	nvkm_mask(device, GPC2CLK_OUT, GPC2CLK_OUT_INIT_MASK,
 		  GPC2CLK_OUT_INIT_VAL);
 
+	ret = gk20a_clk_setup_slide(clk);
+	if (ret)
+		return ret;
+
 	/* Start with lowest frequency */
 	base->func->calc(base, &base->func->pstates[0].base);
 	ret = base->func->prog(&clk->base);
@@ -646,7 +610,7 @@ gk20a_clk = {
 };
 
 int
-_gk20a_clk_ctor(struct nvkm_device *device, int index,
+gk20a_clk_ctor(struct nvkm_device *device, int index,
 		const struct nvkm_clk_func *func,
 		const struct gk20a_clk_pllg_params *params,
 		struct gk20a_clk *clk)
@@ -685,7 +649,7 @@ gk20a_clk_new(struct nvkm_device *device, int index, struct nvkm_clk **pclk)
 		return -ENOMEM;
 	*pclk = &clk->base;
 
-	ret = _gk20a_clk_ctor(device, index, &gk20a_clk, &gk20a_pllg_params,
+	ret = gk20a_clk_ctor(device, index, &gk20a_clk, &gk20a_pllg_params,
 			      clk);
 
 	clk->pl_to_div = pl_to_div;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.h b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.h
index 13c46740197d..0d1450972162 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.h
@@ -24,9 +24,79 @@
 #ifndef __NVKM_CLK_GK20A_H__
 #define __NVKM_CLK_GK20A_H__
 
+#define KHZ (1000)
+#define MHZ (KHZ * 1000)
+
+#define MASK(w)	((1 << (w)) - 1)
+
 #define GK20A_CLK_GPC_MDIV 1000
 
 #define SYS_GPCPLL_CFG_BASE	0x00137000
+#define GPCPLL_CFG		(SYS_GPCPLL_CFG_BASE + 0)
+#define GPCPLL_CFG_ENABLE	BIT(0)
+#define GPCPLL_CFG_IDDQ		BIT(1)
+#define GPCPLL_CFG_LOCK_DET_OFF	BIT(4)
+#define GPCPLL_CFG_LOCK		BIT(17)
+
+#define GPCPLL_CFG2		(SYS_GPCPLL_CFG_BASE + 0xc)
+#define GPCPLL_CFG2_SETUP2_SHIFT	16
+#define GPCPLL_CFG2_PLL_STEPA_SHIFT	24
+
+#define GPCPLL_CFG3			(SYS_GPCPLL_CFG_BASE + 0x18)
+#define GPCPLL_CFG3_VCO_CTRL_SHIFT		0
+#define GPCPLL_CFG3_VCO_CTRL_WIDTH		9
+#define GPCPLL_CFG3_VCO_CTRL_MASK		\
+	(MASK(GPCPLL_CFG3_VCO_CTRL_WIDTH) << GPCPLL_CFG3_VCO_CTRL_SHIFT)
+#define GPCPLL_CFG3_PLL_STEPB_SHIFT		16
+#define GPCPLL_CFG3_PLL_STEPB_WIDTH		8
+
+#define GPCPLL_COEFF		(SYS_GPCPLL_CFG_BASE + 4)
+#define GPCPLL_COEFF_M_SHIFT	0
+#define GPCPLL_COEFF_M_WIDTH	8
+#define GPCPLL_COEFF_N_SHIFT	8
+#define GPCPLL_COEFF_N_WIDTH	8
+#define GPCPLL_COEFF_N_MASK	\
+	(MASK(GPCPLL_COEFF_N_WIDTH) << GPCPLL_COEFF_N_SHIFT)
+#define GPCPLL_COEFF_P_SHIFT	16
+#define GPCPLL_COEFF_P_WIDTH	6
+
+#define GPCPLL_NDIV_SLOWDOWN			(SYS_GPCPLL_CFG_BASE + 0x1c)
+#define GPCPLL_NDIV_SLOWDOWN_NDIV_LO_SHIFT	0
+#define GPCPLL_NDIV_SLOWDOWN_NDIV_MID_SHIFT	8
+#define GPCPLL_NDIV_SLOWDOWN_STEP_SIZE_LO2MID_SHIFT	16
+#define GPCPLL_NDIV_SLOWDOWN_SLOWDOWN_USING_PLL_SHIFT	22
+#define GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT	31
+
+#define GPC_BCAST_GPCPLL_CFG_BASE		0x00132800
+#define GPC_BCAST_NDIV_SLOWDOWN_DEBUG	(GPC_BCAST_GPCPLL_CFG_BASE + 0xa0)
+#define GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_SHIFT	24
+#define GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_MASK \
+	(0x1 << GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_SHIFT)
+
+#define SEL_VCO				(SYS_GPCPLL_CFG_BASE + 0x100)
+#define SEL_VCO_GPC2CLK_OUT_SHIFT	0
+
+#define GPC2CLK_OUT			(SYS_GPCPLL_CFG_BASE + 0x250)
+#define GPC2CLK_OUT_SDIV14_INDIV4_WIDTH	1
+#define GPC2CLK_OUT_SDIV14_INDIV4_SHIFT	31
+#define GPC2CLK_OUT_SDIV14_INDIV4_MODE	1
+#define GPC2CLK_OUT_VCODIV_WIDTH	6
+#define GPC2CLK_OUT_VCODIV_SHIFT	8
+#define GPC2CLK_OUT_VCODIV1		0
+#define GPC2CLK_OUT_VCODIV2		2
+#define GPC2CLK_OUT_VCODIV_MASK		(MASK(GPC2CLK_OUT_VCODIV_WIDTH) << \
+					GPC2CLK_OUT_VCODIV_SHIFT)
+#define GPC2CLK_OUT_BYPDIV_WIDTH	6
+#define GPC2CLK_OUT_BYPDIV_SHIFT	0
+#define GPC2CLK_OUT_BYPDIV31		0x3c
+#define GPC2CLK_OUT_INIT_MASK	((MASK(GPC2CLK_OUT_SDIV14_INDIV4_WIDTH) << \
+		GPC2CLK_OUT_SDIV14_INDIV4_SHIFT)\
+		| (MASK(GPC2CLK_OUT_VCODIV_WIDTH) << GPC2CLK_OUT_VCODIV_SHIFT)\
+		| (MASK(GPC2CLK_OUT_BYPDIV_WIDTH) << GPC2CLK_OUT_BYPDIV_SHIFT))
+#define GPC2CLK_OUT_INIT_VAL	((GPC2CLK_OUT_SDIV14_INDIV4_MODE << \
+		GPC2CLK_OUT_SDIV14_INDIV4_SHIFT) \
+		| (GPC2CLK_OUT_VCODIV1 << GPC2CLK_OUT_VCODIV_SHIFT) \
+		| (GPC2CLK_OUT_BYPDIV31 << GPC2CLK_OUT_BYPDIV_SHIFT))
 
 /* All frequencies in Khz */
 struct gk20a_clk_pllg_params {
@@ -54,7 +124,29 @@ struct gk20a_clk {
 };
 #define gk20a_clk(p) container_of((p), struct gk20a_clk, base)
 
-int _gk20a_clk_ctor(struct nvkm_device *, int, const struct nvkm_clk_func *,
+u32 gk20a_pllg_calc_rate(struct gk20a_clk *, struct gk20a_pll *);
+int gk20a_pllg_calc_mnp(struct gk20a_clk *, unsigned long, struct gk20a_pll *);
+void gk20a_pllg_read_mnp(struct gk20a_clk *, struct gk20a_pll *);
+void gk20a_pllg_write_mnp(struct gk20a_clk *, const struct gk20a_pll *);
+
+static inline bool
+gk20a_pllg_is_enabled(struct gk20a_clk *clk)
+{
+	struct nvkm_device *device = clk->base.subdev.device;
+	u32 val;
+
+	val = nvkm_rd32(device, GPCPLL_CFG);
+	return val & GPCPLL_CFG_ENABLE;
+}
+
+static inline u32
+gk20a_pllg_n_lo(struct gk20a_clk *clk, struct gk20a_pll *pll)
+{
+	return DIV_ROUND_UP(pll->m * clk->params->min_vco,
+			    clk->parent_rate / KHZ);
+}
+
+int gk20a_clk_ctor(struct nvkm_device *, int, const struct nvkm_clk_func *,
 		    const struct gk20a_clk_pllg_params *, struct gk20a_clk *);
 void gk20a_clk_fini(struct nvkm_clk *);
 int gk20a_clk_read(struct nvkm_clk *, enum nv_clk_src);
@@ -62,4 +154,6 @@ int gk20a_clk_calc(struct nvkm_clk *, struct nvkm_cstate *);
 int gk20a_clk_prog(struct nvkm_clk *);
 void gk20a_clk_tidy(struct nvkm_clk *);
 
+int gk20a_clk_setup_slide(struct gk20a_clk *);
+
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gm20b.c
index 71b2bbb61973..b284e949f732 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gm20b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gm20b.c
@@ -21,20 +21,123 @@
  */
 
 #include <subdev/clk.h>
+#include <subdev/volt.h>
+#include <subdev/timer.h>
 #include <core/device.h>
+#include <core/tegra.h>
 
 #include "priv.h"
 #include "gk20a.h"
 
-#define KHZ (1000)
-#define MHZ (KHZ * 1000)
-
-#define MASK(w)	((1 << w) - 1)
+#define GPCPLL_CFG_SYNC_MODE	BIT(2)
 
 #define BYPASSCTRL_SYS	(SYS_GPCPLL_CFG_BASE + 0x340)
 #define BYPASSCTRL_SYS_GPCPLL_SHIFT	0
 #define BYPASSCTRL_SYS_GPCPLL_WIDTH	1
 
+#define GPCPLL_CFG2_SDM_DIN_SHIFT	0
+#define GPCPLL_CFG2_SDM_DIN_WIDTH	8
+#define GPCPLL_CFG2_SDM_DIN_MASK	\
+	(MASK(GPCPLL_CFG2_SDM_DIN_WIDTH) << GPCPLL_CFG2_SDM_DIN_SHIFT)
+#define GPCPLL_CFG2_SDM_DIN_NEW_SHIFT	8
+#define GPCPLL_CFG2_SDM_DIN_NEW_WIDTH	15
+#define GPCPLL_CFG2_SDM_DIN_NEW_MASK	\
+	(MASK(GPCPLL_CFG2_SDM_DIN_NEW_WIDTH) << GPCPLL_CFG2_SDM_DIN_NEW_SHIFT)
+#define GPCPLL_CFG2_SETUP2_SHIFT	16
+#define GPCPLL_CFG2_PLL_STEPA_SHIFT	24
+
+#define GPCPLL_DVFS0	(SYS_GPCPLL_CFG_BASE + 0x10)
+#define GPCPLL_DVFS0_DFS_COEFF_SHIFT	0
+#define GPCPLL_DVFS0_DFS_COEFF_WIDTH	7
+#define GPCPLL_DVFS0_DFS_COEFF_MASK	\
+	(MASK(GPCPLL_DVFS0_DFS_COEFF_WIDTH) << GPCPLL_DVFS0_DFS_COEFF_SHIFT)
+#define GPCPLL_DVFS0_DFS_DET_MAX_SHIFT	8
+#define GPCPLL_DVFS0_DFS_DET_MAX_WIDTH	7
+#define GPCPLL_DVFS0_DFS_DET_MAX_MASK	\
+	(MASK(GPCPLL_DVFS0_DFS_DET_MAX_WIDTH) << GPCPLL_DVFS0_DFS_DET_MAX_SHIFT)
+
+#define GPCPLL_DVFS1		(SYS_GPCPLL_CFG_BASE + 0x14)
+#define GPCPLL_DVFS1_DFS_EXT_DET_SHIFT		0
+#define GPCPLL_DVFS1_DFS_EXT_DET_WIDTH		7
+#define GPCPLL_DVFS1_DFS_EXT_STRB_SHIFT		7
+#define GPCPLL_DVFS1_DFS_EXT_STRB_WIDTH		1
+#define GPCPLL_DVFS1_DFS_EXT_CAL_SHIFT		8
+#define GPCPLL_DVFS1_DFS_EXT_CAL_WIDTH		7
+#define GPCPLL_DVFS1_DFS_EXT_SEL_SHIFT		15
+#define GPCPLL_DVFS1_DFS_EXT_SEL_WIDTH		1
+#define GPCPLL_DVFS1_DFS_CTRL_SHIFT		16
+#define GPCPLL_DVFS1_DFS_CTRL_WIDTH		12
+#define GPCPLL_DVFS1_EN_SDM_SHIFT		28
+#define GPCPLL_DVFS1_EN_SDM_WIDTH		1
+#define GPCPLL_DVFS1_EN_SDM_BIT			BIT(28)
+#define GPCPLL_DVFS1_EN_DFS_SHIFT		29
+#define GPCPLL_DVFS1_EN_DFS_WIDTH		1
+#define GPCPLL_DVFS1_EN_DFS_BIT			BIT(29)
+#define GPCPLL_DVFS1_EN_DFS_CAL_SHIFT		30
+#define GPCPLL_DVFS1_EN_DFS_CAL_WIDTH		1
+#define GPCPLL_DVFS1_EN_DFS_CAL_BIT		BIT(30)
+#define GPCPLL_DVFS1_DFS_CAL_DONE_SHIFT		31
+#define GPCPLL_DVFS1_DFS_CAL_DONE_WIDTH		1
+#define GPCPLL_DVFS1_DFS_CAL_DONE_BIT		BIT(31)
+
+#define GPC_BCAST_GPCPLL_DVFS2	(GPC_BCAST_GPCPLL_CFG_BASE + 0x20)
+#define GPC_BCAST_GPCPLL_DVFS2_DFS_EXT_STROBE_BIT	BIT(16)
+
+#define GPCPLL_CFG3_PLL_DFS_TESTOUT_SHIFT	24
+#define GPCPLL_CFG3_PLL_DFS_TESTOUT_WIDTH	7
+
+#define DFS_DET_RANGE	6	/* -2^6 ... 2^6-1 */
+#define SDM_DIN_RANGE	12	/* -2^12 ... 2^12-1 */
+
+struct gm20b_clk_dvfs_params {
+	s32 coeff_slope;
+	s32 coeff_offs;
+	u32 vco_ctrl;
+};
+
+static const struct gm20b_clk_dvfs_params gm20b_dvfs_params = {
+	.coeff_slope = -165230,
+	.coeff_offs = 214007,
+	.vco_ctrl = 0x7 << 3,
+};
+
+/*
+ * base.n is now the *integer* part of the N factor.
+ * sdm_din contains n's decimal part.
+ */
+struct gm20b_pll {
+	struct gk20a_pll base;
+	u32 sdm_din;
+};
+
+struct gm20b_clk_dvfs {
+	u32 dfs_coeff;
+	s32 dfs_det_max;
+	s32 dfs_ext_cal;
+};
+
+struct gm20b_clk {
+	/* currently applied parameters */
+	struct gk20a_clk base;
+	struct gm20b_clk_dvfs dvfs;
+	u32 uv;
+
+	/* new parameters to apply */
+	struct gk20a_pll new_pll;
+	struct gm20b_clk_dvfs new_dvfs;
+	u32 new_uv;
+
+	const struct gm20b_clk_dvfs_params *dvfs_params;
+
+	/* fused parameters */
+	s32 uvdet_slope;
+	s32 uvdet_offs;
+
+	/* safe frequency we can use at minimum voltage */
+	u32 safe_fmax_vmin;
+};
+#define gm20b_clk(p) container_of((gk20a_clk(p)), struct gm20b_clk, base)
+
 static u32 pl_to_div(u32 pl)
 {
 	return pl;
@@ -53,6 +156,484 @@ static const struct gk20a_clk_pllg_params gm20b_pllg_params = {
 	.min_pl = 1, .max_pl = 31,
 };
 
+static void
+gm20b_pllg_read_mnp(struct gm20b_clk *clk, struct gm20b_pll *pll)
+{
+	struct nvkm_subdev *subdev = &clk->base.base.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 val;
+
+	gk20a_pllg_read_mnp(&clk->base, &pll->base);
+	val = nvkm_rd32(device, GPCPLL_CFG2);
+	pll->sdm_din = (val >> GPCPLL_CFG2_SDM_DIN_SHIFT) &
+		       MASK(GPCPLL_CFG2_SDM_DIN_WIDTH);
+}
+
+static void
+gm20b_pllg_write_mnp(struct gm20b_clk *clk, const struct gm20b_pll *pll)
+{
+	struct nvkm_device *device = clk->base.base.subdev.device;
+
+	nvkm_mask(device, GPCPLL_CFG2, GPCPLL_CFG2_SDM_DIN_MASK,
+		  pll->sdm_din << GPCPLL_CFG2_SDM_DIN_SHIFT);
+	gk20a_pllg_write_mnp(&clk->base, &pll->base);
+}
+
+/*
+ * Determine DFS_COEFF for the requested voltage. Always select external
+ * calibration override equal to the voltage, and set maximum detection
+ * limit "0" (to make sure that PLL output remains under F/V curve when
+ * voltage increases).
+ */
+static void
+gm20b_dvfs_calc_det_coeff(struct gm20b_clk *clk, s32 uv,
+			  struct gm20b_clk_dvfs *dvfs)
+{
+	struct nvkm_subdev *subdev = &clk->base.base.subdev;
+	const struct gm20b_clk_dvfs_params *p = clk->dvfs_params;
+	u32 coeff;
+	/* Work with mv as uv would likely trigger an overflow */
+	s32 mv = DIV_ROUND_CLOSEST(uv, 1000);
+
+	/* coeff = slope * voltage + offset */
+	coeff = DIV_ROUND_CLOSEST(mv * p->coeff_slope, 1000) + p->coeff_offs;
+	coeff = DIV_ROUND_CLOSEST(coeff, 1000);
+	dvfs->dfs_coeff = min_t(u32, coeff, MASK(GPCPLL_DVFS0_DFS_COEFF_WIDTH));
+
+	dvfs->dfs_ext_cal = DIV_ROUND_CLOSEST(uv - clk->uvdet_offs,
+					     clk->uvdet_slope);
+	/* should never happen */
+	if (abs(dvfs->dfs_ext_cal) >= BIT(DFS_DET_RANGE))
+		nvkm_error(subdev, "dfs_ext_cal overflow!\n");
+
+	dvfs->dfs_det_max = 0;
+
+	nvkm_debug(subdev, "%s uv: %d coeff: %x, ext_cal: %d, det_max: %d\n",
+		   __func__, uv, dvfs->dfs_coeff, dvfs->dfs_ext_cal,
+		   dvfs->dfs_det_max);
+}
+
+/*
+ * Solve equation for integer and fractional part of the effective NDIV:
+ *
+ * n_eff = n_int + 1/2 + (SDM_DIN / 2^(SDM_DIN_RANGE + 1)) +
+ *         (DVFS_COEFF * DVFS_DET_DELTA) / 2^DFS_DET_RANGE
+ *
+ * The SDM_DIN LSB is finally shifted out, since it is not accessible by sw.
+ */
+static void
+gm20b_dvfs_calc_ndiv(struct gm20b_clk *clk, u32 n_eff, u32 *n_int, u32 *sdm_din)
+{
+	struct nvkm_subdev *subdev = &clk->base.base.subdev;
+	const struct gk20a_clk_pllg_params *p = clk->base.params;
+	u32 n;
+	s32 det_delta;
+	u32 rem, rem_range;
+
+	/* calculate current ext_cal and subtract previous one */
+	det_delta = DIV_ROUND_CLOSEST(((s32)clk->uv) - clk->uvdet_offs,
+				      clk->uvdet_slope);
+	det_delta -= clk->dvfs.dfs_ext_cal;
+	det_delta = min(det_delta, clk->dvfs.dfs_det_max);
+	det_delta *= clk->dvfs.dfs_coeff;
+
+	/* integer part of n */
+	n = (n_eff << DFS_DET_RANGE) - det_delta;
+	/* should never happen! */
+	if (n <= 0) {
+		nvkm_error(subdev, "ndiv <= 0 - setting to 1...\n");
+		n = 1 << DFS_DET_RANGE;
+	}
+	if (n >> DFS_DET_RANGE > p->max_n) {
+		nvkm_error(subdev, "ndiv > max_n - setting to max_n...\n");
+		n = p->max_n << DFS_DET_RANGE;
+	}
+	*n_int = n >> DFS_DET_RANGE;
+
+	/* fractional part of n */
+	rem = ((u32)n) & MASK(DFS_DET_RANGE);
+	rem_range = SDM_DIN_RANGE + 1 - DFS_DET_RANGE;
+	/* subtract 2^SDM_DIN_RANGE to account for the 1/2 of the equation */
+	rem = (rem << rem_range) - BIT(SDM_DIN_RANGE);
+	/* lose 8 LSB and clip - sdm_din only keeps the most significant byte */
+	*sdm_din = (rem >> BITS_PER_BYTE) & MASK(GPCPLL_CFG2_SDM_DIN_WIDTH);
+
+	nvkm_debug(subdev, "%s n_eff: %d, n_int: %d, sdm_din: %d\n", __func__,
+		   n_eff, *n_int, *sdm_din);
+}
+
+static int
+gm20b_pllg_slide(struct gm20b_clk *clk, u32 n)
+{
+	struct nvkm_subdev *subdev = &clk->base.base.subdev;
+	struct nvkm_device *device = subdev->device;
+	struct gm20b_pll pll;
+	u32 n_int, sdm_din;
+	int ret = 0;
+
+	/* calculate the new n_int/sdm_din for this n/uv */
+	gm20b_dvfs_calc_ndiv(clk, n, &n_int, &sdm_din);
+
+	/* get old coefficients */
+	gm20b_pllg_read_mnp(clk, &pll);
+	/* do nothing if NDIV is the same */
+	if (n_int == pll.base.n && sdm_din == pll.sdm_din)
+		return 0;
+
+	/* pll slowdown mode */
+	nvkm_mask(device, GPCPLL_NDIV_SLOWDOWN,
+		BIT(GPCPLL_NDIV_SLOWDOWN_SLOWDOWN_USING_PLL_SHIFT),
+		BIT(GPCPLL_NDIV_SLOWDOWN_SLOWDOWN_USING_PLL_SHIFT));
+
+	/* new ndiv ready for ramp */
+	/* in DVFS mode SDM is updated via "new" field */
+	nvkm_mask(device, GPCPLL_CFG2, GPCPLL_CFG2_SDM_DIN_NEW_MASK,
+		  sdm_din << GPCPLL_CFG2_SDM_DIN_NEW_SHIFT);
+	pll.base.n = n_int;
+	udelay(1);
+	gk20a_pllg_write_mnp(&clk->base, &pll.base);
+
+	/* dynamic ramp to new ndiv */
+	udelay(1);
+	nvkm_mask(device, GPCPLL_NDIV_SLOWDOWN,
+		  BIT(GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT),
+		  BIT(GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT));
+
+	/* wait for ramping to complete */
+	if (nvkm_wait_usec(device, 500, GPC_BCAST_NDIV_SLOWDOWN_DEBUG,
+		GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_MASK,
+		GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_MASK) < 0)
+		ret = -ETIMEDOUT;
+
+	/* in DVFS mode complete SDM update */
+	nvkm_mask(device, GPCPLL_CFG2, GPCPLL_CFG2_SDM_DIN_MASK,
+		  sdm_din << GPCPLL_CFG2_SDM_DIN_SHIFT);
+
+	/* exit slowdown mode */
+	nvkm_mask(device, GPCPLL_NDIV_SLOWDOWN,
+		BIT(GPCPLL_NDIV_SLOWDOWN_SLOWDOWN_USING_PLL_SHIFT) |
+		BIT(GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT), 0);
+	nvkm_rd32(device, GPCPLL_NDIV_SLOWDOWN);
+
+	return ret;
+}
+
+static int
+gm20b_pllg_enable(struct gm20b_clk *clk)
+{
+	struct nvkm_device *device = clk->base.base.subdev.device;
+
+	nvkm_mask(device, GPCPLL_CFG, GPCPLL_CFG_ENABLE, GPCPLL_CFG_ENABLE);
+	nvkm_rd32(device, GPCPLL_CFG);
+
+	/* In DVFS mode lock cannot be used - so just delay */
+	udelay(40);
+
+	/* set SYNC_MODE for glitchless switch out of bypass */
+	nvkm_mask(device, GPCPLL_CFG, GPCPLL_CFG_SYNC_MODE,
+		       GPCPLL_CFG_SYNC_MODE);
+	nvkm_rd32(device, GPCPLL_CFG);
+
+	/* switch to VCO mode */
+	nvkm_mask(device, SEL_VCO, BIT(SEL_VCO_GPC2CLK_OUT_SHIFT),
+		  BIT(SEL_VCO_GPC2CLK_OUT_SHIFT));
+
+	return 0;
+}
+
+static void
+gm20b_pllg_disable(struct gm20b_clk *clk)
+{
+	struct nvkm_device *device = clk->base.base.subdev.device;
+
+	/* put PLL in bypass before disabling it */
+	nvkm_mask(device, SEL_VCO, BIT(SEL_VCO_GPC2CLK_OUT_SHIFT), 0);
+
+	/* clear SYNC_MODE before disabling PLL */
+	nvkm_mask(device, GPCPLL_CFG, GPCPLL_CFG_SYNC_MODE, 0);
+
+	nvkm_mask(device, GPCPLL_CFG, GPCPLL_CFG_ENABLE, 0);
+	nvkm_rd32(device, GPCPLL_CFG);
+}
+
+static int
+gm20b_pllg_program_mnp(struct gm20b_clk *clk, const struct gk20a_pll *pll)
+{
+	struct nvkm_subdev *subdev = &clk->base.base.subdev;
+	struct nvkm_device *device = subdev->device;
+	struct gm20b_pll cur_pll;
+	u32 n_int, sdm_din;
+	/* if we only change pdiv, we can do a glitchless transition */
+	bool pdiv_only;
+	int ret;
+
+	gm20b_dvfs_calc_ndiv(clk, pll->n, &n_int, &sdm_din);
+	gm20b_pllg_read_mnp(clk, &cur_pll);
+	pdiv_only = cur_pll.base.n == n_int && cur_pll.sdm_din == sdm_din &&
+		    cur_pll.base.m == pll->m;
+
+	/* need full sequence if clock not enabled yet */
+	if (!gk20a_pllg_is_enabled(&clk->base))
+		pdiv_only = false;
+
+	/* split VCO-to-bypass jump in half by setting out divider 1:2 */
+	nvkm_mask(device, GPC2CLK_OUT, GPC2CLK_OUT_VCODIV_MASK,
+		  GPC2CLK_OUT_VCODIV2 << GPC2CLK_OUT_VCODIV_SHIFT);
+	/* Intentional 2nd write to assure linear divider operation */
+	nvkm_mask(device, GPC2CLK_OUT, GPC2CLK_OUT_VCODIV_MASK,
+		  GPC2CLK_OUT_VCODIV2 << GPC2CLK_OUT_VCODIV_SHIFT);
+	nvkm_rd32(device, GPC2CLK_OUT);
+	udelay(2);
+
+	if (pdiv_only) {
+		u32 old = cur_pll.base.pl;
+		u32 new = pll->pl;
+
+		/*
+		 * we can do a glitchless transition only if the old and new PL
+		 * parameters share at least one bit set to 1. If this is not
+		 * the case, calculate and program an interim PL that will allow
+		 * us to respect that rule.
+		 */
+		if ((old & new) == 0) {
+			cur_pll.base.pl = min(old | BIT(ffs(new) - 1),
+					      new | BIT(ffs(old) - 1));
+			gk20a_pllg_write_mnp(&clk->base, &cur_pll.base);
+		}
+
+		cur_pll.base.pl = new;
+		gk20a_pllg_write_mnp(&clk->base, &cur_pll.base);
+	} else {
+		/* disable before programming if more than pdiv changes */
+		gm20b_pllg_disable(clk);
+
+		cur_pll.base = *pll;
+		cur_pll.base.n = n_int;
+		cur_pll.sdm_din = sdm_din;
+		gm20b_pllg_write_mnp(clk, &cur_pll);
+
+		ret = gm20b_pllg_enable(clk);
+		if (ret)
+			return ret;
+	}
+
+	/* restore out divider 1:1 */
+	udelay(2);
+	nvkm_mask(device, GPC2CLK_OUT, GPC2CLK_OUT_VCODIV_MASK,
+		  GPC2CLK_OUT_VCODIV1 << GPC2CLK_OUT_VCODIV_SHIFT);
+	/* Intentional 2nd write to assure linear divider operation */
+	nvkm_mask(device, GPC2CLK_OUT, GPC2CLK_OUT_VCODIV_MASK,
+		  GPC2CLK_OUT_VCODIV1 << GPC2CLK_OUT_VCODIV_SHIFT);
+	nvkm_rd32(device, GPC2CLK_OUT);
+
+	return 0;
+}
+
+static int
+gm20b_pllg_program_mnp_slide(struct gm20b_clk *clk, const struct gk20a_pll *pll)
+{
+	struct gk20a_pll cur_pll;
+	int ret;
+
+	if (gk20a_pllg_is_enabled(&clk->base)) {
+		gk20a_pllg_read_mnp(&clk->base, &cur_pll);
+
+		/* just do NDIV slide if there is no change to M and PL */
+		if (pll->m == cur_pll.m && pll->pl == cur_pll.pl)
+			return gm20b_pllg_slide(clk, pll->n);
+
+		/* slide down to current NDIV_LO */
+		cur_pll.n = gk20a_pllg_n_lo(&clk->base, &cur_pll);
+		ret = gm20b_pllg_slide(clk, cur_pll.n);
+		if (ret)
+			return ret;
+	}
+
+	/* program MNP with the new clock parameters and new NDIV_LO */
+	cur_pll = *pll;
+	cur_pll.n = gk20a_pllg_n_lo(&clk->base, &cur_pll);
+	ret = gm20b_pllg_program_mnp(clk, &cur_pll);
+	if (ret)
+		return ret;
+
+	/* slide up to new NDIV */
+	return gm20b_pllg_slide(clk, pll->n);
+}
+
+static int
+gm20b_clk_calc(struct nvkm_clk *base, struct nvkm_cstate *cstate)
+{
+	struct gm20b_clk *clk = gm20b_clk(base);
+	struct nvkm_subdev *subdev = &base->subdev;
+	struct nvkm_volt *volt = base->subdev.device->volt;
+	int ret;
+
+	ret = gk20a_pllg_calc_mnp(&clk->base, cstate->domain[nv_clk_src_gpc] *
+					     GK20A_CLK_GPC_MDIV, &clk->new_pll);
+	if (ret)
+		return ret;
+
+	clk->new_uv = volt->vid[cstate->voltage].uv;
+	gm20b_dvfs_calc_det_coeff(clk, clk->new_uv, &clk->new_dvfs);
+
+	nvkm_debug(subdev, "%s uv: %d uv\n", __func__, clk->new_uv);
+
+	return 0;
+}
+
+/*
+ * Compute PLL parameters that are always safe for the current voltage
+ */
+static void
+gm20b_dvfs_calc_safe_pll(struct gm20b_clk *clk, struct gk20a_pll *pll)
+{
+	u32 rate = gk20a_pllg_calc_rate(&clk->base, pll) / KHZ;
+	u32 parent_rate = clk->base.parent_rate / KHZ;
+	u32 nmin, nsafe;
+
+	/* remove a safe margin of 10% */
+	if (rate > clk->safe_fmax_vmin)
+		rate = rate * (100 - 10) / 100;
+
+	/* gpc2clk */
+	rate *= 2;
+
+	nmin = DIV_ROUND_UP(pll->m * clk->base.params->min_vco, parent_rate);
+	nsafe = pll->m * rate / (clk->base.parent_rate);
+
+	if (nsafe < nmin) {
+		pll->pl = DIV_ROUND_UP(nmin * parent_rate, pll->m * rate);
+		nsafe = nmin;
+	}
+
+	pll->n = nsafe;
+}
+
+static void
+gm20b_dvfs_program_coeff(struct gm20b_clk *clk, u32 coeff)
+{
+	struct nvkm_device *device = clk->base.base.subdev.device;
+
+	/* strobe to read external DFS coefficient */
+	nvkm_mask(device, GPC_BCAST_GPCPLL_DVFS2,
+		  GPC_BCAST_GPCPLL_DVFS2_DFS_EXT_STROBE_BIT,
+		  GPC_BCAST_GPCPLL_DVFS2_DFS_EXT_STROBE_BIT);
+
+	nvkm_mask(device, GPCPLL_DVFS0, GPCPLL_DVFS0_DFS_COEFF_MASK,
+		  coeff << GPCPLL_DVFS0_DFS_COEFF_SHIFT);
+
+	udelay(1);
+	nvkm_mask(device, GPC_BCAST_GPCPLL_DVFS2,
+		  GPC_BCAST_GPCPLL_DVFS2_DFS_EXT_STROBE_BIT, 0);
+}
+
+static void
+gm20b_dvfs_program_ext_cal(struct gm20b_clk *clk, u32 dfs_det_cal)
+{
+	struct nvkm_device *device = clk->base.base.subdev.device;
+	u32 val;
+
+	nvkm_mask(device, GPC_BCAST_GPCPLL_DVFS2, MASK(DFS_DET_RANGE + 1),
+		  dfs_det_cal);
+	udelay(1);
+
+	val = nvkm_rd32(device, GPCPLL_DVFS1);
+	if (!(val & BIT(25))) {
+		/* Use external value to overwrite calibration value */
+		val |= BIT(25) | BIT(16);
+		nvkm_wr32(device, GPCPLL_DVFS1, val);
+	}
+}
+
+static void
+gm20b_dvfs_program_dfs_detection(struct gm20b_clk *clk,
+				 struct gm20b_clk_dvfs *dvfs)
+{
+	struct nvkm_device *device = clk->base.base.subdev.device;
+
+	/* strobe to read external DFS coefficient */
+	nvkm_mask(device, GPC_BCAST_GPCPLL_DVFS2,
+		  GPC_BCAST_GPCPLL_DVFS2_DFS_EXT_STROBE_BIT,
+		  GPC_BCAST_GPCPLL_DVFS2_DFS_EXT_STROBE_BIT);
+
+	nvkm_mask(device, GPCPLL_DVFS0,
+		  GPCPLL_DVFS0_DFS_COEFF_MASK | GPCPLL_DVFS0_DFS_DET_MAX_MASK,
+		  dvfs->dfs_coeff << GPCPLL_DVFS0_DFS_COEFF_SHIFT |
+		  dvfs->dfs_det_max << GPCPLL_DVFS0_DFS_DET_MAX_SHIFT);
+
+	udelay(1);
+	nvkm_mask(device, GPC_BCAST_GPCPLL_DVFS2,
+		  GPC_BCAST_GPCPLL_DVFS2_DFS_EXT_STROBE_BIT, 0);
+
+	gm20b_dvfs_program_ext_cal(clk, dvfs->dfs_ext_cal);
+}
+
+static int
+gm20b_clk_prog(struct nvkm_clk *base)
+{
+	struct gm20b_clk *clk = gm20b_clk(base);
+	u32 cur_freq;
+	int ret;
+
+	/* No change in DVFS settings? */
+	if (clk->uv == clk->new_uv)
+		goto prog;
+
+	/*
+	 * Interim step for changing DVFS detection settings: low enough
+	 * frequency to be safe at at DVFS coeff = 0.
+	 *
+	 * 1. If voltage is increasing:
+	 * - safe frequency target matches the lowest - old - frequency
+	 * - DVFS settings are still old
+	 * - Voltage already increased to new level by volt, but maximum
+	 *   detection limit assures PLL output remains under F/V curve
+	 *
+	 * 2. If voltage is decreasing:
+	 * - safe frequency target matches the lowest - new - frequency
+	 * - DVFS settings are still old
+	 * - Voltage is also old, it will be lowered by volt afterwards
+	 *
+	 * Interim step can be skipped if old frequency is below safe minimum,
+	 * i.e., it is low enough to be safe at any voltage in operating range
+	 * with zero DVFS coefficient.
+	 */
+	cur_freq = nvkm_clk_read(&clk->base.base, nv_clk_src_gpc);
+	if (cur_freq > clk->safe_fmax_vmin) {
+		struct gk20a_pll pll_safe;
+
+		if (clk->uv < clk->new_uv)
+			/* voltage will raise: safe frequency is current one */
+			pll_safe = clk->base.pll;
+		else
+			/* voltage will drop: safe frequency is new one */
+			pll_safe = clk->new_pll;
+
+		gm20b_dvfs_calc_safe_pll(clk, &pll_safe);
+		ret = gm20b_pllg_program_mnp_slide(clk, &pll_safe);
+		if (ret)
+			return ret;
+	}
+
+	/*
+	 * DVFS detection settings transition:
+	 * - Set DVFS coefficient zero
+	 * - Set calibration level to new voltage
+	 * - Set DVFS coefficient to match new voltage
+	 */
+	gm20b_dvfs_program_coeff(clk, 0);
+	gm20b_dvfs_program_ext_cal(clk, clk->new_dvfs.dfs_ext_cal);
+	gm20b_dvfs_program_coeff(clk, clk->new_dvfs.dfs_coeff);
+	gm20b_dvfs_program_dfs_detection(clk, &clk->new_dvfs);
+
+prog:
+	clk->uv = clk->new_uv;
+	clk->dvfs = clk->new_dvfs;
+	clk->base.pll = clk->new_pll;
+
+	return gm20b_pllg_program_mnp_slide(clk, &clk->base.pll);
+}
+
 static struct nvkm_pstate
 gm20b_pstates[] = {
 	{
@@ -133,9 +714,99 @@ gm20b_pstates[] = {
 			.voltage = 12,
 		},
 	},
-
 };
 
+static void
+gm20b_clk_fini(struct nvkm_clk *base)
+{
+	struct nvkm_device *device = base->subdev.device;
+	struct gm20b_clk *clk = gm20b_clk(base);
+
+	/* slide to VCO min */
+	if (gk20a_pllg_is_enabled(&clk->base)) {
+		struct gk20a_pll pll;
+		u32 n_lo;
+
+		gk20a_pllg_read_mnp(&clk->base, &pll);
+		n_lo = gk20a_pllg_n_lo(&clk->base, &pll);
+		gm20b_pllg_slide(clk, n_lo);
+	}
+
+	gm20b_pllg_disable(clk);
+
+	/* set IDDQ */
+	nvkm_mask(device, GPCPLL_CFG, GPCPLL_CFG_IDDQ, 1);
+}
+
+static int
+gm20b_clk_init_dvfs(struct gm20b_clk *clk)
+{
+	struct nvkm_subdev *subdev = &clk->base.base.subdev;
+	struct nvkm_device *device = subdev->device;
+	bool fused = clk->uvdet_offs && clk->uvdet_slope;
+	static const s32 ADC_SLOPE_UV = 10000; /* default ADC detection slope */
+	u32 data;
+	int ret;
+
+	/* Enable NA DVFS */
+	nvkm_mask(device, GPCPLL_DVFS1, GPCPLL_DVFS1_EN_DFS_BIT,
+		  GPCPLL_DVFS1_EN_DFS_BIT);
+
+	/* Set VCO_CTRL */
+	if (clk->dvfs_params->vco_ctrl)
+		nvkm_mask(device, GPCPLL_CFG3, GPCPLL_CFG3_VCO_CTRL_MASK,
+		      clk->dvfs_params->vco_ctrl << GPCPLL_CFG3_VCO_CTRL_SHIFT);
+
+	if (fused) {
+		/* Start internal calibration, but ignore results */
+		nvkm_mask(device, GPCPLL_DVFS1, GPCPLL_DVFS1_EN_DFS_CAL_BIT,
+			  GPCPLL_DVFS1_EN_DFS_CAL_BIT);
+
+		/* got uvdev parameters from fuse, skip calibration */
+		goto calibrated;
+	}
+
+	/*
+	 * If calibration parameters are not fused, start internal calibration,
+	 * wait for completion, and use results along with default slope to
+	 * calculate ADC offset during boot.
+	 */
+	nvkm_mask(device, GPCPLL_DVFS1, GPCPLL_DVFS1_EN_DFS_CAL_BIT,
+			  GPCPLL_DVFS1_EN_DFS_CAL_BIT);
+
+	/* Wait for internal calibration done (spec < 2us). */
+	ret = nvkm_wait_usec(device, 10, GPCPLL_DVFS1,
+			     GPCPLL_DVFS1_DFS_CAL_DONE_BIT,
+			     GPCPLL_DVFS1_DFS_CAL_DONE_BIT);
+	if (ret < 0) {
+		nvkm_error(subdev, "GPCPLL calibration timeout\n");
+		return -ETIMEDOUT;
+	}
+
+	data = nvkm_rd32(device, GPCPLL_CFG3) >>
+			 GPCPLL_CFG3_PLL_DFS_TESTOUT_SHIFT;
+	data &= MASK(GPCPLL_CFG3_PLL_DFS_TESTOUT_WIDTH);
+
+	clk->uvdet_slope = ADC_SLOPE_UV;
+	clk->uvdet_offs = ((s32)clk->uv) - data * ADC_SLOPE_UV;
+
+	nvkm_debug(subdev, "calibrated DVFS parameters: offs %d, slope %d\n",
+		   clk->uvdet_offs, clk->uvdet_slope);
+
+calibrated:
+	/* Compute and apply initial DVFS parameters */
+	gm20b_dvfs_calc_det_coeff(clk, clk->uv, &clk->dvfs);
+	gm20b_dvfs_program_coeff(clk, 0);
+	gm20b_dvfs_program_ext_cal(clk, clk->dvfs.dfs_ext_cal);
+	gm20b_dvfs_program_coeff(clk, clk->dvfs.dfs_coeff);
+	gm20b_dvfs_program_dfs_detection(clk, &clk->new_dvfs);
+
+	return 0;
+}
+
+/* Forward declaration to detect speedo >=1 in gm20b_clk_init() */
+static const struct nvkm_clk_func gm20b_clk;
+
 static int
 gm20b_clk_init(struct nvkm_clk *base)
 {
@@ -143,15 +814,56 @@ gm20b_clk_init(struct nvkm_clk *base)
 	struct nvkm_subdev *subdev = &clk->base.subdev;
 	struct nvkm_device *device = subdev->device;
 	int ret;
+	u32 data;
+
+	/* get out from IDDQ */
+	nvkm_mask(device, GPCPLL_CFG, GPCPLL_CFG_IDDQ, 0);
+	nvkm_rd32(device, GPCPLL_CFG);
+	udelay(5);
+
+	nvkm_mask(device, GPC2CLK_OUT, GPC2CLK_OUT_INIT_MASK,
+		  GPC2CLK_OUT_INIT_VAL);
 
 	/* Set the global bypass control to VCO */
 	nvkm_mask(device, BYPASSCTRL_SYS,
 	       MASK(BYPASSCTRL_SYS_GPCPLL_WIDTH) << BYPASSCTRL_SYS_GPCPLL_SHIFT,
 	       0);
 
+	ret = gk20a_clk_setup_slide(clk);
+	if (ret)
+		return ret;
+
+	/* If not fused, set RAM SVOP PDP data 0x2, and enable fuse override */
+	data = nvkm_rd32(device, 0x021944);
+	if (!(data & 0x3)) {
+		data |= 0x2;
+		nvkm_wr32(device, 0x021944, data);
+
+		data = nvkm_rd32(device, 0x021948);
+		data |=  0x1;
+		nvkm_wr32(device, 0x021948, data);
+	}
+
+	/* Disable idle slow down  */
+	nvkm_mask(device, 0x20160, 0x003f0000, 0x0);
+
+	/* speedo >= 1? */
+	if (clk->base.func == &gm20b_clk) {
+		struct gm20b_clk *_clk = gm20b_clk(base);
+		struct nvkm_volt *volt = device->volt;
+
+		/* Get current voltage */
+		_clk->uv = nvkm_volt_get(volt);
+
+		/* Initialize DVFS */
+		ret = gm20b_clk_init_dvfs(_clk);
+		if (ret)
+			return ret;
+	}
+
 	/* Start with lowest frequency */
 	base->func->calc(base, &base->func->pstates[0].base);
-	ret = base->func->prog(&clk->base);
+	ret = base->func->prog(base);
 	if (ret) {
 		nvkm_error(subdev, "cannot initialize clock\n");
 		return ret;
@@ -169,6 +881,7 @@ gm20b_clk_speedo0 = {
 	.prog = gk20a_clk_prog,
 	.tidy = gk20a_clk_tidy,
 	.pstates = gm20b_pstates,
+	/* Speedo 0 only supports 12 voltages */
 	.nr_pstates = ARRAY_SIZE(gm20b_pstates) - 1,
 	.domains = {
 		{ nv_clk_src_crystal, 0xff },
@@ -177,8 +890,26 @@ gm20b_clk_speedo0 = {
 	},
 };
 
-int
-gm20b_clk_new(struct nvkm_device *device, int index, struct nvkm_clk **pclk)
+static const struct nvkm_clk_func
+gm20b_clk = {
+	.init = gm20b_clk_init,
+	.fini = gm20b_clk_fini,
+	.read = gk20a_clk_read,
+	.calc = gm20b_clk_calc,
+	.prog = gm20b_clk_prog,
+	.tidy = gk20a_clk_tidy,
+	.pstates = gm20b_pstates,
+	.nr_pstates = ARRAY_SIZE(gm20b_pstates),
+	.domains = {
+		{ nv_clk_src_crystal, 0xff },
+		{ nv_clk_src_gpc, 0xff, 0, "core", GK20A_CLK_GPC_MDIV },
+		{ nv_clk_src_max },
+	},
+};
+
+static int
+gm20b_clk_new_speedo0(struct nvkm_device *device, int index,
+		      struct nvkm_clk **pclk)
 {
 	struct gk20a_clk *clk;
 	int ret;
@@ -188,11 +919,156 @@ gm20b_clk_new(struct nvkm_device *device, int index, struct nvkm_clk **pclk)
 		return -ENOMEM;
 	*pclk = &clk->base;
 
-	ret = _gk20a_clk_ctor(device, index, &gm20b_clk_speedo0,
-			      &gm20b_pllg_params, clk);
+	ret = gk20a_clk_ctor(device, index, &gm20b_clk_speedo0,
+			     &gm20b_pllg_params, clk);
 
 	clk->pl_to_div = pl_to_div;
 	clk->div_to_pl = div_to_pl;
 
 	return ret;
 }
+
+/* FUSE register */
+#define FUSE_RESERVED_CALIB0	0x204
+#define FUSE_RESERVED_CALIB0_INTERCEPT_FRAC_SHIFT	0
+#define FUSE_RESERVED_CALIB0_INTERCEPT_FRAC_WIDTH	4
+#define FUSE_RESERVED_CALIB0_INTERCEPT_INT_SHIFT	4
+#define FUSE_RESERVED_CALIB0_INTERCEPT_INT_WIDTH	10
+#define FUSE_RESERVED_CALIB0_SLOPE_FRAC_SHIFT		14
+#define FUSE_RESERVED_CALIB0_SLOPE_FRAC_WIDTH		10
+#define FUSE_RESERVED_CALIB0_SLOPE_INT_SHIFT		24
+#define FUSE_RESERVED_CALIB0_SLOPE_INT_WIDTH		6
+#define FUSE_RESERVED_CALIB0_FUSE_REV_SHIFT		30
+#define FUSE_RESERVED_CALIB0_FUSE_REV_WIDTH		2
+
+static int
+gm20b_clk_init_fused_params(struct gm20b_clk *clk)
+{
+	struct nvkm_subdev *subdev = &clk->base.base.subdev;
+	u32 val = 0;
+	u32 rev = 0;
+
+#if IS_ENABLED(CONFIG_ARCH_TEGRA)
+	tegra_fuse_readl(FUSE_RESERVED_CALIB0, &val);
+	rev = (val >> FUSE_RESERVED_CALIB0_FUSE_REV_SHIFT) &
+	      MASK(FUSE_RESERVED_CALIB0_FUSE_REV_WIDTH);
+#endif
+
+	/* No fused parameters, we will calibrate later */
+	if (rev == 0)
+		return -EINVAL;
+
+	/* Integer part in mV + fractional part in uV */
+	clk->uvdet_slope = ((val >> FUSE_RESERVED_CALIB0_SLOPE_INT_SHIFT) &
+			MASK(FUSE_RESERVED_CALIB0_SLOPE_INT_WIDTH)) * 1000 +
+			((val >> FUSE_RESERVED_CALIB0_SLOPE_FRAC_SHIFT) &
+			MASK(FUSE_RESERVED_CALIB0_SLOPE_FRAC_WIDTH));
+
+	/* Integer part in mV + fractional part in 100uV */
+	clk->uvdet_offs = ((val >> FUSE_RESERVED_CALIB0_INTERCEPT_INT_SHIFT) &
+			MASK(FUSE_RESERVED_CALIB0_INTERCEPT_INT_WIDTH)) * 1000 +
+			((val >> FUSE_RESERVED_CALIB0_INTERCEPT_FRAC_SHIFT) &
+			 MASK(FUSE_RESERVED_CALIB0_INTERCEPT_FRAC_WIDTH)) * 100;
+
+	nvkm_debug(subdev, "fused calibration data: slope %d, offs %d\n",
+		   clk->uvdet_slope, clk->uvdet_offs);
+	return 0;
+}
+
+static int
+gm20b_clk_init_safe_fmax(struct gm20b_clk *clk)
+{
+	struct nvkm_subdev *subdev = &clk->base.base.subdev;
+	struct nvkm_volt *volt = subdev->device->volt;
+	struct nvkm_pstate *pstates = clk->base.base.func->pstates;
+	int nr_pstates = clk->base.base.func->nr_pstates;
+	int vmin, id = 0;
+	u32 fmax = 0;
+	int i;
+
+	/* find lowest voltage we can use */
+	vmin = volt->vid[0].uv;
+	for (i = 1; i < volt->vid_nr; i++) {
+		if (volt->vid[i].uv <= vmin) {
+			vmin = volt->vid[i].uv;
+			id = volt->vid[i].vid;
+		}
+	}
+
+	/* find max frequency at this voltage */
+	for (i = 0; i < nr_pstates; i++)
+		if (pstates[i].base.voltage == id)
+			fmax = max(fmax,
+				   pstates[i].base.domain[nv_clk_src_gpc]);
+
+	if (!fmax) {
+		nvkm_error(subdev, "failed to evaluate safe fmax\n");
+		return -EINVAL;
+	}
+
+	/* we are safe at 90% of the max frequency */
+	clk->safe_fmax_vmin = fmax * (100 - 10) / 100;
+	nvkm_debug(subdev, "safe fmax @ vmin = %u Khz\n", clk->safe_fmax_vmin);
+
+	return 0;
+}
+
+int
+gm20b_clk_new(struct nvkm_device *device, int index, struct nvkm_clk **pclk)
+{
+	struct nvkm_device_tegra *tdev = device->func->tegra(device);
+	struct gm20b_clk *clk;
+	struct nvkm_subdev *subdev;
+	struct gk20a_clk_pllg_params *clk_params;
+	int ret;
+
+	/* Speedo 0 GPUs cannot use noise-aware PLL */
+	if (tdev->gpu_speedo_id == 0)
+		return gm20b_clk_new_speedo0(device, index, pclk);
+
+	/* Speedo >= 1, use NAPLL */
+	clk = kzalloc(sizeof(*clk) + sizeof(*clk_params), GFP_KERNEL);
+	if (!clk)
+		return -ENOMEM;
+	*pclk = &clk->base.base;
+	subdev = &clk->base.base.subdev;
+
+	/* duplicate the clock parameters since we will patch them below */
+	clk_params = (void *) (clk + 1);
+	*clk_params = gm20b_pllg_params;
+	ret = gk20a_clk_ctor(device, index, &gm20b_clk, clk_params,
+			     &clk->base);
+	if (ret)
+		return ret;
+
+	/*
+	 * NAPLL can only work with max_u, clamp the m range so
+	 * gk20a_pllg_calc_mnp always uses it
+	 */
+	clk_params->max_m = clk_params->min_m = DIV_ROUND_UP(clk_params->max_u,
+						(clk->base.parent_rate / KHZ));
+	if (clk_params->max_m == 0) {
+		nvkm_warn(subdev, "cannot use NAPLL, using legacy clock...\n");
+		kfree(clk);
+		return gm20b_clk_new_speedo0(device, index, pclk);
+	}
+
+	clk->base.pl_to_div = pl_to_div;
+	clk->base.div_to_pl = div_to_pl;
+
+	clk->dvfs_params = &gm20b_dvfs_params;
+
+	ret = gm20b_clk_init_fused_params(clk);
+	/*
+	 * we will calibrate during init - should never happen on
+	 * prod parts
+	 */
+	if (ret)
+		nvkm_warn(subdev, "no fused calibration parameters\n");
+
+	ret = gm20b_clk_init_safe_fmax(clk);
+	if (ret)
+		return ret;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild
index 842d5de96d73..edcc157e6ac8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild
@@ -24,6 +24,8 @@ nvkm-y += nvkm/subdev/fb/gk104.o
 nvkm-y += nvkm/subdev/fb/gk20a.o
 nvkm-y += nvkm/subdev/fb/gm107.o
 nvkm-y += nvkm/subdev/fb/gm200.o
+nvkm-y += nvkm/subdev/fb/gp100.o
+nvkm-y += nvkm/subdev/fb/gp104.o
 
 nvkm-y += nvkm/subdev/fb/ram.o
 nvkm-y += nvkm/subdev/fb/ramnv04.o
@@ -41,6 +43,7 @@ nvkm-y += nvkm/subdev/fb/rammcp77.o
 nvkm-y += nvkm/subdev/fb/ramgf100.o
 nvkm-y += nvkm/subdev/fb/ramgk104.o
 nvkm-y += nvkm/subdev/fb/ramgm107.o
+nvkm-y += nvkm/subdev/fb/ramgp100.o
 nvkm-y += nvkm/subdev/fb/sddr2.o
 nvkm-y += nvkm/subdev/fb/sddr3.o
 nvkm-y += nvkm/subdev/fb/gddr3.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c
index ce90242b8cce..a7049c041594 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c
@@ -25,6 +25,7 @@
 #include "ram.h"
 
 #include <core/memory.h>
+#include <core/option.h>
 #include <subdev/bios.h>
 #include <subdev/bios/M0203.h>
 #include <engine/gr.h>
@@ -134,6 +135,10 @@ nvkm_fb_init(struct nvkm_subdev *subdev)
 
 	if (fb->func->init)
 		fb->func->init(fb);
+	if (fb->func->init_page)
+		fb->func->init_page(fb);
+	if (fb->func->init_unkn)
+		fb->func->init_unkn(fb);
 	return 0;
 }
 
@@ -171,6 +176,7 @@ nvkm_fb_ctor(const struct nvkm_fb_func *func, struct nvkm_device *device,
 	nvkm_subdev_ctor(&nvkm_fb, device, index, &fb->subdev);
 	fb->func = func;
 	fb->tile.regions = fb->func->tile.regions;
+	fb->page = nvkm_longopt(device->cfgopt, "NvFbBigPage", 0);
 }
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c
index e649ead5ccfc..76433cc66fff 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c
@@ -72,6 +72,22 @@ gf100_fb_oneinit(struct nvkm_fb *fb)
 }
 
 void
+gf100_fb_init_page(struct nvkm_fb *fb)
+{
+	struct nvkm_device *device = fb->subdev.device;
+	switch (fb->page) {
+	case 16:
+		nvkm_mask(device, 0x100c80, 0x00000001, 0x00000001);
+		break;
+	case 17:
+	default:
+		nvkm_mask(device, 0x100c80, 0x00000001, 0x00000000);
+		fb->page = 17;
+		break;
+	}
+}
+
+void
 gf100_fb_init(struct nvkm_fb *base)
 {
 	struct gf100_fb *fb = gf100_fb(base);
@@ -79,8 +95,6 @@ gf100_fb_init(struct nvkm_fb *base)
 
 	if (fb->r100c10_page)
 		nvkm_wr32(device, 0x100c10, fb->r100c10 >> 8);
-
-	nvkm_mask(device, 0x100c80, 0x00000001, 0x00000000); /* 128KiB lpg */
 }
 
 void *
@@ -125,6 +139,7 @@ gf100_fb = {
 	.dtor = gf100_fb_dtor,
 	.oneinit = gf100_fb_oneinit,
 	.init = gf100_fb_init,
+	.init_page = gf100_fb_init_page,
 	.intr = gf100_fb_intr,
 	.ram_new = gf100_ram_new,
 	.memtype_valid = gf100_fb_memtype_valid,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.h
index 2160e5a39c9a..449f431644b3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.h
@@ -14,4 +14,6 @@ int gf100_fb_new_(const struct nvkm_fb_func *, struct nvkm_device *,
 void *gf100_fb_dtor(struct nvkm_fb *);
 void gf100_fb_init(struct nvkm_fb *);
 void gf100_fb_intr(struct nvkm_fb *);
+
+void gp100_fb_init(struct nvkm_fb *);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk104.c
index b41f0f70038c..4245e2e6e604 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk104.c
@@ -29,6 +29,7 @@ gk104_fb = {
 	.dtor = gf100_fb_dtor,
 	.oneinit = gf100_fb_oneinit,
 	.init = gf100_fb_init,
+	.init_page = gf100_fb_init_page,
 	.intr = gf100_fb_intr,
 	.ram_new = gk104_ram_new,
 	.memtype_valid = gf100_fb_memtype_valid,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk20a.c
index 7306f7dfc3b9..f815fe2bbf08 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk20a.c
@@ -27,7 +27,6 @@ static void
 gk20a_fb_init(struct nvkm_fb *fb)
 {
 	struct nvkm_device *device = fb->subdev.device;
-	nvkm_mask(device, 0x100c80, 0x00000001, 0x00000000); /* 128KiB lpg */
 	nvkm_wr32(device, 0x100cc8, nvkm_memory_addr(fb->mmu_wr) >> 8);
 	nvkm_wr32(device, 0x100ccc, nvkm_memory_addr(fb->mmu_rd) >> 8);
 }
@@ -36,6 +35,7 @@ static const struct nvkm_fb_func
 gk20a_fb = {
 	.oneinit = gf100_fb_oneinit,
 	.init = gk20a_fb_init,
+	.init_page = gf100_fb_init_page,
 	.memtype_valid = gf100_fb_memtype_valid,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm107.c
index 4869fdb753c9..db699025f546 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm107.c
@@ -29,6 +29,7 @@ gm107_fb = {
 	.dtor = gf100_fb_dtor,
 	.oneinit = gf100_fb_oneinit,
 	.init = gf100_fb_init,
+	.init_page = gf100_fb_init_page,
 	.intr = gf100_fb_intr,
 	.ram_new = gm107_ram_new,
 	.memtype_valid = gf100_fb_memtype_valid,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c
index 44f5716f64d8..62f653240be3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c
@@ -26,6 +26,24 @@
 
 #include <core/memory.h>
 
+void
+gm200_fb_init_page(struct nvkm_fb *fb)
+{
+	struct nvkm_device *device = fb->subdev.device;
+	switch (fb->page) {
+	case 16:
+		nvkm_mask(device, 0x100c80, 0x00000801, 0x00000001);
+		break;
+	case 17:
+		nvkm_mask(device, 0x100c80, 0x00000801, 0x00000000);
+		break;
+	default:
+		nvkm_mask(device, 0x100c80, 0x00000800, 0x00000800);
+		fb->page = 0;
+		break;
+	}
+}
+
 static void
 gm200_fb_init(struct nvkm_fb *base)
 {
@@ -48,6 +66,7 @@ gm200_fb = {
 	.dtor = gf100_fb_dtor,
 	.oneinit = gf100_fb_oneinit,
 	.init = gm200_fb_init,
+	.init_page = gm200_fb_init_page,
 	.intr = gf100_fb_intr,
 	.ram_new = gm107_ram_new,
 	.memtype_valid = gf100_fb_memtype_valid,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp100.c
new file mode 100644
index 000000000000..98474aec1921
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp100.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "gf100.h"
+#include "ram.h"
+
+#include <core/memory.h>
+
+static void
+gp100_fb_init_unkn(struct nvkm_fb *base)
+{
+	struct nvkm_device *device = gf100_fb(base)->base.subdev.device;
+	nvkm_wr32(device, 0x1fac80, nvkm_rd32(device, 0x100c80));
+	nvkm_wr32(device, 0x1facc4, nvkm_rd32(device, 0x100cc4));
+	nvkm_wr32(device, 0x1facc8, nvkm_rd32(device, 0x100cc8));
+	nvkm_wr32(device, 0x1faccc, nvkm_rd32(device, 0x100ccc));
+}
+
+void
+gp100_fb_init(struct nvkm_fb *base)
+{
+	struct gf100_fb *fb = gf100_fb(base);
+	struct nvkm_device *device = fb->base.subdev.device;
+
+	if (fb->r100c10_page)
+		nvkm_wr32(device, 0x100c10, fb->r100c10 >> 8);
+
+	nvkm_wr32(device, 0x100cc8, nvkm_memory_addr(fb->base.mmu_wr) >> 8);
+	nvkm_wr32(device, 0x100ccc, nvkm_memory_addr(fb->base.mmu_rd) >> 8);
+	nvkm_mask(device, 0x100cc4, 0x00060000,
+		  max(nvkm_memory_size(fb->base.mmu_rd) >> 16, (u64)2) << 17);
+}
+
+static const struct nvkm_fb_func
+gp100_fb = {
+	.dtor = gf100_fb_dtor,
+	.oneinit = gf100_fb_oneinit,
+	.init = gp100_fb_init,
+	.init_page = gm200_fb_init_page,
+	.init_unkn = gp100_fb_init_unkn,
+	.ram_new = gp100_ram_new,
+	.memtype_valid = gf100_fb_memtype_valid,
+};
+
+int
+gp100_fb_new(struct nvkm_device *device, int index, struct nvkm_fb **pfb)
+{
+	return gf100_fb_new_(&gp100_fb, device, index, pfb);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp104.c
new file mode 100644
index 000000000000..92cb71861bec
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp104.c
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "gf100.h"
+#include "ram.h"
+
+#include <core/memory.h>
+
+static const struct nvkm_fb_func
+gp104_fb = {
+	.dtor = gf100_fb_dtor,
+	.oneinit = gf100_fb_oneinit,
+	.init = gp100_fb_init,
+	.init_page = gm200_fb_init_page,
+	.ram_new = gp100_ram_new,
+	.memtype_valid = gf100_fb_memtype_valid,
+};
+
+int
+gp104_fb_new(struct nvkm_device *device, int index, struct nvkm_fb **pfb)
+{
+	return gf100_fb_new_(&gp104_fb, device, index, pfb);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h
index d97d640e60a0..e905d44fa1d5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h
@@ -8,6 +8,8 @@ struct nvkm_fb_func {
 	void *(*dtor)(struct nvkm_fb *);
 	int (*oneinit)(struct nvkm_fb *);
 	void (*init)(struct nvkm_fb *);
+	void (*init_page)(struct nvkm_fb *);
+	void (*init_unkn)(struct nvkm_fb *);
 	void (*intr)(struct nvkm_fb *);
 
 	struct {
@@ -60,5 +62,8 @@ void nv46_fb_tile_init(struct nvkm_fb *, int i, u32 addr, u32 size,
 		       u32 pitch, u32 flags, struct nvkm_fb_tile *);
 
 int gf100_fb_oneinit(struct nvkm_fb *);
+void gf100_fb_init_page(struct nvkm_fb *);
 bool gf100_fb_memtype_valid(struct nvkm_fb *, u32);
+
+void gm200_fb_init_page(struct nvkm_fb *);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h
index f816cbf2ced3..b9ec0ae6723a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h
@@ -47,4 +47,5 @@ int mcp77_ram_new(struct nvkm_fb *, struct nvkm_ram **);
 int gf100_ram_new(struct nvkm_fb *, struct nvkm_ram **);
 int gk104_ram_new(struct nvkm_fb *, struct nvkm_ram **);
 int gm107_ram_new(struct nvkm_fb *, struct nvkm_ram **);
+int gp100_ram_new(struct nvkm_fb *, struct nvkm_ram **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c
new file mode 100644
index 000000000000..f3be408b5e5e
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c
@@ -0,0 +1,146 @@
+/*
+ * Copyright 2013 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+#include "ram.h"
+
+#include <subdev/bios.h>
+#include <subdev/bios/init.h>
+#include <subdev/bios/rammap.h>
+
+static int
+gp100_ram_init(struct nvkm_ram *ram)
+{
+	struct nvkm_subdev *subdev = &ram->fb->subdev;
+	struct nvkm_device *device = subdev->device;
+	struct nvkm_bios *bios = device->bios;
+	u8  ver, hdr, cnt, len, snr, ssz;
+	u32 data;
+	int i;
+
+	/* run a bunch of tables from rammap table.  there's actually
+	 * individual pointers for each rammap entry too, but, nvidia
+	 * seem to just run the last two entries' scripts early on in
+	 * their init, and never again.. we'll just run 'em all once
+	 * for now.
+	 *
+	 * i strongly suspect that each script is for a separate mode
+	 * (likely selected by 0x9a065c's lower bits?), and the
+	 * binary driver skips the one that's already been setup by
+	 * the init tables.
+	 */
+	data = nvbios_rammapTe(bios, &ver, &hdr, &cnt, &len, &snr, &ssz);
+	if (!data || hdr < 0x15)
+		return -EINVAL;
+
+	cnt  = nvbios_rd08(bios, data + 0x14); /* guess at count */
+	data = nvbios_rd32(bios, data + 0x10); /* guess u32... */
+	if (cnt) {
+		u32 save = nvkm_rd32(device, 0x9a065c) & 0x000000f0;
+		for (i = 0; i < cnt; i++, data += 4) {
+			if (i != save >> 4) {
+				nvkm_mask(device, 0x9a065c, 0x000000f0, i << 4);
+				nvbios_exec(&(struct nvbios_init) {
+						.subdev = subdev,
+						.bios = bios,
+						.offset = nvbios_rd32(bios, data),
+						.execute = 1,
+					    });
+			}
+		}
+		nvkm_mask(device, 0x9a065c, 0x000000f0, save);
+	}
+
+	nvkm_mask(device, 0x9a0584, 0x11000000, 0x00000000);
+	nvkm_wr32(device, 0x10ecc0, 0xffffffff);
+	nvkm_mask(device, 0x9a0160, 0x00000010, 0x00000010);
+	return 0;
+}
+
+static const struct nvkm_ram_func
+gp100_ram_func = {
+	.init = gp100_ram_init,
+	.get = gf100_ram_get,
+	.put = gf100_ram_put,
+};
+
+int
+gp100_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram)
+{
+	struct nvkm_ram *ram;
+	struct nvkm_subdev *subdev = &fb->subdev;
+	struct nvkm_device *device = subdev->device;
+	enum nvkm_ram_type type = nvkm_fb_bios_memtype(device->bios);
+	const u32 rsvd_head = ( 256 * 1024); /* vga memory */
+	const u32 rsvd_tail = (1024 * 1024); /* vbios etc */
+	u32 fbpa_num = nvkm_rd32(device, 0x022438), fbpa;
+	u32 fbio_opt = nvkm_rd32(device, 0x021c14);
+	u64 part, size = 0, comm = ~0ULL;
+	bool mixed = false;
+	int ret;
+
+	nvkm_debug(subdev, "022438: %08x\n", fbpa_num);
+	nvkm_debug(subdev, "021c14: %08x\n", fbio_opt);
+	for (fbpa = 0; fbpa < fbpa_num; fbpa++) {
+		if (!(fbio_opt & (1 << fbpa))) {
+			part = nvkm_rd32(device, 0x90020c + (fbpa * 0x4000));
+			nvkm_debug(subdev, "fbpa %02x: %lld MiB\n", fbpa, part);
+			part = part << 20;
+			if (part != comm) {
+				if (comm != ~0ULL)
+					mixed = true;
+				comm = min(comm, part);
+			}
+			size = size + part;
+		}
+	}
+
+	ret = nvkm_ram_new_(&gp100_ram_func, fb, type, size, 0, &ram);
+	*pram = ram;
+	if (ret)
+		return ret;
+
+	nvkm_mm_fini(&ram->vram);
+
+	if (mixed) {
+		ret = nvkm_mm_init(&ram->vram, rsvd_head >> NVKM_RAM_MM_SHIFT,
+				   ((comm * fbpa_num) - rsvd_head) >>
+				   NVKM_RAM_MM_SHIFT, 1);
+		if (ret)
+			return ret;
+
+		ret = nvkm_mm_init(&ram->vram, (0x1000000000ULL + comm) >>
+				   NVKM_RAM_MM_SHIFT,
+				   (size - (comm * fbpa_num) - rsvd_tail) >>
+				   NVKM_RAM_MM_SHIFT, 1);
+		if (ret)
+			return ret;
+	} else {
+		ret = nvkm_mm_init(&ram->vram, rsvd_head >> NVKM_RAM_MM_SHIFT,
+				   (size - rsvd_head - rsvd_tail) >>
+				   NVKM_RAM_MM_SHIFT, 1);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/base.c
index 323c79abe468..41bd5d0f7692 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/base.c
@@ -276,6 +276,8 @@ nvkm_iccsense_oneinit(struct nvkm_subdev *subdev)
 		struct pwr_rail_t *r = &stbl.rail[i];
 		struct nvkm_iccsense_rail *rail;
 		struct nvkm_iccsense_sensor *sensor;
+		int (*read)(struct nvkm_iccsense *,
+			    struct nvkm_iccsense_rail *);
 
 		if (!r->mode || r->resistor_mohm == 0)
 			continue;
@@ -284,31 +286,31 @@ nvkm_iccsense_oneinit(struct nvkm_subdev *subdev)
 		if (!sensor)
 			continue;
 
-		rail = kmalloc(sizeof(*rail), GFP_KERNEL);
-		if (!rail)
-			return -ENOMEM;
-
 		switch (sensor->type) {
 		case NVBIOS_EXTDEV_INA209:
 			if (r->rail != 0)
 				continue;
-			rail->read = nvkm_iccsense_ina209_read;
+			read = nvkm_iccsense_ina209_read;
 			break;
 		case NVBIOS_EXTDEV_INA219:
 			if (r->rail != 0)
 				continue;
-			rail->read = nvkm_iccsense_ina219_read;
+			read = nvkm_iccsense_ina219_read;
 			break;
 		case NVBIOS_EXTDEV_INA3221:
 			if (r->rail >= 3)
 				continue;
-			rail->read = nvkm_iccsense_ina3221_read;
+			read = nvkm_iccsense_ina3221_read;
 			break;
 		default:
 			continue;
 		}
 
+		rail = kmalloc(sizeof(*rail), GFP_KERNEL);
+		if (!rail)
+			return -ENOMEM;
 		sensor->rail_mask |= 1 << r->rail;
+		rail->read = read;
 		rail->sensor = sensor;
 		rail->idx = r->rail;
 		rail->mohm = r->resistor_mohm;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild
index 932b366598aa..12d6f4f102cb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild
@@ -3,3 +3,4 @@ nvkm-y += nvkm/subdev/ltc/gf100.o
 nvkm-y += nvkm/subdev/ltc/gk104.o
 nvkm-y += nvkm/subdev/ltc/gm107.o
 nvkm-y += nvkm/subdev/ltc/gm200.o
+nvkm-y += nvkm/subdev/ltc/gp100.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gf100.c
index c9eb677967a8..4a0fa0a9b802 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gf100.c
@@ -23,7 +23,6 @@
  */
 #include "priv.h"
 
-#include <core/enum.h>
 #include <subdev/fb.h>
 #include <subdev/timer.h>
 
@@ -71,7 +70,7 @@ gf100_ltc_zbc_clear_depth(struct nvkm_ltc *ltc, int i, const u32 depth)
 	nvkm_wr32(device, 0x17ea58, depth);
 }
 
-static const struct nvkm_bitfield
+const struct nvkm_bitfield
 gf100_ltc_lts_intr_name[] = {
 	{ 0x00000001, "IDLE_ERROR_IQ" },
 	{ 0x00000002, "IDLE_ERROR_CBC" },
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c
index e292f5679418..ec0a3844b2d1 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c
@@ -68,18 +68,22 @@ gm107_ltc_zbc_clear_depth(struct nvkm_ltc *ltc, int i, const u32 depth)
 	nvkm_wr32(device, 0x17e34c, depth);
 }
 
-static void
-gm107_ltc_lts_isr(struct nvkm_ltc *ltc, int c, int s)
+void
+gm107_ltc_intr_lts(struct nvkm_ltc *ltc, int c, int s)
 {
 	struct nvkm_subdev *subdev = &ltc->subdev;
 	struct nvkm_device *device = subdev->device;
-	u32 base = 0x140000 + (c * 0x2000) + (s * 0x200);
-	u32 stat = nvkm_rd32(device, base + 0x00c);
+	u32 base = 0x140400 + (c * 0x2000) + (s * 0x200);
+	u32 intr = nvkm_rd32(device, base + 0x00c);
+	u16 stat = intr & 0x0000ffff;
+	char msg[128];
 
 	if (stat) {
-		nvkm_error(subdev, "LTC%d_LTS%d: %08x\n", c, s, stat);
-		nvkm_wr32(device, base + 0x00c, stat);
+		nvkm_snprintbf(msg, sizeof(msg), gf100_ltc_lts_intr_name, stat);
+		nvkm_error(subdev, "LTC%d_LTS%d: %08x [%s]\n", c, s, intr, msg);
 	}
+
+	nvkm_wr32(device, base + 0x00c, intr);
 }
 
 void
@@ -92,7 +96,7 @@ gm107_ltc_intr(struct nvkm_ltc *ltc)
 	while (mask) {
 		u32 s, c = __ffs(mask);
 		for (s = 0; s < ltc->lts_nr; s++)
-			gm107_ltc_lts_isr(ltc, c, s);
+			gm107_ltc_intr_lts(ltc, c, s);
 		mask &= ~(1 << c);
 	}
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm200.c
index 2a29bfd5125a..e18e0dc19ec8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm200.c
@@ -46,7 +46,7 @@ static const struct nvkm_ltc_func
 gm200_ltc = {
 	.oneinit = gm200_ltc_oneinit,
 	.init = gm200_ltc_init,
-	.intr = gm107_ltc_intr, /*XXX: not validated */
+	.intr = gm107_ltc_intr,
 	.cbc_clear = gm107_ltc_cbc_clear,
 	.cbc_wait = gm107_ltc_cbc_wait,
 	.zbc = 16,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp100.c
new file mode 100644
index 000000000000..0bdfb2f40266
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp100.c
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+#include "priv.h"
+
+static void
+gp100_ltc_intr(struct nvkm_ltc *ltc)
+{
+	struct nvkm_device *device = ltc->subdev.device;
+	u32 mask;
+
+	mask = nvkm_rd32(device, 0x0001c0);
+	while (mask) {
+		u32 s, c = __ffs(mask);
+		for (s = 0; s < ltc->lts_nr; s++)
+			gm107_ltc_intr_lts(ltc, c, s);
+		mask &= ~(1 << c);
+	}
+}
+
+static int
+gp100_ltc_oneinit(struct nvkm_ltc *ltc)
+{
+	struct nvkm_device *device = ltc->subdev.device;
+	ltc->ltc_nr = nvkm_rd32(device, 0x12006c);
+	ltc->lts_nr = nvkm_rd32(device, 0x17e280) >> 28;
+	/*XXX: tagram allocation - TBD */
+	return nvkm_mm_init(&ltc->tags, 0, 0, 1);
+}
+
+static void
+gp100_ltc_init(struct nvkm_ltc *ltc)
+{
+	/*XXX: PMU LS call to setup tagram address */
+}
+
+static const struct nvkm_ltc_func
+gp100_ltc = {
+	.oneinit = gp100_ltc_oneinit,
+	.init = gp100_ltc_init,
+	.intr = gp100_ltc_intr,
+	.cbc_clear = gm107_ltc_cbc_clear,
+	.cbc_wait = gm107_ltc_cbc_wait,
+	.zbc = 16,
+	.zbc_clear_color = gm107_ltc_zbc_clear_color,
+	.zbc_clear_depth = gm107_ltc_zbc_clear_depth,
+	.invalidate = gf100_ltc_invalidate,
+	.flush = gf100_ltc_flush,
+};
+
+int
+gp100_ltc_new(struct nvkm_device *device, int index, struct nvkm_ltc **pltc)
+{
+	return nvkm_ltc_new_(&gp100_ltc, device, index, pltc);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h
index 6d81c695ed0d..8b95f96e3ffa 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h
@@ -2,6 +2,7 @@
 #define __NVKM_LTC_PRIV_H__
 #define nvkm_ltc(p) container_of((p), struct nvkm_ltc, subdev)
 #include <subdev/ltc.h>
+#include <core/enum.h>
 
 int nvkm_ltc_new_(const struct nvkm_ltc_func *, struct nvkm_device *,
 		  int index, struct nvkm_ltc **);
@@ -31,8 +32,10 @@ void gf100_ltc_zbc_clear_color(struct nvkm_ltc *, int, const u32[4]);
 void gf100_ltc_zbc_clear_depth(struct nvkm_ltc *, int, const u32);
 void gf100_ltc_invalidate(struct nvkm_ltc *);
 void gf100_ltc_flush(struct nvkm_ltc *);
+extern const struct nvkm_bitfield gf100_ltc_lts_intr_name[];
 
 void gm107_ltc_intr(struct nvkm_ltc *);
+void gm107_ltc_intr_lts(struct nvkm_ltc *, int ltc, int lts);
 void gm107_ltc_cbc_clear(struct nvkm_ltc *, u32, u32);
 void gm107_ltc_cbc_wait(struct nvkm_ltc *);
 void gm107_ltc_zbc_clear_color(struct nvkm_ltc *, int, const u32[4]);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild
index 49695ac7be2e..12943f92c206 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild
@@ -10,3 +10,4 @@ nvkm-y += nvkm/subdev/mc/gt215.o
 nvkm-y += nvkm/subdev/mc/gf100.o
 nvkm-y += nvkm/subdev/mc/gk104.o
 nvkm-y += nvkm/subdev/mc/gk20a.o
+nvkm-y += nvkm/subdev/mc/gp100.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/base.c
index 350a8caa84c8..6b25e25f9eba 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/base.c
@@ -27,43 +27,67 @@
 #include <subdev/top.h>
 
 void
-nvkm_mc_unk260(struct nvkm_mc *mc, u32 data)
+nvkm_mc_unk260(struct nvkm_device *device, u32 data)
 {
-	if (mc->func->unk260)
+	struct nvkm_mc *mc = device->mc;
+	if (likely(mc) && mc->func->unk260)
 		mc->func->unk260(mc, data);
 }
 
 void
-nvkm_mc_intr_unarm(struct nvkm_mc *mc)
+nvkm_mc_intr_mask(struct nvkm_device *device, enum nvkm_devidx devidx, bool en)
 {
-	return mc->func->intr_unarm(mc);
+	struct nvkm_mc *mc = device->mc;
+	const struct nvkm_mc_map *map;
+	if (likely(mc) && mc->func->intr_mask) {
+		u32 mask = nvkm_top_intr_mask(device, devidx);
+		for (map = mc->func->intr; !mask && map->stat; map++) {
+			if (map->unit == devidx)
+				mask = map->stat;
+		}
+		mc->func->intr_mask(mc, mask, en ? mask : 0);
+	}
+}
+
+void
+nvkm_mc_intr_unarm(struct nvkm_device *device)
+{
+	struct nvkm_mc *mc = device->mc;
+	if (likely(mc))
+		mc->func->intr_unarm(mc);
 }
 
 void
-nvkm_mc_intr_rearm(struct nvkm_mc *mc)
+nvkm_mc_intr_rearm(struct nvkm_device *device)
 {
-	return mc->func->intr_rearm(mc);
+	struct nvkm_mc *mc = device->mc;
+	if (likely(mc))
+		mc->func->intr_rearm(mc);
 }
 
 static u32
-nvkm_mc_intr_mask(struct nvkm_mc *mc)
+nvkm_mc_intr_stat(struct nvkm_mc *mc)
 {
-	u32 intr = mc->func->intr_mask(mc);
+	u32 intr = mc->func->intr_stat(mc);
 	if (WARN_ON_ONCE(intr == 0xffffffff))
 		intr = 0; /* likely fallen off the bus */
 	return intr;
 }
 
 void
-nvkm_mc_intr(struct nvkm_mc *mc, bool *handled)
+nvkm_mc_intr(struct nvkm_device *device, bool *handled)
 {
-	struct nvkm_device *device = mc->subdev.device;
+	struct nvkm_mc *mc = device->mc;
 	struct nvkm_subdev *subdev;
-	const struct nvkm_mc_map *map = mc->func->intr;
-	u32 stat, intr = nvkm_mc_intr_mask(mc);
+	const struct nvkm_mc_map *map;
+	u32 stat, intr;
 	u64 subdevs;
 
-	stat = nvkm_top_intr(device->top, intr, &subdevs);
+	if (unlikely(!mc))
+		return;
+
+	intr = nvkm_mc_intr_stat(mc);
+	stat = nvkm_top_intr(device, intr, &subdevs);
 	while (subdevs) {
 		enum nvkm_devidx subidx = __ffs64(subdevs);
 		subdev = nvkm_device_subdev(device, subidx);
@@ -72,14 +96,13 @@ nvkm_mc_intr(struct nvkm_mc *mc, bool *handled)
 		subdevs &= ~BIT_ULL(subidx);
 	}
 
-	while (map->stat) {
+	for (map = mc->func->intr; map->stat; map++) {
 		if (intr & map->stat) {
 			subdev = nvkm_device_subdev(device, map->unit);
 			if (subdev)
 				nvkm_subdev_intr(subdev);
 			stat &= ~map->stat;
 		}
-		map++;
 	}
 
 	if (stat)
@@ -87,22 +110,32 @@ nvkm_mc_intr(struct nvkm_mc *mc, bool *handled)
 	*handled = intr != 0;
 }
 
-static void
-nvkm_mc_reset_(struct nvkm_mc *mc, enum nvkm_devidx devidx)
+static u32
+nvkm_mc_reset_mask(struct nvkm_device *device, bool isauto,
+		   enum nvkm_devidx devidx)
 {
-	struct nvkm_device *device = mc->subdev.device;
+	struct nvkm_mc *mc = device->mc;
 	const struct nvkm_mc_map *map;
-	u64 pmc_enable;
-
-	if (!(pmc_enable = nvkm_top_reset(device->top, devidx))) {
-		for (map = mc->func->reset; map && map->stat; map++) {
-			if (map->unit == devidx) {
-				pmc_enable = map->stat;
-				break;
+	u64 pmc_enable = 0;
+	if (likely(mc)) {
+		if (!(pmc_enable = nvkm_top_reset(device, devidx))) {
+			for (map = mc->func->reset; map && map->stat; map++) {
+				if (!isauto || !map->noauto) {
+					if (map->unit == devidx) {
+						pmc_enable = map->stat;
+						break;
+					}
+				}
 			}
 		}
 	}
+	return pmc_enable;
+}
 
+void
+nvkm_mc_reset(struct nvkm_device *device, enum nvkm_devidx devidx)
+{
+	u64 pmc_enable = nvkm_mc_reset_mask(device, true, devidx);
 	if (pmc_enable) {
 		nvkm_mask(device, 0x000200, pmc_enable, 0x00000000);
 		nvkm_mask(device, 0x000200, pmc_enable, pmc_enable);
@@ -111,17 +144,27 @@ nvkm_mc_reset_(struct nvkm_mc *mc, enum nvkm_devidx devidx)
 }
 
 void
-nvkm_mc_reset(struct nvkm_mc *mc, enum nvkm_devidx devidx)
+nvkm_mc_disable(struct nvkm_device *device, enum nvkm_devidx devidx)
 {
-	if (likely(mc))
-		nvkm_mc_reset_(mc, devidx);
+	u64 pmc_enable = nvkm_mc_reset_mask(device, false, devidx);
+	if (pmc_enable)
+		nvkm_mask(device, 0x000200, pmc_enable, 0x00000000);
+}
+
+void
+nvkm_mc_enable(struct nvkm_device *device, enum nvkm_devidx devidx)
+{
+	u64 pmc_enable = nvkm_mc_reset_mask(device, false, devidx);
+	if (pmc_enable) {
+		nvkm_mask(device, 0x000200, pmc_enable, pmc_enable);
+		nvkm_rd32(device, 0x000200);
+	}
 }
 
 static int
 nvkm_mc_fini(struct nvkm_subdev *subdev, bool suspend)
 {
-	struct nvkm_mc *mc = nvkm_mc(subdev);
-	nvkm_mc_intr_unarm(mc);
+	nvkm_mc_intr_unarm(subdev->device);
 	return 0;
 }
 
@@ -131,7 +174,7 @@ nvkm_mc_init(struct nvkm_subdev *subdev)
 	struct nvkm_mc *mc = nvkm_mc(subdev);
 	if (mc->func->init)
 		mc->func->init(mc);
-	nvkm_mc_intr_rearm(mc);
+	nvkm_mc_intr_rearm(subdev->device);
 	return 0;
 }
 
@@ -148,16 +191,21 @@ nvkm_mc = {
 	.fini = nvkm_mc_fini,
 };
 
+void
+nvkm_mc_ctor(const struct nvkm_mc_func *func, struct nvkm_device *device,
+	     int index, struct nvkm_mc *mc)
+{
+	nvkm_subdev_ctor(&nvkm_mc, device, index, &mc->subdev);
+	mc->func = func;
+}
+
 int
 nvkm_mc_new_(const struct nvkm_mc_func *func, struct nvkm_device *device,
 	     int index, struct nvkm_mc **pmc)
 {
 	struct nvkm_mc *mc;
-
 	if (!(mc = *pmc = kzalloc(sizeof(*mc), GFP_KERNEL)))
 		return -ENOMEM;
-
-	nvkm_subdev_ctor(&nvkm_mc, device, index, &mc->subdev);
-	mc->func = func;
+	nvkm_mc_ctor(func, device, index, *pmc);
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/g84.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/g84.c
index 5c85b47f071d..c3d66ef5dc12 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/g84.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/g84.c
@@ -57,7 +57,7 @@ g84_mc = {
 	.intr = g84_mc_intr,
 	.intr_unarm = nv04_mc_intr_unarm,
 	.intr_rearm = nv04_mc_intr_rearm,
-	.intr_mask = nv04_mc_intr_mask,
+	.intr_stat = nv04_mc_intr_stat,
 	.reset = g84_mc_reset,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/g98.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/g98.c
index 0280b43cc10c..93ad4982ce5f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/g98.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/g98.c
@@ -57,7 +57,7 @@ g98_mc = {
 	.intr = g98_mc_intr,
 	.intr_unarm = nv04_mc_intr_unarm,
 	.intr_rearm = nv04_mc_intr_rearm,
-	.intr_mask = nv04_mc_intr_mask,
+	.intr_stat = nv04_mc_intr_stat,
 	.reset = g98_mc_reset,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gf100.c
index 8397e223bd43..d2c4d6033abb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gf100.c
@@ -76,7 +76,7 @@ gf100_mc_intr_rearm(struct nvkm_mc *mc)
 }
 
 u32
-gf100_mc_intr_mask(struct nvkm_mc *mc)
+gf100_mc_intr_stat(struct nvkm_mc *mc)
 {
 	struct nvkm_device *device = mc->subdev.device;
 	u32 intr0 = nvkm_rd32(device, 0x000100);
@@ -85,6 +85,14 @@ gf100_mc_intr_mask(struct nvkm_mc *mc)
 }
 
 void
+gf100_mc_intr_mask(struct nvkm_mc *mc, u32 mask, u32 stat)
+{
+	struct nvkm_device *device = mc->subdev.device;
+	nvkm_mask(device, 0x000640, mask, stat);
+	nvkm_mask(device, 0x000644, mask, stat);
+}
+
+void
 gf100_mc_unk260(struct nvkm_mc *mc, u32 data)
 {
 	nvkm_wr32(mc->subdev.device, 0x000260, data);
@@ -97,6 +105,7 @@ gf100_mc = {
 	.intr_unarm = gf100_mc_intr_unarm,
 	.intr_rearm = gf100_mc_intr_rearm,
 	.intr_mask = gf100_mc_intr_mask,
+	.intr_stat = gf100_mc_intr_stat,
 	.reset = gf100_mc_reset,
 	.unk260 = gf100_mc_unk260,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gk104.c
index 317464212c7d..7b8c6ecad1a5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gk104.c
@@ -26,6 +26,7 @@
 const struct nvkm_mc_map
 gk104_mc_reset[] = {
 	{ 0x00000100, NVKM_ENGINE_FIFO },
+	{ 0x00002000, NVKM_SUBDEV_PMU, true },
 	{}
 };
 
@@ -53,6 +54,7 @@ gk104_mc = {
 	.intr_unarm = gf100_mc_intr_unarm,
 	.intr_rearm = gf100_mc_intr_rearm,
 	.intr_mask = gf100_mc_intr_mask,
+	.intr_stat = gf100_mc_intr_stat,
 	.reset = gk104_mc_reset,
 	.unk260 = gf100_mc_unk260,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gk20a.c
index 60b044f517ed..ca1bf3279dbe 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gk20a.c
@@ -30,6 +30,7 @@ gk20a_mc = {
 	.intr_unarm = gf100_mc_intr_unarm,
 	.intr_rearm = gf100_mc_intr_rearm,
 	.intr_mask = gf100_mc_intr_mask,
+	.intr_stat = gf100_mc_intr_stat,
 	.reset = gk104_mc_reset,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gp100.c
new file mode 100644
index 000000000000..4d22f4abd6de
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gp100.c
@@ -0,0 +1,103 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+#define gp100_mc(p) container_of((p), struct gp100_mc, base)
+#include "priv.h"
+
+struct gp100_mc {
+	struct nvkm_mc base;
+	spinlock_t lock;
+	bool intr;
+	u32 mask;
+};
+
+static void
+gp100_mc_intr_update(struct gp100_mc *mc)
+{
+	struct nvkm_device *device = mc->base.subdev.device;
+	u32 mask = mc->intr ? mc->mask : 0, i;
+	for (i = 0; i < 2; i++) {
+		nvkm_wr32(device, 0x000180 + (i * 0x04), ~mask);
+		nvkm_wr32(device, 0x000160 + (i * 0x04),  mask);
+	}
+}
+
+static void
+gp100_mc_intr_unarm(struct nvkm_mc *base)
+{
+	struct gp100_mc *mc = gp100_mc(base);
+	unsigned long flags;
+	spin_lock_irqsave(&mc->lock, flags);
+	mc->intr = false;
+	gp100_mc_intr_update(mc);
+	spin_unlock_irqrestore(&mc->lock, flags);
+}
+
+static void
+gp100_mc_intr_rearm(struct nvkm_mc *base)
+{
+	struct gp100_mc *mc = gp100_mc(base);
+	unsigned long flags;
+	spin_lock_irqsave(&mc->lock, flags);
+	mc->intr = true;
+	gp100_mc_intr_update(mc);
+	spin_unlock_irqrestore(&mc->lock, flags);
+}
+
+static void
+gp100_mc_intr_mask(struct nvkm_mc *base, u32 mask, u32 intr)
+{
+	struct gp100_mc *mc = gp100_mc(base);
+	unsigned long flags;
+	spin_lock_irqsave(&mc->lock, flags);
+	mc->mask = (mc->mask & ~mask) | intr;
+	gp100_mc_intr_update(mc);
+	spin_unlock_irqrestore(&mc->lock, flags);
+}
+
+static const struct nvkm_mc_func
+gp100_mc = {
+	.init = nv50_mc_init,
+	.intr = gk104_mc_intr,
+	.intr_unarm = gp100_mc_intr_unarm,
+	.intr_rearm = gp100_mc_intr_rearm,
+	.intr_mask = gp100_mc_intr_mask,
+	.intr_stat = gf100_mc_intr_stat,
+	.reset = gk104_mc_reset,
+};
+
+int
+gp100_mc_new(struct nvkm_device *device, int index, struct nvkm_mc **pmc)
+{
+	struct gp100_mc *mc;
+
+	if (!(mc = kzalloc(sizeof(*mc), GFP_KERNEL)))
+		return -ENOMEM;
+	nvkm_mc_ctor(&gp100_mc, device, index, &mc->base);
+	*pmc = &mc->base;
+
+	spin_lock_init(&mc->lock);
+	mc->intr = false;
+	mc->mask = 0x7fffffff;
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gt215.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gt215.c
index aad0ba95bf18..99d50a3d956f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gt215.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gt215.c
@@ -53,13 +53,20 @@ gt215_mc_intr[] = {
 	{},
 };
 
+static void
+gt215_mc_intr_mask(struct nvkm_mc *mc, u32 mask, u32 stat)
+{
+	nvkm_mask(mc->subdev.device, 0x000640, mask, stat);
+}
+
 static const struct nvkm_mc_func
 gt215_mc = {
 	.init = nv50_mc_init,
 	.intr = gt215_mc_intr,
 	.intr_unarm = nv04_mc_intr_unarm,
 	.intr_rearm = nv04_mc_intr_rearm,
-	.intr_mask = nv04_mc_intr_mask,
+	.intr_mask = gt215_mc_intr_mask,
+	.intr_stat = nv04_mc_intr_stat,
 	.reset = gt215_mc_reset,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv04.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv04.c
index a062624e906b..6509defd1460 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv04.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv04.c
@@ -56,7 +56,7 @@ nv04_mc_intr_rearm(struct nvkm_mc *mc)
 }
 
 u32
-nv04_mc_intr_mask(struct nvkm_mc *mc)
+nv04_mc_intr_stat(struct nvkm_mc *mc)
 {
 	return nvkm_rd32(mc->subdev.device, 0x000100);
 }
@@ -75,7 +75,7 @@ nv04_mc = {
 	.intr = nv04_mc_intr,
 	.intr_unarm = nv04_mc_intr_unarm,
 	.intr_rearm = nv04_mc_intr_rearm,
-	.intr_mask = nv04_mc_intr_mask,
+	.intr_stat = nv04_mc_intr_stat,
 	.reset = nv04_mc_reset,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv11.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv11.c
index 55f0b9166b52..9213107901e6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv11.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv11.c
@@ -39,7 +39,7 @@ nv11_mc = {
 	.intr = nv11_mc_intr,
 	.intr_unarm = nv04_mc_intr_unarm,
 	.intr_rearm = nv04_mc_intr_rearm,
-	.intr_mask = nv04_mc_intr_mask,
+	.intr_stat = nv04_mc_intr_stat,
 	.reset = nv04_mc_reset,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv17.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv17.c
index c40fa67f79a5..64bf5bbf8146 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv17.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv17.c
@@ -48,7 +48,7 @@ nv17_mc = {
 	.intr = nv17_mc_intr,
 	.intr_unarm = nv04_mc_intr_unarm,
 	.intr_rearm = nv04_mc_intr_rearm,
-	.intr_mask = nv04_mc_intr_mask,
+	.intr_stat = nv04_mc_intr_stat,
 	.reset = nv17_mc_reset,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv44.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv44.c
index cc56271db564..65fa44a64b98 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv44.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv44.c
@@ -43,7 +43,7 @@ nv44_mc = {
 	.intr = nv17_mc_intr,
 	.intr_unarm = nv04_mc_intr_unarm,
 	.intr_rearm = nv04_mc_intr_rearm,
-	.intr_mask = nv04_mc_intr_mask,
+	.intr_stat = nv04_mc_intr_stat,
 	.reset = nv17_mc_reset,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv50.c
index 343b6078580d..fe93b4fd7100 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv50.c
@@ -50,7 +50,7 @@ nv50_mc = {
 	.intr = nv50_mc_intr,
 	.intr_unarm = nv04_mc_intr_unarm,
 	.intr_rearm = nv04_mc_intr_rearm,
-	.intr_mask = nv04_mc_intr_mask,
+	.intr_stat = nv04_mc_intr_stat,
 	.reset = nv17_mc_reset,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/priv.h
index a12038118512..4f0576a06d24 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/priv.h
@@ -3,12 +3,15 @@
 #define nvkm_mc(p) container_of((p), struct nvkm_mc, subdev)
 #include <subdev/mc.h>
 
+void nvkm_mc_ctor(const struct nvkm_mc_func *, struct nvkm_device *,
+		  int index, struct nvkm_mc *);
 int nvkm_mc_new_(const struct nvkm_mc_func *, struct nvkm_device *,
 		 int index, struct nvkm_mc **);
 
 struct nvkm_mc_map {
 	u32 stat;
 	u32 unit;
+	bool noauto;
 };
 
 struct nvkm_mc_func {
@@ -18,8 +21,10 @@ struct nvkm_mc_func {
 	void (*intr_unarm)(struct nvkm_mc *);
 	/* enable reporting of interrupts to host */
 	void (*intr_rearm)(struct nvkm_mc *);
+	/* (un)mask delivery of specific interrupts */
+	void (*intr_mask)(struct nvkm_mc *, u32 mask, u32 stat);
 	/* retrieve pending interrupt mask (NV_PMC_INTR) */
-	u32 (*intr_mask)(struct nvkm_mc *);
+	u32 (*intr_stat)(struct nvkm_mc *);
 	const struct nvkm_mc_map *reset;
 	void (*unk260)(struct nvkm_mc *, u32);
 };
@@ -27,7 +32,7 @@ struct nvkm_mc_func {
 void nv04_mc_init(struct nvkm_mc *);
 void nv04_mc_intr_unarm(struct nvkm_mc *);
 void nv04_mc_intr_rearm(struct nvkm_mc *);
-u32 nv04_mc_intr_mask(struct nvkm_mc *);
+u32 nv04_mc_intr_stat(struct nvkm_mc *);
 extern const struct nvkm_mc_map nv04_mc_reset[];
 
 extern const struct nvkm_mc_map nv17_mc_intr[];
@@ -39,7 +44,8 @@ void nv50_mc_init(struct nvkm_mc *);
 
 void gf100_mc_intr_unarm(struct nvkm_mc *);
 void gf100_mc_intr_rearm(struct nvkm_mc *);
-u32 gf100_mc_intr_mask(struct nvkm_mc *);
+void gf100_mc_intr_mask(struct nvkm_mc *, u32, u32);
+u32 gf100_mc_intr_stat(struct nvkm_mc *);
 void gf100_mc_unk260(struct nvkm_mc *, u32);
 
 extern const struct nvkm_mc_map gk104_mc_intr[];
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild
index 3c2519fdeb81..2a31b7d66a6d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild
@@ -10,3 +10,4 @@ nvkm-y += nvkm/subdev/pci/g94.o
 nvkm-y += nvkm/subdev/pci/gf100.o
 nvkm-y += nvkm/subdev/pci/gf106.o
 nvkm-y += nvkm/subdev/pci/gk104.o
+nvkm-y += nvkm/subdev/pci/gp100.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c
index 6b0328bd7eed..eb9b278198b2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c
@@ -69,15 +69,13 @@ static irqreturn_t
 nvkm_pci_intr(int irq, void *arg)
 {
 	struct nvkm_pci *pci = arg;
-	struct nvkm_mc *mc = pci->subdev.device->mc;
+	struct nvkm_device *device = pci->subdev.device;
 	bool handled = false;
-	if (likely(mc)) {
-		nvkm_mc_intr_unarm(mc);
-		if (pci->msi)
-			pci->func->msi_rearm(pci);
-		nvkm_mc_intr(mc, &handled);
-		nvkm_mc_intr_rearm(mc);
-	}
+	nvkm_mc_intr_unarm(device);
+	if (pci->msi)
+		pci->func->msi_rearm(pci);
+	nvkm_mc_intr(device, &handled);
+	nvkm_mc_intr_rearm(device);
 	return handled ? IRQ_HANDLED : IRQ_NONE;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gp100.c
new file mode 100644
index 000000000000..82c5234a06ff
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gp100.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2015 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "priv.h"
+
+static void
+gp100_pci_msi_rearm(struct nvkm_pci *pci)
+{
+	nvkm_pci_wr32(pci, 0x0704, 0x00000000);
+}
+
+static const struct nvkm_pci_func
+gp100_pci_func = {
+	.rd32 = nv40_pci_rd32,
+	.wr08 = nv40_pci_wr08,
+	.wr32 = nv40_pci_wr32,
+	.msi_rearm = gp100_pci_msi_rearm,
+};
+
+int
+gp100_pci_new(struct nvkm_device *device, int index, struct nvkm_pci **ppci)
+{
+	return nvkm_pci_new_(&gp100_pci_func, device, index, ppci);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c
index 213fdba6cfa0..314be2192b7d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c
@@ -19,8 +19,9 @@
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  */
-
 #include "priv.h"
+
+#include <subdev/mc.h>
 #include <subdev/timer.h>
 
 static const char *
@@ -70,12 +71,11 @@ nvkm_secboot_falcon_enable(struct nvkm_secboot *sb)
 	int ret;
 
 	/* enable engine */
-	nvkm_mask(device, 0x200, sb->enable_mask, sb->enable_mask);
-	nvkm_rd32(device, 0x200);
+	nvkm_mc_enable(device, sb->devidx);
 	ret = nvkm_wait_msec(device, 10, sb->base + 0x10c, 0x6, 0x0);
 	if (ret < 0) {
-		nvkm_mask(device, 0x200, sb->enable_mask, 0x0);
 		nvkm_error(&sb->subdev, "Falcon mem scrubbing timeout\n");
+		nvkm_mc_disable(device, sb->devidx);
 		return ret;
 	}
 
@@ -85,8 +85,7 @@ nvkm_secboot_falcon_enable(struct nvkm_secboot *sb)
 
 	/* enable IRQs */
 	nvkm_wr32(device, sb->base + 0x010, 0xff);
-	nvkm_mask(device, 0x640, sb->irq_mask, sb->irq_mask);
-	nvkm_mask(device, 0x644, sb->irq_mask, sb->irq_mask);
+	nvkm_mc_intr_mask(device, sb->devidx, true);
 
 	return 0;
 }
@@ -97,14 +96,13 @@ nvkm_secboot_falcon_disable(struct nvkm_secboot *sb)
 	struct nvkm_device *device = sb->subdev.device;
 
 	/* disable IRQs and wait for any previous code to complete */
-	nvkm_mask(device, 0x644, sb->irq_mask, 0x0);
-	nvkm_mask(device, 0x640, sb->irq_mask, 0x0);
+	nvkm_mc_intr_mask(device, sb->devidx, false);
 	nvkm_wr32(device, sb->base + 0x014, 0xff);
 
 	falcon_wait_idle(device, sb->base);
 
 	/* disable engine */
-	nvkm_mask(device, 0x200, sb->enable_mask, 0x0);
+	nvkm_mc_disable(device, sb->devidx);
 
 	return 0;
 }
@@ -216,14 +214,7 @@ nvkm_secboot_oneinit(struct nvkm_subdev *subdev)
 		return ret;
 	}
 
-	/*
-	 * Build all blobs - the same blobs can be used to perform secure boot
-	 * multiple times
-	 */
-	if (sb->func->prepare_blobs)
-		ret = sb->func->prepare_blobs(sb);
-
-	return ret;
+	return 0;
 }
 
 static int
@@ -270,9 +261,8 @@ nvkm_secboot_ctor(const struct nvkm_secboot_func *func,
 	/* setup the performing falcon's base address and masks */
 	switch (func->boot_falcon) {
 	case NVKM_SECBOOT_FALCON_PMU:
+		sb->devidx = NVKM_SUBDEV_PMU;
 		sb->base = 0x10a000;
-		sb->irq_mask = 0x1000000;
-		sb->enable_mask = 0x2000;
 		break;
 	default:
 		nvkm_error(&sb->subdev, "invalid secure boot falcon\n");
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c
index cc100dc940ea..f1e2dc914366 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c
@@ -860,6 +860,8 @@ gm200_secboot_prepare_ls_blob(struct gm200_secboot *gsb)
 
 	/* Write LS blob */
 	ret = ls_ucode_mgr_write_wpr(gsb, &mgr, gsb->ls_blob);
+	if (ret)
+		nvkm_gpuobj_del(&gsb->ls_blob);
 
 cleanup:
 	ls_ucode_mgr_cleanup(&mgr);
@@ -1023,29 +1025,34 @@ gm20x_secboot_prepare_blobs(struct gm200_secboot *gsb)
 	int ret;
 
 	/* Load and prepare the managed falcon's firmwares */
-	ret = gm200_secboot_prepare_ls_blob(gsb);
-	if (ret)
-		return ret;
+	if (!gsb->ls_blob) {
+		ret = gm200_secboot_prepare_ls_blob(gsb);
+		if (ret)
+			return ret;
+	}
 
 	/* Load the HS firmware that will load the LS firmwares */
-	ret = gm200_secboot_prepare_hs_blob(gsb, "acr/ucode_load",
-					    &gsb->acr_load_blob,
-					    &gsb->acr_load_bl_desc, true);
-	if (ret)
-		return ret;
+	if (!gsb->acr_load_blob) {
+		ret = gm200_secboot_prepare_hs_blob(gsb, "acr/ucode_load",
+						&gsb->acr_load_blob,
+						&gsb->acr_load_bl_desc, true);
+		if (ret)
+			return ret;
+	}
 
 	/* Load the HS firmware bootloader */
-	ret = gm200_secboot_prepare_hsbl_blob(gsb);
-	if (ret)
-		return ret;
+	if (!gsb->hsbl_blob) {
+		ret = gm200_secboot_prepare_hsbl_blob(gsb);
+		if (ret)
+			return ret;
+	}
 
 	return 0;
 }
 
 static int
-gm200_secboot_prepare_blobs(struct nvkm_secboot *sb)
+gm200_secboot_prepare_blobs(struct gm200_secboot *gsb)
 {
-	struct gm200_secboot *gsb = gm200_secboot(sb);
 	int ret;
 
 	ret = gm20x_secboot_prepare_blobs(gsb);
@@ -1053,15 +1060,37 @@ gm200_secboot_prepare_blobs(struct nvkm_secboot *sb)
 		return ret;
 
 	/* dGPU only: load the HS firmware that unprotects the WPR region */
-	ret = gm200_secboot_prepare_hs_blob(gsb, "acr/ucode_unload",
-					    &gsb->acr_unload_blob,
-					    &gsb->acr_unload_bl_desc, false);
-	if (ret)
-		return ret;
+	if (!gsb->acr_unload_blob) {
+		ret = gm200_secboot_prepare_hs_blob(gsb, "acr/ucode_unload",
+					       &gsb->acr_unload_blob,
+					       &gsb->acr_unload_bl_desc, false);
+		if (ret)
+			return ret;
+	}
 
 	return 0;
 }
 
+static int
+gm200_secboot_blobs_ready(struct gm200_secboot *gsb)
+{
+	struct nvkm_subdev *subdev = &gsb->base.subdev;
+	int ret;
+
+	/* firmware already loaded, nothing to do... */
+	if (gsb->firmware_ok)
+		return 0;
+
+	ret = gsb->func->prepare_blobs(gsb);
+	if (ret) {
+		nvkm_error(subdev, "failed to load secure firmware\n");
+		return ret;
+	}
+
+	gsb->firmware_ok = true;
+
+	return 0;
+}
 
 
 /*
@@ -1234,6 +1263,11 @@ gm200_secboot_reset(struct nvkm_secboot *sb, enum nvkm_secboot_falcon falcon)
 	struct gm200_secboot *gsb = gm200_secboot(sb);
 	int ret;
 
+	/* Make sure all blobs are ready */
+	ret = gm200_secboot_blobs_ready(gsb);
+	if (ret)
+		return ret;
+
 	/*
 	 * Dummy GM200 implementation: perform secure boot each time we are
 	 * called on FECS. Since only FECS and GPCCS are managed and started
@@ -1373,7 +1407,6 @@ gm200_secboot = {
 	.dtor = gm200_secboot_dtor,
 	.init = gm200_secboot_init,
 	.fini = gm200_secboot_fini,
-	.prepare_blobs = gm200_secboot_prepare_blobs,
 	.reset = gm200_secboot_reset,
 	.start = gm200_secboot_start,
 	.managed_falcons = BIT(NVKM_SECBOOT_FALCON_FECS) |
@@ -1415,6 +1448,7 @@ gm200_secboot_func = {
 	.bl_desc_size = sizeof(struct gm200_flcn_bl_desc),
 	.fixup_bl_desc = gm200_secboot_fixup_bl_desc,
 	.fixup_hs_desc = gm200_secboot_fixup_hs_desc,
+	.prepare_blobs = gm200_secboot_prepare_blobs,
 };
 
 int
@@ -1487,3 +1521,19 @@ MODULE_FIRMWARE("nvidia/gm206/gr/sw_ctx.bin");
 MODULE_FIRMWARE("nvidia/gm206/gr/sw_nonctx.bin");
 MODULE_FIRMWARE("nvidia/gm206/gr/sw_bundle_init.bin");
 MODULE_FIRMWARE("nvidia/gm206/gr/sw_method_init.bin");
+
+MODULE_FIRMWARE("nvidia/gp100/acr/bl.bin");
+MODULE_FIRMWARE("nvidia/gp100/acr/ucode_load.bin");
+MODULE_FIRMWARE("nvidia/gp100/acr/ucode_unload.bin");
+MODULE_FIRMWARE("nvidia/gp100/gr/fecs_bl.bin");
+MODULE_FIRMWARE("nvidia/gp100/gr/fecs_inst.bin");
+MODULE_FIRMWARE("nvidia/gp100/gr/fecs_data.bin");
+MODULE_FIRMWARE("nvidia/gp100/gr/fecs_sig.bin");
+MODULE_FIRMWARE("nvidia/gp100/gr/gpccs_bl.bin");
+MODULE_FIRMWARE("nvidia/gp100/gr/gpccs_inst.bin");
+MODULE_FIRMWARE("nvidia/gp100/gr/gpccs_data.bin");
+MODULE_FIRMWARE("nvidia/gp100/gr/gpccs_sig.bin");
+MODULE_FIRMWARE("nvidia/gp100/gr/sw_ctx.bin");
+MODULE_FIRMWARE("nvidia/gp100/gr/sw_nonctx.bin");
+MODULE_FIRMWARE("nvidia/gp100/gr/sw_bundle_init.bin");
+MODULE_FIRMWARE("nvidia/gp100/gr/sw_method_init.bin");
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c
index 684320484b70..d5395ebfe8d3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c
@@ -42,6 +42,32 @@ struct gm20b_flcn_bl_desc {
 	u32 data_size;
 };
 
+static int
+gm20b_secboot_prepare_blobs(struct gm200_secboot *gsb)
+{
+	struct nvkm_subdev *subdev = &gsb->base.subdev;
+	int acr_size;
+	int ret;
+
+	ret = gm20x_secboot_prepare_blobs(gsb);
+	if (ret)
+		return ret;
+
+	acr_size = gsb->acr_load_blob->size;
+	/*
+	 * On Tegra the WPR region is set by the bootloader. It is illegal for
+	 * the HS blob to be larger than this region.
+	 */
+	if (acr_size > gsb->wpr_size) {
+		nvkm_error(subdev, "WPR region too small for FW blob!\n");
+		nvkm_error(subdev, "required: %dB\n", acr_size);
+		nvkm_error(subdev, "WPR size: %dB\n", gsb->wpr_size);
+		return -ENOSPC;
+	}
+
+	return 0;
+}
+
 /**
  * gm20b_secboot_fixup_bl_desc - adapt BL descriptor to format used by GM20B FW
  *
@@ -88,6 +114,7 @@ gm20b_secboot_func = {
 	.bl_desc_size = sizeof(struct gm20b_flcn_bl_desc),
 	.fixup_bl_desc = gm20b_secboot_fixup_bl_desc,
 	.fixup_hs_desc = gm20b_secboot_fixup_hs_desc,
+	.prepare_blobs = gm20b_secboot_prepare_blobs,
 };
 
 
@@ -147,32 +174,6 @@ gm20b_tegra_read_wpr(struct gm200_secboot *gsb)
 #endif
 
 static int
-gm20b_secboot_prepare_blobs(struct nvkm_secboot *sb)
-{
-	struct gm200_secboot *gsb = gm200_secboot(sb);
-	int acr_size;
-	int ret;
-
-	ret = gm20x_secboot_prepare_blobs(gsb);
-	if (ret)
-		return ret;
-
-	acr_size = gsb->acr_load_blob->size;
-	/*
-	 * On Tegra the WPR region is set by the bootloader. It is illegal for
-	 * the HS blob to be larger than this region.
-	 */
-	if (acr_size > gsb->wpr_size) {
-		nvkm_error(&sb->subdev, "WPR region too small for FW blob!\n");
-		nvkm_error(&sb->subdev, "required: %dB\n", acr_size);
-		nvkm_error(&sb->subdev, "WPR size: %dB\n", gsb->wpr_size);
-		return -ENOSPC;
-	}
-
-	return 0;
-}
-
-static int
 gm20b_secboot_init(struct nvkm_secboot *sb)
 {
 	struct gm200_secboot *gsb = gm200_secboot(sb);
@@ -189,7 +190,6 @@ static const struct nvkm_secboot_func
 gm20b_secboot = {
 	.dtor = gm200_secboot_dtor,
 	.init = gm20b_secboot_init,
-	.prepare_blobs = gm20b_secboot_prepare_blobs,
 	.reset = gm200_secboot_reset,
 	.start = gm200_secboot_start,
 	.managed_falcons = BIT(NVKM_SECBOOT_FALCON_FECS),
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h
index f2b09dee7c5d..a9a8a0e1017e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h
@@ -30,7 +30,6 @@ struct nvkm_secboot_func {
 	int (*init)(struct nvkm_secboot *);
 	int (*fini)(struct nvkm_secboot *, bool suspend);
 	void *(*dtor)(struct nvkm_secboot *);
-	int (*prepare_blobs)(struct nvkm_secboot *);
 	int (*reset)(struct nvkm_secboot *, enum nvkm_secboot_falcon);
 	int (*start)(struct nvkm_secboot *, enum nvkm_secboot_falcon);
 
@@ -147,10 +146,8 @@ struct hsflcn_acr_desc {
  * @inst:		instance block for HS falcon
  * @pgd:		page directory for the HS falcon
  * @vm:			address space used by the HS falcon
- * @bl_desc_size:	size of the BL descriptor used by this chip.
- * @fixup_bl_desc:	hook that generates the proper BL descriptor format from
- *			the generic GM200 format into a data array of size
- *			bl_desc_size
+ * @falcon_state:	current state of the managed falcons
+ * @firmware_ok:	whether the firmware blobs have been created
  */
 struct gm200_secboot {
 	struct nvkm_secboot base;
@@ -196,9 +193,19 @@ struct gm200_secboot {
 		RUNNING,
 	} falcon_state[NVKM_SECBOOT_FALCON_END];
 
+	bool firmware_ok;
 };
 #define gm200_secboot(sb) container_of(sb, struct gm200_secboot, base)
 
+/**
+ * Contains functions we wish to abstract between GM200-like implementations
+ * @bl_desc_size:	size of the BL descriptor used by this chip.
+ * @fixup_bl_desc:	hook that generates the proper BL descriptor format from
+ *			the generic GM200 format into a data array of size
+ *			bl_desc_size
+ * @fixup_hs_desc:	hook that twiddles the HS descriptor before it is used
+ * @prepare_blobs:	prepares the various blobs needed for secure booting
+ */
 struct gm200_secboot_func {
 	/*
 	 * Size of the bootloader descriptor for this chip. A block of this
@@ -214,6 +221,7 @@ struct gm200_secboot_func {
 	 * we want the HS FW to set up.
 	 */
 	void (*fixup_hs_desc)(struct gm200_secboot *, struct hsflcn_acr_desc *);
+	int (*prepare_blobs)(struct gm200_secboot *);
 };
 
 int gm200_secboot_init(struct nvkm_secboot *);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c
index a1b264664aad..fe063d5728e2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c
@@ -41,8 +41,9 @@ nvkm_top_device_new(struct nvkm_top *top)
 }
 
 u32
-nvkm_top_reset(struct nvkm_top *top, enum nvkm_devidx index)
+nvkm_top_reset(struct nvkm_device *device, enum nvkm_devidx index)
 {
+	struct nvkm_top *top = device->top;
 	struct nvkm_top_device *info;
 
 	if (top) {
@@ -56,8 +57,25 @@ nvkm_top_reset(struct nvkm_top *top, enum nvkm_devidx index)
 }
 
 u32
-nvkm_top_intr(struct nvkm_top *top, u32 intr, u64 *psubdevs)
+nvkm_top_intr_mask(struct nvkm_device *device, enum nvkm_devidx devidx)
 {
+	struct nvkm_top *top = device->top;
+	struct nvkm_top_device *info;
+
+	if (top) {
+		list_for_each_entry(info, &top->device, head) {
+			if (info->index == devidx && info->intr >= 0)
+				return BIT(info->intr);
+		}
+	}
+
+	return 0;
+}
+
+u32
+nvkm_top_intr(struct nvkm_device *device, u32 intr, u64 *psubdevs)
+{
+	struct nvkm_top *top = device->top;
 	struct nvkm_top_device *info;
 	u64 subdevs = 0;
 	u32 handled = 0;
@@ -78,8 +96,9 @@ nvkm_top_intr(struct nvkm_top *top, u32 intr, u64 *psubdevs)
 }
 
 enum nvkm_devidx
-nvkm_top_fault(struct nvkm_top *top, int fault)
+nvkm_top_fault(struct nvkm_device *device, int fault)
 {
+	struct nvkm_top *top = device->top;
 	struct nvkm_top_device *info;
 
 	list_for_each_entry(info, &top->device, head) {
@@ -91,8 +110,9 @@ nvkm_top_fault(struct nvkm_top *top, int fault)
 }
 
 enum nvkm_devidx
-nvkm_top_engine(struct nvkm_top *top, int index, int *runl, int *engn)
+nvkm_top_engine(struct nvkm_device *device, int index, int *runl, int *engn)
 {
+	struct nvkm_top *top = device->top;
 	struct nvkm_top_device *info;
 	int n = 0;
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c
index e06acc340e99..efac3402f9dd 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c
@@ -29,7 +29,7 @@ gk104_top_oneinit(struct nvkm_top *top)
 	struct nvkm_subdev *subdev = &top->subdev;
 	struct nvkm_device *device = subdev->device;
 	struct nvkm_top_device *info = NULL;
-	u32 data, type;
+	u32 data, type, inst;
 	int i;
 
 	for (i = 0; i < 64; i++) {
@@ -37,6 +37,7 @@ gk104_top_oneinit(struct nvkm_top *top)
 			if (!(info = nvkm_top_device_new(top)))
 				return -ENOMEM;
 			type = ~0;
+			inst = 0;
 		}
 
 		data = nvkm_rd32(device, 0x022700 + (i * 0x04));
@@ -45,6 +46,7 @@ gk104_top_oneinit(struct nvkm_top *top)
 		case 0x00000000: /* NOT_VALID */
 			continue;
 		case 0x00000001: /* DATA */
+			inst        = (data & 0x3c000000) >> 26;
 			info->addr  = (data & 0x00fff000);
 			info->fault = (data & 0x000000f8) >> 3;
 			break;
@@ -67,27 +69,32 @@ gk104_top_oneinit(struct nvkm_top *top)
 			continue;
 
 		/* Translate engine type to NVKM engine identifier. */
+#define A_(A) if (inst == 0) info->index = NVKM_ENGINE_##A
+#define B_(A) if (inst + NVKM_ENGINE_##A##0 < NVKM_ENGINE_##A##_LAST + 1)      \
+		info->index = NVKM_ENGINE_##A##0 + inst
 		switch (type) {
-		case 0x00000000: info->index = NVKM_ENGINE_GR; break;
-		case 0x00000001: info->index = NVKM_ENGINE_CE0; break;
-		case 0x00000002: info->index = NVKM_ENGINE_CE1; break;
-		case 0x00000003: info->index = NVKM_ENGINE_CE2; break;
-		case 0x00000008: info->index = NVKM_ENGINE_MSPDEC; break;
-		case 0x00000009: info->index = NVKM_ENGINE_MSPPP; break;
-		case 0x0000000a: info->index = NVKM_ENGINE_MSVLD; break;
-		case 0x0000000b: info->index = NVKM_ENGINE_MSENC; break;
-		case 0x0000000c: info->index = NVKM_ENGINE_VIC; break;
-		case 0x0000000d: info->index = NVKM_ENGINE_SEC; break;
-		case 0x0000000e: info->index = NVKM_ENGINE_NVENC0; break;
-		case 0x0000000f: info->index = NVKM_ENGINE_NVENC1; break;
-		case 0x00000010: info->index = NVKM_ENGINE_NVDEC; break;
+		case 0x00000000: A_(GR    ); break;
+		case 0x00000001: A_(CE0   ); break;
+		case 0x00000002: A_(CE1   ); break;
+		case 0x00000003: A_(CE2   ); break;
+		case 0x00000008: A_(MSPDEC); break;
+		case 0x00000009: A_(MSPPP ); break;
+		case 0x0000000a: A_(MSVLD ); break;
+		case 0x0000000b: A_(MSENC ); break;
+		case 0x0000000c: A_(VIC   ); break;
+		case 0x0000000d: A_(SEC   ); break;
+		case 0x0000000e: B_(NVENC ); break;
+		case 0x0000000f: A_(NVENC1); break;
+		case 0x00000010: A_(NVDEC ); break;
+		case 0x00000013: B_(CE    ); break;
 			break;
 		default:
 			break;
 		}
 
-		nvkm_debug(subdev, "%02x (%8s): addr %06x fault %2d engine %2d "
-				   "runlist %2d intr %2d reset %2d\n", type,
+		nvkm_debug(subdev, "%02x.%d (%8s): addr %06x fault %2d "
+				   "engine %2d runlist %2d intr %2d "
+				   "reset %2d\n", type, inst,
 			   info->index == NVKM_SUBDEV_NR ? NULL :
 					  nvkm_subdev_name[info->index],
 			   info->addr, info->fault, info->engine, info->runlist,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/base.c
index 6b2d7531a7ff..1c3d23b0e84a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/base.c
@@ -120,6 +120,8 @@ nvkm_volt_parse_bios(struct nvkm_bios *bios, struct nvkm_volt *volt)
 
 	data = nvbios_volt_parse(bios, &ver, &hdr, &cnt, &len, &info);
 	if (data && info.vidmask && info.base && info.step) {
+		volt->min_uv = info.min;
+		volt->max_uv = info.max;
 		for (i = 0; i < info.vidmask + 1; i++) {
 			if (info.base >= info.min &&
 				info.base <= info.max) {
@@ -131,6 +133,8 @@ nvkm_volt_parse_bios(struct nvkm_bios *bios, struct nvkm_volt *volt)
 		}
 		volt->vid_mask = info.vidmask;
 	} else if (data && info.vidmask) {
+		volt->min_uv = 0xffffffff;
+		volt->max_uv = 0;
 		for (i = 0; i < cnt; i++) {
 			data = nvbios_volt_entry_parse(bios, i, &ver, &hdr,
 						       &ivid);
@@ -138,9 +142,14 @@ nvkm_volt_parse_bios(struct nvkm_bios *bios, struct nvkm_volt *volt)
 				volt->vid[volt->vid_nr].uv = ivid.voltage;
 				volt->vid[volt->vid_nr].vid = ivid.vid;
 				volt->vid_nr++;
+				volt->min_uv = min(volt->min_uv, ivid.voltage);
+				volt->max_uv = max(volt->max_uv, ivid.voltage);
 			}
 		}
 		volt->vid_mask = info.vidmask;
+	} else if (data && info.type == NVBIOS_VOLT_PWM) {
+		volt->min_uv = info.base;
+		volt->max_uv = info.base + info.pwm_range;
 	}
 }
 
@@ -181,8 +190,11 @@ nvkm_volt_ctor(const struct nvkm_volt_func *func, struct nvkm_device *device,
 	volt->func = func;
 
 	/* Assuming the non-bios device should build the voltage table later */
-	if (bios)
+	if (bios) {
 		nvkm_volt_parse_bios(bios, volt);
+		nvkm_debug(&volt->subdev, "min: %iuv max: %iuv\n",
+			   volt->min_uv, volt->max_uv);
+	}
 
 	if (volt->vid_nr) {
 		for (i = 0; i < volt->vid_nr; i++) {
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk20a.c
index d554455326da..ce5d83cdc7cf 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk20a.c
@@ -77,18 +77,19 @@ gk20a_volt_get_cvb_t_voltage(int speedo, int temp, int s_scale, int t_scale,
 	return mv;
 }
 
-int
+static int
 gk20a_volt_calc_voltage(const struct cvb_coef *coef, int speedo)
 {
+	static const int v_scale = 1000;
 	int mv;
 
 	mv = gk20a_volt_get_cvb_t_voltage(speedo, -10, 100, 10, coef);
-	mv = DIV_ROUND_UP(mv, 1000);
+	mv = DIV_ROUND_UP(mv, v_scale);
 
 	return mv * 1000;
 }
 
-int
+static int
 gk20a_volt_vid_get(struct nvkm_volt *base)
 {
 	struct gk20a_volt *volt = gk20a_volt(base);
@@ -103,7 +104,7 @@ gk20a_volt_vid_get(struct nvkm_volt *base)
 	return -EINVAL;
 }
 
-int
+static int
 gk20a_volt_vid_set(struct nvkm_volt *base, u8 vid)
 {
 	struct gk20a_volt *volt = gk20a_volt(base);
@@ -113,7 +114,7 @@ gk20a_volt_vid_set(struct nvkm_volt *base, u8 vid)
 	return regulator_set_voltage(volt->vdd, volt->base.vid[vid].uv, 1200000);
 }
 
-int
+static int
 gk20a_volt_set_id(struct nvkm_volt *base, u8 id, int condition)
 {
 	struct gk20a_volt *volt = gk20a_volt(base);
@@ -143,9 +144,9 @@ gk20a_volt = {
 };
 
 int
-_gk20a_volt_ctor(struct nvkm_device *device, int index,
-		 const struct cvb_coef *coefs, int nb_coefs,
-		 struct gk20a_volt *volt)
+gk20a_volt_ctor(struct nvkm_device *device, int index,
+		const struct cvb_coef *coefs, int nb_coefs,
+		int vmin, struct gk20a_volt *volt)
 {
 	struct nvkm_device_tegra *tdev = device->func->tegra(device);
 	int i, uv;
@@ -160,9 +161,9 @@ _gk20a_volt_ctor(struct nvkm_device *device, int index,
 	volt->base.vid_nr = nb_coefs;
 	for (i = 0; i < volt->base.vid_nr; i++) {
 		volt->base.vid[i].vid = i;
-		volt->base.vid[i].uv =
-			gk20a_volt_calc_voltage(&coefs[i],
-						tdev->gpu_speedo);
+		volt->base.vid[i].uv = max(
+			gk20a_volt_calc_voltage(&coefs[i], tdev->gpu_speedo),
+			vmin);
 		nvkm_debug(&volt->base.subdev, "%2d: vid=%d, uv=%d\n", i,
 			   volt->base.vid[i].vid, volt->base.vid[i].uv);
 	}
@@ -180,6 +181,6 @@ gk20a_volt_new(struct nvkm_device *device, int index, struct nvkm_volt **pvolt)
 		return -ENOMEM;
 	*pvolt = &volt->base;
 
-	return _gk20a_volt_ctor(device, index, gk20a_cvb_coef,
-				ARRAY_SIZE(gk20a_cvb_coef), volt);
+	return gk20a_volt_ctor(device, index, gk20a_cvb_coef,
+			       ARRAY_SIZE(gk20a_cvb_coef), 0, volt);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk20a.h b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk20a.h
index 0fa3b502bcf8..6a6c97f9684e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk20a.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk20a.h
@@ -37,13 +37,8 @@ struct gk20a_volt {
 	struct regulator *vdd;
 };
 
-int _gk20a_volt_ctor(struct nvkm_device *device, int index,
-		     const struct cvb_coef *coefs, int nb_coefs,
-		     struct gk20a_volt *volt);
-
-int gk20a_volt_calc_voltage(const struct cvb_coef *coef, int speedo);
-int gk20a_volt_vid_get(struct nvkm_volt *volt);
-int gk20a_volt_vid_set(struct nvkm_volt *volt, u8 vid);
-int gk20a_volt_set_id(struct nvkm_volt *volt, u8 id, int condition);
+int gk20a_volt_ctor(struct nvkm_device *device, int index,
+		    const struct cvb_coef *coefs, int nb_coefs,
+		    int vmin, struct gk20a_volt *volt);
 
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gm20b.c
index 49b5ecb701e4..74db4d28930f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gm20b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gm20b.c
@@ -41,16 +41,52 @@ const struct cvb_coef gm20b_cvb_coef[] = {
 	/* 921600 */ { 2647676, -106455, 1632 },
 };
 
+static const struct cvb_coef gm20b_na_cvb_coef[] = {
+	/* KHz,         c0,     c1,   c2,    c3,     c4,   c5 */
+	/*  76800 */ {  814294, 8144, -940, 808, -21583, 226 },
+	/* 153600 */ {  856185, 8144, -940, 808, -21583, 226 },
+	/* 230400 */ {  898077, 8144, -940, 808, -21583, 226 },
+	/* 307200 */ {  939968, 8144, -940, 808, -21583, 226 },
+	/* 384000 */ {  981860, 8144, -940, 808, -21583, 226 },
+	/* 460800 */ { 1023751, 8144, -940, 808, -21583, 226 },
+	/* 537600 */ { 1065642, 8144, -940, 808, -21583, 226 },
+	/* 614400 */ { 1107534, 8144, -940, 808, -21583, 226 },
+	/* 691200 */ { 1149425, 8144, -940, 808, -21583, 226 },
+	/* 768000 */ { 1191317, 8144, -940, 808, -21583, 226 },
+	/* 844800 */ { 1233208, 8144, -940, 808, -21583, 226 },
+	/* 921600 */ { 1275100, 8144, -940, 808, -21583, 226 },
+	/* 998400 */ { 1316991, 8144, -940, 808, -21583, 226 },
+};
+
+const u32 speedo_to_vmin[] = {
+	/*   0,      1,      2,      3,      4, */
+	950000, 840000, 818750, 840000, 810000,
+};
+
 int
 gm20b_volt_new(struct nvkm_device *device, int index, struct nvkm_volt **pvolt)
 {
+	struct nvkm_device_tegra *tdev = device->func->tegra(device);
 	struct gk20a_volt *volt;
+	u32 vmin;
+
+	if (tdev->gpu_speedo_id >= ARRAY_SIZE(speedo_to_vmin)) {
+		nvdev_error(device, "unsupported speedo %d\n",
+			    tdev->gpu_speedo_id);
+		return -EINVAL;
+	}
 
 	volt = kzalloc(sizeof(*volt), GFP_KERNEL);
 	if (!volt)
 		return -ENOMEM;
 	*pvolt = &volt->base;
 
-	return _gk20a_volt_ctor(device, index, gm20b_cvb_coef,
-				ARRAY_SIZE(gm20b_cvb_coef), volt);
+	vmin = speedo_to_vmin[tdev->gpu_speedo_id];
+
+	if (tdev->gpu_speedo_id >= 1)
+		return gk20a_volt_ctor(device, index, gm20b_na_cvb_coef,
+				     ARRAY_SIZE(gm20b_na_cvb_coef), vmin, volt);
+	else
+		return gk20a_volt_ctor(device, index, gm20b_cvb_coef,
+					ARRAY_SIZE(gm20b_cvb_coef), vmin, volt);
 }
diff --git a/drivers/gpu/drm/omapdrm/Kconfig b/drivers/gpu/drm/omapdrm/Kconfig
index 336ad4de9981..556f81f6b2c7 100644
--- a/drivers/gpu/drm/omapdrm/Kconfig
+++ b/drivers/gpu/drm/omapdrm/Kconfig
@@ -4,11 +4,6 @@ config DRM_OMAP
 	depends on ARCH_OMAP2PLUS || ARCH_MULTIPLATFORM
 	select OMAP2_DSS
 	select DRM_KMS_HELPER
-	select DRM_KMS_FB_HELPER
-	select FB_SYS_FILLRECT
-	select FB_SYS_COPYAREA
-	select FB_SYS_IMAGEBLIT
-	select FB_SYS_FOPS
 	default n
 	help
 	  DRM display driver for OMAP2/3/4 based boards.
diff --git a/drivers/gpu/drm/omapdrm/displays/connector-dvi.c b/drivers/gpu/drm/omapdrm/displays/connector-dvi.c
index 75f7827525cf..684b7aeda411 100644
--- a/drivers/gpu/drm/omapdrm/displays/connector-dvi.c
+++ b/drivers/gpu/drm/omapdrm/displays/connector-dvi.c
@@ -255,6 +255,7 @@ static int dvic_probe_of(struct platform_device *pdev)
 	adapter_node = of_parse_phandle(node, "ddc-i2c-bus", 0);
 	if (adapter_node) {
 		adapter = of_get_i2c_adapter_by_node(adapter_node);
+		of_node_put(adapter_node);
 		if (adapter == NULL) {
 			dev_err(&pdev->dev, "failed to parse ddc-i2c-bus\n");
 			omap_dss_put_device(ddata->in);
diff --git a/drivers/gpu/drm/omapdrm/displays/panel-dsi-cm.c b/drivers/gpu/drm/omapdrm/displays/panel-dsi-cm.c
index 1b0cf2d8224b..0eae8afaed90 100644
--- a/drivers/gpu/drm/omapdrm/displays/panel-dsi-cm.c
+++ b/drivers/gpu/drm/omapdrm/displays/panel-dsi-cm.c
@@ -1284,8 +1284,7 @@ static int dsicm_probe(struct platform_device *pdev)
 	return 0;
 
 err_sysfs_create:
-	if (bldev != NULL)
-		backlight_device_unregister(bldev);
+	backlight_device_unregister(bldev);
 err_bl:
 	destroy_workqueue(ddata->workqueue);
 err_reg:
diff --git a/drivers/gpu/drm/omapdrm/dss/dsi.c b/drivers/gpu/drm/omapdrm/dss/dsi.c
index 6f45e9d00b41..e1be5e795cd8 100644
--- a/drivers/gpu/drm/omapdrm/dss/dsi.c
+++ b/drivers/gpu/drm/omapdrm/dss/dsi.c
@@ -1167,7 +1167,6 @@ static int dsi_regulator_init(struct platform_device *dsidev)
 {
 	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
 	struct regulator *vdds_dsi;
-	int r;
 
 	if (dsi->vdds_dsi_reg != NULL)
 		return 0;
diff --git a/drivers/gpu/drm/omapdrm/dss/dss-of.c b/drivers/gpu/drm/omapdrm/dss/dss-of.c
index dfd4e9621e3b..e256d879b25c 100644
--- a/drivers/gpu/drm/omapdrm/dss/dss-of.c
+++ b/drivers/gpu/drm/omapdrm/dss/dss-of.c
@@ -125,15 +125,16 @@ u32 dss_of_port_get_port_number(struct device_node *port)
 
 static struct device_node *omapdss_of_get_remote_port(const struct device_node *node)
 {
-	struct device_node *np;
+	struct device_node *np, *np_parent;
 
 	np = of_parse_phandle(node, "remote-endpoint", 0);
 	if (!np)
 		return NULL;
 
-	np = of_get_next_parent(np);
+	np_parent = of_get_next_parent(np);
+	of_node_put(np);
 
-	return np;
+	return np_parent;
 }
 
 struct device_node *
diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi5.c b/drivers/gpu/drm/omapdrm/dss/hdmi5.c
index 061f9bab4c9b..0c0a5139a301 100644
--- a/drivers/gpu/drm/omapdrm/dss/hdmi5.c
+++ b/drivers/gpu/drm/omapdrm/dss/hdmi5.c
@@ -120,7 +120,6 @@ static irqreturn_t hdmi_irq_handler(int irq, void *data)
 
 static int hdmi_init_regulator(void)
 {
-	int r;
 	struct regulator *reg;
 
 	if (hdmi.vdda_reg != NULL)
diff --git a/drivers/gpu/drm/omapdrm/omap_fb.c b/drivers/gpu/drm/omapdrm/omap_fb.c
index 983c8cf2441c..31f5178c22c7 100644
--- a/drivers/gpu/drm/omapdrm/omap_fb.c
+++ b/drivers/gpu/drm/omapdrm/omap_fb.c
@@ -115,8 +115,8 @@ static void omap_framebuffer_destroy(struct drm_framebuffer *fb)
 
 	for (i = 0; i < n; i++) {
 		struct plane *plane = &omap_fb->planes[i];
-		if (plane->bo)
-			drm_gem_object_unreference_unlocked(plane->bo);
+
+		drm_gem_object_unreference_unlocked(plane->bo);
 	}
 
 	kfree(omap_fb);
diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
index 3a7bdf1c842b..85143d1b9b31 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -168,6 +168,7 @@ static int panel_simple_disable(struct drm_panel *panel)
 
 	if (p->backlight) {
 		p->backlight->props.power = FB_BLANK_POWERDOWN;
+		p->backlight->props.state |= BL_CORE_FBBLANK;
 		backlight_update_status(p->backlight);
 	}
 
@@ -235,6 +236,7 @@ static int panel_simple_enable(struct drm_panel *panel)
 		msleep(p->desc->delay.enable);
 
 	if (p->backlight) {
+		p->backlight->props.state &= ~BL_CORE_FBBLANK;
 		p->backlight->props.power = FB_BLANK_UNBLANK;
 		backlight_update_status(p->backlight);
 	}
@@ -964,8 +966,8 @@ static const struct panel_desc innolux_zj070na_01p = {
 	.num_modes = 1,
 	.bpc = 6,
 	.size = {
-		.width = 1024,
-		.height = 600,
+		.width = 154,
+		.height = 90,
 	},
 };
 
@@ -1017,6 +1019,51 @@ static const struct panel_desc lg_lb070wv8 = {
 	.bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG,
 };
 
+static const struct drm_display_mode lg_lp079qx1_sp0v_mode = {
+	.clock = 200000,
+	.hdisplay = 1536,
+	.hsync_start = 1536 + 12,
+	.hsync_end = 1536 + 12 + 16,
+	.htotal = 1536 + 12 + 16 + 48,
+	.vdisplay = 2048,
+	.vsync_start = 2048 + 8,
+	.vsync_end = 2048 + 8 + 4,
+	.vtotal = 2048 + 8 + 4 + 8,
+	.vrefresh = 60,
+	.flags = DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC,
+};
+
+static const struct panel_desc lg_lp079qx1_sp0v = {
+	.modes = &lg_lp079qx1_sp0v_mode,
+	.num_modes = 1,
+	.size = {
+		.width = 129,
+		.height = 171,
+	},
+};
+
+static const struct drm_display_mode lg_lp097qx1_spa1_mode = {
+	.clock = 205210,
+	.hdisplay = 2048,
+	.hsync_start = 2048 + 150,
+	.hsync_end = 2048 + 150 + 5,
+	.htotal = 2048 + 150 + 5 + 5,
+	.vdisplay = 1536,
+	.vsync_start = 1536 + 3,
+	.vsync_end = 1536 + 3 + 1,
+	.vtotal = 1536 + 3 + 1 + 9,
+	.vrefresh = 60,
+};
+
+static const struct panel_desc lg_lp097qx1_spa1 = {
+	.modes = &lg_lp097qx1_spa1_mode,
+	.num_modes = 1,
+	.size = {
+		.width = 208,
+		.height = 147,
+	},
+};
+
 static const struct drm_display_mode lg_lp120up1_mode = {
 	.clock = 162300,
 	.hdisplay = 1920,
@@ -1224,6 +1271,28 @@ static const struct panel_desc qd43003c0_40 = {
 	.bus_format = MEDIA_BUS_FMT_RGB888_1X24,
 };
 
+static const struct drm_display_mode samsung_lsn122dl01_c01_mode = {
+	.clock = 271560,
+	.hdisplay = 2560,
+	.hsync_start = 2560 + 48,
+	.hsync_end = 2560 + 48 + 32,
+	.htotal = 2560 + 48 + 32 + 80,
+	.vdisplay = 1600,
+	.vsync_start = 1600 + 2,
+	.vsync_end = 1600 + 2 + 5,
+	.vtotal = 1600 + 2 + 5 + 57,
+	.vrefresh = 60,
+};
+
+static const struct panel_desc samsung_lsn122dl01_c01 = {
+	.modes = &samsung_lsn122dl01_c01_mode,
+	.num_modes = 1,
+	.size = {
+		.width = 263,
+		.height = 164,
+	},
+};
+
 static const struct drm_display_mode samsung_ltn101nt05_mode = {
 	.clock = 54030,
 	.hdisplay = 1024,
@@ -1242,8 +1311,8 @@ static const struct panel_desc samsung_ltn101nt05 = {
 	.num_modes = 1,
 	.bpc = 6,
 	.size = {
-		.width = 1024,
-		.height = 600,
+		.width = 223,
+		.height = 125,
 	},
 };
 
@@ -1270,6 +1339,53 @@ static const struct panel_desc samsung_ltn140at29_301 = {
 	},
 };
 
+static const struct display_timing sharp_lq101k1ly04_timing = {
+	.pixelclock = { 60000000, 65000000, 80000000 },
+	.hactive = { 1280, 1280, 1280 },
+	.hfront_porch = { 20, 20, 20 },
+	.hback_porch = { 20, 20, 20 },
+	.hsync_len = { 10, 10, 10 },
+	.vactive = { 800, 800, 800 },
+	.vfront_porch = { 4, 4, 4 },
+	.vback_porch = { 4, 4, 4 },
+	.vsync_len = { 4, 4, 4 },
+	.flags = DISPLAY_FLAGS_PIXDATA_POSEDGE,
+};
+
+static const struct panel_desc sharp_lq101k1ly04 = {
+	.timings = &sharp_lq101k1ly04_timing,
+	.num_timings = 1,
+	.bpc = 8,
+	.size = {
+		.width = 217,
+		.height = 136,
+	},
+	.bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_JEIDA,
+};
+
+static const struct drm_display_mode sharp_lq123p1jx31_mode = {
+	.clock = 252750,
+	.hdisplay = 2400,
+	.hsync_start = 2400 + 48,
+	.hsync_end = 2400 + 48 + 32,
+	.htotal = 2400 + 48 + 32 + 80,
+	.vdisplay = 1600,
+	.vsync_start = 1600 + 3,
+	.vsync_end = 1600 + 3 + 10,
+	.vtotal = 1600 + 3 + 10 + 33,
+	.vrefresh = 60,
+	.flags = DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC,
+};
+
+static const struct panel_desc sharp_lq123p1jx31 = {
+	.modes = &sharp_lq123p1jx31_mode,
+	.num_modes = 1,
+	.size = {
+		.width = 259,
+		.height = 173,
+	},
+};
+
 static const struct drm_display_mode shelly_sca07010_bfn_lnn_mode = {
 	.clock = 33300,
 	.hdisplay = 800,
@@ -1293,6 +1409,29 @@ static const struct panel_desc shelly_sca07010_bfn_lnn = {
 	.bus_format = MEDIA_BUS_FMT_RGB666_1X18,
 };
 
+static const struct drm_display_mode starry_kr122ea0sra_mode = {
+	.clock = 147000,
+	.hdisplay = 1920,
+	.hsync_start = 1920 + 16,
+	.hsync_end = 1920 + 16 + 16,
+	.htotal = 1920 + 16 + 16 + 32,
+	.vdisplay = 1200,
+	.vsync_start = 1200 + 15,
+	.vsync_end = 1200 + 15 + 2,
+	.vtotal = 1200 + 15 + 2 + 18,
+	.vrefresh = 60,
+	.flags = DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC,
+};
+
+static const struct panel_desc starry_kr122ea0sra = {
+	.modes = &starry_kr122ea0sra_mode,
+	.num_modes = 1,
+	.size = {
+		.width = 263,
+		.height = 164,
+	},
+};
+
 static const struct drm_display_mode tpk_f07a_0102_mode = {
 	.clock = 33260,
 	.hdisplay = 800,
@@ -1457,6 +1596,12 @@ static const struct of_device_id platform_of_match[] = {
 		.compatible = "lg,lb070wv8",
 		.data = &lg_lb070wv8,
 	}, {
+		.compatible = "lg,lp079qx1-sp0v",
+		.data = &lg_lp079qx1_sp0v,
+	}, {
+		.compatible = "lg,lp097qx1-spa1",
+		.data = &lg_lp097qx1_spa1,
+	}, {
 		.compatible = "lg,lp120up1",
 		.data = &lg_lp120up1,
 	}, {
@@ -1481,15 +1626,27 @@ static const struct of_device_id platform_of_match[] = {
 		.compatible = "qiaodian,qd43003c0-40",
 		.data = &qd43003c0_40,
 	}, {
+		.compatible = "samsung,lsn122dl01-c01",
+		.data = &samsung_lsn122dl01_c01,
+	}, {
 		.compatible = "samsung,ltn101nt05",
 		.data = &samsung_ltn101nt05,
 	}, {
 		.compatible = "samsung,ltn140at29-301",
 		.data = &samsung_ltn140at29_301,
 	}, {
+		.compatible = "sharp,lq101k1ly04",
+		.data = &sharp_lq101k1ly04,
+	}, {
+		.compatible = "sharp,lq123p1jx31",
+		.data = &sharp_lq123p1jx31,
+	}, {
 		.compatible = "shelly,sca07010-bfn-lnn",
 		.data = &shelly_sca07010_bfn_lnn,
 	}, {
+		.compatible = "starry,kr122ea0sra",
+		.data = &starry_kr122ea0sra,
+	}, {
 		.compatible = "tpk,f07a-0102",
 		.data = &tpk_f07a_0102,
 	}, {
@@ -1701,7 +1858,6 @@ static const struct panel_desc_dsi panasonic_vvx10f004b00 = {
 	.lanes = 4,
 };
 
-
 static const struct of_device_id dsi_of_match[] = {
 	{
 		.compatible = "auo,b080uan01",
diff --git a/drivers/gpu/drm/qxl/Kconfig b/drivers/gpu/drm/qxl/Kconfig
index 38c2bb72e456..da45b11b66b8 100644
--- a/drivers/gpu/drm/qxl/Kconfig
+++ b/drivers/gpu/drm/qxl/Kconfig
@@ -1,12 +1,7 @@
 config DRM_QXL
 	tristate "QXL virtual GPU"
 	depends on DRM && PCI
-	select FB_SYS_FILLRECT
-	select FB_SYS_COPYAREA
-	select FB_SYS_IMAGEBLIT
-	select FB_DEFERRED_IO
 	select DRM_KMS_HELPER
-	select DRM_KMS_FB_HELPER
 	select DRM_TTM
 	select CRC32
 	help
diff --git a/drivers/gpu/drm/qxl/qxl_cmd.c b/drivers/gpu/drm/qxl/qxl_cmd.c
index b5d4b41361bd..04270f5d110c 100644
--- a/drivers/gpu/drm/qxl/qxl_cmd.c
+++ b/drivers/gpu/drm/qxl/qxl_cmd.c
@@ -203,7 +203,7 @@ qxl_push_cursor_ring_release(struct qxl_device *qdev, struct qxl_release *releas
 bool qxl_queue_garbage_collect(struct qxl_device *qdev, bool flush)
 {
 	if (!qxl_check_idle(qdev->release_ring)) {
-		queue_work(qdev->gc_queue, &qdev->gc_work);
+		schedule_work(&qdev->gc_work);
 		if (flush)
 			flush_work(&qdev->gc_work);
 		return true;
diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c
index ad429683fef7..3aef12742a53 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -468,8 +468,7 @@ void qxl_user_framebuffer_destroy(struct drm_framebuffer *fb)
 {
 	struct qxl_framebuffer *qxl_fb = to_qxl_framebuffer(fb);
 
-	if (qxl_fb->obj)
-		drm_gem_object_unreference_unlocked(qxl_fb->obj);
+	drm_gem_object_unreference_unlocked(qxl_fb->obj);
 	drm_framebuffer_cleanup(fb);
 	kfree(qxl_fb);
 }
diff --git a/drivers/gpu/drm/qxl/qxl_draw.c b/drivers/gpu/drm/qxl/qxl_draw.c
index 56e1d633875e..ffe885395145 100644
--- a/drivers/gpu/drm/qxl/qxl_draw.c
+++ b/drivers/gpu/drm/qxl/qxl_draw.c
@@ -37,7 +37,6 @@ static int alloc_clips(struct qxl_device *qdev,
  * the qxl_clip_rects. This is *not* the same as the memory allocated
  * on the device, it is offset to qxl_clip_rects.chunk.data */
 static struct qxl_rect *drawable_set_clipping(struct qxl_device *qdev,
-					      struct qxl_drawable *drawable,
 					      unsigned num_clips,
 					      struct qxl_bo *clips_bo)
 {
@@ -136,6 +135,8 @@ static int qxl_palette_create_1bit(struct qxl_bo *palette_bo,
 				 * correctly globaly, since that would require
 				 * tracking all of our palettes. */
 	ret = qxl_bo_kmap(palette_bo, (void **)&pal);
+	if (ret)
+		return ret;
 	pal->num_ents = 2;
 	pal->unique = unique++;
 	if (visual == FB_VISUAL_TRUECOLOR || visual == FB_VISUAL_DIRECTCOLOR) {
@@ -349,7 +350,7 @@ void qxl_draw_dirty_fb(struct qxl_device *qdev,
 	if (ret)
 		goto out_release_backoff;
 
-	rects = drawable_set_clipping(qdev, drawable, num_clips, clips_bo);
+	rects = drawable_set_clipping(qdev, num_clips, clips_bo);
 	if (!rects)
 		goto out_release_backoff;
 
diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h
index 3ad6604b34ce..8e633caa4078 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.h
+++ b/drivers/gpu/drm/qxl/qxl_drv.h
@@ -321,7 +321,6 @@ struct qxl_device {
 	struct qxl_bo *current_release_bo[3];
 	int current_release_bo_offset[3];
 
-	struct workqueue_struct *gc_queue;
 	struct work_struct gc_work;
 
 	struct drm_property *hotplug_mode_update_property;
diff --git a/drivers/gpu/drm/qxl/qxl_kms.c b/drivers/gpu/drm/qxl/qxl_kms.c
index 2319800b7add..e642242728c0 100644
--- a/drivers/gpu/drm/qxl/qxl_kms.c
+++ b/drivers/gpu/drm/qxl/qxl_kms.c
@@ -258,7 +258,6 @@ static int qxl_device_init(struct qxl_device *qdev,
 		 (unsigned long)qdev->surfaceram_size);
 
 
-	qdev->gc_queue = create_singlethread_workqueue("qxl_gc");
 	INIT_WORK(&qdev->gc_work, qxl_gc_work);
 
 	return 0;
@@ -270,10 +269,7 @@ static void qxl_device_fini(struct qxl_device *qdev)
 		qxl_bo_unref(&qdev->current_release_bo[0]);
 	if (qdev->current_release_bo[1])
 		qxl_bo_unref(&qdev->current_release_bo[1]);
-	flush_workqueue(qdev->gc_queue);
-	destroy_workqueue(qdev->gc_queue);
-	qdev->gc_queue = NULL;
-
+	flush_work(&qdev->gc_work);
 	qxl_ring_free(qdev->command_ring);
 	qxl_ring_free(qdev->cursor_ring);
 	qxl_ring_free(qdev->release_ring);
@@ -310,10 +306,6 @@ int qxl_driver_load(struct drm_device *dev, unsigned long flags)
 	struct qxl_device *qdev;
 	int r;
 
-	/* require kms */
-	if (!drm_core_check_feature(dev, DRIVER_MODESET))
-		return -ENODEV;
-
 	qdev = kzalloc(sizeof(struct qxl_device), GFP_KERNEL);
 	if (qdev == NULL)
 		return -ENOMEM;
diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
index 0738d74c8d04..d50c9679e631 100644
--- a/drivers/gpu/drm/qxl/qxl_ttm.c
+++ b/drivers/gpu/drm/qxl/qxl_ttm.c
@@ -350,11 +350,19 @@ static int qxl_bo_move(struct ttm_buffer_object *bo,
 		       struct ttm_mem_reg *new_mem)
 {
 	struct ttm_mem_reg *old_mem = &bo->mem;
+	int ret;
+
+	ret = ttm_bo_wait(bo, interruptible, no_wait_gpu);
+	if (ret)
+		return ret;
+
+
 	if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
 		qxl_move_null(bo, new_mem);
 		return 0;
 	}
-	return ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
+	return ttm_bo_move_memcpy(bo, evict, interruptible,
+				  no_wait_gpu, new_mem);
 }
 
 static void qxl_bo_move_notify(struct ttm_buffer_object *bo,
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index e91763d5d800..a97abc8af657 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -589,7 +589,8 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc,
 		if (ASIC_IS_DCE41(rdev) || ASIC_IS_DCE61(rdev) || ASIC_IS_DCE8(rdev))
 			radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV;
 		/* use frac fb div on RS780/RS880 */
-		if ((rdev->family == CHIP_RS780) || (rdev->family == CHIP_RS880))
+		if (((rdev->family == CHIP_RS780) || (rdev->family == CHIP_RS880))
+		    && !radeon_crtc->ss_enabled)
 			radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV;
 		if (ASIC_IS_DCE32(rdev) && mode->clock > 165000)
 			radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV;
@@ -626,7 +627,7 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc,
 			if (radeon_crtc->ss.refdiv) {
 				radeon_crtc->pll_flags |= RADEON_PLL_USE_REF_DIV;
 				radeon_crtc->pll_reference_div = radeon_crtc->ss.refdiv;
-				if (ASIC_IS_AVIVO(rdev))
+				if (rdev->family >= CHIP_RV770)
 					radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV;
 			}
 		}
diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c
index 587cae4e73c9..56bb758f4e33 100644
--- a/drivers/gpu/drm/radeon/atombios_encoders.c
+++ b/drivers/gpu/drm/radeon/atombios_encoders.c
@@ -120,6 +120,7 @@ atombios_set_backlight_level(struct radeon_encoder *radeon_encoder, u8 level)
 		case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
 		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
 		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
 			if (dig->backlight_level == 0)
 				atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_LCD_BLOFF, 0, 0);
 			else {
diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c
index 35e0fc3ae8a7..7ba450832e6b 100644
--- a/drivers/gpu/drm/radeon/ci_dpm.c
+++ b/drivers/gpu/drm/radeon/ci_dpm.c
@@ -3843,7 +3843,10 @@ static void ci_find_dpm_states_clocks_in_dpm_table(struct radeon_device *rdev,
 	if (i >= sclk_table->count) {
 		pi->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_SCLK;
 	} else {
-		/* XXX check display min clock requirements */
+		/* XXX The current code always reprogrammed the sclk levels,
+		 * but we don't currently handle disp sclk requirements
+		 * so just skip it.
+		 */
 		if (CISLAND_MINIMUM_ENGINE_CLOCK != CISLAND_MINIMUM_ENGINE_CLOCK)
 			pi->need_update_smu7_dpm_table |= DPMTABLE_UPDATE_SCLK;
 	}
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index ba192a35c607..0c1b9ff433af 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -53,6 +53,7 @@ MODULE_FIRMWARE("radeon/bonaire_mc.bin");
 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
+MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
 
 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
@@ -72,6 +73,7 @@ MODULE_FIRMWARE("radeon/hawaii_mc.bin");
 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
+MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
 
 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
@@ -1990,12 +1992,17 @@ static int cik_init_microcode(struct radeon_device *rdev)
 	int new_fw = 0;
 	int err;
 	int num_fw;
+	bool new_smc = false;
 
 	DRM_DEBUG("\n");
 
 	switch (rdev->family) {
 	case CHIP_BONAIRE:
 		chip_name = "BONAIRE";
+		if ((rdev->pdev->revision == 0x80) ||
+		    (rdev->pdev->revision == 0x81) ||
+		    (rdev->pdev->device == 0x665f))
+			new_smc = true;
 		new_chip_name = "bonaire";
 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
 		me_req_size = CIK_ME_UCODE_SIZE * 4;
@@ -2010,6 +2017,8 @@ static int cik_init_microcode(struct radeon_device *rdev)
 		break;
 	case CHIP_HAWAII:
 		chip_name = "HAWAII";
+		if (rdev->pdev->revision == 0x80)
+			new_smc = true;
 		new_chip_name = "hawaii";
 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
 		me_req_size = CIK_ME_UCODE_SIZE * 4;
@@ -2259,7 +2268,10 @@ static int cik_init_microcode(struct radeon_device *rdev)
 			}
 		}
 
-		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
+		if (new_smc)
+			snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
+		else
+			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
 		if (err) {
 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
@@ -8354,7 +8366,8 @@ static int cik_startup(struct radeon_device *rdev)
 		}
 	}
 	rdev->rlc.cs_data = ci_cs_data;
-	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
+	rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
+	rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
 	r = sumo_rlc_init(rdev);
 	if (r) {
 		DRM_ERROR("Failed to init rlc BOs!\n");
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index 0d3f744de35a..d960d3915408 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -2209,6 +2209,12 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 		}
 		break;
 	}
+	case PACKET3_PFP_SYNC_ME:
+		if (pkt->count) {
+			DRM_ERROR("bad PFP_SYNC_ME\n");
+			return -EINVAL;
+		}
+		break;
 	case PACKET3_SURFACE_SYNC:
 		if (pkt->count != 3) {
 			DRM_ERROR("bad SURFACE_SYNC\n");
@@ -3381,6 +3387,7 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev,
 	case PACKET3_MPEG_INDEX:
 	case PACKET3_WAIT_REG_MEM:
 	case PACKET3_MEM_WRITE:
+	case PACKET3_PFP_SYNC_ME:
 	case PACKET3_SURFACE_SYNC:
 	case PACKET3_EVENT_WRITE:
 	case PACKET3_EVENT_WRITE_EOP:
diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h
index 0b174e14e9a6..c8e3d394cde7 100644
--- a/drivers/gpu/drm/radeon/evergreend.h
+++ b/drivers/gpu/drm/radeon/evergreend.h
@@ -1624,6 +1624,7 @@
 		 */
 #              define PACKET3_CP_DMA_CMD_SAIC      (1 << 28)
 #              define PACKET3_CP_DMA_CMD_DAIC      (1 << 29)
+#define	PACKET3_PFP_SYNC_ME				0x42
 #define	PACKET3_SURFACE_SYNC				0x43
 #              define PACKET3_CB0_DEST_BASE_ENA    (1 << 6)
 #              define PACKET3_CB1_DEST_BASE_ENA    (1 << 7)
diff --git a/drivers/gpu/drm/radeon/radeon_acpi.c b/drivers/gpu/drm/radeon/radeon_acpi.c
index 59acd0e5c2c6..31c9a92d6a1b 100644
--- a/drivers/gpu/drm/radeon/radeon_acpi.c
+++ b/drivers/gpu/drm/radeon/radeon_acpi.c
@@ -741,13 +741,6 @@ int radeon_acpi_init(struct radeon_device *rdev)
 		}
 
 		atif->encoder_for_bl = target;
-		if (!target) {
-			/* Brightness change notification is enabled, but we
-			 * didn't find a backlight controller, this should
-			 * never happen.
-			 */
-			DRM_ERROR("Cannot find a backlight controller\n");
-		}
 	}
 
 	if (atif->functions.sbios_requests && !atif->functions.system_params) {
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index f8097a0e7a79..5df3ec73021b 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -1155,7 +1155,7 @@ bool radeon_atom_get_clock_info(struct drm_device *dev)
 		    le16_to_cpu(firmware_info->info.usReferenceClock);
 		p1pll->reference_div = 0;
 
-		if (crev < 2)
+		if ((frev < 2) && (crev < 2))
 			p1pll->pll_out_min =
 				le16_to_cpu(firmware_info->info.usMinPixelClockPLL_Output);
 		else
@@ -1164,7 +1164,7 @@ bool radeon_atom_get_clock_info(struct drm_device *dev)
 		p1pll->pll_out_max =
 		    le32_to_cpu(firmware_info->info.ulMaxPixelClockPLL_Output);
 
-		if (crev >= 4) {
+		if (((frev < 2) && (crev >= 4)) || (frev >= 2)) {
 			p1pll->lcd_pll_out_min =
 				le16_to_cpu(firmware_info->info_14.usLcdMinPixelClockPLL_Output) * 100;
 			if (p1pll->lcd_pll_out_min == 0)
diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
index 95f4fea89302..6de342861202 100644
--- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c
+++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
@@ -10,6 +10,7 @@
 #include <linux/slab.h>
 #include <linux/acpi.h>
 #include <linux/pci.h>
+#include <linux/delay.h>
 
 #include "radeon_acpi.h"
 
@@ -27,6 +28,7 @@ struct radeon_atpx_functions {
 struct radeon_atpx {
 	acpi_handle handle;
 	struct radeon_atpx_functions functions;
+	bool is_hybrid;
 };
 
 static struct radeon_atpx_priv {
@@ -62,6 +64,14 @@ bool radeon_has_atpx(void) {
 	return radeon_atpx_priv.atpx_detected;
 }
 
+bool radeon_has_atpx_dgpu_power_cntl(void) {
+	return radeon_atpx_priv.atpx.functions.power_cntl;
+}
+
+bool radeon_is_atpx_hybrid(void) {
+	return radeon_atpx_priv.atpx.is_hybrid;
+}
+
 /**
  * radeon_atpx_call - call an ATPX method
  *
@@ -141,18 +151,12 @@ static void radeon_atpx_parse_functions(struct radeon_atpx_functions *f, u32 mas
  */
 static int radeon_atpx_validate(struct radeon_atpx *atpx)
 {
-	/* make sure required functions are enabled */
-	/* dGPU power control is required */
-	if (atpx->functions.power_cntl == false) {
-		printk("ATPX dGPU power cntl not present, forcing\n");
-		atpx->functions.power_cntl = true;
-	}
+	u32 valid_bits = 0;
 
 	if (atpx->functions.px_params) {
 		union acpi_object *info;
 		struct atpx_px_params output;
 		size_t size;
-		u32 valid_bits;
 
 		info = radeon_atpx_call(atpx->handle, ATPX_FUNCTION_GET_PX_PARAMETERS, NULL);
 		if (!info)
@@ -171,19 +175,42 @@ static int radeon_atpx_validate(struct radeon_atpx *atpx)
 		memcpy(&output, info->buffer.pointer, size);
 
 		valid_bits = output.flags & output.valid_flags;
-		/* if separate mux flag is set, mux controls are required */
-		if (valid_bits & ATPX_SEPARATE_MUX_FOR_I2C) {
-			atpx->functions.i2c_mux_cntl = true;
-			atpx->functions.disp_mux_cntl = true;
-		}
-		/* if any outputs are muxed, mux controls are required */
-		if (valid_bits & (ATPX_CRT1_RGB_SIGNAL_MUXED |
-				  ATPX_TV_SIGNAL_MUXED |
-				  ATPX_DFP_SIGNAL_MUXED))
-			atpx->functions.disp_mux_cntl = true;
 
 		kfree(info);
 	}
+
+	/* if separate mux flag is set, mux controls are required */
+	if (valid_bits & ATPX_SEPARATE_MUX_FOR_I2C) {
+		atpx->functions.i2c_mux_cntl = true;
+		atpx->functions.disp_mux_cntl = true;
+	}
+	/* if any outputs are muxed, mux controls are required */
+	if (valid_bits & (ATPX_CRT1_RGB_SIGNAL_MUXED |
+			  ATPX_TV_SIGNAL_MUXED |
+			  ATPX_DFP_SIGNAL_MUXED))
+		atpx->functions.disp_mux_cntl = true;
+
+	/* some bioses set these bits rather than flagging power_cntl as supported */
+	if (valid_bits & (ATPX_DYNAMIC_PX_SUPPORTED |
+			  ATPX_DYNAMIC_DGPU_POWER_OFF_SUPPORTED))
+		atpx->functions.power_cntl = true;
+
+	atpx->is_hybrid = false;
+	if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) {
+		printk("ATPX Hybrid Graphics\n");
+#if 1
+		/* This is a temporary hack until the D3 cold support
+		 * makes it upstream.  The ATPX power_control method seems
+		 * to still work on even if the system should be using
+		 * the new standardized hybrid D3 cold ACPI interface.
+		 */
+		atpx->functions.power_cntl = true;
+#else
+		atpx->functions.power_cntl = false;
+#endif
+		atpx->is_hybrid = true;
+	}
+
 	return 0;
 }
 
@@ -258,6 +285,10 @@ static int radeon_atpx_set_discrete_state(struct radeon_atpx *atpx, u8 state)
 		if (!info)
 			return -EIO;
 		kfree(info);
+
+		/* 200ms delay is required after off */
+		if (state == 0)
+			msleep(200);
 	}
 	return 0;
 }
@@ -505,7 +536,6 @@ static int radeon_atpx_get_client_id(struct pci_dev *pdev)
 static const struct vga_switcheroo_handler radeon_atpx_handler = {
 	.switchto = radeon_atpx_switchto,
 	.power_state = radeon_atpx_power_state,
-	.init = radeon_atpx_init,
 	.get_client_id = radeon_atpx_get_client_id,
 };
 
@@ -541,6 +571,7 @@ static bool radeon_atpx_detect(void)
 		printk(KERN_INFO "vga_switcheroo: detected switching method %s handle\n",
 		       acpi_method_name);
 		radeon_atpx_priv.atpx_detected = true;
+		radeon_atpx_init();
 		return true;
 	}
 	return false;
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index 81a63d7f5cd9..b79f3b002471 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -2064,7 +2064,6 @@ radeon_add_atom_connector(struct drm_device *dev,
 							   RADEON_OUTPUT_CSC_BYPASS);
 			/* no HPD on analog connectors */
 			radeon_connector->hpd.hpd = RADEON_HPD_NONE;
-			connector->polled = DRM_CONNECTOR_POLL_CONNECT;
 			connector->interlace_allowed = true;
 			connector->doublescan_allowed = true;
 			break;
@@ -2314,8 +2313,10 @@ radeon_add_atom_connector(struct drm_device *dev,
 	}
 
 	if (radeon_connector->hpd.hpd == RADEON_HPD_NONE) {
-		if (i2c_bus->valid)
-			connector->polled = DRM_CONNECTOR_POLL_CONNECT;
+		if (i2c_bus->valid) {
+			connector->polled = DRM_CONNECTOR_POLL_CONNECT |
+			                    DRM_CONNECTOR_POLL_DISCONNECT;
+		}
 	} else
 		connector->polled = DRM_CONNECTOR_POLL_HPD;
 
@@ -2391,7 +2392,6 @@ radeon_add_legacy_connector(struct drm_device *dev,
 					      1);
 		/* no HPD on analog connectors */
 		radeon_connector->hpd.hpd = RADEON_HPD_NONE;
-		connector->polled = DRM_CONNECTOR_POLL_CONNECT;
 		connector->interlace_allowed = true;
 		connector->doublescan_allowed = true;
 		break;
@@ -2476,10 +2476,13 @@ radeon_add_legacy_connector(struct drm_device *dev,
 	}
 
 	if (radeon_connector->hpd.hpd == RADEON_HPD_NONE) {
-		if (i2c_bus->valid)
-			connector->polled = DRM_CONNECTOR_POLL_CONNECT;
+		if (i2c_bus->valid) {
+			connector->polled = DRM_CONNECTOR_POLL_CONNECT |
+			                    DRM_CONNECTOR_POLL_DISCONNECT;
+		}
 	} else
 		connector->polled = DRM_CONNECTOR_POLL_HPD;
+
 	connector->display_info.subpixel_order = subpixel_order;
 	drm_connector_register(connector);
 }
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index e721e6b2766e..a00dd2f74527 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -30,6 +30,7 @@
 #include <drm/drmP.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/radeon_drm.h>
+#include <linux/pm_runtime.h>
 #include <linux/vgaarb.h>
 #include <linux/vga_switcheroo.h>
 #include <linux/efi.h>
@@ -630,6 +631,23 @@ void radeon_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc)
 /*
  * GPU helpers function.
  */
+
+/**
+ * radeon_device_is_virtual - check if we are running is a virtual environment
+ *
+ * Check if the asic has been passed through to a VM (all asics).
+ * Used at driver startup.
+ * Returns true if virtual or false if not.
+ */
+static bool radeon_device_is_virtual(void)
+{
+#ifdef CONFIG_X86
+	return boot_cpu_has(X86_FEATURE_HYPERVISOR);
+#else
+	return false;
+#endif
+}
+
 /**
  * radeon_card_posted - check if the hw has already been initialized
  *
@@ -643,6 +661,10 @@ bool radeon_card_posted(struct radeon_device *rdev)
 {
 	uint32_t reg;
 
+	/* for pass through, always force asic_init */
+	if (radeon_device_is_virtual())
+		return false;
+
 	/* required for EFI mode on macbook2,1 which uses an r5xx asic */
 	if (efi_enabled(EFI_BOOT) &&
 	    (rdev->pdev->subsystem_vendor == PCI_VENDOR_ID_APPLE) &&
@@ -1505,6 +1527,9 @@ int radeon_device_init(struct radeon_device *rdev,
 	return 0;
 
 failed:
+	/* balance pm_runtime_get_sync() in radeon_driver_unload_kms() */
+	if (radeon_is_px(ddev))
+		pm_runtime_put_noidle(ddev->dev);
 	if (runtime)
 		vga_switcheroo_fini_domain_pm_ops(rdev->dev);
 	return r;
@@ -1631,7 +1656,7 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend,
 	radeon_agp_suspend(rdev);
 
 	pci_save_state(dev->pdev);
-	if (freeze && rdev->family >= CHIP_R600) {
+	if (freeze && rdev->family >= CHIP_CEDAR) {
 		rdev->asic->asic_reset(rdev, true);
 		pci_restore_state(dev->pdev);
 	} else if (suspend) {
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 3965d1916b9c..c3206fb8f4cf 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -714,7 +714,7 @@ static void radeon_crtc_init(struct drm_device *dev, int index)
 
 	drm_mode_crtc_set_gamma_size(&radeon_crtc->base, 256);
 	radeon_crtc->crtc_id = index;
-	radeon_crtc->flip_queue = create_singlethread_workqueue("radeon-crtc");
+	radeon_crtc->flip_queue = alloc_workqueue("radeon-crtc", WQ_HIGHPRI, 0);
 	rdev->mode_info.crtcs[index] = radeon_crtc;
 
 	if (rdev->family >= CHIP_BONAIRE) {
@@ -1324,9 +1324,7 @@ static void radeon_user_framebuffer_destroy(struct drm_framebuffer *fb)
 {
 	struct radeon_framebuffer *radeon_fb = to_radeon_framebuffer(fb);
 
-	if (radeon_fb->obj) {
-		drm_gem_object_unreference_unlocked(radeon_fb->obj);
-	}
+	drm_gem_object_unreference_unlocked(radeon_fb->obj);
 	drm_framebuffer_cleanup(fb);
 	kfree(radeon_fb);
 }
@@ -1711,6 +1709,7 @@ void radeon_modeset_fini(struct radeon_device *rdev)
 		radeon_afmt_fini(rdev);
 		drm_kms_helper_poll_fini(rdev->ddev);
 		radeon_hpd_fini(rdev);
+		drm_crtc_force_disable_all(rdev->ddev);
 		drm_mode_config_cleanup(rdev->ddev);
 		rdev->mode_info.mode_config_initialized = false;
 	}
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index a455dc7d4aa1..c01a7c6abb49 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -93,9 +93,10 @@
  *   2.43.0 - RADEON_INFO_GPU_RESET_COUNTER
  *   2.44.0 - SET_APPEND_CNT packet3 support
  *   2.45.0 - Allow setting shader registers using DMA/COPY packet3 on SI
+ *   2.46.0 - Add PFP_SYNC_ME support on evergreen
  */
 #define KMS_DRIVER_MAJOR	2
-#define KMS_DRIVER_MINOR	45
+#define KMS_DRIVER_MINOR	46
 #define KMS_DRIVER_PATCHLEVEL	0
 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
 int radeon_driver_unload_kms(struct drm_device *dev);
@@ -162,9 +163,13 @@ void radeon_debugfs_cleanup(struct drm_minor *minor);
 #if defined(CONFIG_VGA_SWITCHEROO)
 void radeon_register_atpx_handler(void);
 void radeon_unregister_atpx_handler(void);
+bool radeon_has_atpx_dgpu_power_cntl(void);
+bool radeon_is_atpx_hybrid(void);
 #else
 static inline void radeon_register_atpx_handler(void) {}
 static inline void radeon_unregister_atpx_handler(void) {}
+static inline bool radeon_has_atpx_dgpu_power_cntl(void) { return false; }
+static inline bool radeon_is_atpx_hybrid(void) { return false; }
 #endif
 
 int radeon_no_wb;
@@ -404,7 +409,10 @@ static int radeon_pmops_runtime_suspend(struct device *dev)
 	pci_save_state(pdev);
 	pci_disable_device(pdev);
 	pci_ignore_hotplug(pdev);
-	pci_set_power_state(pdev, PCI_D3cold);
+	if (radeon_is_atpx_hybrid())
+		pci_set_power_state(pdev, PCI_D3cold);
+	else if (!radeon_has_atpx_dgpu_power_cntl())
+		pci_set_power_state(pdev, PCI_D3hot);
 	drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF;
 
 	return 0;
@@ -421,7 +429,9 @@ static int radeon_pmops_runtime_resume(struct device *dev)
 
 	drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
 
-	pci_set_power_state(pdev, PCI_D0);
+	if (radeon_is_atpx_hybrid() ||
+	    !radeon_has_atpx_dgpu_power_cntl())
+		pci_set_power_state(pdev, PCI_D0);
 	pci_restore_state(pdev);
 	ret = pci_enable_device(pdev);
 	if (ret)
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index 414953c46a38..835563c1f0ed 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -63,7 +63,10 @@ int radeon_driver_unload_kms(struct drm_device *dev)
 	if (rdev->rmmio == NULL)
 		goto done_free;
 
-	pm_runtime_get_sync(dev->dev);
+	if (radeon_is_px(dev)) {
+		pm_runtime_get_sync(dev->dev);
+		pm_runtime_forbid(dev->dev);
+	}
 
 	radeon_kfd_device_fini(rdev);
 
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 590b0377fbe2..ffdad81ef964 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -300,8 +300,7 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
 	if (IS_ERR(fence))
 		return PTR_ERR(fence);
 
-	r = ttm_bo_move_accel_cleanup(bo, &fence->base,
-				      evict, no_wait_gpu, new_mem);
+	r = ttm_bo_move_accel_cleanup(bo, &fence->base, evict, new_mem);
 	radeon_fence_unref(&fence);
 	return r;
 }
@@ -403,6 +402,10 @@ static int radeon_bo_move(struct ttm_buffer_object *bo,
 	struct ttm_mem_reg *old_mem = &bo->mem;
 	int r;
 
+	r = ttm_bo_wait(bo, interruptible, no_wait_gpu);
+	if (r)
+		return r;
+
 	/* Can't move a pinned BO */
 	rbo = container_of(bo, struct radeon_bo, tbo);
 	if (WARN_ON_ONCE(rbo->pin_count > 0))
@@ -441,7 +444,8 @@ static int radeon_bo_move(struct ttm_buffer_object *bo,
 
 	if (r) {
 memcpy:
-		r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
+		r = ttm_bo_move_memcpy(bo, evict, interruptible,
+				       no_wait_gpu, new_mem);
 		if (r) {
 			return r;
 		}
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index b30e719dd56d..2523ca96c6c7 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -50,6 +50,7 @@ MODULE_FIRMWARE("radeon/tahiti_ce.bin");
 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
+MODULE_FIRMWARE("radeon/tahiti_k_smc.bin");
 
 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
@@ -65,6 +66,7 @@ MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
 MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
+MODULE_FIRMWARE("radeon/pitcairn_k_smc.bin");
 
 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
 MODULE_FIRMWARE("radeon/VERDE_me.bin");
@@ -80,6 +82,7 @@ MODULE_FIRMWARE("radeon/verde_ce.bin");
 MODULE_FIRMWARE("radeon/verde_mc.bin");
 MODULE_FIRMWARE("radeon/verde_rlc.bin");
 MODULE_FIRMWARE("radeon/verde_smc.bin");
+MODULE_FIRMWARE("radeon/verde_k_smc.bin");
 
 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
 MODULE_FIRMWARE("radeon/OLAND_me.bin");
@@ -95,6 +98,7 @@ MODULE_FIRMWARE("radeon/oland_ce.bin");
 MODULE_FIRMWARE("radeon/oland_mc.bin");
 MODULE_FIRMWARE("radeon/oland_rlc.bin");
 MODULE_FIRMWARE("radeon/oland_smc.bin");
+MODULE_FIRMWARE("radeon/oland_k_smc.bin");
 
 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
@@ -110,6 +114,7 @@ MODULE_FIRMWARE("radeon/hainan_ce.bin");
 MODULE_FIRMWARE("radeon/hainan_mc.bin");
 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
 MODULE_FIRMWARE("radeon/hainan_smc.bin");
+MODULE_FIRMWARE("radeon/hainan_k_smc.bin");
 
 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
 static void si_pcie_gen3_enable(struct radeon_device *rdev);
@@ -1653,12 +1658,16 @@ static int si_init_microcode(struct radeon_device *rdev)
 	char fw_name[30];
 	int err;
 	int new_fw = 0;
+	bool new_smc = false;
 
 	DRM_DEBUG("\n");
 
 	switch (rdev->family) {
 	case CHIP_TAHITI:
 		chip_name = "TAHITI";
+		/* XXX: figure out which Tahitis need the new ucode */
+		if (0)
+			new_smc = true;
 		new_chip_name = "tahiti";
 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
 		me_req_size = SI_PM4_UCODE_SIZE * 4;
@@ -1670,6 +1679,13 @@ static int si_init_microcode(struct radeon_device *rdev)
 		break;
 	case CHIP_PITCAIRN:
 		chip_name = "PITCAIRN";
+		if ((rdev->pdev->revision == 0x81) ||
+		    (rdev->pdev->device == 0x6810) ||
+		    (rdev->pdev->device == 0x6811) ||
+		    (rdev->pdev->device == 0x6816) ||
+		    (rdev->pdev->device == 0x6817) ||
+		    (rdev->pdev->device == 0x6806))
+			new_smc = true;
 		new_chip_name = "pitcairn";
 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
 		me_req_size = SI_PM4_UCODE_SIZE * 4;
@@ -1681,6 +1697,16 @@ static int si_init_microcode(struct radeon_device *rdev)
 		break;
 	case CHIP_VERDE:
 		chip_name = "VERDE";
+		if ((rdev->pdev->revision == 0x81) ||
+		    (rdev->pdev->revision == 0x83) ||
+		    (rdev->pdev->revision == 0x87) ||
+		    (rdev->pdev->device == 0x6820) ||
+		    (rdev->pdev->device == 0x6821) ||
+		    (rdev->pdev->device == 0x6822) ||
+		    (rdev->pdev->device == 0x6823) ||
+		    (rdev->pdev->device == 0x682A) ||
+		    (rdev->pdev->device == 0x682B))
+			new_smc = true;
 		new_chip_name = "verde";
 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
 		me_req_size = SI_PM4_UCODE_SIZE * 4;
@@ -1692,6 +1718,13 @@ static int si_init_microcode(struct radeon_device *rdev)
 		break;
 	case CHIP_OLAND:
 		chip_name = "OLAND";
+		if ((rdev->pdev->revision == 0xC7) ||
+		    (rdev->pdev->revision == 0x80) ||
+		    (rdev->pdev->revision == 0x81) ||
+		    (rdev->pdev->revision == 0x83) ||
+		    (rdev->pdev->device == 0x6604) ||
+		    (rdev->pdev->device == 0x6605))
+			new_smc = true;
 		new_chip_name = "oland";
 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
 		me_req_size = SI_PM4_UCODE_SIZE * 4;
@@ -1702,6 +1735,13 @@ static int si_init_microcode(struct radeon_device *rdev)
 		break;
 	case CHIP_HAINAN:
 		chip_name = "HAINAN";
+		if ((rdev->pdev->revision == 0x81) ||
+		    (rdev->pdev->revision == 0x83) ||
+		    (rdev->pdev->revision == 0xC3) ||
+		    (rdev->pdev->device == 0x6664) ||
+		    (rdev->pdev->device == 0x6665) ||
+		    (rdev->pdev->device == 0x6667))
+			new_smc = true;
 		new_chip_name = "hainan";
 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
 		me_req_size = SI_PM4_UCODE_SIZE * 4;
@@ -1847,7 +1887,10 @@ static int si_init_microcode(struct radeon_device *rdev)
 		}
 	}
 
-	snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
+	if (new_smc)
+		snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
+	else
+		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
 	if (err) {
 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
diff --git a/drivers/gpu/drm/rcar-du/Kconfig b/drivers/gpu/drm/rcar-du/Kconfig
index 7fc3ca5ce6c7..4c2fd056dd6d 100644
--- a/drivers/gpu/drm/rcar-du/Kconfig
+++ b/drivers/gpu/drm/rcar-du/Kconfig
@@ -6,7 +6,6 @@ config DRM_RCAR_DU
 	select DRM_KMS_HELPER
 	select DRM_KMS_CMA_HELPER
 	select DRM_GEM_CMA_HELPER
-	select DRM_KMS_FB_HELPER
 	select VIDEOMODE_HELPERS
 	help
 	  Choose this option if you have an R-Car chipset.
diff --git a/drivers/gpu/drm/rcar-du/Makefile b/drivers/gpu/drm/rcar-du/Makefile
index 827711e28226..d3b44651061a 100644
--- a/drivers/gpu/drm/rcar-du/Makefile
+++ b/drivers/gpu/drm/rcar-du/Makefile
@@ -7,8 +7,8 @@ rcar-du-drm-y := rcar_du_crtc.o \
 		 rcar_du_plane.o \
 		 rcar_du_vgacon.o
 
-rcar-du-drm-$(CONFIG_DRM_RCAR_HDMI)	+= rcar_du_hdmicon.o \
-					   rcar_du_hdmienc.o
+rcar-du-drm-$(CONFIG_DRM_RCAR_HDMI)	+= rcar_du_hdmienc.o
+
 rcar-du-drm-$(CONFIG_DRM_RCAR_LVDS)	+= rcar_du_lvdsenc.o
 
 rcar-du-drm-$(CONFIG_DRM_RCAR_VSP)	+= rcar_du_vsp.o
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
index 0d8bdda736f9..e39fcef2e033 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
@@ -552,7 +552,7 @@ static irqreturn_t rcar_du_crtc_irq(int irq, void *arg)
 	rcar_du_crtc_write(rcrtc, DSRCR, status & DSRCR_MASK);
 
 	if (status & DSSR_FRM) {
-		drm_handle_vblank(rcrtc->crtc.dev, rcrtc->index);
+		drm_crtc_handle_vblank(&rcrtc->crtc);
 		rcar_du_crtc_finish_page_flip(rcrtc);
 		ret = IRQ_HANDLED;
 	}
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_encoder.c b/drivers/gpu/drm/rcar-du/rcar_du_encoder.c
index 55149e9ce28e..ab8645c57e2d 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_encoder.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_encoder.c
@@ -19,7 +19,6 @@
 
 #include "rcar_du_drv.h"
 #include "rcar_du_encoder.h"
-#include "rcar_du_hdmicon.h"
 #include "rcar_du_hdmienc.h"
 #include "rcar_du_kms.h"
 #include "rcar_du_lvdscon.h"
@@ -174,7 +173,7 @@ int rcar_du_encoder_init(struct rcar_du_device *rcdu,
 		break;
 
 	case DRM_MODE_ENCODER_TMDS:
-		ret = rcar_du_hdmi_connector_init(rcdu, renc);
+		/* connector managed by the bridge driver */
 		break;
 
 	default:
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_encoder.h b/drivers/gpu/drm/rcar-du/rcar_du_encoder.h
index a8669c3e0dd5..7fc10a9c34c3 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_encoder.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_encoder.h
@@ -15,7 +15,6 @@
 #define __RCAR_DU_ENCODER_H__
 
 #include <drm/drm_crtc.h>
-#include <drm/drm_encoder_slave.h>
 
 struct rcar_du_device;
 struct rcar_du_hdmienc;
@@ -30,16 +29,16 @@ enum rcar_du_encoder_type {
 };
 
 struct rcar_du_encoder {
-	struct drm_encoder_slave slave;
+	struct drm_encoder base;
 	enum rcar_du_output output;
 	struct rcar_du_hdmienc *hdmi;
 	struct rcar_du_lvdsenc *lvds;
 };
 
 #define to_rcar_encoder(e) \
-	container_of(e, struct rcar_du_encoder, slave.base)
+	container_of(e, struct rcar_du_encoder, base)
 
-#define rcar_encoder_to_drm_encoder(e)	(&(e)->slave.base)
+#define rcar_encoder_to_drm_encoder(e)	(&(e)->base)
 
 struct rcar_du_connector {
 	struct drm_connector connector;
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_hdmicon.c b/drivers/gpu/drm/rcar-du/rcar_du_hdmicon.c
deleted file mode 100644
index 612b4d5ae098..000000000000
--- a/drivers/gpu/drm/rcar-du/rcar_du_hdmicon.c
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * R-Car Display Unit HDMI Connector
- *
- * Copyright (C) 2014 Renesas Electronics Corporation
- *
- * Contact: Laurent Pinchart (laurent.pinchart@ideasonboard.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <drm/drmP.h>
-#include <drm/drm_atomic_helper.h>
-#include <drm/drm_crtc.h>
-#include <drm/drm_crtc_helper.h>
-#include <drm/drm_encoder_slave.h>
-
-#include "rcar_du_drv.h"
-#include "rcar_du_encoder.h"
-#include "rcar_du_hdmicon.h"
-#include "rcar_du_kms.h"
-
-#define to_slave_funcs(e)	(to_rcar_encoder(e)->slave.slave_funcs)
-
-static int rcar_du_hdmi_connector_get_modes(struct drm_connector *connector)
-{
-	struct rcar_du_connector *con = to_rcar_connector(connector);
-	struct drm_encoder *encoder = rcar_encoder_to_drm_encoder(con->encoder);
-	const struct drm_encoder_slave_funcs *sfuncs = to_slave_funcs(encoder);
-
-	if (sfuncs->get_modes == NULL)
-		return 0;
-
-	return sfuncs->get_modes(encoder, connector);
-}
-
-static int rcar_du_hdmi_connector_mode_valid(struct drm_connector *connector,
-					     struct drm_display_mode *mode)
-{
-	struct rcar_du_connector *con = to_rcar_connector(connector);
-	struct drm_encoder *encoder = rcar_encoder_to_drm_encoder(con->encoder);
-	const struct drm_encoder_slave_funcs *sfuncs = to_slave_funcs(encoder);
-
-	if (sfuncs->mode_valid == NULL)
-		return MODE_OK;
-
-	return sfuncs->mode_valid(encoder, mode);
-}
-
-static const struct drm_connector_helper_funcs connector_helper_funcs = {
-	.get_modes = rcar_du_hdmi_connector_get_modes,
-	.mode_valid = rcar_du_hdmi_connector_mode_valid,
-};
-
-static enum drm_connector_status
-rcar_du_hdmi_connector_detect(struct drm_connector *connector, bool force)
-{
-	struct rcar_du_connector *con = to_rcar_connector(connector);
-	struct drm_encoder *encoder = rcar_encoder_to_drm_encoder(con->encoder);
-	const struct drm_encoder_slave_funcs *sfuncs = to_slave_funcs(encoder);
-
-	if (sfuncs->detect == NULL)
-		return connector_status_unknown;
-
-	return sfuncs->detect(encoder, connector);
-}
-
-static const struct drm_connector_funcs connector_funcs = {
-	.dpms = drm_atomic_helper_connector_dpms,
-	.reset = drm_atomic_helper_connector_reset,
-	.detect = rcar_du_hdmi_connector_detect,
-	.fill_modes = drm_helper_probe_single_connector_modes,
-	.destroy = drm_connector_cleanup,
-	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
-	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
-};
-
-int rcar_du_hdmi_connector_init(struct rcar_du_device *rcdu,
-				struct rcar_du_encoder *renc)
-{
-	struct drm_encoder *encoder = rcar_encoder_to_drm_encoder(renc);
-	struct rcar_du_connector *rcon;
-	struct drm_connector *connector;
-	int ret;
-
-	rcon = devm_kzalloc(rcdu->dev, sizeof(*rcon), GFP_KERNEL);
-	if (rcon == NULL)
-		return -ENOMEM;
-
-	connector = &rcon->connector;
-	connector->display_info.width_mm = 0;
-	connector->display_info.height_mm = 0;
-	connector->interlace_allowed = true;
-	connector->polled = DRM_CONNECTOR_POLL_HPD;
-
-	ret = drm_connector_init(rcdu->ddev, connector, &connector_funcs,
-				 DRM_MODE_CONNECTOR_HDMIA);
-	if (ret < 0)
-		return ret;
-
-	drm_connector_helper_add(connector, &connector_helper_funcs);
-
-	connector->dpms = DRM_MODE_DPMS_OFF;
-	drm_object_property_set_value(&connector->base,
-		rcdu->ddev->mode_config.dpms_property, DRM_MODE_DPMS_OFF);
-
-	ret = drm_mode_connector_attach_encoder(connector, encoder);
-	if (ret < 0)
-		return ret;
-
-	rcon->encoder = renc;
-
-	return 0;
-}
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_hdmicon.h b/drivers/gpu/drm/rcar-du/rcar_du_hdmicon.h
deleted file mode 100644
index 87daa949227f..000000000000
--- a/drivers/gpu/drm/rcar-du/rcar_du_hdmicon.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * R-Car Display Unit HDMI Connector
- *
- * Copyright (C) 2014 Renesas Electronics Corporation
- *
- * Contact: Laurent Pinchart (laurent.pinchart@ideasonboard.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#ifndef __RCAR_DU_HDMICON_H__
-#define __RCAR_DU_HDMICON_H__
-
-struct rcar_du_device;
-struct rcar_du_encoder;
-
-#if IS_ENABLED(CONFIG_DRM_RCAR_HDMI)
-int rcar_du_hdmi_connector_init(struct rcar_du_device *rcdu,
-				struct rcar_du_encoder *renc);
-#else
-static inline int rcar_du_hdmi_connector_init(struct rcar_du_device *rcdu,
-					      struct rcar_du_encoder *renc)
-{
-	return -ENOSYS;
-}
-#endif
-
-#endif /* __RCAR_DU_HDMICON_H__ */
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_hdmienc.c b/drivers/gpu/drm/rcar-du/rcar_du_hdmienc.c
index 461662d231e2..4de3ff0dbebd 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_hdmienc.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_hdmienc.c
@@ -16,7 +16,6 @@
 #include <drm/drmP.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_crtc_helper.h>
-#include <drm/drm_encoder_slave.h>
 
 #include "rcar_du_drv.h"
 #include "rcar_du_encoder.h"
@@ -25,20 +24,14 @@
 
 struct rcar_du_hdmienc {
 	struct rcar_du_encoder *renc;
-	struct device *dev;
 	bool enabled;
 };
 
 #define to_rcar_hdmienc(e)	(to_rcar_encoder(e)->hdmi)
-#define to_slave_funcs(e)	(to_rcar_encoder(e)->slave.slave_funcs)
 
 static void rcar_du_hdmienc_disable(struct drm_encoder *encoder)
 {
 	struct rcar_du_hdmienc *hdmienc = to_rcar_hdmienc(encoder);
-	const struct drm_encoder_slave_funcs *sfuncs = to_slave_funcs(encoder);
-
-	if (sfuncs->dpms)
-		sfuncs->dpms(encoder, DRM_MODE_DPMS_OFF);
 
 	if (hdmienc->renc->lvds)
 		rcar_du_lvdsenc_enable(hdmienc->renc->lvds, encoder->crtc,
@@ -50,15 +43,11 @@ static void rcar_du_hdmienc_disable(struct drm_encoder *encoder)
 static void rcar_du_hdmienc_enable(struct drm_encoder *encoder)
 {
 	struct rcar_du_hdmienc *hdmienc = to_rcar_hdmienc(encoder);
-	const struct drm_encoder_slave_funcs *sfuncs = to_slave_funcs(encoder);
 
 	if (hdmienc->renc->lvds)
 		rcar_du_lvdsenc_enable(hdmienc->renc->lvds, encoder->crtc,
 				       true);
 
-	if (sfuncs->dpms)
-		sfuncs->dpms(encoder, DRM_MODE_DPMS_ON);
-
 	hdmienc->enabled = true;
 }
 
@@ -67,29 +56,21 @@ static int rcar_du_hdmienc_atomic_check(struct drm_encoder *encoder,
 					struct drm_connector_state *conn_state)
 {
 	struct rcar_du_hdmienc *hdmienc = to_rcar_hdmienc(encoder);
-	const struct drm_encoder_slave_funcs *sfuncs = to_slave_funcs(encoder);
 	struct drm_display_mode *adjusted_mode = &crtc_state->adjusted_mode;
-	const struct drm_display_mode *mode = &crtc_state->mode;
 
 	if (hdmienc->renc->lvds)
 		rcar_du_lvdsenc_atomic_check(hdmienc->renc->lvds,
 					     adjusted_mode);
 
-	if (sfuncs->mode_fixup == NULL)
-		return 0;
-
-	return sfuncs->mode_fixup(encoder, mode, adjusted_mode) ? 0 : -EINVAL;
+	return 0;
 }
 
+
 static void rcar_du_hdmienc_mode_set(struct drm_encoder *encoder,
 				     struct drm_display_mode *mode,
 				     struct drm_display_mode *adjusted_mode)
 {
 	struct rcar_du_hdmienc *hdmienc = to_rcar_hdmienc(encoder);
-	const struct drm_encoder_slave_funcs *sfuncs = to_slave_funcs(encoder);
-
-	if (sfuncs->mode_set)
-		sfuncs->mode_set(encoder, mode, adjusted_mode);
 
 	rcar_du_crtc_route_output(encoder->crtc, hdmienc->renc->output);
 }
@@ -109,7 +90,6 @@ static void rcar_du_hdmienc_cleanup(struct drm_encoder *encoder)
 		rcar_du_hdmienc_disable(encoder);
 
 	drm_encoder_cleanup(encoder);
-	put_device(hdmienc->dev);
 }
 
 static const struct drm_encoder_funcs encoder_funcs = {
@@ -120,8 +100,7 @@ int rcar_du_hdmienc_init(struct rcar_du_device *rcdu,
 			 struct rcar_du_encoder *renc, struct device_node *np)
 {
 	struct drm_encoder *encoder = rcar_encoder_to_drm_encoder(renc);
-	struct drm_i2c_encoder_driver *driver;
-	struct i2c_client *i2c_slave;
+	struct drm_bridge *bridge;
 	struct rcar_du_hdmienc *hdmienc;
 	int ret;
 
@@ -129,44 +108,29 @@ int rcar_du_hdmienc_init(struct rcar_du_device *rcdu,
 	if (hdmienc == NULL)
 		return -ENOMEM;
 
-	/* Locate the slave I2C device and driver. */
-	i2c_slave = of_find_i2c_device_by_node(np);
-	if (!i2c_slave || !i2c_get_clientdata(i2c_slave)) {
-		dev_dbg(rcdu->dev,
-			"can't get I2C slave for %s, deferring probe\n",
-			of_node_full_name(np));
+	/* Locate drm bridge from the hdmi encoder DT node */
+	bridge = of_drm_find_bridge(np);
+	if (!bridge)
 		return -EPROBE_DEFER;
-	}
-
-	hdmienc->dev = &i2c_slave->dev;
-
-	if (hdmienc->dev->driver == NULL) {
-		dev_dbg(rcdu->dev,
-			"I2C slave %s not probed yet, deferring probe\n",
-			dev_name(hdmienc->dev));
-		ret = -EPROBE_DEFER;
-		goto error;
-	}
-
-	/* Initialize the slave encoder. */
-	driver = to_drm_i2c_encoder_driver(to_i2c_driver(hdmienc->dev->driver));
-	ret = driver->encoder_init(i2c_slave, rcdu->ddev, &renc->slave);
-	if (ret < 0)
-		goto error;
 
 	ret = drm_encoder_init(rcdu->ddev, encoder, &encoder_funcs,
 			       DRM_MODE_ENCODER_TMDS, NULL);
 	if (ret < 0)
-		goto error;
+		return ret;
 
 	drm_encoder_helper_add(encoder, &encoder_helper_funcs);
 
 	renc->hdmi = hdmienc;
 	hdmienc->renc = renc;
 
-	return 0;
+	/* Link drm_bridge to encoder */
+	bridge->encoder = encoder;
+
+	ret = drm_bridge_attach(rcdu->ddev, bridge);
+	if (ret) {
+		drm_encoder_cleanup(encoder);
+		return ret;
+	}
 
-error:
-	put_device(hdmienc->dev);
-	return ret;
+	return 0;
 }
diff --git a/drivers/gpu/drm/rockchip/Kconfig b/drivers/gpu/drm/rockchip/Kconfig
index e48611e83c03..3c58669a06ce 100644
--- a/drivers/gpu/drm/rockchip/Kconfig
+++ b/drivers/gpu/drm/rockchip/Kconfig
@@ -4,11 +4,7 @@ config DRM_ROCKCHIP
 	depends on RESET_CONTROLLER
 	select DRM_GEM_CMA_HELPER
 	select DRM_KMS_HELPER
-	select DRM_KMS_FB_HELPER
 	select DRM_PANEL
-	select FB_CFB_FILLRECT
-	select FB_CFB_COPYAREA
-	select FB_CFB_IMAGEBLIT
 	select VIDEOMODE_HELPERS
 	help
 	  Choose this option if you have a Rockchip soc chipset.
diff --git a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c
index c120172add5c..89aadbf465f8 100644
--- a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c
+++ b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c
@@ -14,6 +14,7 @@
 
 #include <linux/component.h>
 #include <linux/mfd/syscon.h>
+#include <linux/of_device.h>
 #include <linux/of_graph.h>
 #include <linux/regmap.h>
 #include <linux/reset.h>
@@ -33,13 +34,28 @@
 #include "rockchip_drm_drv.h"
 #include "rockchip_drm_vop.h"
 
+#define RK3288_GRF_SOC_CON6		0x25c
+#define RK3288_EDP_LCDC_SEL		BIT(5)
+#define RK3399_GRF_SOC_CON20		0x6250
+#define RK3399_EDP_LCDC_SEL		BIT(5)
+
+#define HIWORD_UPDATE(val, mask)	(val | (mask) << 16)
+
 #define to_dp(nm)	container_of(nm, struct rockchip_dp_device, nm)
 
-/* dp grf register offset */
-#define GRF_SOC_CON6                            0x025c
-#define GRF_EDP_LCD_SEL_MASK                    BIT(5)
-#define GRF_EDP_SEL_VOP_LIT                     BIT(5)
-#define GRF_EDP_SEL_VOP_BIG                     0
+/**
+ * struct rockchip_dp_chip_data - splite the grf setting of kind of chips
+ * @lcdsel_grf_reg: grf register offset of lcdc select
+ * @lcdsel_big: reg value of selecting vop big for eDP
+ * @lcdsel_lit: reg value of selecting vop little for eDP
+ * @chip_type: specific chip type
+ */
+struct rockchip_dp_chip_data {
+	u32	lcdsel_grf_reg;
+	u32	lcdsel_big;
+	u32	lcdsel_lit;
+	u32	chip_type;
+};
 
 struct rockchip_dp_device {
 	struct drm_device        *drm_dev;
@@ -48,9 +64,12 @@ struct rockchip_dp_device {
 	struct drm_display_mode  mode;
 
 	struct clk               *pclk;
+	struct clk               *grfclk;
 	struct regmap            *grf;
 	struct reset_control     *rst;
 
+	const struct rockchip_dp_chip_data *data;
+
 	struct analogix_dp_plat_data plat_data;
 };
 
@@ -77,6 +96,7 @@ static int rockchip_dp_poweron(struct analogix_dp_plat_data *plat_data)
 	ret = rockchip_dp_pre_init(dp);
 	if (ret < 0) {
 		dev_err(dp->dev, "failed to dp pre init %d\n", ret);
+		clk_disable_unprepare(dp->pclk);
 		return ret;
 	}
 
@@ -92,6 +112,23 @@ static int rockchip_dp_powerdown(struct analogix_dp_plat_data *plat_data)
 	return 0;
 }
 
+static int rockchip_dp_get_modes(struct analogix_dp_plat_data *plat_data,
+				 struct drm_connector *connector)
+{
+	struct drm_display_info *di = &connector->display_info;
+	/* VOP couldn't output YUV video format for eDP rightly */
+	u32 mask = DRM_COLOR_FORMAT_YCRCB444 | DRM_COLOR_FORMAT_YCRCB422;
+
+	if ((di->color_formats & mask)) {
+		DRM_DEBUG_KMS("Swapping display color format from YUV to RGB\n");
+		di->color_formats &= ~mask;
+		di->color_formats |= DRM_COLOR_FORMAT_RGB444;
+		di->bpc = 8;
+	}
+
+	return 0;
+}
+
 static bool
 rockchip_dp_drm_encoder_mode_fixup(struct drm_encoder *encoder,
 				   const struct drm_display_mode *mode,
@@ -119,17 +156,23 @@ static void rockchip_dp_drm_encoder_enable(struct drm_encoder *encoder)
 		return;
 
 	if (ret)
-		val = GRF_EDP_SEL_VOP_LIT | (GRF_EDP_LCD_SEL_MASK << 16);
+		val = dp->data->lcdsel_lit;
 	else
-		val = GRF_EDP_SEL_VOP_BIG | (GRF_EDP_LCD_SEL_MASK << 16);
+		val = dp->data->lcdsel_big;
 
 	dev_dbg(dp->dev, "vop %s output to dp\n", (ret) ? "LIT" : "BIG");
 
-	ret = regmap_write(dp->grf, GRF_SOC_CON6, val);
-	if (ret != 0) {
-		dev_err(dp->dev, "Could not write to GRF: %d\n", ret);
+	ret = clk_prepare_enable(dp->grfclk);
+	if (ret < 0) {
+		dev_err(dp->dev, "failed to enable grfclk %d\n", ret);
 		return;
 	}
+
+	ret = regmap_write(dp->grf, dp->data->lcdsel_grf_reg, val);
+	if (ret != 0)
+		dev_err(dp->dev, "Could not write to GRF: %d\n", ret);
+
+	clk_disable_unprepare(dp->grfclk);
 }
 
 static void rockchip_dp_drm_encoder_nop(struct drm_encoder *encoder)
@@ -143,22 +186,29 @@ rockchip_dp_drm_encoder_atomic_check(struct drm_encoder *encoder,
 				      struct drm_connector_state *conn_state)
 {
 	struct rockchip_crtc_state *s = to_rockchip_crtc_state(crtc_state);
+	struct rockchip_dp_device *dp = to_dp(encoder);
+	int ret;
 
 	/*
-	 * FIXME(Yakir): driver should configure the CRTC output video
-	 * mode with the display information which indicated the monitor
-	 * support colorimetry.
-	 *
-	 * But don't know why the CRTC driver seems could only output the
-	 * RGBaaa rightly. For example, if connect the "innolux,n116bge"
-	 * eDP screen, EDID would indicated that screen only accepted the
-	 * 6bpc mode. But if I configure CRTC to RGB666 output, then eDP
-	 * screen would show a blue picture (RGB888 show a green picture).
-	 * But if I configure CTRC to RGBaaa, and eDP driver still keep
-	 * RGB666 input video mode, then screen would works prefect.
+	 * The hardware IC designed that VOP must output the RGB10 video
+	 * format to eDP controller, and if eDP panel only support RGB8,
+	 * then eDP controller should cut down the video data, not via VOP
+	 * controller, that's why we need to hardcode the VOP output mode
+	 * to RGA10 here.
 	 */
+
 	s->output_mode = ROCKCHIP_OUT_MODE_AAAA;
 	s->output_type = DRM_MODE_CONNECTOR_eDP;
+	if (dp->data->chip_type == RK3399_EDP) {
+		/*
+		 * For RK3399, VOP Lit must code the out mode to RGB888,
+		 * VOP Big must code the out mode to RGB10.
+		 */
+		ret = drm_of_encoder_active_endpoint_id(dp->dev->of_node,
+							encoder);
+		if (ret > 0)
+			s->output_mode = ROCKCHIP_OUT_MODE_P888;
+	}
 
 	return 0;
 }
@@ -192,6 +242,16 @@ static int rockchip_dp_init(struct rockchip_dp_device *dp)
 		return PTR_ERR(dp->grf);
 	}
 
+	dp->grfclk = devm_clk_get(dev, "grf");
+	if (PTR_ERR(dp->grfclk) == -ENOENT) {
+		dp->grfclk = NULL;
+	} else if (PTR_ERR(dp->grfclk) == -EPROBE_DEFER) {
+		return -EPROBE_DEFER;
+	} else if (IS_ERR(dp->grfclk)) {
+		dev_err(dev, "failed to get grf clock\n");
+		return PTR_ERR(dp->grfclk);
+	}
+
 	dp->pclk = devm_clk_get(dev, "pclk");
 	if (IS_ERR(dp->pclk)) {
 		dev_err(dev, "failed to get pclk property\n");
@@ -213,6 +273,7 @@ static int rockchip_dp_init(struct rockchip_dp_device *dp)
 	ret = rockchip_dp_pre_init(dp);
 	if (ret < 0) {
 		dev_err(dp->dev, "failed to pre init %d\n", ret);
+		clk_disable_unprepare(dp->pclk);
 		return ret;
 	}
 
@@ -246,6 +307,7 @@ static int rockchip_dp_bind(struct device *dev, struct device *master,
 			    void *data)
 {
 	struct rockchip_dp_device *dp = dev_get_drvdata(dev);
+	const struct rockchip_dp_chip_data *dp_data;
 	struct drm_device *drm_dev = data;
 	int ret;
 
@@ -256,10 +318,15 @@ static int rockchip_dp_bind(struct device *dev, struct device *master,
 	 */
 	dev_set_drvdata(dev, NULL);
 
+	dp_data = of_device_get_match_data(dev);
+	if (!dp_data)
+		return -ENODEV;
+
 	ret = rockchip_dp_init(dp);
 	if (ret < 0)
 		return ret;
 
+	dp->data = dp_data;
 	dp->drm_dev = drm_dev;
 
 	ret = rockchip_dp_drm_create_encoder(dp);
@@ -270,9 +337,10 @@ static int rockchip_dp_bind(struct device *dev, struct device *master,
 
 	dp->plat_data.encoder = &dp->encoder;
 
-	dp->plat_data.dev_type = RK3288_DP;
+	dp->plat_data.dev_type = dp->data->chip_type;
 	dp->plat_data.power_on = rockchip_dp_poweron;
 	dp->plat_data.power_off = rockchip_dp_powerdown;
+	dp->plat_data.get_modes = rockchip_dp_get_modes;
 
 	return analogix_dp_bind(dev, dp->drm_dev, &dp->plat_data);
 }
@@ -292,38 +360,33 @@ static int rockchip_dp_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct device_node *panel_node, *port, *endpoint;
+	struct drm_panel *panel = NULL;
 	struct rockchip_dp_device *dp;
-	struct drm_panel *panel;
 
 	port = of_graph_get_port_by_id(dev->of_node, 1);
-	if (!port) {
-		dev_err(dev, "can't find output port\n");
-		return -EINVAL;
-	}
-
-	endpoint = of_get_child_by_name(port, "endpoint");
-	of_node_put(port);
-	if (!endpoint) {
-		dev_err(dev, "no output endpoint found\n");
-		return -EINVAL;
-	}
-
-	panel_node = of_graph_get_remote_port_parent(endpoint);
-	of_node_put(endpoint);
-	if (!panel_node) {
-		dev_err(dev, "no output node found\n");
-		return -EINVAL;
-	}
-
-	panel = of_drm_find_panel(panel_node);
-	if (!panel) {
-		DRM_ERROR("failed to find panel\n");
+	if (port) {
+		endpoint = of_get_child_by_name(port, "endpoint");
+		of_node_put(port);
+		if (!endpoint) {
+			dev_err(dev, "no output endpoint found\n");
+			return -EINVAL;
+		}
+
+		panel_node = of_graph_get_remote_port_parent(endpoint);
+		of_node_put(endpoint);
+		if (!panel_node) {
+			dev_err(dev, "no output node found\n");
+			return -EINVAL;
+		}
+
+		panel = of_drm_find_panel(panel_node);
 		of_node_put(panel_node);
-		return -EPROBE_DEFER;
+		if (!panel) {
+			DRM_ERROR("failed to find panel\n");
+			return -EPROBE_DEFER;
+		}
 	}
 
-	of_node_put(panel_node);
-
 	dp = devm_kzalloc(dev, sizeof(*dp), GFP_KERNEL);
 	if (!dp)
 		return -ENOMEM;
@@ -356,8 +419,23 @@ static const struct dev_pm_ops rockchip_dp_pm_ops = {
 #endif
 };
 
+static const struct rockchip_dp_chip_data rk3399_edp = {
+	.lcdsel_grf_reg = RK3399_GRF_SOC_CON20,
+	.lcdsel_big = HIWORD_UPDATE(0, RK3399_EDP_LCDC_SEL),
+	.lcdsel_lit = HIWORD_UPDATE(RK3399_EDP_LCDC_SEL, RK3399_EDP_LCDC_SEL),
+	.chip_type = RK3399_EDP,
+};
+
+static const struct rockchip_dp_chip_data rk3288_dp = {
+	.lcdsel_grf_reg = RK3288_GRF_SOC_CON6,
+	.lcdsel_big = HIWORD_UPDATE(0, RK3288_EDP_LCDC_SEL),
+	.lcdsel_lit = HIWORD_UPDATE(RK3288_EDP_LCDC_SEL, RK3288_EDP_LCDC_SEL),
+	.chip_type = RK3288_DP,
+};
+
 static const struct of_device_id rockchip_dp_dt_ids[] = {
-	{.compatible = "rockchip,rk3288-dp",},
+	{.compatible = "rockchip,rk3288-dp", .data = &rk3288_dp },
+	{.compatible = "rockchip,rk3399-edp", .data = &rk3399_edp },
 	{}
 };
 MODULE_DEVICE_TABLE(of, rockchip_dp_dt_ids);
diff --git a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
index 801110f65a63..0665fb915579 100644
--- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
+++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
@@ -15,7 +15,6 @@
 #include <drm/drmP.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_edid.h>
-#include <drm/drm_encoder_slave.h>
 #include <drm/bridge/dw_hdmi.h>
 
 #include "rockchip_drm_drv.h"
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
index d665fb04d264..a822d49a255a 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
@@ -79,7 +79,7 @@ int rockchip_register_crtc_funcs(struct drm_crtc *crtc,
 	int pipe = drm_crtc_index(crtc);
 	struct rockchip_drm_private *priv = crtc->dev->dev_private;
 
-	if (pipe > ROCKCHIP_MAX_CRTC)
+	if (pipe >= ROCKCHIP_MAX_CRTC)
 		return -EINVAL;
 
 	priv->crtc_funcs[pipe] = crtc_funcs;
@@ -92,7 +92,7 @@ void rockchip_unregister_crtc_funcs(struct drm_crtc *crtc)
 	int pipe = drm_crtc_index(crtc);
 	struct rockchip_drm_private *priv = crtc->dev->dev_private;
 
-	if (pipe > ROCKCHIP_MAX_CRTC)
+	if (pipe >= ROCKCHIP_MAX_CRTC)
 		return;
 
 	priv->crtc_funcs[pipe] = NULL;
@@ -257,7 +257,7 @@ static void rockchip_drm_unbind(struct device *dev)
 	dev_set_drvdata(dev, NULL);
 }
 
-void rockchip_drm_lastclose(struct drm_device *dev)
+static void rockchip_drm_lastclose(struct drm_device *dev)
 {
 	struct rockchip_drm_private *priv = dev->dev_private;
 
@@ -433,6 +433,7 @@ static int rockchip_drm_platform_probe(struct platform_device *pdev)
 			is_support_iommu = false;
 		}
 
+		of_node_put(iommu);
 		component_match_add(dev, &match, compare_of, port->parent);
 		of_node_put(port);
 	}
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
index 20f12bc5a386..55c52734c52d 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
@@ -20,6 +20,7 @@
 #include <drm/drm_crtc_helper.h>
 
 #include "rockchip_drm_drv.h"
+#include "rockchip_drm_fb.h"
 #include "rockchip_drm_gem.h"
 
 #define to_rockchip_fb(x) container_of(x, struct rockchip_drm_fb, fb)
@@ -43,14 +44,10 @@ struct drm_gem_object *rockchip_fb_get_gem_obj(struct drm_framebuffer *fb,
 static void rockchip_drm_fb_destroy(struct drm_framebuffer *fb)
 {
 	struct rockchip_drm_fb *rockchip_fb = to_rockchip_fb(fb);
-	struct drm_gem_object *obj;
 	int i;
 
-	for (i = 0; i < ROCKCHIP_MAX_FB_BUFFER; i++) {
-		obj = rockchip_fb->obj[i];
-		if (obj)
-			drm_gem_object_unreference_unlocked(obj);
-	}
+	for (i = 0; i < ROCKCHIP_MAX_FB_BUFFER; i++)
+		drm_gem_object_unreference_unlocked(rockchip_fb->obj[i]);
 
 	drm_framebuffer_cleanup(fb);
 	kfree(rockchip_fb);
@@ -245,7 +242,7 @@ rockchip_atomic_commit_tail(struct drm_atomic_state *state)
 	drm_atomic_helper_cleanup_planes(dev, state);
 }
 
-struct drm_mode_config_helper_funcs rockchip_mode_config_helpers = {
+static struct drm_mode_config_helper_funcs rockchip_mode_config_helpers = {
 	.atomic_commit_tail = rockchip_atomic_commit_tail,
 };
 
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
index 6255e5bcd954..91305eb7d312 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
@@ -328,9 +328,9 @@ static void scl_vop_cal_scl_fac(struct vop *vop, const struct vop_win_data *win,
 			    scl_cal_scale2(src_h, dst_h));
 		if (is_yuv) {
 			VOP_SCL_SET(vop, win, scale_cbcr_x,
-				    scl_cal_scale2(src_w, dst_w));
+				    scl_cal_scale2(cbcr_src_w, dst_w));
 			VOP_SCL_SET(vop, win, scale_cbcr_y,
-				    scl_cal_scale2(src_h, dst_h));
+				    scl_cal_scale2(cbcr_src_h, dst_h));
 		}
 		return;
 	}
@@ -798,7 +798,7 @@ static const struct drm_plane_helper_funcs plane_helper_funcs = {
 	.atomic_disable = vop_plane_atomic_disable,
 };
 
-void vop_atomic_plane_reset(struct drm_plane *plane)
+static void vop_atomic_plane_reset(struct drm_plane *plane)
 {
 	struct vop_plane_state *vop_plane_state =
 					to_vop_plane_state(plane->state);
@@ -815,7 +815,7 @@ void vop_atomic_plane_reset(struct drm_plane *plane)
 	plane->state->plane = plane;
 }
 
-struct drm_plane_state *
+static struct drm_plane_state *
 vop_atomic_plane_duplicate_state(struct drm_plane *plane)
 {
 	struct vop_plane_state *old_vop_plane_state;
@@ -1052,6 +1052,17 @@ static void vop_crtc_destroy(struct drm_crtc *crtc)
 	drm_crtc_cleanup(crtc);
 }
 
+static void vop_crtc_reset(struct drm_crtc *crtc)
+{
+	if (crtc->state)
+		__drm_atomic_helper_crtc_destroy_state(crtc->state);
+	kfree(crtc->state);
+
+	crtc->state = kzalloc(sizeof(struct rockchip_crtc_state), GFP_KERNEL);
+	if (crtc->state)
+		crtc->state->crtc = crtc;
+}
+
 static struct drm_crtc_state *vop_crtc_duplicate_state(struct drm_crtc *crtc)
 {
 	struct rockchip_crtc_state *rockchip_state;
@@ -1077,7 +1088,7 @@ static const struct drm_crtc_funcs vop_crtc_funcs = {
 	.set_config = drm_atomic_helper_set_config,
 	.page_flip = drm_atomic_helper_page_flip,
 	.destroy = vop_crtc_destroy,
-	.reset = drm_atomic_helper_crtc_reset,
+	.reset = vop_crtc_reset,
 	.atomic_duplicate_state = vop_crtc_duplicate_state,
 	.atomic_destroy_state = vop_crtc_destroy_state,
 };
diff --git a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c
index 3166b46a5893..919992cdc97e 100644
--- a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c
+++ b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c
@@ -190,7 +190,7 @@ static const struct vop_data rk3288_vop = {
 	.win_size = ARRAY_SIZE(rk3288_vop_win_data),
 };
 
-static const struct vop_scl_regs rk3066_win_scl = {
+static const struct vop_scl_regs rk3036_win_scl = {
 	.scale_yrgb_x = VOP_REG(RK3036_WIN0_SCL_FACTOR_YRGB, 0xffff, 0x0),
 	.scale_yrgb_y = VOP_REG(RK3036_WIN0_SCL_FACTOR_YRGB, 0xffff, 16),
 	.scale_cbcr_x = VOP_REG(RK3036_WIN0_SCL_FACTOR_CBR, 0xffff, 0x0),
@@ -198,7 +198,7 @@ static const struct vop_scl_regs rk3066_win_scl = {
 };
 
 static const struct vop_win_phy rk3036_win0_data = {
-	.scl = &rk3066_win_scl,
+	.scl = &rk3036_win_scl,
 	.data_formats = formats_win_full,
 	.nformats = ARRAY_SIZE(formats_win_full),
 	.enable = VOP_REG(RK3036_SYS_CTRL, 0x1, 0),
@@ -210,6 +210,7 @@ static const struct vop_win_phy rk3036_win0_data = {
 	.yrgb_mst = VOP_REG(RK3036_WIN0_YRGB_MST, 0xffffffff, 0),
 	.uv_mst = VOP_REG(RK3036_WIN0_CBR_MST, 0xffffffff, 0),
 	.yrgb_vir = VOP_REG(RK3036_WIN0_VIR, 0xffff, 0),
+	.uv_vir = VOP_REG(RK3036_WIN0_VIR, 0x1fff, 16),
 };
 
 static const struct vop_win_phy rk3036_win1_data = {
@@ -299,7 +300,7 @@ static int vop_remove(struct platform_device *pdev)
 	return 0;
 }
 
-struct platform_driver vop_platform_driver = {
+static struct platform_driver vop_platform_driver = {
 	.probe = vop_probe,
 	.remove = vop_remove,
 	.driver = {
diff --git a/drivers/gpu/drm/shmobile/Kconfig b/drivers/gpu/drm/shmobile/Kconfig
index 8d17d00ddb4b..c987c826daa3 100644
--- a/drivers/gpu/drm/shmobile/Kconfig
+++ b/drivers/gpu/drm/shmobile/Kconfig
@@ -6,7 +6,6 @@ config DRM_SHMOBILE
 	select BACKLIGHT_CLASS_DEVICE
 	select BACKLIGHT_LCD_SUPPORT
 	select DRM_KMS_HELPER
-	select DRM_KMS_FB_HELPER
 	select DRM_KMS_CMA_HELPER
 	select DRM_GEM_CMA_HELPER
 	help
diff --git a/drivers/gpu/drm/sti/Kconfig b/drivers/gpu/drm/sti/Kconfig
index 5ad43a1bb260..494ab257f77c 100644
--- a/drivers/gpu/drm/sti/Kconfig
+++ b/drivers/gpu/drm/sti/Kconfig
@@ -7,5 +7,6 @@ config DRM_STI
 	select DRM_KMS_CMA_HELPER
 	select DRM_PANEL
 	select FW_LOADER
+	select SND_SOC_HDMI_CODEC if SND_SOC
 	help
 	  Choose this option to enable DRM on STM stiH41x chipset
diff --git a/drivers/gpu/drm/sti/sti_awg_utils.c b/drivers/gpu/drm/sti/sti_awg_utils.c
index a516eb869f6f..2da7d6866d5d 100644
--- a/drivers/gpu/drm/sti/sti_awg_utils.c
+++ b/drivers/gpu/drm/sti/sti_awg_utils.c
@@ -6,6 +6,8 @@
 
 #include "sti_awg_utils.h"
 
+#define AWG_DELAY (-5)
+
 #define AWG_OPCODE_OFFSET 10
 #define AWG_MAX_ARG       0x3ff
 
@@ -125,7 +127,7 @@ static int awg_generate_line_signal(
 		val = timing->blanking_level;
 		ret |= awg_generate_instr(RPLSET, val, 0, 0, fwparams);
 
-		val = timing->trailing_pixels - 1;
+		val = timing->trailing_pixels - 1 + AWG_DELAY;
 		ret |= awg_generate_instr(SKIP, val, 0, 0, fwparams);
 	}
 
diff --git a/drivers/gpu/drm/sti/sti_compositor.c b/drivers/gpu/drm/sti/sti_compositor.c
index 794148ff0e57..134201ecc6fd 100644
--- a/drivers/gpu/drm/sti/sti_compositor.c
+++ b/drivers/gpu/drm/sti/sti_compositor.c
@@ -254,12 +254,12 @@ static int sti_compositor_probe(struct platform_device *pdev)
 	}
 
 	/* Get reset resources */
-	compo->rst_main = devm_reset_control_get(dev, "compo-main");
+	compo->rst_main = devm_reset_control_get_shared(dev, "compo-main");
 	/* Take compo main out of reset */
 	if (!IS_ERR(compo->rst_main))
 		reset_control_deassert(compo->rst_main);
 
-	compo->rst_aux = devm_reset_control_get(dev, "compo-aux");
+	compo->rst_aux = devm_reset_control_get_shared(dev, "compo-aux");
 	/* Take compo aux out of reset */
 	if (!IS_ERR(compo->rst_aux))
 		reset_control_deassert(compo->rst_aux);
@@ -267,10 +267,12 @@ static int sti_compositor_probe(struct platform_device *pdev)
 	vtg_np = of_parse_phandle(pdev->dev.of_node, "st,vtg", 0);
 	if (vtg_np)
 		compo->vtg_main = of_vtg_find(vtg_np);
+	of_node_put(vtg_np);
 
 	vtg_np = of_parse_phandle(pdev->dev.of_node, "st,vtg", 1);
 	if (vtg_np)
 		compo->vtg_aux = of_vtg_find(vtg_np);
+	of_node_put(vtg_np);
 
 	platform_set_drvdata(pdev, compo);
 
diff --git a/drivers/gpu/drm/sti/sti_crtc.c b/drivers/gpu/drm/sti/sti_crtc.c
index 7fab3af7473b..c7d734dc3cf4 100644
--- a/drivers/gpu/drm/sti/sti_crtc.c
+++ b/drivers/gpu/drm/sti/sti_crtc.c
@@ -23,22 +23,11 @@
 static void sti_crtc_enable(struct drm_crtc *crtc)
 {
 	struct sti_mixer *mixer = to_sti_mixer(crtc);
-	struct device *dev = mixer->dev;
-	struct sti_compositor *compo = dev_get_drvdata(dev);
 
 	DRM_DEBUG_DRIVER("\n");
 
 	mixer->status = STI_MIXER_READY;
 
-	/* Prepare and enable the compo IP clock */
-	if (mixer->id == STI_MIXER_MAIN) {
-		if (clk_prepare_enable(compo->clk_compo_main))
-			DRM_INFO("Failed to prepare/enable compo_main clk\n");
-	} else {
-		if (clk_prepare_enable(compo->clk_compo_aux))
-			DRM_INFO("Failed to prepare/enable compo_aux clk\n");
-	}
-
 	drm_crtc_vblank_on(crtc);
 }
 
@@ -57,9 +46,8 @@ sti_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *mode)
 	struct sti_mixer *mixer = to_sti_mixer(crtc);
 	struct device *dev = mixer->dev;
 	struct sti_compositor *compo = dev_get_drvdata(dev);
-	struct clk *clk;
+	struct clk *compo_clk, *pix_clk;
 	int rate = mode->clock * 1000;
-	int res;
 
 	DRM_DEBUG_KMS("CRTC:%d (%s) mode:%d (%s)\n",
 		      crtc->base.id, sti_mixer_to_str(mixer),
@@ -74,32 +62,46 @@ sti_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *mode)
 		      mode->vsync_start, mode->vsync_end,
 		      mode->vtotal, mode->type, mode->flags);
 
-	/* Set rate and prepare/enable pixel clock */
-	if (mixer->id == STI_MIXER_MAIN)
-		clk = compo->clk_pix_main;
-	else
-		clk = compo->clk_pix_aux;
+	if (mixer->id == STI_MIXER_MAIN) {
+		compo_clk = compo->clk_compo_main;
+		pix_clk = compo->clk_pix_main;
+	} else {
+		compo_clk = compo->clk_compo_aux;
+		pix_clk = compo->clk_pix_aux;
+	}
+
+	/* Prepare and enable the compo IP clock */
+	if (clk_prepare_enable(compo_clk)) {
+		DRM_INFO("Failed to prepare/enable compositor clk\n");
+		goto compo_error;
+	}
 
-	res = clk_set_rate(clk, rate);
-	if (res < 0) {
+	/* Set rate and prepare/enable pixel clock */
+	if (clk_set_rate(pix_clk, rate) < 0) {
 		DRM_ERROR("Cannot set rate (%dHz) for pix clk\n", rate);
-		return -EINVAL;
+		goto pix_error;
 	}
-	if (clk_prepare_enable(clk)) {
+	if (clk_prepare_enable(pix_clk)) {
 		DRM_ERROR("Failed to prepare/enable pix clk\n");
-		return -EINVAL;
+		goto pix_error;
 	}
 
 	sti_vtg_set_config(mixer->id == STI_MIXER_MAIN ?
 			compo->vtg_main : compo->vtg_aux, &crtc->mode);
 
-	res = sti_mixer_active_video_area(mixer, &crtc->mode);
-	if (res) {
+	if (sti_mixer_active_video_area(mixer, &crtc->mode)) {
 		DRM_ERROR("Can't set active video area\n");
-		return -EINVAL;
+		goto mixer_error;
 	}
 
-	return res;
+	return 0;
+
+mixer_error:
+	clk_disable_unprepare(pix_clk);
+pix_error:
+	clk_disable_unprepare(compo_clk);
+compo_error:
+	return -EINVAL;
 }
 
 static void sti_crtc_disable(struct drm_crtc *crtc)
@@ -130,7 +132,6 @@ static void sti_crtc_disable(struct drm_crtc *crtc)
 static void
 sti_crtc_mode_set_nofb(struct drm_crtc *crtc)
 {
-	sti_crtc_enable(crtc);
 	sti_crtc_mode_set(crtc, &crtc->state->adjusted_mode);
 }
 
@@ -221,9 +222,7 @@ static void sti_crtc_atomic_flush(struct drm_crtc *crtc,
 static const struct drm_crtc_helper_funcs sti_crtc_helper_funcs = {
 	.enable = sti_crtc_enable,
 	.disable = sti_crtc_disabling,
-	.mode_set = drm_helper_crtc_mode_set,
 	.mode_set_nofb = sti_crtc_mode_set_nofb,
-	.mode_set_base = drm_helper_crtc_mode_set_base,
 	.atomic_begin = sti_crtc_atomic_begin,
 	.atomic_flush = sti_crtc_atomic_flush,
 };
diff --git a/drivers/gpu/drm/sti/sti_dvo.c b/drivers/gpu/drm/sti/sti_dvo.c
index ec3108074350..00881eb4536e 100644
--- a/drivers/gpu/drm/sti/sti_dvo.c
+++ b/drivers/gpu/drm/sti/sti_dvo.c
@@ -580,6 +580,7 @@ static int sti_dvo_probe(struct platform_device *pdev)
 	dvo->panel_node = of_parse_phandle(np, "sti,panel", 0);
 	if (!dvo->panel_node)
 		DRM_ERROR("No panel associated to the dvo output\n");
+	of_node_put(dvo->panel_node);
 
 	platform_set_drvdata(pdev, dvo);
 
diff --git a/drivers/gpu/drm/sti/sti_hdmi.c b/drivers/gpu/drm/sti/sti_hdmi.c
index 8d1402b245bf..fedc17f98d9b 100644
--- a/drivers/gpu/drm/sti/sti_hdmi.c
+++ b/drivers/gpu/drm/sti/sti_hdmi.c
@@ -18,6 +18,8 @@
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_edid.h>
 
+#include <sound/hdmi-codec.h>
+
 #include "sti_hdmi.h"
 #include "sti_hdmi_tx3g4c28phy.h"
 #include "sti_hdmi_tx3g0c55phy.h"
@@ -35,6 +37,8 @@
 #define HDMI_DFLT_CHL0_DAT              0x0110
 #define HDMI_DFLT_CHL1_DAT              0x0114
 #define HDMI_DFLT_CHL2_DAT              0x0118
+#define HDMI_AUDIO_CFG                  0x0200
+#define HDMI_SPDIF_FIFO_STATUS          0x0204
 #define HDMI_SW_DI_1_HEAD_WORD          0x0210
 #define HDMI_SW_DI_1_PKT_WORD0          0x0214
 #define HDMI_SW_DI_1_PKT_WORD1          0x0218
@@ -44,6 +48,9 @@
 #define HDMI_SW_DI_1_PKT_WORD5          0x0228
 #define HDMI_SW_DI_1_PKT_WORD6          0x022C
 #define HDMI_SW_DI_CFG                  0x0230
+#define HDMI_SAMPLE_FLAT_MASK           0x0244
+#define HDMI_AUDN                       0x0400
+#define HDMI_AUD_CTS                    0x0404
 #define HDMI_SW_DI_2_HEAD_WORD          0x0600
 #define HDMI_SW_DI_2_PKT_WORD0          0x0604
 #define HDMI_SW_DI_2_PKT_WORD1          0x0608
@@ -103,6 +110,7 @@
 #define HDMI_INT_DLL_LCK                BIT(5)
 #define HDMI_INT_NEW_FRAME              BIT(6)
 #define HDMI_INT_GENCTRL_PKT            BIT(7)
+#define HDMI_INT_AUDIO_FIFO_XRUN        BIT(8)
 #define HDMI_INT_SINK_TERM_PRESENT      BIT(11)
 
 #define HDMI_DEFAULT_INT (HDMI_INT_SINK_TERM_PRESENT \
@@ -111,6 +119,7 @@
 			| HDMI_INT_GLOBAL)
 
 #define HDMI_WORKING_INT (HDMI_INT_SINK_TERM_PRESENT \
+			| HDMI_INT_AUDIO_FIFO_XRUN \
 			| HDMI_INT_GENCTRL_PKT \
 			| HDMI_INT_NEW_FRAME \
 			| HDMI_INT_DLL_LCK \
@@ -121,6 +130,27 @@
 
 #define HDMI_STA_SW_RST                 BIT(1)
 
+#define HDMI_AUD_CFG_8CH		BIT(0)
+#define HDMI_AUD_CFG_SPDIF_DIV_2	BIT(1)
+#define HDMI_AUD_CFG_SPDIF_DIV_3	BIT(2)
+#define HDMI_AUD_CFG_SPDIF_CLK_DIV_4	(BIT(1) | BIT(2))
+#define HDMI_AUD_CFG_CTS_CLK_256FS	BIT(12)
+#define HDMI_AUD_CFG_DTS_INVALID	BIT(16)
+#define HDMI_AUD_CFG_ONE_BIT_INVALID	(BIT(18) | BIT(19) | BIT(20) |  BIT(21))
+#define HDMI_AUD_CFG_CH12_VALID	BIT(28)
+#define HDMI_AUD_CFG_CH34_VALID	BIT(29)
+#define HDMI_AUD_CFG_CH56_VALID	BIT(30)
+#define HDMI_AUD_CFG_CH78_VALID	BIT(31)
+
+/* sample flat mask */
+#define HDMI_SAMPLE_FLAT_NO	 0
+#define HDMI_SAMPLE_FLAT_SP0 BIT(0)
+#define HDMI_SAMPLE_FLAT_SP1 BIT(1)
+#define HDMI_SAMPLE_FLAT_SP2 BIT(2)
+#define HDMI_SAMPLE_FLAT_SP3 BIT(3)
+#define HDMI_SAMPLE_FLAT_ALL (HDMI_SAMPLE_FLAT_SP0 | HDMI_SAMPLE_FLAT_SP1 |\
+			      HDMI_SAMPLE_FLAT_SP2 | HDMI_SAMPLE_FLAT_SP3)
+
 #define HDMI_INFOFRAME_HEADER_TYPE(x)    (((x) & 0xff) <<  0)
 #define HDMI_INFOFRAME_HEADER_VERSION(x) (((x) & 0xff) <<  8)
 #define HDMI_INFOFRAME_HEADER_LEN(x)     (((x) & 0x0f) << 16)
@@ -171,6 +201,10 @@ static irqreturn_t hdmi_irq_thread(int irq, void *arg)
 		wake_up_interruptible(&hdmi->wait_event);
 	}
 
+	/* Audio FIFO underrun IRQ */
+	if (hdmi->irq_status & HDMI_INT_AUDIO_FIFO_XRUN)
+		DRM_INFO("Warning: audio FIFO underrun occurs!");
+
 	return IRQ_HANDLED;
 }
 
@@ -441,26 +475,29 @@ static int hdmi_avi_infoframe_config(struct sti_hdmi *hdmi)
  */
 static int hdmi_audio_infoframe_config(struct sti_hdmi *hdmi)
 {
-	struct hdmi_audio_infoframe infofame;
+	struct hdmi_audio_params *audio = &hdmi->audio;
 	u8 buffer[HDMI_INFOFRAME_SIZE(AUDIO)];
-	int ret;
-
-	ret = hdmi_audio_infoframe_init(&infofame);
-	if (ret < 0) {
-		DRM_ERROR("failed to setup audio infoframe: %d\n", ret);
-		return ret;
-	}
-
-	infofame.channels = 2;
-
-	ret = hdmi_audio_infoframe_pack(&infofame, buffer, sizeof(buffer));
-	if (ret < 0) {
-		DRM_ERROR("failed to pack audio infoframe: %d\n", ret);
-		return ret;
+	int ret, val;
+
+	DRM_DEBUG_DRIVER("enter %s, AIF %s\n", __func__,
+			 audio->enabled ? "enable" : "disable");
+	if (audio->enabled) {
+		/* set audio parameters stored*/
+		ret = hdmi_audio_infoframe_pack(&audio->cea, buffer,
+						sizeof(buffer));
+		if (ret < 0) {
+			DRM_ERROR("failed to pack audio infoframe: %d\n", ret);
+			return ret;
+		}
+		hdmi_infoframe_write_infopack(hdmi, buffer, ret);
+	} else {
+		/*disable audio info frame transmission */
+		val = hdmi_read(hdmi, HDMI_SW_DI_CFG);
+		val &= ~HDMI_IFRAME_CFG_DI_N(HDMI_IFRAME_MASK,
+					     HDMI_IFRAME_SLOT_AUDIO);
+		hdmi_write(hdmi, val, HDMI_SW_DI_CFG);
 	}
 
-	hdmi_infoframe_write_infopack(hdmi, buffer, ret);
-
 	return 0;
 }
 
@@ -650,6 +687,10 @@ static int hdmi_dbg_show(struct seq_file *s, void *data)
 	DBGFS_DUMP("", HDMI_SW_DI_CFG);
 	hdmi_dbg_sw_di_cfg(s, hdmi_read(hdmi, HDMI_SW_DI_CFG));
 
+	DBGFS_DUMP("\n", HDMI_AUDIO_CFG);
+	DBGFS_DUMP("\n", HDMI_SPDIF_FIFO_STATUS);
+	DBGFS_DUMP("\n", HDMI_AUDN);
+
 	seq_printf(s, "\n AVI Infoframe (Data Island slot N=%d):",
 		   HDMI_IFRAME_SLOT_AVI);
 	DBGFS_DUMP_DI(HDMI_SW_DI_N_HEAD_WORD, HDMI_IFRAME_SLOT_AVI);
@@ -854,6 +895,7 @@ static int sti_hdmi_connector_get_modes(struct drm_connector *connector)
 
 	count = drm_add_edid_modes(connector, edid);
 	drm_mode_connector_update_edid_property(connector, edid);
+	drm_edid_to_eld(connector, edid);
 
 	kfree(edid);
 	return count;
@@ -1036,6 +1078,207 @@ static struct drm_encoder *sti_hdmi_find_encoder(struct drm_device *dev)
 	return NULL;
 }
 
+/**
+ * sti_hdmi_audio_get_non_coherent_n() - get N parameter for non-coherent
+ * clocks. None-coherent clocks means that audio and TMDS clocks have not the
+ * same source (drifts between clocks). In this case assumption is that CTS is
+ * automatically calculated by hardware.
+ *
+ * @audio_fs: audio frame clock frequency in Hz
+ *
+ * Values computed are based on table described in HDMI specification 1.4b
+ *
+ * Returns n value.
+ */
+static int sti_hdmi_audio_get_non_coherent_n(unsigned int audio_fs)
+{
+	unsigned int n;
+
+	switch (audio_fs) {
+	case 32000:
+		n = 4096;
+		break;
+	case 44100:
+		n = 6272;
+		break;
+	case 48000:
+		n = 6144;
+		break;
+	case 88200:
+		n = 6272 * 2;
+		break;
+	case 96000:
+		n = 6144 * 2;
+		break;
+	case 176400:
+		n = 6272 * 4;
+		break;
+	case 192000:
+		n = 6144 * 4;
+		break;
+	default:
+		/* Not pre-defined, recommended value: 128 * fs / 1000 */
+		n = (audio_fs * 128) / 1000;
+	}
+
+	return n;
+}
+
+static int hdmi_audio_configure(struct sti_hdmi *hdmi,
+				struct hdmi_audio_params *params)
+{
+	int audio_cfg, n;
+	struct hdmi_audio_infoframe *info = &params->cea;
+
+	DRM_DEBUG_DRIVER("\n");
+
+	if (!hdmi->enabled)
+		return 0;
+
+	/* update N parameter */
+	n = sti_hdmi_audio_get_non_coherent_n(params->sample_rate);
+
+	DRM_DEBUG_DRIVER("Audio rate = %d Hz, TMDS clock = %d Hz, n = %d\n",
+			 params->sample_rate, hdmi->mode.clock * 1000, n);
+	hdmi_write(hdmi, n, HDMI_AUDN);
+
+	/* update HDMI registers according to configuration */
+	audio_cfg = HDMI_AUD_CFG_SPDIF_DIV_2 | HDMI_AUD_CFG_DTS_INVALID |
+		    HDMI_AUD_CFG_ONE_BIT_INVALID;
+
+	switch (info->channels) {
+	case 8:
+		audio_cfg |= HDMI_AUD_CFG_CH78_VALID;
+	case 6:
+		audio_cfg |= HDMI_AUD_CFG_CH56_VALID;
+	case 4:
+		audio_cfg |= HDMI_AUD_CFG_CH34_VALID | HDMI_AUD_CFG_8CH;
+	case 2:
+		audio_cfg |= HDMI_AUD_CFG_CH12_VALID;
+		break;
+	default:
+		DRM_ERROR("ERROR: Unsupported number of channels (%d)!\n",
+			  info->channels);
+		return -EINVAL;
+	}
+
+	hdmi_write(hdmi, audio_cfg, HDMI_AUDIO_CFG);
+
+	hdmi->audio = *params;
+
+	return hdmi_audio_infoframe_config(hdmi);
+}
+
+static void hdmi_audio_shutdown(struct device *dev, void *data)
+{
+	struct sti_hdmi *hdmi = dev_get_drvdata(dev);
+	int audio_cfg;
+
+	DRM_DEBUG_DRIVER("\n");
+
+	/* disable audio */
+	audio_cfg = HDMI_AUD_CFG_SPDIF_DIV_2 | HDMI_AUD_CFG_DTS_INVALID |
+		    HDMI_AUD_CFG_ONE_BIT_INVALID;
+	hdmi_write(hdmi, audio_cfg, HDMI_AUDIO_CFG);
+
+	hdmi->audio.enabled = 0;
+	hdmi_audio_infoframe_config(hdmi);
+}
+
+static int hdmi_audio_hw_params(struct device *dev,
+				void *data,
+				struct hdmi_codec_daifmt *daifmt,
+				struct hdmi_codec_params *params)
+{
+	struct sti_hdmi *hdmi = dev_get_drvdata(dev);
+	int ret;
+	struct hdmi_audio_params audio = {
+		.sample_width = params->sample_width,
+		.sample_rate = params->sample_rate,
+		.cea = params->cea,
+	};
+
+	DRM_DEBUG_DRIVER("\n");
+
+	if (!hdmi->enabled)
+		return 0;
+
+	if ((daifmt->fmt != HDMI_I2S) || daifmt->bit_clk_inv ||
+	    daifmt->frame_clk_inv || daifmt->bit_clk_master ||
+	    daifmt->frame_clk_master) {
+		dev_err(dev, "%s: Bad flags %d %d %d %d\n", __func__,
+			daifmt->bit_clk_inv, daifmt->frame_clk_inv,
+			daifmt->bit_clk_master,
+			daifmt->frame_clk_master);
+		return -EINVAL;
+	}
+
+	audio.enabled = 1;
+
+	ret = hdmi_audio_configure(hdmi, &audio);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int hdmi_audio_digital_mute(struct device *dev, void *data, bool enable)
+{
+	struct sti_hdmi *hdmi = dev_get_drvdata(dev);
+
+	DRM_DEBUG_DRIVER("%s\n", enable ? "enable" : "disable");
+
+	if (enable)
+		hdmi_write(hdmi, HDMI_SAMPLE_FLAT_ALL, HDMI_SAMPLE_FLAT_MASK);
+	else
+		hdmi_write(hdmi, HDMI_SAMPLE_FLAT_NO, HDMI_SAMPLE_FLAT_MASK);
+
+	return 0;
+}
+
+static int hdmi_audio_get_eld(struct device *dev, void *data, uint8_t *buf, size_t len)
+{
+	struct sti_hdmi *hdmi = dev_get_drvdata(dev);
+	struct drm_connector *connector = hdmi->drm_connector;
+
+	DRM_DEBUG_DRIVER("\n");
+	memcpy(buf, connector->eld, min(sizeof(connector->eld), len));
+
+	return 0;
+}
+
+static const struct hdmi_codec_ops audio_codec_ops = {
+	.hw_params = hdmi_audio_hw_params,
+	.audio_shutdown = hdmi_audio_shutdown,
+	.digital_mute = hdmi_audio_digital_mute,
+	.get_eld = hdmi_audio_get_eld,
+};
+
+static int sti_hdmi_register_audio_driver(struct device *dev,
+					  struct sti_hdmi *hdmi)
+{
+	struct hdmi_codec_pdata codec_data = {
+		.ops = &audio_codec_ops,
+		.max_i2s_channels = 8,
+		.i2s = 1,
+	};
+
+	DRM_DEBUG_DRIVER("\n");
+
+	hdmi->audio.enabled = 0;
+
+	hdmi->audio_pdev = platform_device_register_data(
+		dev, HDMI_CODEC_DRV_NAME, PLATFORM_DEVID_AUTO,
+		&codec_data, sizeof(codec_data));
+
+	if (IS_ERR(hdmi->audio_pdev))
+		return PTR_ERR(hdmi->audio_pdev);
+
+	DRM_INFO("%s Driver bound %s\n", HDMI_CODEC_DRV_NAME, dev_name(dev));
+
+	return 0;
+}
+
 static int sti_hdmi_bind(struct device *dev, struct device *master, void *data)
 {
 	struct sti_hdmi *hdmi = dev_get_drvdata(dev);
@@ -1082,12 +1325,27 @@ static int sti_hdmi_bind(struct device *dev, struct device *master, void *data)
 	/* initialise property */
 	sti_hdmi_connector_init_property(drm_dev, drm_connector);
 
+	hdmi->drm_connector = drm_connector;
+
 	err = drm_mode_connector_attach_encoder(drm_connector, encoder);
 	if (err) {
 		DRM_ERROR("Failed to attach a connector to a encoder\n");
 		goto err_sysfs;
 	}
 
+	err = sti_hdmi_register_audio_driver(dev, hdmi);
+	if (err) {
+		DRM_ERROR("Failed to attach an audio codec\n");
+		goto err_sysfs;
+	}
+
+	/* Initialize audio infoframe */
+	err = hdmi_audio_infoframe_init(&hdmi->audio.cea);
+	if (err) {
+		DRM_ERROR("Failed to init audio infoframe\n");
+		goto err_sysfs;
+	}
+
 	/* Enable default interrupts */
 	hdmi_write(hdmi, HDMI_DEFAULT_INT, HDMI_INT_EN);
 
@@ -1095,6 +1353,7 @@ static int sti_hdmi_bind(struct device *dev, struct device *master, void *data)
 
 err_sysfs:
 	drm_bridge_remove(bridge);
+	hdmi->drm_connector = NULL;
 	return -EINVAL;
 }
 
@@ -1244,6 +1503,8 @@ static int sti_hdmi_remove(struct platform_device *pdev)
 	struct sti_hdmi *hdmi = dev_get_drvdata(&pdev->dev);
 
 	i2c_put_adapter(hdmi->ddc_adapt);
+	if (hdmi->audio_pdev)
+		platform_device_unregister(hdmi->audio_pdev);
 	component_del(&pdev->dev, &sti_hdmi_ops);
 
 	return 0;
diff --git a/drivers/gpu/drm/sti/sti_hdmi.h b/drivers/gpu/drm/sti/sti_hdmi.h
index ef3a94583bbd..119bc3582ac7 100644
--- a/drivers/gpu/drm/sti/sti_hdmi.h
+++ b/drivers/gpu/drm/sti/sti_hdmi.h
@@ -23,6 +23,13 @@ struct hdmi_phy_ops {
 	void (*stop)(struct sti_hdmi *hdmi);
 };
 
+struct hdmi_audio_params {
+	bool enabled;
+	unsigned int sample_width;
+	unsigned int sample_rate;
+	struct hdmi_audio_infoframe cea;
+};
+
 /* values for the framing mode property */
 enum sti_hdmi_modes {
 	HDMI_MODE_HDMI,
@@ -67,6 +74,9 @@ static const struct drm_prop_enum_list colorspace_mode_names[] = {
  * @ddc_adapt: i2c ddc adapter
  * @colorspace: current colorspace selected
  * @hdmi_mode: select framing for HDMI or DVI
+ * @audio_pdev: ASoC hdmi-codec platform device
+ * @audio: hdmi audio parameters.
+ * @drm_connector: hdmi connector
  */
 struct sti_hdmi {
 	struct device dev;
@@ -89,6 +99,9 @@ struct sti_hdmi {
 	struct i2c_adapter *ddc_adapt;
 	enum hdmi_colorspace colorspace;
 	enum sti_hdmi_modes hdmi_mode;
+	struct platform_device *audio_pdev;
+	struct hdmi_audio_params audio;
+	struct drm_connector *drm_connector;
 };
 
 u32 hdmi_read(struct sti_hdmi *hdmi, int offset);
diff --git a/drivers/gpu/drm/sti/sti_hqvdp.c b/drivers/gpu/drm/sti/sti_hqvdp.c
index 33d2f42550cc..b03232247966 100644
--- a/drivers/gpu/drm/sti/sti_hqvdp.c
+++ b/drivers/gpu/drm/sti/sti_hqvdp.c
@@ -1363,6 +1363,7 @@ static int sti_hqvdp_probe(struct platform_device *pdev)
 	vtg_np = of_parse_phandle(pdev->dev.of_node, "st,vtg", 0);
 	if (vtg_np)
 		hqvdp->vtg = of_vtg_find(vtg_np);
+	of_node_put(vtg_np);
 
 	platform_set_drvdata(pdev, hqvdp);
 
diff --git a/drivers/gpu/drm/sti/sti_plane.c b/drivers/gpu/drm/sti/sti_plane.c
index 85cee9098439..0cf3335ef37c 100644
--- a/drivers/gpu/drm/sti/sti_plane.c
+++ b/drivers/gpu/drm/sti/sti_plane.c
@@ -45,25 +45,15 @@ const char *sti_plane_to_str(struct sti_plane *plane)
 
 #define STI_FPS_INTERVAL_MS     3000
 
-static int sti_plane_timespec_ms_diff(struct timespec lhs, struct timespec rhs)
-{
-	struct timespec tmp_ts = timespec_sub(lhs, rhs);
-	u64 tmp_ns = (u64)timespec_to_ns(&tmp_ts);
-
-	do_div(tmp_ns, NSEC_PER_MSEC);
-
-	return (u32)tmp_ns;
-}
-
 void sti_plane_update_fps(struct sti_plane *plane,
 			  bool new_frame,
 			  bool new_field)
 {
-	struct timespec now;
+	ktime_t now;
 	struct sti_fps_info *fps;
 	int fpks, fipks, ms_since_last, num_frames, num_fields;
 
-	getrawmonotonic(&now);
+	now = ktime_get();
 
 	/* Compute number of frame updates */
 	fps = &plane->fps_info;
@@ -76,7 +66,7 @@ void sti_plane_update_fps(struct sti_plane *plane,
 		return;
 
 	fps->curr_frame_counter++;
-	ms_since_last = sti_plane_timespec_ms_diff(now, fps->last_timestamp);
+	ms_since_last = ktime_to_ms(ktime_sub(now, fps->last_timestamp));
 	num_frames = fps->curr_frame_counter - fps->last_frame_counter;
 
 	if (num_frames <= 0  || ms_since_last < STI_FPS_INTERVAL_MS)
diff --git a/drivers/gpu/drm/sti/sti_plane.h b/drivers/gpu/drm/sti/sti_plane.h
index 39d39f5b7dd9..e0ea1dd3bb88 100644
--- a/drivers/gpu/drm/sti/sti_plane.h
+++ b/drivers/gpu/drm/sti/sti_plane.h
@@ -55,7 +55,7 @@ struct sti_fps_info {
 	unsigned int last_frame_counter;
 	unsigned int curr_field_counter;
 	unsigned int last_field_counter;
-	struct timespec last_timestamp;
+	ktime_t	     last_timestamp;
 	char fps_str[FPS_LENGTH];
 	char fips_str[FPS_LENGTH];
 };
diff --git a/drivers/gpu/drm/sti/sti_vtg.c b/drivers/gpu/drm/sti/sti_vtg.c
index 6bf4ce466d20..0bdc385eec17 100644
--- a/drivers/gpu/drm/sti/sti_vtg.c
+++ b/drivers/gpu/drm/sti/sti_vtg.c
@@ -65,7 +65,7 @@
 #define HDMI_DELAY          (5)
 
 /* Delay introduced by the DVO in nb of pixel */
-#define DVO_DELAY           (2)
+#define DVO_DELAY           (7)
 
 /* delay introduced by the Arbitrary Waveform Generator in nb of pixels */
 #define AWG_DELAY_HD        (-9)
@@ -432,6 +432,7 @@ static int vtg_probe(struct platform_device *pdev)
 	np = of_parse_phandle(pdev->dev.of_node, "st,slave", 0);
 	if (np) {
 		vtg->slave = of_vtg_find(np);
+		of_node_put(np);
 
 		if (!vtg->slave)
 			return -EPROBE_DEFER;
diff --git a/drivers/gpu/drm/sun4i/Kconfig b/drivers/gpu/drm/sun4i/Kconfig
index 99510e64e91a..a4b357db8856 100644
--- a/drivers/gpu/drm/sun4i/Kconfig
+++ b/drivers/gpu/drm/sun4i/Kconfig
@@ -1,6 +1,6 @@
 config DRM_SUN4I
 	tristate "DRM Support for Allwinner A10 Display Engine"
-	depends on DRM && ARM
+	depends on DRM && ARM && COMMON_CLK
 	depends on ARCH_SUNXI || COMPILE_TEST
 	select DRM_GEM_CMA_HELPER
 	select DRM_KMS_HELPER
diff --git a/drivers/gpu/drm/sun4i/sun4i_backend.c b/drivers/gpu/drm/sun4i/sun4i_backend.c
index f7a15c1a93bf..3ab560450a82 100644
--- a/drivers/gpu/drm/sun4i/sun4i_backend.c
+++ b/drivers/gpu/drm/sun4i/sun4i_backend.c
@@ -190,7 +190,7 @@ int sun4i_backend_update_layer_buffer(struct sun4i_backend *backend,
 	/* Get the physical address of the buffer in memory */
 	gem = drm_fb_cma_get_gem_obj(fb, 0);
 
-	DRM_DEBUG_DRIVER("Using GEM @ 0x%x\n", gem->paddr);
+	DRM_DEBUG_DRIVER("Using GEM @ %pad\n", &gem->paddr);
 
 	/* Compute the start of the displayed memory */
 	bpp = drm_format_plane_cpp(fb->pixel_format, 0);
@@ -198,7 +198,7 @@ int sun4i_backend_update_layer_buffer(struct sun4i_backend *backend,
 	paddr += (state->src_x >> 16) * bpp;
 	paddr += (state->src_y >> 16) * fb->pitches[0];
 
-	DRM_DEBUG_DRIVER("Setting buffer address to 0x%x\n", paddr);
+	DRM_DEBUG_DRIVER("Setting buffer address to %pad\n", &paddr);
 
 	/* Write the 32 lower bits of the address (in bits) */
 	lo_paddr = paddr << 3;
diff --git a/drivers/gpu/drm/sun4i/sun4i_crtc.c b/drivers/gpu/drm/sun4i/sun4i_crtc.c
index f628b6d8f23f..4a192210574f 100644
--- a/drivers/gpu/drm/sun4i/sun4i_crtc.c
+++ b/drivers/gpu/drm/sun4i/sun4i_crtc.c
@@ -77,6 +77,14 @@ static void sun4i_crtc_disable(struct drm_crtc *crtc)
 	DRM_DEBUG_DRIVER("Disabling the CRTC\n");
 
 	sun4i_tcon_disable(drv->tcon);
+
+	if (crtc->state->event && !crtc->state->active) {
+		spin_lock_irq(&crtc->dev->event_lock);
+		drm_crtc_send_vblank_event(crtc, crtc->state->event);
+		spin_unlock_irq(&crtc->dev->event_lock);
+
+		crtc->state->event = NULL;
+	}
 }
 
 static void sun4i_crtc_enable(struct drm_crtc *crtc)
diff --git a/drivers/gpu/drm/sun4i/sun4i_dotclock.c b/drivers/gpu/drm/sun4i/sun4i_dotclock.c
index 3ff668cb463c..5b3463197c48 100644
--- a/drivers/gpu/drm/sun4i/sun4i_dotclock.c
+++ b/drivers/gpu/drm/sun4i/sun4i_dotclock.c
@@ -72,14 +72,40 @@ static unsigned long sun4i_dclk_recalc_rate(struct clk_hw *hw,
 static long sun4i_dclk_round_rate(struct clk_hw *hw, unsigned long rate,
 				  unsigned long *parent_rate)
 {
-	return *parent_rate / DIV_ROUND_CLOSEST(*parent_rate, rate);
+	unsigned long best_parent = 0;
+	u8 best_div = 1;
+	int i;
+
+	for (i = 6; i < 127; i++) {
+		unsigned long ideal = rate * i;
+		unsigned long rounded;
+
+		rounded = clk_hw_round_rate(clk_hw_get_parent(hw),
+					    ideal);
+
+		if (rounded == ideal) {
+			best_parent = rounded;
+			best_div = i;
+			goto out;
+		}
+
+		if ((rounded < ideal) && (rounded > best_parent)) {
+			best_parent = rounded;
+			best_div = i;
+		}
+	}
+
+out:
+	*parent_rate = best_parent;
+
+	return best_parent / best_div;
 }
 
 static int sun4i_dclk_set_rate(struct clk_hw *hw, unsigned long rate,
 			       unsigned long parent_rate)
 {
 	struct sun4i_dclk *dclk = hw_to_dclk(hw);
-	int div = DIV_ROUND_CLOSEST(parent_rate, rate);
+	u8 div = parent_rate / rate;
 
 	return regmap_update_bits(dclk->regmap, SUN4I_TCON0_DCLK_REG,
 				  GENMASK(6, 0), div);
@@ -127,10 +153,14 @@ int sun4i_dclk_create(struct device *dev, struct sun4i_tcon *tcon)
 	const char *clk_name, *parent_name;
 	struct clk_init_data init;
 	struct sun4i_dclk *dclk;
+	int ret;
 
 	parent_name = __clk_get_name(tcon->sclk0);
-	of_property_read_string_index(dev->of_node, "clock-output-names", 0,
-				      &clk_name);
+	ret = of_property_read_string_index(dev->of_node,
+					    "clock-output-names", 0,
+					    &clk_name);
+	if (ret)
+		return ret;
 
 	dclk = devm_kzalloc(dev, sizeof(*dclk), GFP_KERNEL);
 	if (!dclk)
@@ -140,6 +170,7 @@ int sun4i_dclk_create(struct device *dev, struct sun4i_tcon *tcon)
 	init.ops = &sun4i_dclk_ops;
 	init.parent_names = &parent_name;
 	init.num_parents = 1;
+	init.flags = CLK_SET_RATE_PARENT;
 
 	dclk->regmap = tcon->regs;
 	dclk->hw.init = &init;
diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c
index 9a67f927a53e..7092daaf6c43 100644
--- a/drivers/gpu/drm/sun4i/sun4i_drv.c
+++ b/drivers/gpu/drm/sun4i/sun4i_drv.c
@@ -92,11 +92,27 @@ static struct drm_driver sun4i_drv_driver = {
 	/* Frame Buffer Operations */
 
 	/* VBlank Operations */
-	.get_vblank_counter	= drm_vblank_count,
+	.get_vblank_counter	= drm_vblank_no_hw_counter,
 	.enable_vblank		= sun4i_drv_enable_vblank,
 	.disable_vblank		= sun4i_drv_disable_vblank,
 };
 
+static void sun4i_remove_framebuffers(void)
+{
+	struct apertures_struct *ap;
+
+	ap = alloc_apertures(1);
+	if (!ap)
+		return;
+
+	/* The framebuffer can be located anywhere in RAM */
+	ap->ranges[0].base = 0;
+	ap->ranges[0].size = ~0;
+
+	remove_conflicting_framebuffers(ap, "sun4i-drm-fb", false);
+	kfree(ap);
+}
+
 static int sun4i_drv_bind(struct device *dev)
 {
 	struct drm_device *drm;
@@ -140,6 +156,9 @@ static int sun4i_drv_bind(struct device *dev)
 	}
 	drm->irq_enabled = true;
 
+	/* Remove early framebuffers (ie. simplefb) */
+	sun4i_remove_framebuffers();
+
 	/* Create our framebuffer */
 	drv->fbdev = sun4i_framebuffer_init(drm);
 	if (IS_ERR(drv->fbdev)) {
@@ -280,6 +299,7 @@ static int sun4i_drv_probe(struct platform_device *pdev)
 
 		count += sun4i_drv_add_endpoints(&pdev->dev, &match,
 						pipeline);
+		of_node_put(pipeline);
 
 		DRM_DEBUG_DRIVER("Queued %d outputs on pipeline %d\n",
 				 count, i);
diff --git a/drivers/gpu/drm/sun4i/sun4i_framebuffer.c b/drivers/gpu/drm/sun4i/sun4i_framebuffer.c
index a0b30c216a5b..70688febd7ac 100644
--- a/drivers/gpu/drm/sun4i/sun4i_framebuffer.c
+++ b/drivers/gpu/drm/sun4i/sun4i_framebuffer.c
@@ -20,8 +20,7 @@ static void sun4i_de_output_poll_changed(struct drm_device *drm)
 {
 	struct sun4i_drv *drv = drm->dev_private;
 
-	if (drv->fbdev)
-		drm_fbdev_cma_hotplug_event(drv->fbdev);
+	drm_fbdev_cma_hotplug_event(drv->fbdev);
 }
 
 static const struct drm_mode_config_funcs sun4i_de_mode_config_funcs = {
diff --git a/drivers/gpu/drm/sun4i/sun4i_rgb.c b/drivers/gpu/drm/sun4i/sun4i_rgb.c
index 442cfe271688..f5bbac6efb4c 100644
--- a/drivers/gpu/drm/sun4i/sun4i_rgb.c
+++ b/drivers/gpu/drm/sun4i/sun4i_rgb.c
@@ -54,8 +54,13 @@ static int sun4i_rgb_get_modes(struct drm_connector *connector)
 static int sun4i_rgb_mode_valid(struct drm_connector *connector,
 				struct drm_display_mode *mode)
 {
+	struct sun4i_rgb *rgb = drm_connector_to_sun4i_rgb(connector);
+	struct sun4i_drv *drv = rgb->drv;
+	struct sun4i_tcon *tcon = drv->tcon;
 	u32 hsync = mode->hsync_end - mode->hsync_start;
 	u32 vsync = mode->vsync_end - mode->vsync_start;
+	unsigned long rate = mode->clock * 1000;
+	long rounded_rate;
 
 	DRM_DEBUG_DRIVER("Validating modes...\n");
 
@@ -87,6 +92,15 @@ static int sun4i_rgb_mode_valid(struct drm_connector *connector,
 
 	DRM_DEBUG_DRIVER("Vertical parameters OK\n");
 
+	rounded_rate = clk_round_rate(tcon->dclk, rate);
+	if (rounded_rate < rate)
+		return MODE_CLOCK_LOW;
+
+	if (rounded_rate > rate)
+		return MODE_CLOCK_HIGH;
+
+	DRM_DEBUG_DRIVER("Clock rate OK\n");
+
 	return MODE_OK;
 }
 
@@ -193,7 +207,7 @@ int sun4i_rgb_init(struct drm_device *drm)
 	int ret;
 
 	/* If we don't have a panel, there's no point in going on */
-	if (!tcon->panel)
+	if (IS_ERR(tcon->panel))
 		return -ENODEV;
 
 	rgb = devm_kzalloc(drm->dev, sizeof(*rgb), GFP_KERNEL);
diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c
index 9f19b0e08560..652385f09735 100644
--- a/drivers/gpu/drm/sun4i/sun4i_tcon.c
+++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c
@@ -425,11 +425,11 @@ static struct drm_panel *sun4i_tcon_find_panel(struct device_node *node)
 
 	remote = of_graph_get_remote_port_parent(end_node);
 	if (!remote) {
-		DRM_DEBUG_DRIVER("Enable to parse remote node\n");
+		DRM_DEBUG_DRIVER("Unable to parse remote node\n");
 		return ERR_PTR(-EINVAL);
 	}
 
-	return of_drm_find_panel(remote);
+	return of_drm_find_panel(remote) ?: ERR_PTR(-EPROBE_DEFER);
 }
 
 static int sun4i_tcon_bind(struct device *dev, struct device *master,
@@ -490,7 +490,11 @@ static int sun4i_tcon_bind(struct device *dev, struct device *master,
 		return 0;
 	}
 
-	return sun4i_rgb_init(drm);
+	ret = sun4i_rgb_init(drm);
+	if (ret < 0)
+		goto err_free_clocks;
+
+	return 0;
 
 err_free_clocks:
 	sun4i_tcon_free_clocks(tcon);
@@ -522,12 +526,13 @@ static int sun4i_tcon_probe(struct platform_device *pdev)
 	 * Defer the probe.
 	 */
 	panel = sun4i_tcon_find_panel(node);
-	if (IS_ERR(panel)) {
-		/*
-		 * If we don't have a panel endpoint, just go on
-		 */
-		if (PTR_ERR(panel) != -ENODEV)
-			return -EPROBE_DEFER;
+
+	/*
+	 * If we don't have a panel endpoint, just go on
+	 */
+	if (PTR_ERR(panel) == -EPROBE_DEFER) {
+		DRM_DEBUG_DRIVER("Still waiting for our panel. Deferring...\n");
+		return -EPROBE_DEFER;
 	}
 
 	return component_add(&pdev->dev, &sun4i_tcon_ops);
diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index 39940f5b7c91..8495bd01b544 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -10,6 +10,7 @@
 #include <linux/clk.h>
 #include <linux/debugfs.h>
 #include <linux/iommu.h>
+#include <linux/pm_runtime.h>
 #include <linux/reset.h>
 
 #include <soc/tegra/pmc.h>
@@ -1216,6 +1217,8 @@ static void tegra_crtc_disable(struct drm_crtc *crtc)
 
 	tegra_dc_stats_reset(&dc->stats);
 	drm_crtc_vblank_off(crtc);
+
+	pm_runtime_put_sync(dc->dev);
 }
 
 static void tegra_crtc_enable(struct drm_crtc *crtc)
@@ -1225,6 +1228,48 @@ static void tegra_crtc_enable(struct drm_crtc *crtc)
 	struct tegra_dc *dc = to_tegra_dc(crtc);
 	u32 value;
 
+	pm_runtime_get_sync(dc->dev);
+
+	/* initialize display controller */
+	if (dc->syncpt) {
+		u32 syncpt = host1x_syncpt_id(dc->syncpt);
+
+		value = SYNCPT_CNTRL_NO_STALL;
+		tegra_dc_writel(dc, value, DC_CMD_GENERAL_INCR_SYNCPT_CNTRL);
+
+		value = SYNCPT_VSYNC_ENABLE | syncpt;
+		tegra_dc_writel(dc, value, DC_CMD_CONT_SYNCPT_VSYNC);
+	}
+
+	value = WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT |
+		WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT;
+	tegra_dc_writel(dc, value, DC_CMD_INT_TYPE);
+
+	value = WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT |
+		WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT;
+	tegra_dc_writel(dc, value, DC_CMD_INT_POLARITY);
+
+	/* initialize timer */
+	value = CURSOR_THRESHOLD(0) | WINDOW_A_THRESHOLD(0x20) |
+		WINDOW_B_THRESHOLD(0x20) | WINDOW_C_THRESHOLD(0x20);
+	tegra_dc_writel(dc, value, DC_DISP_DISP_MEM_HIGH_PRIORITY);
+
+	value = CURSOR_THRESHOLD(0) | WINDOW_A_THRESHOLD(1) |
+		WINDOW_B_THRESHOLD(1) | WINDOW_C_THRESHOLD(1);
+	tegra_dc_writel(dc, value, DC_DISP_DISP_MEM_HIGH_PRIORITY_TIMER);
+
+	value = VBLANK_INT | WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT |
+		WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT;
+	tegra_dc_writel(dc, value, DC_CMD_INT_ENABLE);
+
+	value = WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT |
+		WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT;
+	tegra_dc_writel(dc, value, DC_CMD_INT_MASK);
+
+	if (dc->soc->supports_border_color)
+		tegra_dc_writel(dc, 0, DC_DISP_BORDER_COLOR);
+
+	/* apply PLL and pixel clock changes */
 	tegra_dc_commit_state(dc, state);
 
 	/* program display mode */
@@ -1685,7 +1730,6 @@ static int tegra_dc_init(struct host1x_client *client)
 	struct tegra_drm *tegra = drm->dev_private;
 	struct drm_plane *primary = NULL;
 	struct drm_plane *cursor = NULL;
-	u32 value;
 	int err;
 
 	dc->syncpt = host1x_syncpt_request(dc->dev, flags);
@@ -1755,47 +1799,6 @@ static int tegra_dc_init(struct host1x_client *client)
 		goto cleanup;
 	}
 
-	/* initialize display controller */
-	if (dc->syncpt) {
-		u32 syncpt = host1x_syncpt_id(dc->syncpt);
-
-		value = SYNCPT_CNTRL_NO_STALL;
-		tegra_dc_writel(dc, value, DC_CMD_GENERAL_INCR_SYNCPT_CNTRL);
-
-		value = SYNCPT_VSYNC_ENABLE | syncpt;
-		tegra_dc_writel(dc, value, DC_CMD_CONT_SYNCPT_VSYNC);
-	}
-
-	value = WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT |
-		WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT;
-	tegra_dc_writel(dc, value, DC_CMD_INT_TYPE);
-
-	value = WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT |
-		WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT;
-	tegra_dc_writel(dc, value, DC_CMD_INT_POLARITY);
-
-	/* initialize timer */
-	value = CURSOR_THRESHOLD(0) | WINDOW_A_THRESHOLD(0x20) |
-		WINDOW_B_THRESHOLD(0x20) | WINDOW_C_THRESHOLD(0x20);
-	tegra_dc_writel(dc, value, DC_DISP_DISP_MEM_HIGH_PRIORITY);
-
-	value = CURSOR_THRESHOLD(0) | WINDOW_A_THRESHOLD(1) |
-		WINDOW_B_THRESHOLD(1) | WINDOW_C_THRESHOLD(1);
-	tegra_dc_writel(dc, value, DC_DISP_DISP_MEM_HIGH_PRIORITY_TIMER);
-
-	value = VBLANK_INT | WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT |
-		WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT;
-	tegra_dc_writel(dc, value, DC_CMD_INT_ENABLE);
-
-	value = WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT |
-		WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT;
-	tegra_dc_writel(dc, value, DC_CMD_INT_MASK);
-
-	if (dc->soc->supports_border_color)
-		tegra_dc_writel(dc, 0, DC_DISP_BORDER_COLOR);
-
-	tegra_dc_stats_reset(&dc->stats);
-
 	return 0;
 
 cleanup:
@@ -1987,33 +1990,15 @@ static int tegra_dc_probe(struct platform_device *pdev)
 		return PTR_ERR(dc->rst);
 	}
 
+	reset_control_assert(dc->rst);
+
 	if (dc->soc->has_powergate) {
 		if (dc->pipe == 0)
 			dc->powergate = TEGRA_POWERGATE_DIS;
 		else
 			dc->powergate = TEGRA_POWERGATE_DISB;
 
-		err = tegra_powergate_sequence_power_up(dc->powergate, dc->clk,
-							dc->rst);
-		if (err < 0) {
-			dev_err(&pdev->dev, "failed to power partition: %d\n",
-				err);
-			return err;
-		}
-	} else {
-		err = clk_prepare_enable(dc->clk);
-		if (err < 0) {
-			dev_err(&pdev->dev, "failed to enable clock: %d\n",
-				err);
-			return err;
-		}
-
-		err = reset_control_deassert(dc->rst);
-		if (err < 0) {
-			dev_err(&pdev->dev, "failed to deassert reset: %d\n",
-				err);
-			return err;
-		}
+		tegra_powergate_power_off(dc->powergate);
 	}
 
 	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -2027,16 +2012,19 @@ static int tegra_dc_probe(struct platform_device *pdev)
 		return -ENXIO;
 	}
 
-	INIT_LIST_HEAD(&dc->client.list);
-	dc->client.ops = &dc_client_ops;
-	dc->client.dev = &pdev->dev;
-
 	err = tegra_dc_rgb_probe(dc);
 	if (err < 0 && err != -ENODEV) {
 		dev_err(&pdev->dev, "failed to probe RGB output: %d\n", err);
 		return err;
 	}
 
+	platform_set_drvdata(pdev, dc);
+	pm_runtime_enable(&pdev->dev);
+
+	INIT_LIST_HEAD(&dc->client.list);
+	dc->client.ops = &dc_client_ops;
+	dc->client.dev = &pdev->dev;
+
 	err = host1x_client_register(&dc->client);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to register host1x client: %d\n",
@@ -2044,8 +2032,6 @@ static int tegra_dc_probe(struct platform_device *pdev)
 		return err;
 	}
 
-	platform_set_drvdata(pdev, dc);
-
 	return 0;
 }
 
@@ -2067,7 +2053,22 @@ static int tegra_dc_remove(struct platform_device *pdev)
 		return err;
 	}
 
-	reset_control_assert(dc->rst);
+	pm_runtime_disable(&pdev->dev);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int tegra_dc_suspend(struct device *dev)
+{
+	struct tegra_dc *dc = dev_get_drvdata(dev);
+	int err;
+
+	err = reset_control_assert(dc->rst);
+	if (err < 0) {
+		dev_err(dev, "failed to assert reset: %d\n", err);
+		return err;
+	}
 
 	if (dc->soc->has_powergate)
 		tegra_powergate_power_off(dc->powergate);
@@ -2077,10 +2078,45 @@ static int tegra_dc_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static int tegra_dc_resume(struct device *dev)
+{
+	struct tegra_dc *dc = dev_get_drvdata(dev);
+	int err;
+
+	if (dc->soc->has_powergate) {
+		err = tegra_powergate_sequence_power_up(dc->powergate, dc->clk,
+							dc->rst);
+		if (err < 0) {
+			dev_err(dev, "failed to power partition: %d\n", err);
+			return err;
+		}
+	} else {
+		err = clk_prepare_enable(dc->clk);
+		if (err < 0) {
+			dev_err(dev, "failed to enable clock: %d\n", err);
+			return err;
+		}
+
+		err = reset_control_deassert(dc->rst);
+		if (err < 0) {
+			dev_err(dev, "failed to deassert reset: %d\n", err);
+			return err;
+		}
+	}
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops tegra_dc_pm_ops = {
+	SET_RUNTIME_PM_OPS(tegra_dc_suspend, tegra_dc_resume, NULL)
+};
+
 struct platform_driver tegra_dc_driver = {
 	.driver = {
 		.name = "tegra-dc",
 		.of_match_table = tegra_dc_of_match,
+		.pm = &tegra_dc_pm_ops,
 	},
 	.probe = tegra_dc_probe,
 	.remove = tegra_dc_remove,
diff --git a/drivers/gpu/drm/tegra/dpaux.c b/drivers/gpu/drm/tegra/dpaux.c
index b24a0f14821a..059f409556d5 100644
--- a/drivers/gpu/drm/tegra/dpaux.c
+++ b/drivers/gpu/drm/tegra/dpaux.c
@@ -12,6 +12,9 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/of_gpio.h>
+#include <linux/pinctrl/pinconf-generic.h>
+#include <linux/pinctrl/pinctrl.h>
+#include <linux/pinctrl/pinmux.h>
 #include <linux/platform_device.h>
 #include <linux/reset.h>
 #include <linux/regulator/consumer.h>
@@ -44,6 +47,11 @@ struct tegra_dpaux {
 	struct completion complete;
 	struct work_struct work;
 	struct list_head list;
+
+#ifdef CONFIG_GENERIC_PINCONF
+	struct pinctrl_dev *pinctrl;
+	struct pinctrl_desc desc;
+#endif
 };
 
 static inline struct tegra_dpaux *to_dpaux(struct drm_dp_aux *aux)
@@ -267,6 +275,148 @@ static irqreturn_t tegra_dpaux_irq(int irq, void *data)
 	return ret;
 }
 
+enum tegra_dpaux_functions {
+	DPAUX_PADCTL_FUNC_AUX,
+	DPAUX_PADCTL_FUNC_I2C,
+	DPAUX_PADCTL_FUNC_OFF,
+};
+
+static void tegra_dpaux_pad_power_down(struct tegra_dpaux *dpaux)
+{
+	u32 value = tegra_dpaux_readl(dpaux, DPAUX_HYBRID_SPARE);
+
+	value |= DPAUX_HYBRID_SPARE_PAD_POWER_DOWN;
+
+	tegra_dpaux_writel(dpaux, value, DPAUX_HYBRID_SPARE);
+}
+
+static void tegra_dpaux_pad_power_up(struct tegra_dpaux *dpaux)
+{
+	u32 value = tegra_dpaux_readl(dpaux, DPAUX_HYBRID_SPARE);
+
+	value &= ~DPAUX_HYBRID_SPARE_PAD_POWER_DOWN;
+
+	tegra_dpaux_writel(dpaux, value, DPAUX_HYBRID_SPARE);
+}
+
+static int tegra_dpaux_pad_config(struct tegra_dpaux *dpaux, unsigned function)
+{
+	u32 value;
+
+	switch (function) {
+	case DPAUX_PADCTL_FUNC_AUX:
+		value = DPAUX_HYBRID_PADCTL_AUX_CMH(2) |
+			DPAUX_HYBRID_PADCTL_AUX_DRVZ(4) |
+			DPAUX_HYBRID_PADCTL_AUX_DRVI(0x18) |
+			DPAUX_HYBRID_PADCTL_AUX_INPUT_RCV |
+			DPAUX_HYBRID_PADCTL_MODE_AUX;
+		break;
+
+	case DPAUX_PADCTL_FUNC_I2C:
+		value = DPAUX_HYBRID_PADCTL_I2C_SDA_INPUT_RCV |
+			DPAUX_HYBRID_PADCTL_I2C_SCL_INPUT_RCV |
+			DPAUX_HYBRID_PADCTL_MODE_I2C;
+		break;
+
+	case DPAUX_PADCTL_FUNC_OFF:
+		tegra_dpaux_pad_power_down(dpaux);
+		return 0;
+
+	default:
+		return -ENOTSUPP;
+	}
+
+	tegra_dpaux_writel(dpaux, value, DPAUX_HYBRID_PADCTL);
+	tegra_dpaux_pad_power_up(dpaux);
+
+	return 0;
+}
+
+#ifdef CONFIG_GENERIC_PINCONF
+static const struct pinctrl_pin_desc tegra_dpaux_pins[] = {
+	PINCTRL_PIN(0, "DP_AUX_CHx_P"),
+	PINCTRL_PIN(1, "DP_AUX_CHx_N"),
+};
+
+static const unsigned tegra_dpaux_pin_numbers[] = { 0, 1 };
+
+static const char * const tegra_dpaux_groups[] = {
+	"dpaux-io",
+};
+
+static const char * const tegra_dpaux_functions[] = {
+	"aux",
+	"i2c",
+	"off",
+};
+
+static int tegra_dpaux_get_groups_count(struct pinctrl_dev *pinctrl)
+{
+	return ARRAY_SIZE(tegra_dpaux_groups);
+}
+
+static const char *tegra_dpaux_get_group_name(struct pinctrl_dev *pinctrl,
+					      unsigned int group)
+{
+	return tegra_dpaux_groups[group];
+}
+
+static int tegra_dpaux_get_group_pins(struct pinctrl_dev *pinctrl,
+				      unsigned group, const unsigned **pins,
+				      unsigned *num_pins)
+{
+	*pins = tegra_dpaux_pin_numbers;
+	*num_pins = ARRAY_SIZE(tegra_dpaux_pin_numbers);
+
+	return 0;
+}
+
+static const struct pinctrl_ops tegra_dpaux_pinctrl_ops = {
+	.get_groups_count = tegra_dpaux_get_groups_count,
+	.get_group_name = tegra_dpaux_get_group_name,
+	.get_group_pins = tegra_dpaux_get_group_pins,
+	.dt_node_to_map = pinconf_generic_dt_node_to_map_group,
+	.dt_free_map = pinconf_generic_dt_free_map,
+};
+
+static int tegra_dpaux_get_functions_count(struct pinctrl_dev *pinctrl)
+{
+	return ARRAY_SIZE(tegra_dpaux_functions);
+}
+
+static const char *tegra_dpaux_get_function_name(struct pinctrl_dev *pinctrl,
+						 unsigned int function)
+{
+	return tegra_dpaux_functions[function];
+}
+
+static int tegra_dpaux_get_function_groups(struct pinctrl_dev *pinctrl,
+					   unsigned int function,
+					   const char * const **groups,
+					   unsigned * const num_groups)
+{
+	*num_groups = ARRAY_SIZE(tegra_dpaux_groups);
+	*groups = tegra_dpaux_groups;
+
+	return 0;
+}
+
+static int tegra_dpaux_set_mux(struct pinctrl_dev *pinctrl,
+			       unsigned int function, unsigned int group)
+{
+	struct tegra_dpaux *dpaux = pinctrl_dev_get_drvdata(pinctrl);
+
+	return tegra_dpaux_pad_config(dpaux, function);
+}
+
+static const struct pinmux_ops tegra_dpaux_pinmux_ops = {
+	.get_functions_count = tegra_dpaux_get_functions_count,
+	.get_function_name = tegra_dpaux_get_function_name,
+	.get_function_groups = tegra_dpaux_get_function_groups,
+	.set_mux = tegra_dpaux_set_mux,
+};
+#endif
+
 static int tegra_dpaux_probe(struct platform_device *pdev)
 {
 	struct tegra_dpaux *dpaux;
@@ -294,11 +444,14 @@ static int tegra_dpaux_probe(struct platform_device *pdev)
 		return -ENXIO;
 	}
 
-	dpaux->rst = devm_reset_control_get(&pdev->dev, "dpaux");
-	if (IS_ERR(dpaux->rst)) {
-		dev_err(&pdev->dev, "failed to get reset control: %ld\n",
-			PTR_ERR(dpaux->rst));
-		return PTR_ERR(dpaux->rst);
+	if (!pdev->dev.pm_domain) {
+		dpaux->rst = devm_reset_control_get(&pdev->dev, "dpaux");
+		if (IS_ERR(dpaux->rst)) {
+			dev_err(&pdev->dev,
+				"failed to get reset control: %ld\n",
+				PTR_ERR(dpaux->rst));
+			return PTR_ERR(dpaux->rst);
+		}
 	}
 
 	dpaux->clk = devm_clk_get(&pdev->dev, NULL);
@@ -315,34 +468,37 @@ static int tegra_dpaux_probe(struct platform_device *pdev)
 		return err;
 	}
 
-	reset_control_deassert(dpaux->rst);
+	if (dpaux->rst)
+		reset_control_deassert(dpaux->rst);
 
 	dpaux->clk_parent = devm_clk_get(&pdev->dev, "parent");
 	if (IS_ERR(dpaux->clk_parent)) {
 		dev_err(&pdev->dev, "failed to get parent clock: %ld\n",
 			PTR_ERR(dpaux->clk_parent));
-		return PTR_ERR(dpaux->clk_parent);
+		err = PTR_ERR(dpaux->clk_parent);
+		goto assert_reset;
 	}
 
 	err = clk_prepare_enable(dpaux->clk_parent);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to enable parent clock: %d\n",
 			err);
-		return err;
+		goto assert_reset;
 	}
 
 	err = clk_set_rate(dpaux->clk_parent, 270000000);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to set clock to 270 MHz: %d\n",
 			err);
-		return err;
+		goto disable_parent_clk;
 	}
 
 	dpaux->vdd = devm_regulator_get(&pdev->dev, "vdd");
 	if (IS_ERR(dpaux->vdd)) {
 		dev_err(&pdev->dev, "failed to get VDD supply: %ld\n",
 			PTR_ERR(dpaux->vdd));
-		return PTR_ERR(dpaux->vdd);
+		err = PTR_ERR(dpaux->vdd);
+		goto disable_parent_clk;
 	}
 
 	err = devm_request_irq(dpaux->dev, dpaux->irq, tegra_dpaux_irq, 0,
@@ -350,7 +506,7 @@ static int tegra_dpaux_probe(struct platform_device *pdev)
 	if (err < 0) {
 		dev_err(dpaux->dev, "failed to request IRQ#%u: %d\n",
 			dpaux->irq, err);
-		return err;
+		goto disable_parent_clk;
 	}
 
 	disable_irq(dpaux->irq);
@@ -360,7 +516,7 @@ static int tegra_dpaux_probe(struct platform_device *pdev)
 
 	err = drm_dp_aux_register(&dpaux->aux);
 	if (err < 0)
-		return err;
+		goto disable_parent_clk;
 
 	/*
 	 * Assume that by default the DPAUX/I2C pads will be used for HDMI,
@@ -370,16 +526,24 @@ static int tegra_dpaux_probe(struct platform_device *pdev)
 	 * is no possibility to perform the I2C mode configuration in the
 	 * HDMI path.
 	 */
-	value = tegra_dpaux_readl(dpaux, DPAUX_HYBRID_SPARE);
-	value &= ~DPAUX_HYBRID_SPARE_PAD_POWER_DOWN;
-	tegra_dpaux_writel(dpaux, value, DPAUX_HYBRID_SPARE);
-
-	value = tegra_dpaux_readl(dpaux, DPAUX_HYBRID_PADCTL);
-	value = DPAUX_HYBRID_PADCTL_I2C_SDA_INPUT_RCV |
-		DPAUX_HYBRID_PADCTL_I2C_SCL_INPUT_RCV |
-		DPAUX_HYBRID_PADCTL_MODE_I2C;
-	tegra_dpaux_writel(dpaux, value, DPAUX_HYBRID_PADCTL);
+	err = tegra_dpaux_pad_config(dpaux, DPAUX_HYBRID_PADCTL_MODE_I2C);
+	if (err < 0)
+		return err;
 
+#ifdef CONFIG_GENERIC_PINCONF
+	dpaux->desc.name = dev_name(&pdev->dev);
+	dpaux->desc.pins = tegra_dpaux_pins;
+	dpaux->desc.npins = ARRAY_SIZE(tegra_dpaux_pins);
+	dpaux->desc.pctlops = &tegra_dpaux_pinctrl_ops;
+	dpaux->desc.pmxops = &tegra_dpaux_pinmux_ops;
+	dpaux->desc.owner = THIS_MODULE;
+
+	dpaux->pinctrl = devm_pinctrl_register(&pdev->dev, &dpaux->desc, dpaux);
+	if (!dpaux->pinctrl) {
+		dev_err(&pdev->dev, "failed to register pincontrol\n");
+		return -ENODEV;
+	}
+#endif
 	/* enable and clear all interrupts */
 	value = DPAUX_INTR_AUX_DONE | DPAUX_INTR_IRQ_EVENT |
 		DPAUX_INTR_UNPLUG_EVENT | DPAUX_INTR_PLUG_EVENT;
@@ -393,17 +557,24 @@ static int tegra_dpaux_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, dpaux);
 
 	return 0;
+
+disable_parent_clk:
+	clk_disable_unprepare(dpaux->clk_parent);
+assert_reset:
+	if (dpaux->rst)
+		reset_control_assert(dpaux->rst);
+
+	clk_disable_unprepare(dpaux->clk);
+
+	return err;
 }
 
 static int tegra_dpaux_remove(struct platform_device *pdev)
 {
 	struct tegra_dpaux *dpaux = platform_get_drvdata(pdev);
-	u32 value;
 
 	/* make sure pads are powered down when not in use */
-	value = tegra_dpaux_readl(dpaux, DPAUX_HYBRID_SPARE);
-	value |= DPAUX_HYBRID_SPARE_PAD_POWER_DOWN;
-	tegra_dpaux_writel(dpaux, value, DPAUX_HYBRID_SPARE);
+	tegra_dpaux_pad_power_down(dpaux);
 
 	drm_dp_aux_unregister(&dpaux->aux);
 
@@ -414,7 +585,10 @@ static int tegra_dpaux_remove(struct platform_device *pdev)
 	cancel_work_sync(&dpaux->work);
 
 	clk_disable_unprepare(dpaux->clk_parent);
-	reset_control_assert(dpaux->rst);
+
+	if (dpaux->rst)
+		reset_control_assert(dpaux->rst);
+
 	clk_disable_unprepare(dpaux->clk);
 
 	return 0;
@@ -528,30 +702,15 @@ enum drm_connector_status drm_dp_aux_detect(struct drm_dp_aux *aux)
 int drm_dp_aux_enable(struct drm_dp_aux *aux)
 {
 	struct tegra_dpaux *dpaux = to_dpaux(aux);
-	u32 value;
-
-	value = DPAUX_HYBRID_PADCTL_AUX_CMH(2) |
-		DPAUX_HYBRID_PADCTL_AUX_DRVZ(4) |
-		DPAUX_HYBRID_PADCTL_AUX_DRVI(0x18) |
-		DPAUX_HYBRID_PADCTL_AUX_INPUT_RCV |
-		DPAUX_HYBRID_PADCTL_MODE_AUX;
-	tegra_dpaux_writel(dpaux, value, DPAUX_HYBRID_PADCTL);
-
-	value = tegra_dpaux_readl(dpaux, DPAUX_HYBRID_SPARE);
-	value &= ~DPAUX_HYBRID_SPARE_PAD_POWER_DOWN;
-	tegra_dpaux_writel(dpaux, value, DPAUX_HYBRID_SPARE);
 
-	return 0;
+	return tegra_dpaux_pad_config(dpaux, DPAUX_PADCTL_FUNC_AUX);
 }
 
 int drm_dp_aux_disable(struct drm_dp_aux *aux)
 {
 	struct tegra_dpaux *dpaux = to_dpaux(aux);
-	u32 value;
 
-	value = tegra_dpaux_readl(dpaux, DPAUX_HYBRID_SPARE);
-	value |= DPAUX_HYBRID_SPARE_PAD_POWER_DOWN;
-	tegra_dpaux_writel(dpaux, value, DPAUX_HYBRID_SPARE);
+	tegra_dpaux_pad_power_down(dpaux);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index a177a42a9849..755264d9db22 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -56,8 +56,8 @@ static void tegra_atomic_complete(struct tegra_drm *tegra,
 	 */
 
 	drm_atomic_helper_commit_modeset_disables(drm, state);
-	drm_atomic_helper_commit_planes(drm, state, false);
 	drm_atomic_helper_commit_modeset_enables(drm, state);
+	drm_atomic_helper_commit_planes(drm, state, true);
 
 	drm_atomic_helper_wait_for_vblanks(drm, state);
 
diff --git a/drivers/gpu/drm/tegra/dsi.c b/drivers/gpu/drm/tegra/dsi.c
index 099cccb2fbcb..3d228ad90e0f 100644
--- a/drivers/gpu/drm/tegra/dsi.c
+++ b/drivers/gpu/drm/tegra/dsi.c
@@ -13,6 +13,7 @@
 #include <linux/of.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/reset.h>
 
 #include <linux/regulator/consumer.h>
@@ -677,6 +678,45 @@ static void tegra_dsi_ganged_disable(struct tegra_dsi *dsi)
 	tegra_dsi_writel(dsi, 0, DSI_GANGED_MODE_CONTROL);
 }
 
+static int tegra_dsi_pad_enable(struct tegra_dsi *dsi)
+{
+	u32 value;
+
+	value = DSI_PAD_CONTROL_VS1_PULLDN(0) | DSI_PAD_CONTROL_VS1_PDIO(0);
+	tegra_dsi_writel(dsi, value, DSI_PAD_CONTROL_0);
+
+	return 0;
+}
+
+static int tegra_dsi_pad_calibrate(struct tegra_dsi *dsi)
+{
+	u32 value;
+
+	/*
+	 * XXX Is this still needed? The module reset is deasserted right
+	 * before this function is called.
+	 */
+	tegra_dsi_writel(dsi, 0, DSI_PAD_CONTROL_0);
+	tegra_dsi_writel(dsi, 0, DSI_PAD_CONTROL_1);
+	tegra_dsi_writel(dsi, 0, DSI_PAD_CONTROL_2);
+	tegra_dsi_writel(dsi, 0, DSI_PAD_CONTROL_3);
+	tegra_dsi_writel(dsi, 0, DSI_PAD_CONTROL_4);
+
+	/* start calibration */
+	tegra_dsi_pad_enable(dsi);
+
+	value = DSI_PAD_SLEW_UP(0x7) | DSI_PAD_SLEW_DN(0x7) |
+		DSI_PAD_LP_UP(0x1) | DSI_PAD_LP_DN(0x1) |
+		DSI_PAD_OUT_CLK(0x0);
+	tegra_dsi_writel(dsi, value, DSI_PAD_CONTROL_2);
+
+	value = DSI_PAD_PREEMP_PD_CLK(0x3) | DSI_PAD_PREEMP_PU_CLK(0x3) |
+		DSI_PAD_PREEMP_PD(0x03) | DSI_PAD_PREEMP_PU(0x3);
+	tegra_dsi_writel(dsi, value, DSI_PAD_CONTROL_3);
+
+	return tegra_mipi_calibrate(dsi->mipi);
+}
+
 static void tegra_dsi_set_timeout(struct tegra_dsi *dsi, unsigned long bclk,
 				  unsigned int vrefresh)
 {
@@ -836,7 +876,7 @@ static void tegra_dsi_encoder_disable(struct drm_encoder *encoder)
 
 	tegra_dsi_disable(dsi);
 
-	return;
+	pm_runtime_put(dsi->dev);
 }
 
 static void tegra_dsi_encoder_enable(struct drm_encoder *encoder)
@@ -847,6 +887,13 @@ static void tegra_dsi_encoder_enable(struct drm_encoder *encoder)
 	struct tegra_dsi *dsi = to_dsi(output);
 	struct tegra_dsi_state *state;
 	u32 value;
+	int err;
+
+	pm_runtime_get_sync(dsi->dev);
+
+	err = tegra_dsi_pad_calibrate(dsi);
+	if (err < 0)
+		dev_err(dsi->dev, "MIPI calibration failed: %d\n", err);
 
 	state = tegra_dsi_get_state(dsi);
 
@@ -875,8 +922,6 @@ static void tegra_dsi_encoder_enable(struct drm_encoder *encoder)
 
 	if (output->panel)
 		drm_panel_enable(output->panel);
-
-	return;
 }
 
 static int
@@ -966,55 +1011,12 @@ static const struct drm_encoder_helper_funcs tegra_dsi_encoder_helper_funcs = {
 	.atomic_check = tegra_dsi_encoder_atomic_check,
 };
 
-static int tegra_dsi_pad_enable(struct tegra_dsi *dsi)
-{
-	u32 value;
-
-	value = DSI_PAD_CONTROL_VS1_PULLDN(0) | DSI_PAD_CONTROL_VS1_PDIO(0);
-	tegra_dsi_writel(dsi, value, DSI_PAD_CONTROL_0);
-
-	return 0;
-}
-
-static int tegra_dsi_pad_calibrate(struct tegra_dsi *dsi)
-{
-	u32 value;
-
-	tegra_dsi_writel(dsi, 0, DSI_PAD_CONTROL_0);
-	tegra_dsi_writel(dsi, 0, DSI_PAD_CONTROL_1);
-	tegra_dsi_writel(dsi, 0, DSI_PAD_CONTROL_2);
-	tegra_dsi_writel(dsi, 0, DSI_PAD_CONTROL_3);
-	tegra_dsi_writel(dsi, 0, DSI_PAD_CONTROL_4);
-
-	/* start calibration */
-	tegra_dsi_pad_enable(dsi);
-
-	value = DSI_PAD_SLEW_UP(0x7) | DSI_PAD_SLEW_DN(0x7) |
-		DSI_PAD_LP_UP(0x1) | DSI_PAD_LP_DN(0x1) |
-		DSI_PAD_OUT_CLK(0x0);
-	tegra_dsi_writel(dsi, value, DSI_PAD_CONTROL_2);
-
-	value = DSI_PAD_PREEMP_PD_CLK(0x3) | DSI_PAD_PREEMP_PU_CLK(0x3) |
-		DSI_PAD_PREEMP_PD(0x03) | DSI_PAD_PREEMP_PU(0x3);
-	tegra_dsi_writel(dsi, value, DSI_PAD_CONTROL_3);
-
-	return tegra_mipi_calibrate(dsi->mipi);
-}
-
 static int tegra_dsi_init(struct host1x_client *client)
 {
 	struct drm_device *drm = dev_get_drvdata(client->parent);
 	struct tegra_dsi *dsi = host1x_client_to_dsi(client);
 	int err;
 
-	reset_control_deassert(dsi->rst);
-
-	err = tegra_dsi_pad_calibrate(dsi);
-	if (err < 0) {
-		dev_err(dsi->dev, "MIPI calibration failed: %d\n", err);
-		goto reset;
-	}
-
 	/* Gangsters must not register their own outputs. */
 	if (!dsi->master) {
 		dsi->output.dev = client->dev;
@@ -1037,12 +1039,9 @@ static int tegra_dsi_init(struct host1x_client *client)
 		drm_connector_register(&dsi->output.connector);
 
 		err = tegra_output_init(drm, &dsi->output);
-		if (err < 0) {
-			dev_err(client->dev,
-				"failed to initialize output: %d\n",
+		if (err < 0)
+			dev_err(dsi->dev, "failed to initialize output: %d\n",
 				err);
-			goto reset;
-		}
 
 		dsi->output.encoder.possible_crtcs = 0x3;
 	}
@@ -1054,10 +1053,6 @@ static int tegra_dsi_init(struct host1x_client *client)
 	}
 
 	return 0;
-
-reset:
-	reset_control_assert(dsi->rst);
-	return err;
 }
 
 static int tegra_dsi_exit(struct host1x_client *client)
@@ -1069,7 +1064,7 @@ static int tegra_dsi_exit(struct host1x_client *client)
 	if (IS_ENABLED(CONFIG_DEBUG_FS))
 		tegra_dsi_debugfs_exit(dsi);
 
-	reset_control_assert(dsi->rst);
+	regulator_disable(dsi->vdd);
 
 	return 0;
 }
@@ -1493,74 +1488,50 @@ static int tegra_dsi_probe(struct platform_device *pdev)
 	dsi->format = MIPI_DSI_FMT_RGB888;
 	dsi->lanes = 4;
 
-	dsi->rst = devm_reset_control_get(&pdev->dev, "dsi");
-	if (IS_ERR(dsi->rst))
-		return PTR_ERR(dsi->rst);
+	if (!pdev->dev.pm_domain) {
+		dsi->rst = devm_reset_control_get(&pdev->dev, "dsi");
+		if (IS_ERR(dsi->rst))
+			return PTR_ERR(dsi->rst);
+	}
 
 	dsi->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(dsi->clk)) {
 		dev_err(&pdev->dev, "cannot get DSI clock\n");
-		err = PTR_ERR(dsi->clk);
-		goto reset;
-	}
-
-	err = clk_prepare_enable(dsi->clk);
-	if (err < 0) {
-		dev_err(&pdev->dev, "cannot enable DSI clock\n");
-		goto reset;
+		return PTR_ERR(dsi->clk);
 	}
 
 	dsi->clk_lp = devm_clk_get(&pdev->dev, "lp");
 	if (IS_ERR(dsi->clk_lp)) {
 		dev_err(&pdev->dev, "cannot get low-power clock\n");
-		err = PTR_ERR(dsi->clk_lp);
-		goto disable_clk;
-	}
-
-	err = clk_prepare_enable(dsi->clk_lp);
-	if (err < 0) {
-		dev_err(&pdev->dev, "cannot enable low-power clock\n");
-		goto disable_clk;
+		return PTR_ERR(dsi->clk_lp);
 	}
 
 	dsi->clk_parent = devm_clk_get(&pdev->dev, "parent");
 	if (IS_ERR(dsi->clk_parent)) {
 		dev_err(&pdev->dev, "cannot get parent clock\n");
-		err = PTR_ERR(dsi->clk_parent);
-		goto disable_clk_lp;
+		return PTR_ERR(dsi->clk_parent);
 	}
 
 	dsi->vdd = devm_regulator_get(&pdev->dev, "avdd-dsi-csi");
 	if (IS_ERR(dsi->vdd)) {
 		dev_err(&pdev->dev, "cannot get VDD supply\n");
-		err = PTR_ERR(dsi->vdd);
-		goto disable_clk_lp;
-	}
-
-	err = regulator_enable(dsi->vdd);
-	if (err < 0) {
-		dev_err(&pdev->dev, "cannot enable VDD supply\n");
-		goto disable_clk_lp;
+		return PTR_ERR(dsi->vdd);
 	}
 
 	err = tegra_dsi_setup_clocks(dsi);
 	if (err < 0) {
 		dev_err(&pdev->dev, "cannot setup clocks\n");
-		goto disable_vdd;
+		return err;
 	}
 
 	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	dsi->regs = devm_ioremap_resource(&pdev->dev, regs);
-	if (IS_ERR(dsi->regs)) {
-		err = PTR_ERR(dsi->regs);
-		goto disable_vdd;
-	}
+	if (IS_ERR(dsi->regs))
+		return PTR_ERR(dsi->regs);
 
 	dsi->mipi = tegra_mipi_request(&pdev->dev);
-	if (IS_ERR(dsi->mipi)) {
-		err = PTR_ERR(dsi->mipi);
-		goto disable_vdd;
-	}
+	if (IS_ERR(dsi->mipi))
+		return PTR_ERR(dsi->mipi);
 
 	dsi->host.ops = &tegra_dsi_host_ops;
 	dsi->host.dev = &pdev->dev;
@@ -1571,6 +1542,9 @@ static int tegra_dsi_probe(struct platform_device *pdev)
 		goto mipi_free;
 	}
 
+	platform_set_drvdata(pdev, dsi);
+	pm_runtime_enable(&pdev->dev);
+
 	INIT_LIST_HEAD(&dsi->client.list);
 	dsi->client.ops = &dsi_client_ops;
 	dsi->client.dev = &pdev->dev;
@@ -1582,22 +1556,12 @@ static int tegra_dsi_probe(struct platform_device *pdev)
 		goto unregister;
 	}
 
-	platform_set_drvdata(pdev, dsi);
-
 	return 0;
 
 unregister:
 	mipi_dsi_host_unregister(&dsi->host);
 mipi_free:
 	tegra_mipi_free(dsi->mipi);
-disable_vdd:
-	regulator_disable(dsi->vdd);
-disable_clk_lp:
-	clk_disable_unprepare(dsi->clk_lp);
-disable_clk:
-	clk_disable_unprepare(dsi->clk);
-reset:
-	reset_control_assert(dsi->rst);
 	return err;
 }
 
@@ -1606,6 +1570,8 @@ static int tegra_dsi_remove(struct platform_device *pdev)
 	struct tegra_dsi *dsi = platform_get_drvdata(pdev);
 	int err;
 
+	pm_runtime_disable(&pdev->dev);
+
 	err = host1x_client_unregister(&dsi->client);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to unregister host1x client: %d\n",
@@ -1618,14 +1584,82 @@ static int tegra_dsi_remove(struct platform_device *pdev)
 	mipi_dsi_host_unregister(&dsi->host);
 	tegra_mipi_free(dsi->mipi);
 
-	regulator_disable(dsi->vdd);
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int tegra_dsi_suspend(struct device *dev)
+{
+	struct tegra_dsi *dsi = dev_get_drvdata(dev);
+	int err;
+
+	if (dsi->rst) {
+		err = reset_control_assert(dsi->rst);
+		if (err < 0) {
+			dev_err(dev, "failed to assert reset: %d\n", err);
+			return err;
+		}
+	}
+
+	usleep_range(1000, 2000);
+
 	clk_disable_unprepare(dsi->clk_lp);
 	clk_disable_unprepare(dsi->clk);
-	reset_control_assert(dsi->rst);
+
+	regulator_disable(dsi->vdd);
 
 	return 0;
 }
 
+static int tegra_dsi_resume(struct device *dev)
+{
+	struct tegra_dsi *dsi = dev_get_drvdata(dev);
+	int err;
+
+	err = regulator_enable(dsi->vdd);
+	if (err < 0) {
+		dev_err(dsi->dev, "failed to enable VDD supply: %d\n", err);
+		return err;
+	}
+
+	err = clk_prepare_enable(dsi->clk);
+	if (err < 0) {
+		dev_err(dev, "cannot enable DSI clock: %d\n", err);
+		goto disable_vdd;
+	}
+
+	err = clk_prepare_enable(dsi->clk_lp);
+	if (err < 0) {
+		dev_err(dev, "cannot enable low-power clock: %d\n", err);
+		goto disable_clk;
+	}
+
+	usleep_range(1000, 2000);
+
+	if (dsi->rst) {
+		err = reset_control_deassert(dsi->rst);
+		if (err < 0) {
+			dev_err(dev, "cannot assert reset: %d\n", err);
+			goto disable_clk_lp;
+		}
+	}
+
+	return 0;
+
+disable_clk_lp:
+	clk_disable_unprepare(dsi->clk_lp);
+disable_clk:
+	clk_disable_unprepare(dsi->clk);
+disable_vdd:
+	regulator_disable(dsi->vdd);
+	return err;
+}
+#endif
+
+static const struct dev_pm_ops tegra_dsi_pm_ops = {
+	SET_RUNTIME_PM_OPS(tegra_dsi_suspend, tegra_dsi_resume, NULL)
+};
+
 static const struct of_device_id tegra_dsi_of_match[] = {
 	{ .compatible = "nvidia,tegra210-dsi", },
 	{ .compatible = "nvidia,tegra132-dsi", },
@@ -1639,6 +1673,7 @@ struct platform_driver tegra_dsi_driver = {
 	.driver = {
 		.name = "tegra-dsi",
 		.of_match_table = tegra_dsi_of_match,
+		.pm = &tegra_dsi_pm_ops,
 	},
 	.probe = tegra_dsi_probe,
 	.remove = tegra_dsi_remove,
diff --git a/drivers/gpu/drm/tegra/fb.c b/drivers/gpu/drm/tegra/fb.c
index 1b12aa7a715e..e6d71fa4028e 100644
--- a/drivers/gpu/drm/tegra/fb.c
+++ b/drivers/gpu/drm/tegra/fb.c
@@ -68,7 +68,7 @@ static void tegra_fb_destroy(struct drm_framebuffer *framebuffer)
 		struct tegra_bo *bo = fb->planes[i];
 
 		if (bo) {
-			if (bo->pages && bo->vaddr)
+			if (bo->pages)
 				vunmap(bo->vaddr);
 
 			drm_gem_object_unreference_unlocked(&bo->gem);
diff --git a/drivers/gpu/drm/tegra/hdmi.c b/drivers/gpu/drm/tegra/hdmi.c
index 2fdb8796443e..cda0491ed6bf 100644
--- a/drivers/gpu/drm/tegra/hdmi.c
+++ b/drivers/gpu/drm/tegra/hdmi.c
@@ -11,6 +11,7 @@
 #include <linux/debugfs.h>
 #include <linux/gpio.h>
 #include <linux/hdmi.h>
+#include <linux/pm_runtime.h>
 #include <linux/regulator/consumer.h>
 #include <linux/reset.h>
 
@@ -18,10 +19,14 @@
 #include <drm/drm_crtc.h>
 #include <drm/drm_crtc_helper.h>
 
+#include <sound/hda_verbs.h>
+
 #include "hdmi.h"
 #include "drm.h"
 #include "dc.h"
 
+#define HDMI_ELD_BUFFER_SIZE 96
+
 struct tmds_config {
 	unsigned int pclk;
 	u32 pll0;
@@ -39,6 +44,8 @@ struct tegra_hdmi_config {
 	u32 fuse_override_value;
 
 	bool has_sor_io_peak_current;
+	bool has_hda;
+	bool has_hbr;
 };
 
 struct tegra_hdmi {
@@ -60,7 +67,10 @@ struct tegra_hdmi {
 	const struct tegra_hdmi_config *config;
 
 	unsigned int audio_source;
-	unsigned int audio_freq;
+	unsigned int audio_sample_rate;
+	unsigned int audio_channels;
+
+	unsigned int pixel_clock;
 	bool stereo;
 	bool dvi;
 
@@ -402,11 +412,11 @@ static const struct tmds_config tegra124_tmds_config[] = {
 };
 
 static const struct tegra_hdmi_audio_config *
-tegra_hdmi_get_audio_config(unsigned int audio_freq, unsigned int pclk)
+tegra_hdmi_get_audio_config(unsigned int sample_rate, unsigned int pclk)
 {
 	const struct tegra_hdmi_audio_config *table;
 
-	switch (audio_freq) {
+	switch (sample_rate) {
 	case 32000:
 		table = tegra_hdmi_audio_32k;
 		break;
@@ -476,44 +486,114 @@ static void tegra_hdmi_setup_audio_fs_tables(struct tegra_hdmi *hdmi)
 	}
 }
 
-static int tegra_hdmi_setup_audio(struct tegra_hdmi *hdmi, unsigned int pclk)
+static void tegra_hdmi_write_aval(struct tegra_hdmi *hdmi, u32 value)
+{
+	static const struct {
+		unsigned int sample_rate;
+		unsigned int offset;
+	} regs[] = {
+		{  32000, HDMI_NV_PDISP_SOR_AUDIO_AVAL_0320 },
+		{  44100, HDMI_NV_PDISP_SOR_AUDIO_AVAL_0441 },
+		{  48000, HDMI_NV_PDISP_SOR_AUDIO_AVAL_0480 },
+		{  88200, HDMI_NV_PDISP_SOR_AUDIO_AVAL_0882 },
+		{  96000, HDMI_NV_PDISP_SOR_AUDIO_AVAL_0960 },
+		{ 176400, HDMI_NV_PDISP_SOR_AUDIO_AVAL_1764 },
+		{ 192000, HDMI_NV_PDISP_SOR_AUDIO_AVAL_1920 },
+	};
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(regs); i++) {
+		if (regs[i].sample_rate == hdmi->audio_sample_rate) {
+			tegra_hdmi_writel(hdmi, value, regs[i].offset);
+			break;
+		}
+	}
+}
+
+static int tegra_hdmi_setup_audio(struct tegra_hdmi *hdmi)
 {
-	struct device_node *node = hdmi->dev->of_node;
 	const struct tegra_hdmi_audio_config *config;
-	unsigned int offset = 0;
-	u32 value;
+	u32 source, value;
 
 	switch (hdmi->audio_source) {
 	case HDA:
-		value = AUDIO_CNTRL0_SOURCE_SELECT_HDAL;
+		if (hdmi->config->has_hda)
+			source = SOR_AUDIO_CNTRL0_SOURCE_SELECT_HDAL;
+		else
+			return -EINVAL;
+
 		break;
 
 	case SPDIF:
-		value = AUDIO_CNTRL0_SOURCE_SELECT_SPDIF;
+		if (hdmi->config->has_hda)
+			source = SOR_AUDIO_CNTRL0_SOURCE_SELECT_SPDIF;
+		else
+			source = AUDIO_CNTRL0_SOURCE_SELECT_SPDIF;
 		break;
 
 	default:
-		value = AUDIO_CNTRL0_SOURCE_SELECT_AUTO;
+		if (hdmi->config->has_hda)
+			source = SOR_AUDIO_CNTRL0_SOURCE_SELECT_AUTO;
+		else
+			source = AUDIO_CNTRL0_SOURCE_SELECT_AUTO;
 		break;
 	}
 
-	if (of_device_is_compatible(node, "nvidia,tegra30-hdmi")) {
-		value |= AUDIO_CNTRL0_ERROR_TOLERANCE(6) |
-			 AUDIO_CNTRL0_FRAMES_PER_BLOCK(0xc0);
-		tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_AUDIO_CNTRL0);
-	} else {
-		value |= AUDIO_CNTRL0_INJECT_NULLSMPL;
+	/*
+	 * Tegra30 and later use a slightly modified version of the register
+	 * layout to accomodate for changes related to supporting HDA as the
+	 * audio input source for HDMI. The source select field has moved to
+	 * the SOR_AUDIO_CNTRL0 register, but the error tolerance and frames
+	 * per block fields remain in the AUDIO_CNTRL0 register.
+	 */
+	if (hdmi->config->has_hda) {
+		/*
+		 * Inject null samples into the audio FIFO for every frame in
+		 * which the codec did not receive any samples. This applies
+		 * to stereo LPCM only.
+		 *
+		 * XXX: This seems to be a remnant of MCP days when this was
+		 * used to work around issues with monitors not being able to
+		 * play back system startup sounds early. It is possibly not
+		 * needed on Linux at all.
+		 */
+		if (hdmi->audio_channels == 2)
+			value = SOR_AUDIO_CNTRL0_INJECT_NULLSMPL;
+		else
+			value = 0;
+
+		value |= source;
+
 		tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_SOR_AUDIO_CNTRL0);
+	}
 
-		value = AUDIO_CNTRL0_ERROR_TOLERANCE(6) |
-			AUDIO_CNTRL0_FRAMES_PER_BLOCK(0xc0);
-		tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_AUDIO_CNTRL0);
+	/*
+	 * On Tegra20, HDA is not a supported audio source and the source
+	 * select field is part of the AUDIO_CNTRL0 register.
+	 */
+	value = AUDIO_CNTRL0_FRAMES_PER_BLOCK(0xc0) |
+		AUDIO_CNTRL0_ERROR_TOLERANCE(6);
+
+	if (!hdmi->config->has_hda)
+		value |= source;
+
+	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_AUDIO_CNTRL0);
+
+	/*
+	 * Advertise support for High Bit-Rate on Tegra114 and later.
+	 */
+	if (hdmi->config->has_hbr) {
+		value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_SOR_AUDIO_SPARE0);
+		value |= SOR_AUDIO_SPARE0_HBR_ENABLE;
+		tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_SOR_AUDIO_SPARE0);
 	}
 
-	config = tegra_hdmi_get_audio_config(hdmi->audio_freq, pclk);
+	config = tegra_hdmi_get_audio_config(hdmi->audio_sample_rate,
+					     hdmi->pixel_clock);
 	if (!config) {
-		dev_err(hdmi->dev, "cannot set audio to %u at %u pclk\n",
-			hdmi->audio_freq, pclk);
+		dev_err(hdmi->dev,
+			"cannot set audio to %u Hz at %u Hz pixel clock\n",
+			hdmi->audio_sample_rate, hdmi->pixel_clock);
 		return -EINVAL;
 	}
 
@@ -526,8 +606,8 @@ static int tegra_hdmi_setup_audio(struct tegra_hdmi *hdmi, unsigned int pclk)
 	tegra_hdmi_writel(hdmi, ACR_SUBPACK_N(config->n) | ACR_ENABLE,
 			  HDMI_NV_PDISP_HDMI_ACR_0441_SUBPACK_HIGH);
 
-	value = ACR_SUBPACK_CTS(config->cts);
-	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_HDMI_ACR_0441_SUBPACK_LOW);
+	tegra_hdmi_writel(hdmi, ACR_SUBPACK_CTS(config->cts),
+			  HDMI_NV_PDISP_HDMI_ACR_0441_SUBPACK_LOW);
 
 	value = SPARE_HW_CTS | SPARE_FORCE_SW_CTS | SPARE_CTS_RESET_VAL(1);
 	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_HDMI_SPARE);
@@ -536,43 +616,53 @@ static int tegra_hdmi_setup_audio(struct tegra_hdmi *hdmi, unsigned int pclk)
 	value &= ~AUDIO_N_RESETF;
 	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_AUDIO_N);
 
-	if (of_device_is_compatible(node, "nvidia,tegra30-hdmi")) {
-		switch (hdmi->audio_freq) {
-		case 32000:
-			offset = HDMI_NV_PDISP_SOR_AUDIO_AVAL_0320;
-			break;
+	if (hdmi->config->has_hda)
+		tegra_hdmi_write_aval(hdmi, config->aval);
 
-		case 44100:
-			offset = HDMI_NV_PDISP_SOR_AUDIO_AVAL_0441;
-			break;
+	tegra_hdmi_setup_audio_fs_tables(hdmi);
 
-		case 48000:
-			offset = HDMI_NV_PDISP_SOR_AUDIO_AVAL_0480;
-			break;
+	return 0;
+}
 
-		case 88200:
-			offset = HDMI_NV_PDISP_SOR_AUDIO_AVAL_0882;
-			break;
+static void tegra_hdmi_disable_audio(struct tegra_hdmi *hdmi)
+{
+	u32 value;
 
-		case 96000:
-			offset = HDMI_NV_PDISP_SOR_AUDIO_AVAL_0960;
-			break;
+	value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_HDMI_GENERIC_CTRL);
+	value &= ~GENERIC_CTRL_AUDIO;
+	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_HDMI_GENERIC_CTRL);
+}
 
-		case 176400:
-			offset = HDMI_NV_PDISP_SOR_AUDIO_AVAL_1764;
-			break;
+static void tegra_hdmi_enable_audio(struct tegra_hdmi *hdmi)
+{
+	u32 value;
 
-		case 192000:
-			offset = HDMI_NV_PDISP_SOR_AUDIO_AVAL_1920;
-			break;
-		}
+	value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_HDMI_GENERIC_CTRL);
+	value |= GENERIC_CTRL_AUDIO;
+	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_HDMI_GENERIC_CTRL);
+}
 
-		tegra_hdmi_writel(hdmi, config->aval, offset);
-	}
+static void tegra_hdmi_write_eld(struct tegra_hdmi *hdmi)
+{
+	size_t length = drm_eld_size(hdmi->output.connector.eld), i;
+	u32 value;
 
-	tegra_hdmi_setup_audio_fs_tables(hdmi);
+	for (i = 0; i < length; i++)
+		tegra_hdmi_writel(hdmi, i << 8 | hdmi->output.connector.eld[i],
+				  HDMI_NV_PDISP_SOR_AUDIO_HDA_ELD_BUFWR);
 
-	return 0;
+	/*
+	 * The HDA codec will always report an ELD buffer size of 96 bytes and
+	 * the HDA codec driver will check that each byte read from the buffer
+	 * is valid. Therefore every byte must be written, even if no 96 bytes
+	 * were parsed from EDID.
+	 */
+	for (i = length; i < HDMI_ELD_BUFFER_SIZE; i++)
+		tegra_hdmi_writel(hdmi, i << 8 | 0,
+				  HDMI_NV_PDISP_SOR_AUDIO_HDA_ELD_BUFWR);
+
+	value = SOR_AUDIO_HDA_PRESENSE_VALID | SOR_AUDIO_HDA_PRESENSE_PRESENT;
+	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_SOR_AUDIO_HDA_PRESENSE);
 }
 
 static inline u32 tegra_hdmi_subpack(const u8 *ptr, size_t size)
@@ -644,12 +734,6 @@ static void tegra_hdmi_setup_avi_infoframe(struct tegra_hdmi *hdmi,
 	u8 buffer[17];
 	ssize_t err;
 
-	if (hdmi->dvi) {
-		tegra_hdmi_writel(hdmi, 0,
-				  HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_CTRL);
-		return;
-	}
-
 	err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode);
 	if (err < 0) {
 		dev_err(hdmi->dev, "failed to setup AVI infoframe: %zd\n", err);
@@ -663,9 +747,24 @@ static void tegra_hdmi_setup_avi_infoframe(struct tegra_hdmi *hdmi,
 	}
 
 	tegra_hdmi_write_infopack(hdmi, buffer, err);
+}
+
+static void tegra_hdmi_disable_avi_infoframe(struct tegra_hdmi *hdmi)
+{
+	u32 value;
 
-	tegra_hdmi_writel(hdmi, INFOFRAME_CTRL_ENABLE,
-			  HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_CTRL);
+	value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_CTRL);
+	value &= ~INFOFRAME_CTRL_ENABLE;
+	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_CTRL);
+}
+
+static void tegra_hdmi_enable_avi_infoframe(struct tegra_hdmi *hdmi)
+{
+	u32 value;
+
+	value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_CTRL);
+	value |= INFOFRAME_CTRL_ENABLE;
+	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_CTRL);
 }
 
 static void tegra_hdmi_setup_audio_infoframe(struct tegra_hdmi *hdmi)
@@ -674,12 +773,6 @@ static void tegra_hdmi_setup_audio_infoframe(struct tegra_hdmi *hdmi)
 	u8 buffer[14];
 	ssize_t err;
 
-	if (hdmi->dvi) {
-		tegra_hdmi_writel(hdmi, 0,
-				  HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_CTRL);
-		return;
-	}
-
 	err = hdmi_audio_infoframe_init(&frame);
 	if (err < 0) {
 		dev_err(hdmi->dev, "failed to setup audio infoframe: %zd\n",
@@ -687,7 +780,7 @@ static void tegra_hdmi_setup_audio_infoframe(struct tegra_hdmi *hdmi)
 		return;
 	}
 
-	frame.channels = 2;
+	frame.channels = hdmi->audio_channels;
 
 	err = hdmi_audio_infoframe_pack(&frame, buffer, sizeof(buffer));
 	if (err < 0) {
@@ -703,9 +796,24 @@ static void tegra_hdmi_setup_audio_infoframe(struct tegra_hdmi *hdmi)
 	 * bytes can be programmed.
 	 */
 	tegra_hdmi_write_infopack(hdmi, buffer, min_t(size_t, 10, err));
+}
 
-	tegra_hdmi_writel(hdmi, INFOFRAME_CTRL_ENABLE,
-			  HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_CTRL);
+static void tegra_hdmi_disable_audio_infoframe(struct tegra_hdmi *hdmi)
+{
+	u32 value;
+
+	value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_CTRL);
+	value &= ~INFOFRAME_CTRL_ENABLE;
+	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_CTRL);
+}
+
+static void tegra_hdmi_enable_audio_infoframe(struct tegra_hdmi *hdmi)
+{
+	u32 value;
+
+	value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_CTRL);
+	value |= INFOFRAME_CTRL_ENABLE;
+	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_CTRL);
 }
 
 static void tegra_hdmi_setup_stereo_infoframe(struct tegra_hdmi *hdmi)
@@ -713,14 +821,6 @@ static void tegra_hdmi_setup_stereo_infoframe(struct tegra_hdmi *hdmi)
 	struct hdmi_vendor_infoframe frame;
 	u8 buffer[10];
 	ssize_t err;
-	u32 value;
-
-	if (!hdmi->stereo) {
-		value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_HDMI_GENERIC_CTRL);
-		value &= ~GENERIC_CTRL_ENABLE;
-		tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_HDMI_GENERIC_CTRL);
-		return;
-	}
 
 	hdmi_vendor_infoframe_init(&frame);
 	frame.s3d_struct = HDMI_3D_STRUCTURE_FRAME_PACKING;
@@ -733,6 +833,20 @@ static void tegra_hdmi_setup_stereo_infoframe(struct tegra_hdmi *hdmi)
 	}
 
 	tegra_hdmi_write_infopack(hdmi, buffer, err);
+}
+
+static void tegra_hdmi_disable_stereo_infoframe(struct tegra_hdmi *hdmi)
+{
+	u32 value;
+
+	value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_HDMI_GENERIC_CTRL);
+	value &= ~GENERIC_CTRL_ENABLE;
+	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_HDMI_GENERIC_CTRL);
+}
+
+static void tegra_hdmi_enable_stereo_infoframe(struct tegra_hdmi *hdmi)
+{
+	u32 value;
 
 	value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_HDMI_GENERIC_CTRL);
 	value |= GENERIC_CTRL_ENABLE;
@@ -772,10 +886,25 @@ static bool tegra_output_is_hdmi(struct tegra_output *output)
 	return drm_detect_hdmi_monitor(edid);
 }
 
+static enum drm_connector_status
+tegra_hdmi_connector_detect(struct drm_connector *connector, bool force)
+{
+	struct tegra_output *output = connector_to_output(connector);
+	struct tegra_hdmi *hdmi = to_hdmi(output);
+	enum drm_connector_status status;
+
+	status = tegra_output_connector_detect(connector, force);
+	if (status == connector_status_connected)
+		return status;
+
+	tegra_hdmi_writel(hdmi, 0, HDMI_NV_PDISP_SOR_AUDIO_HDA_PRESENSE);
+	return status;
+}
+
 static const struct drm_connector_funcs tegra_hdmi_connector_funcs = {
 	.dpms = drm_atomic_helper_connector_dpms,
 	.reset = drm_atomic_helper_connector_reset,
-	.detect = tegra_output_connector_detect,
+	.detect = tegra_hdmi_connector_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.destroy = tegra_output_connector_destroy,
 	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
@@ -814,7 +943,9 @@ static const struct drm_encoder_funcs tegra_hdmi_encoder_funcs = {
 
 static void tegra_hdmi_encoder_disable(struct drm_encoder *encoder)
 {
+	struct tegra_output *output = encoder_to_output(encoder);
 	struct tegra_dc *dc = to_tegra_dc(encoder->crtc);
+	struct tegra_hdmi *hdmi = to_hdmi(output);
 	u32 value;
 
 	/*
@@ -828,6 +959,20 @@ static void tegra_hdmi_encoder_disable(struct drm_encoder *encoder)
 
 		tegra_dc_commit(dc);
 	}
+
+	if (!hdmi->dvi) {
+		if (hdmi->stereo)
+			tegra_hdmi_disable_stereo_infoframe(hdmi);
+
+		tegra_hdmi_disable_audio_infoframe(hdmi);
+		tegra_hdmi_disable_avi_infoframe(hdmi);
+		tegra_hdmi_disable_audio(hdmi);
+	}
+
+	tegra_hdmi_writel(hdmi, 0, HDMI_NV_PDISP_INT_ENABLE);
+	tegra_hdmi_writel(hdmi, 0, HDMI_NV_PDISP_INT_MASK);
+
+	pm_runtime_put(hdmi->dev);
 }
 
 static void tegra_hdmi_encoder_enable(struct drm_encoder *encoder)
@@ -836,21 +981,28 @@ static void tegra_hdmi_encoder_enable(struct drm_encoder *encoder)
 	unsigned int h_sync_width, h_front_porch, h_back_porch, i, rekey;
 	struct tegra_output *output = encoder_to_output(encoder);
 	struct tegra_dc *dc = to_tegra_dc(encoder->crtc);
-	struct device_node *node = output->dev->of_node;
 	struct tegra_hdmi *hdmi = to_hdmi(output);
-	unsigned int pulse_start, div82, pclk;
+	unsigned int pulse_start, div82;
 	int retries = 1000;
 	u32 value;
 	int err;
 
-	hdmi->dvi = !tegra_output_is_hdmi(output);
+	pm_runtime_get_sync(hdmi->dev);
 
-	pclk = mode->clock * 1000;
+	/*
+	 * Enable and unmask the HDA codec SCRATCH0 register interrupt. This
+	 * is used for interoperability between the HDA codec driver and the
+	 * HDMI driver.
+	 */
+	tegra_hdmi_writel(hdmi, INT_CODEC_SCRATCH0, HDMI_NV_PDISP_INT_ENABLE);
+	tegra_hdmi_writel(hdmi, INT_CODEC_SCRATCH0, HDMI_NV_PDISP_INT_MASK);
+
+	hdmi->pixel_clock = mode->clock * 1000;
 	h_sync_width = mode->hsync_end - mode->hsync_start;
 	h_back_porch = mode->htotal - mode->hsync_end;
 	h_front_porch = mode->hsync_start - mode->hdisplay;
 
-	err = clk_set_rate(hdmi->clk, pclk);
+	err = clk_set_rate(hdmi->clk, hdmi->pixel_clock);
 	if (err < 0) {
 		dev_err(hdmi->dev, "failed to set HDMI clock frequency: %d\n",
 			err);
@@ -909,17 +1061,15 @@ static void tegra_hdmi_encoder_enable(struct drm_encoder *encoder)
 	value = SOR_REFCLK_DIV_INT(div82 >> 2) | SOR_REFCLK_DIV_FRAC(div82);
 	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_SOR_REFCLK);
 
+	hdmi->dvi = !tegra_output_is_hdmi(output);
 	if (!hdmi->dvi) {
-		err = tegra_hdmi_setup_audio(hdmi, pclk);
+		err = tegra_hdmi_setup_audio(hdmi);
 		if (err < 0)
 			hdmi->dvi = true;
 	}
 
-	if (of_device_is_compatible(node, "nvidia,tegra20-hdmi")) {
-		/*
-		 * TODO: add ELD support
-		 */
-	}
+	if (hdmi->config->has_hda)
+		tegra_hdmi_write_eld(hdmi);
 
 	rekey = HDMI_REKEY_DEFAULT;
 	value = HDMI_CTRL_REKEY(rekey);
@@ -931,20 +1081,17 @@ static void tegra_hdmi_encoder_enable(struct drm_encoder *encoder)
 
 	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_HDMI_CTRL);
 
-	if (hdmi->dvi)
-		tegra_hdmi_writel(hdmi, 0x0,
-				  HDMI_NV_PDISP_HDMI_GENERIC_CTRL);
-	else
-		tegra_hdmi_writel(hdmi, GENERIC_CTRL_AUDIO,
-				  HDMI_NV_PDISP_HDMI_GENERIC_CTRL);
+	if (!hdmi->dvi) {
+		tegra_hdmi_setup_avi_infoframe(hdmi, mode);
+		tegra_hdmi_setup_audio_infoframe(hdmi);
 
-	tegra_hdmi_setup_avi_infoframe(hdmi, mode);
-	tegra_hdmi_setup_audio_infoframe(hdmi);
-	tegra_hdmi_setup_stereo_infoframe(hdmi);
+		if (hdmi->stereo)
+			tegra_hdmi_setup_stereo_infoframe(hdmi);
+	}
 
 	/* TMDS CONFIG */
 	for (i = 0; i < hdmi->config->num_tmds; i++) {
-		if (pclk <= hdmi->config->tmds[i].pclk) {
+		if (hdmi->pixel_clock <= hdmi->config->tmds[i].pclk) {
 			tegra_hdmi_setup_tmds(hdmi, &hdmi->config->tmds[i]);
 			break;
 		}
@@ -1031,6 +1178,15 @@ static void tegra_hdmi_encoder_enable(struct drm_encoder *encoder)
 
 	tegra_dc_commit(dc);
 
+	if (!hdmi->dvi) {
+		tegra_hdmi_enable_avi_infoframe(hdmi);
+		tegra_hdmi_enable_audio_infoframe(hdmi);
+		tegra_hdmi_enable_audio(hdmi);
+
+		if (hdmi->stereo)
+			tegra_hdmi_enable_stereo_infoframe(hdmi);
+	}
+
 	/* TODO: add HDCP support */
 }
 
@@ -1235,8 +1391,14 @@ static int tegra_hdmi_show_regs(struct seq_file *s, void *data)
 	DUMP_REG(HDMI_NV_PDISP_KEY_HDCP_KEY_TRIG);
 	DUMP_REG(HDMI_NV_PDISP_KEY_SKEY_INDEX);
 	DUMP_REG(HDMI_NV_PDISP_SOR_AUDIO_CNTRL0);
+	DUMP_REG(HDMI_NV_PDISP_SOR_AUDIO_SPARE0);
+	DUMP_REG(HDMI_NV_PDISP_SOR_AUDIO_HDA_CODEC_SCRATCH0);
+	DUMP_REG(HDMI_NV_PDISP_SOR_AUDIO_HDA_CODEC_SCRATCH1);
 	DUMP_REG(HDMI_NV_PDISP_SOR_AUDIO_HDA_ELD_BUFWR);
 	DUMP_REG(HDMI_NV_PDISP_SOR_AUDIO_HDA_PRESENSE);
+	DUMP_REG(HDMI_NV_PDISP_INT_STATUS);
+	DUMP_REG(HDMI_NV_PDISP_INT_MASK);
+	DUMP_REG(HDMI_NV_PDISP_INT_ENABLE);
 	DUMP_REG(HDMI_NV_PDISP_SOR_IO_PEAK_CURRENT);
 
 #undef DUMP_REG
@@ -1360,14 +1522,6 @@ static int tegra_hdmi_init(struct host1x_client *client)
 		return err;
 	}
 
-	err = clk_prepare_enable(hdmi->clk);
-	if (err < 0) {
-		dev_err(hdmi->dev, "failed to enable clock: %d\n", err);
-		return err;
-	}
-
-	reset_control_deassert(hdmi->rst);
-
 	return 0;
 }
 
@@ -1377,9 +1531,6 @@ static int tegra_hdmi_exit(struct host1x_client *client)
 
 	tegra_output_exit(&hdmi->output);
 
-	reset_control_assert(hdmi->rst);
-	clk_disable_unprepare(hdmi->clk);
-
 	regulator_disable(hdmi->vdd);
 	regulator_disable(hdmi->pll);
 	regulator_disable(hdmi->hdmi);
@@ -1401,6 +1552,8 @@ static const struct tegra_hdmi_config tegra20_hdmi_config = {
 	.fuse_override_offset = HDMI_NV_PDISP_SOR_LANE_DRIVE_CURRENT,
 	.fuse_override_value = 1 << 31,
 	.has_sor_io_peak_current = false,
+	.has_hda = false,
+	.has_hbr = false,
 };
 
 static const struct tegra_hdmi_config tegra30_hdmi_config = {
@@ -1409,6 +1562,8 @@ static const struct tegra_hdmi_config tegra30_hdmi_config = {
 	.fuse_override_offset = HDMI_NV_PDISP_SOR_LANE_DRIVE_CURRENT,
 	.fuse_override_value = 1 << 31,
 	.has_sor_io_peak_current = false,
+	.has_hda = true,
+	.has_hbr = false,
 };
 
 static const struct tegra_hdmi_config tegra114_hdmi_config = {
@@ -1417,6 +1572,8 @@ static const struct tegra_hdmi_config tegra114_hdmi_config = {
 	.fuse_override_offset = HDMI_NV_PDISP_SOR_PAD_CTLS0,
 	.fuse_override_value = 1 << 31,
 	.has_sor_io_peak_current = true,
+	.has_hda = true,
+	.has_hbr = true,
 };
 
 static const struct tegra_hdmi_config tegra124_hdmi_config = {
@@ -1425,6 +1582,8 @@ static const struct tegra_hdmi_config tegra124_hdmi_config = {
 	.fuse_override_offset = HDMI_NV_PDISP_SOR_PAD_CTLS0,
 	.fuse_override_value = 1 << 31,
 	.has_sor_io_peak_current = true,
+	.has_hda = true,
+	.has_hbr = true,
 };
 
 static const struct of_device_id tegra_hdmi_of_match[] = {
@@ -1436,6 +1595,67 @@ static const struct of_device_id tegra_hdmi_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, tegra_hdmi_of_match);
 
+static void hda_format_parse(unsigned int format, unsigned int *rate,
+			     unsigned int *channels)
+{
+	unsigned int mul, div;
+
+	if (format & AC_FMT_BASE_44K)
+		*rate = 44100;
+	else
+		*rate = 48000;
+
+	mul = (format & AC_FMT_MULT_MASK) >> AC_FMT_MULT_SHIFT;
+	div = (format & AC_FMT_DIV_MASK) >> AC_FMT_DIV_SHIFT;
+
+	*rate = *rate * (mul + 1) / (div + 1);
+
+	*channels = (format & AC_FMT_CHAN_MASK) >> AC_FMT_CHAN_SHIFT;
+}
+
+static irqreturn_t tegra_hdmi_irq(int irq, void *data)
+{
+	struct tegra_hdmi *hdmi = data;
+	u32 value;
+	int err;
+
+	value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_INT_STATUS);
+	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_INT_STATUS);
+
+	if (value & INT_CODEC_SCRATCH0) {
+		unsigned int format;
+		u32 value;
+
+		value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_SOR_AUDIO_HDA_CODEC_SCRATCH0);
+
+		if (value & SOR_AUDIO_HDA_CODEC_SCRATCH0_VALID) {
+			unsigned int sample_rate, channels;
+
+			format = value & SOR_AUDIO_HDA_CODEC_SCRATCH0_FMT_MASK;
+
+			hda_format_parse(format, &sample_rate, &channels);
+
+			hdmi->audio_sample_rate = sample_rate;
+			hdmi->audio_channels = channels;
+
+			err = tegra_hdmi_setup_audio(hdmi);
+			if (err < 0) {
+				tegra_hdmi_disable_audio_infoframe(hdmi);
+				tegra_hdmi_disable_audio(hdmi);
+			} else {
+				tegra_hdmi_setup_audio_infoframe(hdmi);
+				tegra_hdmi_enable_audio_infoframe(hdmi);
+				tegra_hdmi_enable_audio(hdmi);
+			}
+		} else {
+			tegra_hdmi_disable_audio_infoframe(hdmi);
+			tegra_hdmi_disable_audio(hdmi);
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
 static int tegra_hdmi_probe(struct platform_device *pdev)
 {
 	const struct of_device_id *match;
@@ -1453,8 +1673,10 @@ static int tegra_hdmi_probe(struct platform_device *pdev)
 
 	hdmi->config = match->data;
 	hdmi->dev = &pdev->dev;
+
 	hdmi->audio_source = AUTO;
-	hdmi->audio_freq = 44100;
+	hdmi->audio_sample_rate = 48000;
+	hdmi->audio_channels = 2;
 	hdmi->stereo = false;
 	hdmi->dvi = false;
 
@@ -1515,6 +1737,17 @@ static int tegra_hdmi_probe(struct platform_device *pdev)
 
 	hdmi->irq = err;
 
+	err = devm_request_irq(hdmi->dev, hdmi->irq, tegra_hdmi_irq, 0,
+			       dev_name(hdmi->dev), hdmi);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to request IRQ#%u: %d\n",
+			hdmi->irq, err);
+		return err;
+	}
+
+	platform_set_drvdata(pdev, hdmi);
+	pm_runtime_enable(&pdev->dev);
+
 	INIT_LIST_HEAD(&hdmi->client.list);
 	hdmi->client.ops = &hdmi_client_ops;
 	hdmi->client.dev = &pdev->dev;
@@ -1526,8 +1759,6 @@ static int tegra_hdmi_probe(struct platform_device *pdev)
 		return err;
 	}
 
-	platform_set_drvdata(pdev, hdmi);
-
 	return 0;
 }
 
@@ -1536,6 +1767,8 @@ static int tegra_hdmi_remove(struct platform_device *pdev)
 	struct tegra_hdmi *hdmi = platform_get_drvdata(pdev);
 	int err;
 
+	pm_runtime_disable(&pdev->dev);
+
 	err = host1x_client_unregister(&hdmi->client);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to unregister host1x client: %d\n",
@@ -1545,17 +1778,61 @@ static int tegra_hdmi_remove(struct platform_device *pdev)
 
 	tegra_output_remove(&hdmi->output);
 
-	clk_disable_unprepare(hdmi->clk_parent);
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int tegra_hdmi_suspend(struct device *dev)
+{
+	struct tegra_hdmi *hdmi = dev_get_drvdata(dev);
+	int err;
+
+	err = reset_control_assert(hdmi->rst);
+	if (err < 0) {
+		dev_err(dev, "failed to assert reset: %d\n", err);
+		return err;
+	}
+
+	usleep_range(1000, 2000);
+
 	clk_disable_unprepare(hdmi->clk);
 
 	return 0;
 }
 
+static int tegra_hdmi_resume(struct device *dev)
+{
+	struct tegra_hdmi *hdmi = dev_get_drvdata(dev);
+	int err;
+
+	err = clk_prepare_enable(hdmi->clk);
+	if (err < 0) {
+		dev_err(dev, "failed to enable clock: %d\n", err);
+		return err;
+	}
+
+	usleep_range(1000, 2000);
+
+	err = reset_control_deassert(hdmi->rst);
+	if (err < 0) {
+		dev_err(dev, "failed to deassert reset: %d\n", err);
+		clk_disable_unprepare(hdmi->clk);
+		return err;
+	}
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops tegra_hdmi_pm_ops = {
+	SET_RUNTIME_PM_OPS(tegra_hdmi_suspend, tegra_hdmi_resume, NULL)
+};
+
 struct platform_driver tegra_hdmi_driver = {
 	.driver = {
 		.name = "tegra-hdmi",
-		.owner = THIS_MODULE,
 		.of_match_table = tegra_hdmi_of_match,
+		.pm = &tegra_hdmi_pm_ops,
 	},
 	.probe = tegra_hdmi_probe,
 	.remove = tegra_hdmi_remove,
diff --git a/drivers/gpu/drm/tegra/hdmi.h b/drivers/gpu/drm/tegra/hdmi.h
index a882514389cd..2339f134a09a 100644
--- a/drivers/gpu/drm/tegra/hdmi.h
+++ b/drivers/gpu/drm/tegra/hdmi.h
@@ -468,9 +468,20 @@
 #define HDMI_NV_PDISP_KEY_SKEY_INDEX				0xa3
 
 #define HDMI_NV_PDISP_SOR_AUDIO_CNTRL0				0xac
-#define AUDIO_CNTRL0_INJECT_NULLSMPL (1 << 29)
+#define  SOR_AUDIO_CNTRL0_SOURCE_SELECT_AUTO	(0 << 20)
+#define  SOR_AUDIO_CNTRL0_SOURCE_SELECT_SPDIF	(1 << 20)
+#define  SOR_AUDIO_CNTRL0_SOURCE_SELECT_HDAL	(2 << 20)
+#define  SOR_AUDIO_CNTRL0_INJECT_NULLSMPL	(1 << 29)
+#define HDMI_NV_PDISP_SOR_AUDIO_SPARE0				0xae
+#define  SOR_AUDIO_SPARE0_HBR_ENABLE		(1 << 27)
+#define HDMI_NV_PDISP_SOR_AUDIO_HDA_CODEC_SCRATCH0		0xba
+#define  SOR_AUDIO_HDA_CODEC_SCRATCH0_VALID	(1 << 30)
+#define  SOR_AUDIO_HDA_CODEC_SCRATCH0_FMT_MASK	0xffff
+#define HDMI_NV_PDISP_SOR_AUDIO_HDA_CODEC_SCRATCH1		0xbb
 #define HDMI_NV_PDISP_SOR_AUDIO_HDA_ELD_BUFWR			0xbc
 #define HDMI_NV_PDISP_SOR_AUDIO_HDA_PRESENSE			0xbd
+#define  SOR_AUDIO_HDA_PRESENSE_VALID		(1 << 1)
+#define  SOR_AUDIO_HDA_PRESENSE_PRESENT		(1 << 0)
 
 #define HDMI_NV_PDISP_SOR_AUDIO_AVAL_0320    0xbf
 #define HDMI_NV_PDISP_SOR_AUDIO_AVAL_0441    0xc0
@@ -481,6 +492,14 @@
 #define HDMI_NV_PDISP_SOR_AUDIO_AVAL_1920    0xc5
 #define HDMI_NV_PDISP_SOR_AUDIO_AVAL_DEFAULT 0xc5
 
+#define HDMI_NV_PDISP_INT_STATUS			0xcc
+#define  INT_SCRATCH		(1 << 3)
+#define  INT_CP_REQUEST		(1 << 2)
+#define  INT_CODEC_SCRATCH1	(1 << 1)
+#define  INT_CODEC_SCRATCH0	(1 << 0)
+#define HDMI_NV_PDISP_INT_MASK				0xcd
+#define HDMI_NV_PDISP_INT_ENABLE			0xce
+
 #define HDMI_NV_PDISP_SOR_IO_PEAK_CURRENT		0xd1
 #define PEAK_CURRENT_LANE0(x) (((x) & 0x7f) <<  0)
 #define PEAK_CURRENT_LANE1(x) (((x) & 0x7f) <<  8)
diff --git a/drivers/gpu/drm/tegra/output.c b/drivers/gpu/drm/tegra/output.c
index 1480f6aaffe4..595d1ec3e02e 100644
--- a/drivers/gpu/drm/tegra/output.c
+++ b/drivers/gpu/drm/tegra/output.c
@@ -36,6 +36,7 @@ int tegra_output_connector_get_modes(struct drm_connector *connector)
 
 	if (edid) {
 		err = drm_add_edid_modes(connector, edid);
+		drm_edid_to_eld(connector, edid);
 		kfree(edid);
 	}
 
diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index 34958d71284b..74d0540b8d4c 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -7,11 +7,13 @@
  */
 
 #include <linux/clk.h>
+#include <linux/clk-provider.h>
 #include <linux/debugfs.h>
 #include <linux/gpio.h>
 #include <linux/io.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/regulator/consumer.h>
 #include <linux/reset.h>
 
@@ -149,6 +151,8 @@ struct tegra_sor_soc {
 
 	const struct tegra_sor_hdmi_settings *settings;
 	unsigned int num_settings;
+
+	const u8 *xbar_cfg;
 };
 
 struct tegra_sor;
@@ -169,7 +173,9 @@ struct tegra_sor {
 
 	struct reset_control *rst;
 	struct clk *clk_parent;
+	struct clk *clk_brick;
 	struct clk *clk_safe;
+	struct clk *clk_src;
 	struct clk *clk_dp;
 	struct clk *clk;
 
@@ -190,6 +196,18 @@ struct tegra_sor {
 	struct regulator *hdmi_supply;
 };
 
+struct tegra_sor_state {
+	struct drm_connector_state base;
+
+	unsigned int bpc;
+};
+
+static inline struct tegra_sor_state *
+to_sor_state(struct drm_connector_state *state)
+{
+	return container_of(state, struct tegra_sor_state, base);
+}
+
 struct tegra_sor_config {
 	u32 bits_per_pixel;
 
@@ -225,6 +243,118 @@ static inline void tegra_sor_writel(struct tegra_sor *sor, u32 value,
 	writel(value, sor->regs + (offset << 2));
 }
 
+static int tegra_sor_set_parent_clock(struct tegra_sor *sor, struct clk *parent)
+{
+	int err;
+
+	clk_disable_unprepare(sor->clk);
+
+	err = clk_set_parent(sor->clk, parent);
+	if (err < 0)
+		return err;
+
+	err = clk_prepare_enable(sor->clk);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+struct tegra_clk_sor_brick {
+	struct clk_hw hw;
+	struct tegra_sor *sor;
+};
+
+static inline struct tegra_clk_sor_brick *to_brick(struct clk_hw *hw)
+{
+	return container_of(hw, struct tegra_clk_sor_brick, hw);
+}
+
+static const char * const tegra_clk_sor_brick_parents[] = {
+	"pll_d2_out0", "pll_dp"
+};
+
+static int tegra_clk_sor_brick_set_parent(struct clk_hw *hw, u8 index)
+{
+	struct tegra_clk_sor_brick *brick = to_brick(hw);
+	struct tegra_sor *sor = brick->sor;
+	u32 value;
+
+	value = tegra_sor_readl(sor, SOR_CLK_CNTRL);
+	value &= ~SOR_CLK_CNTRL_DP_CLK_SEL_MASK;
+
+	switch (index) {
+	case 0:
+		value |= SOR_CLK_CNTRL_DP_CLK_SEL_SINGLE_PCLK;
+		break;
+
+	case 1:
+		value |= SOR_CLK_CNTRL_DP_CLK_SEL_SINGLE_DPCLK;
+		break;
+	}
+
+	tegra_sor_writel(sor, value, SOR_CLK_CNTRL);
+
+	return 0;
+}
+
+static u8 tegra_clk_sor_brick_get_parent(struct clk_hw *hw)
+{
+	struct tegra_clk_sor_brick *brick = to_brick(hw);
+	struct tegra_sor *sor = brick->sor;
+	u8 parent = U8_MAX;
+	u32 value;
+
+	value = tegra_sor_readl(sor, SOR_CLK_CNTRL);
+
+	switch (value & SOR_CLK_CNTRL_DP_CLK_SEL_MASK) {
+	case SOR_CLK_CNTRL_DP_CLK_SEL_SINGLE_PCLK:
+	case SOR_CLK_CNTRL_DP_CLK_SEL_DIFF_PCLK:
+		parent = 0;
+		break;
+
+	case SOR_CLK_CNTRL_DP_CLK_SEL_SINGLE_DPCLK:
+	case SOR_CLK_CNTRL_DP_CLK_SEL_DIFF_DPCLK:
+		parent = 1;
+		break;
+	}
+
+	return parent;
+}
+
+static const struct clk_ops tegra_clk_sor_brick_ops = {
+	.set_parent = tegra_clk_sor_brick_set_parent,
+	.get_parent = tegra_clk_sor_brick_get_parent,
+};
+
+static struct clk *tegra_clk_sor_brick_register(struct tegra_sor *sor,
+						const char *name)
+{
+	struct tegra_clk_sor_brick *brick;
+	struct clk_init_data init;
+	struct clk *clk;
+
+	brick = devm_kzalloc(sor->dev, sizeof(*brick), GFP_KERNEL);
+	if (!brick)
+		return ERR_PTR(-ENOMEM);
+
+	brick->sor = sor;
+
+	init.name = name;
+	init.flags = 0;
+	init.parent_names = tegra_clk_sor_brick_parents;
+	init.num_parents = ARRAY_SIZE(tegra_clk_sor_brick_parents);
+	init.ops = &tegra_clk_sor_brick_ops;
+
+	brick->hw.init = &init;
+
+	clk = devm_clk_register(sor->dev, &brick->hw);
+	if (IS_ERR(clk))
+		kfree(brick);
+
+	return clk;
+}
+
 static int tegra_sor_dp_train_fast(struct tegra_sor *sor,
 				   struct drm_dp_link *link)
 {
@@ -569,10 +699,10 @@ static int tegra_sor_compute_params(struct tegra_sor *sor,
 	return false;
 }
 
-static int tegra_sor_calc_config(struct tegra_sor *sor,
-				 const struct drm_display_mode *mode,
-				 struct tegra_sor_config *config,
-				 struct drm_dp_link *link)
+static int tegra_sor_compute_config(struct tegra_sor *sor,
+				    const struct drm_display_mode *mode,
+				    struct tegra_sor_config *config,
+				    struct drm_dp_link *link)
 {
 	const u64 f = 100000, link_rate = link->rate * 1000;
 	const u64 pclk = mode->clock * 1000;
@@ -661,6 +791,135 @@ static int tegra_sor_calc_config(struct tegra_sor *sor,
 	return 0;
 }
 
+static void tegra_sor_apply_config(struct tegra_sor *sor,
+				   const struct tegra_sor_config *config)
+{
+	u32 value;
+
+	value = tegra_sor_readl(sor, SOR_DP_LINKCTL0);
+	value &= ~SOR_DP_LINKCTL_TU_SIZE_MASK;
+	value |= SOR_DP_LINKCTL_TU_SIZE(config->tu_size);
+	tegra_sor_writel(sor, value, SOR_DP_LINKCTL0);
+
+	value = tegra_sor_readl(sor, SOR_DP_CONFIG0);
+	value &= ~SOR_DP_CONFIG_WATERMARK_MASK;
+	value |= SOR_DP_CONFIG_WATERMARK(config->watermark);
+
+	value &= ~SOR_DP_CONFIG_ACTIVE_SYM_COUNT_MASK;
+	value |= SOR_DP_CONFIG_ACTIVE_SYM_COUNT(config->active_count);
+
+	value &= ~SOR_DP_CONFIG_ACTIVE_SYM_FRAC_MASK;
+	value |= SOR_DP_CONFIG_ACTIVE_SYM_FRAC(config->active_frac);
+
+	if (config->active_polarity)
+		value |= SOR_DP_CONFIG_ACTIVE_SYM_POLARITY;
+	else
+		value &= ~SOR_DP_CONFIG_ACTIVE_SYM_POLARITY;
+
+	value |= SOR_DP_CONFIG_ACTIVE_SYM_ENABLE;
+	value |= SOR_DP_CONFIG_DISPARITY_NEGATIVE;
+	tegra_sor_writel(sor, value, SOR_DP_CONFIG0);
+
+	value = tegra_sor_readl(sor, SOR_DP_AUDIO_HBLANK_SYMBOLS);
+	value &= ~SOR_DP_AUDIO_HBLANK_SYMBOLS_MASK;
+	value |= config->hblank_symbols & 0xffff;
+	tegra_sor_writel(sor, value, SOR_DP_AUDIO_HBLANK_SYMBOLS);
+
+	value = tegra_sor_readl(sor, SOR_DP_AUDIO_VBLANK_SYMBOLS);
+	value &= ~SOR_DP_AUDIO_VBLANK_SYMBOLS_MASK;
+	value |= config->vblank_symbols & 0xffff;
+	tegra_sor_writel(sor, value, SOR_DP_AUDIO_VBLANK_SYMBOLS);
+}
+
+static void tegra_sor_mode_set(struct tegra_sor *sor,
+			       const struct drm_display_mode *mode,
+			       struct tegra_sor_state *state)
+{
+	struct tegra_dc *dc = to_tegra_dc(sor->output.encoder.crtc);
+	unsigned int vbe, vse, hbe, hse, vbs, hbs;
+	u32 value;
+
+	value = tegra_sor_readl(sor, SOR_STATE1);
+	value &= ~SOR_STATE_ASY_PIXELDEPTH_MASK;
+	value &= ~SOR_STATE_ASY_CRC_MODE_MASK;
+	value &= ~SOR_STATE_ASY_OWNER_MASK;
+
+	value |= SOR_STATE_ASY_CRC_MODE_COMPLETE |
+		 SOR_STATE_ASY_OWNER(dc->pipe + 1);
+
+	if (mode->flags & DRM_MODE_FLAG_PHSYNC)
+		value &= ~SOR_STATE_ASY_HSYNCPOL;
+
+	if (mode->flags & DRM_MODE_FLAG_NHSYNC)
+		value |= SOR_STATE_ASY_HSYNCPOL;
+
+	if (mode->flags & DRM_MODE_FLAG_PVSYNC)
+		value &= ~SOR_STATE_ASY_VSYNCPOL;
+
+	if (mode->flags & DRM_MODE_FLAG_NVSYNC)
+		value |= SOR_STATE_ASY_VSYNCPOL;
+
+	switch (state->bpc) {
+	case 16:
+		value |= SOR_STATE_ASY_PIXELDEPTH_BPP_48_444;
+		break;
+
+	case 12:
+		value |= SOR_STATE_ASY_PIXELDEPTH_BPP_36_444;
+		break;
+
+	case 10:
+		value |= SOR_STATE_ASY_PIXELDEPTH_BPP_30_444;
+		break;
+
+	case 8:
+		value |= SOR_STATE_ASY_PIXELDEPTH_BPP_24_444;
+		break;
+
+	case 6:
+		value |= SOR_STATE_ASY_PIXELDEPTH_BPP_18_444;
+		break;
+
+	default:
+		value |= SOR_STATE_ASY_PIXELDEPTH_BPP_24_444;
+		break;
+	}
+
+	tegra_sor_writel(sor, value, SOR_STATE1);
+
+	/*
+	 * TODO: The video timing programming below doesn't seem to match the
+	 * register definitions.
+	 */
+
+	value = ((mode->vtotal & 0x7fff) << 16) | (mode->htotal & 0x7fff);
+	tegra_sor_writel(sor, value, SOR_HEAD_STATE1(dc->pipe));
+
+	/* sync end = sync width - 1 */
+	vse = mode->vsync_end - mode->vsync_start - 1;
+	hse = mode->hsync_end - mode->hsync_start - 1;
+
+	value = ((vse & 0x7fff) << 16) | (hse & 0x7fff);
+	tegra_sor_writel(sor, value, SOR_HEAD_STATE2(dc->pipe));
+
+	/* blank end = sync end + back porch */
+	vbe = vse + (mode->vtotal - mode->vsync_end);
+	hbe = hse + (mode->htotal - mode->hsync_end);
+
+	value = ((vbe & 0x7fff) << 16) | (hbe & 0x7fff);
+	tegra_sor_writel(sor, value, SOR_HEAD_STATE3(dc->pipe));
+
+	/* blank start = blank end + active */
+	vbs = vbe + mode->vdisplay;
+	hbs = hbe + mode->hdisplay;
+
+	value = ((vbs & 0x7fff) << 16) | (hbs & 0x7fff);
+	tegra_sor_writel(sor, value, SOR_HEAD_STATE4(dc->pipe));
+
+	/* XXX interlacing support */
+	tegra_sor_writel(sor, 0x001, SOR_HEAD_STATE5(dc->pipe));
+}
+
 static int tegra_sor_detach(struct tegra_sor *sor)
 {
 	unsigned long value, timeout;
@@ -733,7 +992,8 @@ static int tegra_sor_power_down(struct tegra_sor *sor)
 	if ((value & SOR_PWR_TRIGGER) != 0)
 		return -ETIMEDOUT;
 
-	err = clk_set_parent(sor->clk, sor->clk_safe);
+	/* switch to safe parent clock */
+	err = tegra_sor_set_parent_clock(sor, sor->clk_safe);
 	if (err < 0)
 		dev_err(sor->dev, "failed to set safe parent clock: %d\n", err);
 
@@ -1038,6 +1298,22 @@ static void tegra_sor_debugfs_exit(struct tegra_sor *sor)
 	sor->debugfs = NULL;
 }
 
+static void tegra_sor_connector_reset(struct drm_connector *connector)
+{
+	struct tegra_sor_state *state;
+
+	state = kzalloc(sizeof(*state), GFP_KERNEL);
+	if (!state)
+		return;
+
+	if (connector->state) {
+		__drm_atomic_helper_connector_destroy_state(connector->state);
+		kfree(connector->state);
+	}
+
+	__drm_atomic_helper_connector_reset(connector, &state->base);
+}
+
 static enum drm_connector_status
 tegra_sor_connector_detect(struct drm_connector *connector, bool force)
 {
@@ -1050,13 +1326,28 @@ tegra_sor_connector_detect(struct drm_connector *connector, bool force)
 	return tegra_output_connector_detect(connector, force);
 }
 
+static struct drm_connector_state *
+tegra_sor_connector_duplicate_state(struct drm_connector *connector)
+{
+	struct tegra_sor_state *state = to_sor_state(connector->state);
+	struct tegra_sor_state *copy;
+
+	copy = kmemdup(state, sizeof(*state), GFP_KERNEL);
+	if (!copy)
+		return NULL;
+
+	__drm_atomic_helper_connector_duplicate_state(connector, &copy->base);
+
+	return &copy->base;
+}
+
 static const struct drm_connector_funcs tegra_sor_connector_funcs = {
 	.dpms = drm_atomic_helper_connector_dpms,
-	.reset = drm_atomic_helper_connector_reset,
+	.reset = tegra_sor_connector_reset,
 	.detect = tegra_sor_connector_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.destroy = tegra_output_connector_destroy,
-	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+	.atomic_duplicate_state = tegra_sor_connector_duplicate_state,
 	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
 };
 
@@ -1081,6 +1372,10 @@ static enum drm_mode_status
 tegra_sor_connector_mode_valid(struct drm_connector *connector,
 			       struct drm_display_mode *mode)
 {
+	/* HDMI 2.0 modes are not yet supported */
+	if (mode->clock > 340000)
+		return MODE_NOCLOCK;
+
 	return MODE_OK;
 }
 
@@ -1140,8 +1435,7 @@ static void tegra_sor_edp_disable(struct drm_encoder *encoder)
 	if (output->panel)
 		drm_panel_unprepare(output->panel);
 
-	reset_control_assert(sor->rst);
-	clk_disable_unprepare(sor->clk);
+	pm_runtime_put(sor->dev);
 }
 
 #if 0
@@ -1191,19 +1485,18 @@ static void tegra_sor_edp_enable(struct drm_encoder *encoder)
 	struct drm_display_mode *mode = &encoder->crtc->state->adjusted_mode;
 	struct tegra_output *output = encoder_to_output(encoder);
 	struct tegra_dc *dc = to_tegra_dc(encoder->crtc);
-	unsigned int vbe, vse, hbe, hse, vbs, hbs, i;
 	struct tegra_sor *sor = to_sor(output);
 	struct tegra_sor_config config;
+	struct tegra_sor_state *state;
 	struct drm_dp_link link;
 	u8 rate, lanes;
+	unsigned int i;
 	int err = 0;
 	u32 value;
 
-	err = clk_prepare_enable(sor->clk);
-	if (err < 0)
-		dev_err(sor->dev, "failed to enable clock: %d\n", err);
+	state = to_sor_state(output->connector.state);
 
-	reset_control_deassert(sor->rst);
+	pm_runtime_get_sync(sor->dev);
 
 	if (output->panel)
 		drm_panel_prepare(output->panel);
@@ -1218,17 +1511,17 @@ static void tegra_sor_edp_enable(struct drm_encoder *encoder)
 		return;
 	}
 
-	err = clk_set_parent(sor->clk, sor->clk_safe);
+	/* switch to safe parent clock */
+	err = tegra_sor_set_parent_clock(sor, sor->clk_safe);
 	if (err < 0)
 		dev_err(sor->dev, "failed to set safe parent clock: %d\n", err);
 
 	memset(&config, 0, sizeof(config));
-	config.bits_per_pixel = output->connector.display_info.bpc * 3;
+	config.bits_per_pixel = state->bpc * 3;
 
-	err = tegra_sor_calc_config(sor, mode, &config, &link);
+	err = tegra_sor_compute_config(sor, mode, &config, &link);
 	if (err < 0)
-		dev_err(sor->dev, "failed to compute link configuration: %d\n",
-			err);
+		dev_err(sor->dev, "failed to compute configuration: %d\n", err);
 
 	value = tegra_sor_readl(sor, SOR_CLK_CNTRL);
 	value &= ~SOR_CLK_CNTRL_DP_CLK_SEL_MASK;
@@ -1325,10 +1618,18 @@ static void tegra_sor_edp_enable(struct drm_encoder *encoder)
 	value &= ~SOR_PLL2_PORT_POWERDOWN;
 	tegra_sor_writel(sor, value, SOR_PLL2);
 
-	/* switch to DP clock */
-	err = clk_set_parent(sor->clk, sor->clk_dp);
+	/* XXX not in TRM */
+	for (value = 0, i = 0; i < 5; i++)
+		value |= SOR_XBAR_CTRL_LINK0_XSEL(i, sor->soc->xbar_cfg[i]) |
+			 SOR_XBAR_CTRL_LINK1_XSEL(i, i);
+
+	tegra_sor_writel(sor, 0x00000000, SOR_XBAR_POL);
+	tegra_sor_writel(sor, value, SOR_XBAR_CTRL);
+
+	/* switch to DP parent clock */
+	err = tegra_sor_set_parent_clock(sor, sor->clk_dp);
 	if (err < 0)
-		dev_err(sor->dev, "failed to set DP parent clock: %d\n", err);
+		dev_err(sor->dev, "failed to set parent clock: %d\n", err);
 
 	/* power DP lanes */
 	value = tegra_sor_readl(sor, SOR_DP_PADCTL0);
@@ -1374,13 +1675,11 @@ static void tegra_sor_edp_enable(struct drm_encoder *encoder)
 	value |= drm_dp_link_rate_to_bw_code(link.rate) << 2;
 	tegra_sor_writel(sor, value, SOR_CLK_CNTRL);
 
-	/* set linkctl */
+	tegra_sor_apply_config(sor, &config);
+
+	/* enable link */
 	value = tegra_sor_readl(sor, SOR_DP_LINKCTL0);
 	value |= SOR_DP_LINKCTL_ENABLE;
-
-	value &= ~SOR_DP_LINKCTL_TU_SIZE_MASK;
-	value |= SOR_DP_LINKCTL_TU_SIZE(config.tu_size);
-
 	value |= SOR_DP_LINKCTL_ENHANCED_FRAME;
 	tegra_sor_writel(sor, value, SOR_DP_LINKCTL0);
 
@@ -1393,35 +1692,6 @@ static void tegra_sor_edp_enable(struct drm_encoder *encoder)
 
 	tegra_sor_writel(sor, value, SOR_DP_TPG);
 
-	value = tegra_sor_readl(sor, SOR_DP_CONFIG0);
-	value &= ~SOR_DP_CONFIG_WATERMARK_MASK;
-	value |= SOR_DP_CONFIG_WATERMARK(config.watermark);
-
-	value &= ~SOR_DP_CONFIG_ACTIVE_SYM_COUNT_MASK;
-	value |= SOR_DP_CONFIG_ACTIVE_SYM_COUNT(config.active_count);
-
-	value &= ~SOR_DP_CONFIG_ACTIVE_SYM_FRAC_MASK;
-	value |= SOR_DP_CONFIG_ACTIVE_SYM_FRAC(config.active_frac);
-
-	if (config.active_polarity)
-		value |= SOR_DP_CONFIG_ACTIVE_SYM_POLARITY;
-	else
-		value &= ~SOR_DP_CONFIG_ACTIVE_SYM_POLARITY;
-
-	value |= SOR_DP_CONFIG_ACTIVE_SYM_ENABLE;
-	value |= SOR_DP_CONFIG_DISPARITY_NEGATIVE;
-	tegra_sor_writel(sor, value, SOR_DP_CONFIG0);
-
-	value = tegra_sor_readl(sor, SOR_DP_AUDIO_HBLANK_SYMBOLS);
-	value &= ~SOR_DP_AUDIO_HBLANK_SYMBOLS_MASK;
-	value |= config.hblank_symbols & 0xffff;
-	tegra_sor_writel(sor, value, SOR_DP_AUDIO_HBLANK_SYMBOLS);
-
-	value = tegra_sor_readl(sor, SOR_DP_AUDIO_VBLANK_SYMBOLS);
-	value &= ~SOR_DP_AUDIO_VBLANK_SYMBOLS_MASK;
-	value |= config.vblank_symbols & 0xffff;
-	tegra_sor_writel(sor, value, SOR_DP_AUDIO_VBLANK_SYMBOLS);
-
 	/* enable pad calibration logic */
 	value = tegra_sor_readl(sor, SOR_DP_PADCTL0);
 	value |= SOR_DP_PADCTL_PAD_CAL_PD;
@@ -1477,75 +1747,19 @@ static void tegra_sor_edp_enable(struct drm_encoder *encoder)
 	if (err < 0)
 		dev_err(sor->dev, "failed to power up SOR: %d\n", err);
 
-	/*
-	 * configure panel (24bpp, vsync-, hsync-, DP-A protocol, complete
-	 * raster, associate with display controller)
-	 */
-	value = SOR_STATE_ASY_PROTOCOL_DP_A |
-		SOR_STATE_ASY_CRC_MODE_COMPLETE |
-		SOR_STATE_ASY_OWNER(dc->pipe + 1);
-
-	if (mode->flags & DRM_MODE_FLAG_PHSYNC)
-		value &= ~SOR_STATE_ASY_HSYNCPOL;
-
-	if (mode->flags & DRM_MODE_FLAG_NHSYNC)
-		value |= SOR_STATE_ASY_HSYNCPOL;
-
-	if (mode->flags & DRM_MODE_FLAG_PVSYNC)
-		value &= ~SOR_STATE_ASY_VSYNCPOL;
-
-	if (mode->flags & DRM_MODE_FLAG_NVSYNC)
-		value |= SOR_STATE_ASY_VSYNCPOL;
-
-	switch (config.bits_per_pixel) {
-	case 24:
-		value |= SOR_STATE_ASY_PIXELDEPTH_BPP_24_444;
-		break;
-
-	case 18:
-		value |= SOR_STATE_ASY_PIXELDEPTH_BPP_18_444;
-		break;
-
-	default:
-		BUG();
-		break;
-	}
-
-	tegra_sor_writel(sor, value, SOR_STATE1);
-
-	/*
-	 * TODO: The video timing programming below doesn't seem to match the
-	 * register definitions.
-	 */
-
-	value = ((mode->vtotal & 0x7fff) << 16) | (mode->htotal & 0x7fff);
-	tegra_sor_writel(sor, value, SOR_HEAD_STATE1(dc->pipe));
-
-	vse = mode->vsync_end - mode->vsync_start - 1;
-	hse = mode->hsync_end - mode->hsync_start - 1;
-
-	value = ((vse & 0x7fff) << 16) | (hse & 0x7fff);
-	tegra_sor_writel(sor, value, SOR_HEAD_STATE2(dc->pipe));
-
-	vbe = vse + (mode->vsync_start - mode->vdisplay);
-	hbe = hse + (mode->hsync_start - mode->hdisplay);
-
-	value = ((vbe & 0x7fff) << 16) | (hbe & 0x7fff);
-	tegra_sor_writel(sor, value, SOR_HEAD_STATE3(dc->pipe));
-
-	vbs = vbe + mode->vdisplay;
-	hbs = hbe + mode->hdisplay;
-
-	value = ((vbs & 0x7fff) << 16) | (hbs & 0x7fff);
-	tegra_sor_writel(sor, value, SOR_HEAD_STATE4(dc->pipe));
-
-	tegra_sor_writel(sor, 0x1, SOR_HEAD_STATE5(dc->pipe));
-
 	/* CSTM (LVDS, link A/B, upper) */
 	value = SOR_CSTM_LVDS | SOR_CSTM_LINK_ACT_A | SOR_CSTM_LINK_ACT_B |
 		SOR_CSTM_UPPER;
 	tegra_sor_writel(sor, value, SOR_CSTM);
 
+	/* use DP-A protocol */
+	value = tegra_sor_readl(sor, SOR_STATE1);
+	value &= ~SOR_STATE_ASY_PROTOCOL_MASK;
+	value |= SOR_STATE_ASY_PROTOCOL_DP_A;
+	tegra_sor_writel(sor, value, SOR_STATE1);
+
+	tegra_sor_mode_set(sor, mode, state);
+
 	/* PWM setup */
 	err = tegra_sor_setup_pwm(sor, 250);
 	if (err < 0)
@@ -1577,11 +1791,15 @@ tegra_sor_encoder_atomic_check(struct drm_encoder *encoder,
 			       struct drm_connector_state *conn_state)
 {
 	struct tegra_output *output = encoder_to_output(encoder);
+	struct tegra_sor_state *state = to_sor_state(conn_state);
 	struct tegra_dc *dc = to_tegra_dc(conn_state->crtc);
 	unsigned long pclk = crtc_state->mode.clock * 1000;
 	struct tegra_sor *sor = to_sor(output);
+	struct drm_display_info *info;
 	int err;
 
+	info = &output->connector.display_info;
+
 	err = tegra_dc_state_setup_clock(dc, crtc_state, sor->clk_parent,
 					 pclk, 0);
 	if (err < 0) {
@@ -1589,6 +1807,18 @@ tegra_sor_encoder_atomic_check(struct drm_encoder *encoder,
 		return err;
 	}
 
+	switch (info->bpc) {
+	case 8:
+	case 6:
+		state->bpc = info->bpc;
+		break;
+
+	default:
+		DRM_DEBUG_KMS("%u bits-per-color not supported\n", info->bpc);
+		state->bpc = 8;
+		break;
+	}
+
 	return 0;
 }
 
@@ -1751,9 +1981,7 @@ static void tegra_sor_hdmi_disable(struct drm_encoder *encoder)
 	if (err < 0)
 		dev_err(sor->dev, "failed to power off HDMI rail: %d\n", err);
 
-	reset_control_assert(sor->rst);
-	usleep_range(1000, 2000);
-	clk_disable_unprepare(sor->clk);
+	pm_runtime_put(sor->dev);
 }
 
 static void tegra_sor_hdmi_enable(struct drm_encoder *encoder)
@@ -1761,26 +1989,21 @@ static void tegra_sor_hdmi_enable(struct drm_encoder *encoder)
 	struct tegra_output *output = encoder_to_output(encoder);
 	unsigned int h_ref_to_sync = 1, pulse_start, max_ac;
 	struct tegra_dc *dc = to_tegra_dc(encoder->crtc);
-	unsigned int vbe, vse, hbe, hse, vbs, hbs, div;
 	struct tegra_sor_hdmi_settings *settings;
 	struct tegra_sor *sor = to_sor(output);
+	struct tegra_sor_state *state;
 	struct drm_display_mode *mode;
-	struct drm_display_info *info;
+	unsigned int div, i;
 	u32 value;
 	int err;
 
+	state = to_sor_state(output->connector.state);
 	mode = &encoder->crtc->state->adjusted_mode;
-	info = &output->connector.display_info;
 
-	err = clk_prepare_enable(sor->clk);
-	if (err < 0)
-		dev_err(sor->dev, "failed to enable clock: %d\n", err);
+	pm_runtime_get_sync(sor->dev);
 
-	usleep_range(1000, 2000);
-
-	reset_control_deassert(sor->rst);
-
-	err = clk_set_parent(sor->clk, sor->clk_safe);
+	/* switch to safe parent clock */
+	err = tegra_sor_set_parent_clock(sor, sor->clk_safe);
 	if (err < 0)
 		dev_err(sor->dev, "failed to set safe parent clock: %d\n", err);
 
@@ -1876,22 +2099,20 @@ static void tegra_sor_hdmi_enable(struct drm_encoder *encoder)
 	value = SOR_REFCLK_DIV_INT(div) | SOR_REFCLK_DIV_FRAC(div);
 	tegra_sor_writel(sor, value, SOR_REFCLK);
 
-	/* XXX don't hardcode */
-	value = SOR_XBAR_CTRL_LINK1_XSEL(4, 4) |
-		SOR_XBAR_CTRL_LINK1_XSEL(3, 3) |
-		SOR_XBAR_CTRL_LINK1_XSEL(2, 2) |
-		SOR_XBAR_CTRL_LINK1_XSEL(1, 1) |
-		SOR_XBAR_CTRL_LINK1_XSEL(0, 0) |
-		SOR_XBAR_CTRL_LINK0_XSEL(4, 4) |
-		SOR_XBAR_CTRL_LINK0_XSEL(3, 3) |
-		SOR_XBAR_CTRL_LINK0_XSEL(2, 0) |
-		SOR_XBAR_CTRL_LINK0_XSEL(1, 1) |
-		SOR_XBAR_CTRL_LINK0_XSEL(0, 2);
-	tegra_sor_writel(sor, value, SOR_XBAR_CTRL);
+	/* XXX not in TRM */
+	for (value = 0, i = 0; i < 5; i++)
+		value |= SOR_XBAR_CTRL_LINK0_XSEL(i, sor->soc->xbar_cfg[i]) |
+			 SOR_XBAR_CTRL_LINK1_XSEL(i, i);
 
 	tegra_sor_writel(sor, 0x00000000, SOR_XBAR_POL);
+	tegra_sor_writel(sor, value, SOR_XBAR_CTRL);
 
-	err = clk_set_parent(sor->clk, sor->clk_parent);
+	/* switch to parent clock */
+	err = clk_set_parent(sor->clk_src, sor->clk_parent);
+	if (err < 0)
+		dev_err(sor->dev, "failed to set source clock: %d\n", err);
+
+	err = tegra_sor_set_parent_clock(sor, sor->clk_src);
 	if (err < 0)
 		dev_err(sor->dev, "failed to set parent clock: %d\n", err);
 
@@ -2001,7 +2222,7 @@ static void tegra_sor_hdmi_enable(struct drm_encoder *encoder)
 	value &= ~DITHER_CONTROL_MASK;
 	value &= ~BASE_COLOR_SIZE_MASK;
 
-	switch (info->bpc) {
+	switch (state->bpc) {
 	case 6:
 		value |= BASE_COLOR_SIZE_666;
 		break;
@@ -2011,7 +2232,8 @@ static void tegra_sor_hdmi_enable(struct drm_encoder *encoder)
 		break;
 
 	default:
-		WARN(1, "%u bits-per-color not supported\n", info->bpc);
+		WARN(1, "%u bits-per-color not supported\n", state->bpc);
+		value |= BASE_COLOR_SIZE_888;
 		break;
 	}
 
@@ -2021,83 +2243,19 @@ static void tegra_sor_hdmi_enable(struct drm_encoder *encoder)
 	if (err < 0)
 		dev_err(sor->dev, "failed to power up SOR: %d\n", err);
 
-	/* configure mode */
-	value = tegra_sor_readl(sor, SOR_STATE1);
-	value &= ~SOR_STATE_ASY_PIXELDEPTH_MASK;
-	value &= ~SOR_STATE_ASY_CRC_MODE_MASK;
-	value &= ~SOR_STATE_ASY_OWNER_MASK;
-
-	value |= SOR_STATE_ASY_CRC_MODE_COMPLETE |
-		 SOR_STATE_ASY_OWNER(dc->pipe + 1);
-
-	if (mode->flags & DRM_MODE_FLAG_PHSYNC)
-		value &= ~SOR_STATE_ASY_HSYNCPOL;
-
-	if (mode->flags & DRM_MODE_FLAG_NHSYNC)
-		value |= SOR_STATE_ASY_HSYNCPOL;
-
-	if (mode->flags & DRM_MODE_FLAG_PVSYNC)
-		value &= ~SOR_STATE_ASY_VSYNCPOL;
-
-	if (mode->flags & DRM_MODE_FLAG_NVSYNC)
-		value |= SOR_STATE_ASY_VSYNCPOL;
-
-	switch (info->bpc) {
-	case 8:
-		value |= SOR_STATE_ASY_PIXELDEPTH_BPP_24_444;
-		break;
-
-	case 6:
-		value |= SOR_STATE_ASY_PIXELDEPTH_BPP_18_444;
-		break;
-
-	default:
-		BUG();
-		break;
-	}
-
-	tegra_sor_writel(sor, value, SOR_STATE1);
-
+	/* configure dynamic range of output */
 	value = tegra_sor_readl(sor, SOR_HEAD_STATE0(dc->pipe));
 	value &= ~SOR_HEAD_STATE_RANGECOMPRESS_MASK;
 	value &= ~SOR_HEAD_STATE_DYNRANGE_MASK;
 	tegra_sor_writel(sor, value, SOR_HEAD_STATE0(dc->pipe));
 
+	/* configure colorspace */
 	value = tegra_sor_readl(sor, SOR_HEAD_STATE0(dc->pipe));
 	value &= ~SOR_HEAD_STATE_COLORSPACE_MASK;
 	value |= SOR_HEAD_STATE_COLORSPACE_RGB;
 	tegra_sor_writel(sor, value, SOR_HEAD_STATE0(dc->pipe));
 
-	/*
-	 * TODO: The video timing programming below doesn't seem to match the
-	 * register definitions.
-	 */
-
-	value = ((mode->vtotal & 0x7fff) << 16) | (mode->htotal & 0x7fff);
-	tegra_sor_writel(sor, value, SOR_HEAD_STATE1(dc->pipe));
-
-	/* sync end = sync width - 1 */
-	vse = mode->vsync_end - mode->vsync_start - 1;
-	hse = mode->hsync_end - mode->hsync_start - 1;
-
-	value = ((vse & 0x7fff) << 16) | (hse & 0x7fff);
-	tegra_sor_writel(sor, value, SOR_HEAD_STATE2(dc->pipe));
-
-	/* blank end = sync end + back porch */
-	vbe = vse + (mode->vtotal - mode->vsync_end);
-	hbe = hse + (mode->htotal - mode->hsync_end);
-
-	value = ((vbe & 0x7fff) << 16) | (hbe & 0x7fff);
-	tegra_sor_writel(sor, value, SOR_HEAD_STATE3(dc->pipe));
-
-	/* blank start = blank end + active */
-	vbs = vbe + mode->vdisplay;
-	hbs = hbe + mode->hdisplay;
-
-	value = ((vbs & 0x7fff) << 16) | (hbs & 0x7fff);
-	tegra_sor_writel(sor, value, SOR_HEAD_STATE4(dc->pipe));
-
-	tegra_sor_writel(sor, 0x1, SOR_HEAD_STATE5(dc->pipe));
+	tegra_sor_mode_set(sor, mode, state);
 
 	tegra_sor_update(sor);
 
@@ -2195,10 +2353,13 @@ static int tegra_sor_init(struct host1x_client *client)
 	 * XXX: Remove this reset once proper hand-over from firmware to
 	 * kernel is possible.
 	 */
-	err = reset_control_assert(sor->rst);
-	if (err < 0) {
-		dev_err(sor->dev, "failed to assert SOR reset: %d\n", err);
-		return err;
+	if (sor->rst) {
+		err = reset_control_assert(sor->rst);
+		if (err < 0) {
+			dev_err(sor->dev, "failed to assert SOR reset: %d\n",
+				err);
+			return err;
+		}
 	}
 
 	err = clk_prepare_enable(sor->clk);
@@ -2209,10 +2370,13 @@ static int tegra_sor_init(struct host1x_client *client)
 
 	usleep_range(1000, 3000);
 
-	err = reset_control_deassert(sor->rst);
-	if (err < 0) {
-		dev_err(sor->dev, "failed to deassert SOR reset: %d\n", err);
-		return err;
+	if (sor->rst) {
+		err = reset_control_deassert(sor->rst);
+		if (err < 0) {
+			dev_err(sor->dev, "failed to deassert SOR reset: %d\n",
+				err);
+			return err;
+		}
 	}
 
 	err = clk_prepare_enable(sor->clk_safe);
@@ -2323,11 +2487,16 @@ static const struct tegra_sor_ops tegra_sor_hdmi_ops = {
 	.remove = tegra_sor_hdmi_remove,
 };
 
+static const u8 tegra124_sor_xbar_cfg[5] = {
+	0, 1, 2, 3, 4
+};
+
 static const struct tegra_sor_soc tegra124_sor = {
 	.supports_edp = true,
 	.supports_lvds = true,
 	.supports_hdmi = false,
 	.supports_dp = false,
+	.xbar_cfg = tegra124_sor_xbar_cfg,
 };
 
 static const struct tegra_sor_soc tegra210_sor = {
@@ -2335,6 +2504,11 @@ static const struct tegra_sor_soc tegra210_sor = {
 	.supports_lvds = false,
 	.supports_hdmi = false,
 	.supports_dp = false,
+	.xbar_cfg = tegra124_sor_xbar_cfg,
+};
+
+static const u8 tegra210_sor_xbar_cfg[5] = {
+	2, 1, 0, 3, 4
 };
 
 static const struct tegra_sor_soc tegra210_sor1 = {
@@ -2345,6 +2519,8 @@ static const struct tegra_sor_soc tegra210_sor1 = {
 
 	.num_settings = ARRAY_SIZE(tegra210_sor_hdmi_defaults),
 	.settings = tegra210_sor_hdmi_defaults,
+
+	.xbar_cfg = tegra210_sor_xbar_cfg,
 };
 
 static const struct of_device_id tegra_sor_of_match[] = {
@@ -2434,11 +2610,14 @@ static int tegra_sor_probe(struct platform_device *pdev)
 		goto remove;
 	}
 
-	sor->rst = devm_reset_control_get(&pdev->dev, "sor");
-	if (IS_ERR(sor->rst)) {
-		err = PTR_ERR(sor->rst);
-		dev_err(&pdev->dev, "failed to get reset control: %d\n", err);
-		goto remove;
+	if (!pdev->dev.pm_domain) {
+		sor->rst = devm_reset_control_get(&pdev->dev, "sor");
+		if (IS_ERR(sor->rst)) {
+			err = PTR_ERR(sor->rst);
+			dev_err(&pdev->dev, "failed to get reset control: %d\n",
+				err);
+			goto remove;
+		}
 	}
 
 	sor->clk = devm_clk_get(&pdev->dev, NULL);
@@ -2448,6 +2627,16 @@ static int tegra_sor_probe(struct platform_device *pdev)
 		goto remove;
 	}
 
+	if (sor->soc->supports_hdmi || sor->soc->supports_dp) {
+		sor->clk_src = devm_clk_get(&pdev->dev, "source");
+		if (IS_ERR(sor->clk_src)) {
+			err = PTR_ERR(sor->clk_src);
+			dev_err(sor->dev, "failed to get source clock: %d\n",
+				err);
+			goto remove;
+		}
+	}
+
 	sor->clk_parent = devm_clk_get(&pdev->dev, "parent");
 	if (IS_ERR(sor->clk_parent)) {
 		err = PTR_ERR(sor->clk_parent);
@@ -2469,6 +2658,19 @@ static int tegra_sor_probe(struct platform_device *pdev)
 		goto remove;
 	}
 
+	platform_set_drvdata(pdev, sor);
+	pm_runtime_enable(&pdev->dev);
+
+	pm_runtime_get_sync(&pdev->dev);
+	sor->clk_brick = tegra_clk_sor_brick_register(sor, "sor1_brick");
+	pm_runtime_put(&pdev->dev);
+
+	if (IS_ERR(sor->clk_brick)) {
+		err = PTR_ERR(sor->clk_brick);
+		dev_err(&pdev->dev, "failed to register SOR clock: %d\n", err);
+		goto remove;
+	}
+
 	INIT_LIST_HEAD(&sor->client.list);
 	sor->client.ops = &sor_client_ops;
 	sor->client.dev = &pdev->dev;
@@ -2480,8 +2682,6 @@ static int tegra_sor_probe(struct platform_device *pdev)
 		goto remove;
 	}
 
-	platform_set_drvdata(pdev, sor);
-
 	return 0;
 
 remove:
@@ -2497,6 +2697,8 @@ static int tegra_sor_remove(struct platform_device *pdev)
 	struct tegra_sor *sor = platform_get_drvdata(pdev);
 	int err;
 
+	pm_runtime_disable(&pdev->dev);
+
 	err = host1x_client_unregister(&sor->client);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to unregister host1x client: %d\n",
@@ -2515,10 +2717,62 @@ static int tegra_sor_remove(struct platform_device *pdev)
 	return 0;
 }
 
+#ifdef CONFIG_PM
+static int tegra_sor_suspend(struct device *dev)
+{
+	struct tegra_sor *sor = dev_get_drvdata(dev);
+	int err;
+
+	if (sor->rst) {
+		err = reset_control_assert(sor->rst);
+		if (err < 0) {
+			dev_err(dev, "failed to assert reset: %d\n", err);
+			return err;
+		}
+	}
+
+	usleep_range(1000, 2000);
+
+	clk_disable_unprepare(sor->clk);
+
+	return 0;
+}
+
+static int tegra_sor_resume(struct device *dev)
+{
+	struct tegra_sor *sor = dev_get_drvdata(dev);
+	int err;
+
+	err = clk_prepare_enable(sor->clk);
+	if (err < 0) {
+		dev_err(dev, "failed to enable clock: %d\n", err);
+		return err;
+	}
+
+	usleep_range(1000, 2000);
+
+	if (sor->rst) {
+		err = reset_control_deassert(sor->rst);
+		if (err < 0) {
+			dev_err(dev, "failed to deassert reset: %d\n", err);
+			clk_disable_unprepare(sor->clk);
+			return err;
+		}
+	}
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops tegra_sor_pm_ops = {
+	SET_RUNTIME_PM_OPS(tegra_sor_suspend, tegra_sor_resume, NULL)
+};
+
 struct platform_driver tegra_sor_driver = {
 	.driver = {
 		.name = "tegra-sor",
 		.of_match_table = tegra_sor_of_match,
+		.pm = &tegra_sor_pm_ops,
 	},
 	.probe = tegra_sor_probe,
 	.remove = tegra_sor_remove,
diff --git a/drivers/gpu/drm/tegra/sor.h b/drivers/gpu/drm/tegra/sor.h
index 2d31d027e3f6..865c73b48968 100644
--- a/drivers/gpu/drm/tegra/sor.h
+++ b/drivers/gpu/drm/tegra/sor.h
@@ -27,6 +27,9 @@
 #define  SOR_STATE_ASY_PIXELDEPTH_MASK		(0xf << 17)
 #define  SOR_STATE_ASY_PIXELDEPTH_BPP_18_444	(0x2 << 17)
 #define  SOR_STATE_ASY_PIXELDEPTH_BPP_24_444	(0x5 << 17)
+#define  SOR_STATE_ASY_PIXELDEPTH_BPP_30_444	(0x6 << 17)
+#define  SOR_STATE_ASY_PIXELDEPTH_BPP_36_444	(0x8 << 17)
+#define  SOR_STATE_ASY_PIXELDEPTH_BPP_48_444	(0x9 << 17)
 #define  SOR_STATE_ASY_VSYNCPOL			(1 << 13)
 #define  SOR_STATE_ASY_HSYNCPOL			(1 << 12)
 #define  SOR_STATE_ASY_PROTOCOL_MASK		(0xf << 8)
diff --git a/drivers/gpu/drm/tilcdc/Kconfig b/drivers/gpu/drm/tilcdc/Kconfig
index f60a1ec84fa4..28fed7e206d0 100644
--- a/drivers/gpu/drm/tilcdc/Kconfig
+++ b/drivers/gpu/drm/tilcdc/Kconfig
@@ -2,7 +2,6 @@ config DRM_TILCDC
 	tristate "DRM Support for TI LCDC Display Controller"
 	depends on DRM && OF && ARM
 	select DRM_KMS_HELPER
-	select DRM_KMS_FB_HELPER
 	select DRM_KMS_CMA_HELPER
 	select DRM_GEM_CMA_HELPER
 	select VIDEOMODE_HELPERS
diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
index 79027b1c64d3..107c8bd04f6d 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
@@ -697,7 +697,7 @@ irqreturn_t tilcdc_crtc_irq(struct drm_crtc *crtc)
 
 		spin_unlock_irqrestore(&tilcdc_crtc->irq_lock, flags);
 
-		drm_handle_vblank(dev, 0);
+		drm_crtc_handle_vblank(crtc);
 
 		if (!skip_event) {
 			struct drm_pending_vblank_event *event;
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 39386f50af87..4054d804fe06 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -146,10 +146,9 @@ static void ttm_bo_release_list(struct kref *list_kref)
 	BUG_ON(bo->mem.mm_node != NULL);
 	BUG_ON(!list_empty(&bo->lru));
 	BUG_ON(!list_empty(&bo->ddestroy));
-
-	if (bo->ttm)
-		ttm_tt_destroy(bo->ttm);
+	ttm_tt_destroy(bo->ttm);
 	atomic_dec(&bo->glob->bo_count);
+	fence_put(bo->moving);
 	if (bo->resv == &bo->ttm_resv)
 		reservation_object_fini(&bo->ttm_resv);
 	mutex_destroy(&bo->wu_mutex);
@@ -360,7 +359,8 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
 		ret = bdev->driver->move(bo, evict, interruptible,
 					 no_wait_gpu, mem);
 	else
-		ret = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, mem);
+		ret = ttm_bo_move_memcpy(bo, evict, interruptible,
+					 no_wait_gpu, mem);
 
 	if (ret) {
 		if (bdev->driver->move_notify) {
@@ -396,8 +396,7 @@ moved:
 
 out_err:
 	new_man = &bdev->man[bo->mem.mem_type];
-	if ((new_man->flags & TTM_MEMTYPE_FLAG_FIXED) && bo->ttm) {
-		ttm_tt_unbind(bo->ttm);
+	if (new_man->flags & TTM_MEMTYPE_FLAG_FIXED) {
 		ttm_tt_destroy(bo->ttm);
 		bo->ttm = NULL;
 	}
@@ -418,11 +417,8 @@ static void ttm_bo_cleanup_memtype_use(struct ttm_buffer_object *bo)
 	if (bo->bdev->driver->move_notify)
 		bo->bdev->driver->move_notify(bo, NULL);
 
-	if (bo->ttm) {
-		ttm_tt_unbind(bo->ttm);
-		ttm_tt_destroy(bo->ttm);
-		bo->ttm = NULL;
-	}
+	ttm_tt_destroy(bo->ttm);
+	bo->ttm = NULL;
 	ttm_bo_mem_put(bo, &bo->mem);
 
 	ww_mutex_unlock (&bo->resv->lock);
@@ -688,15 +684,6 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible,
 	struct ttm_placement placement;
 	int ret = 0;
 
-	ret = ttm_bo_wait(bo, interruptible, no_wait_gpu);
-
-	if (unlikely(ret != 0)) {
-		if (ret != -ERESTARTSYS) {
-			pr_err("Failed to expire sync object before buffer eviction\n");
-		}
-		goto out;
-	}
-
 	lockdep_assert_held(&bo->resv->lock.base);
 
 	evict_mem = bo->mem;
@@ -720,7 +707,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible,
 
 	ret = ttm_bo_handle_move_mem(bo, &evict_mem, true, interruptible,
 				     no_wait_gpu);
-	if (ret) {
+	if (unlikely(ret)) {
 		if (ret != -ERESTARTSYS)
 			pr_err("Buffer eviction failed\n");
 		ttm_bo_mem_put(bo, &evict_mem);
@@ -800,6 +787,34 @@ void ttm_bo_mem_put(struct ttm_buffer_object *bo, struct ttm_mem_reg *mem)
 EXPORT_SYMBOL(ttm_bo_mem_put);
 
 /**
+ * Add the last move fence to the BO and reserve a new shared slot.
+ */
+static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo,
+				 struct ttm_mem_type_manager *man,
+				 struct ttm_mem_reg *mem)
+{
+	struct fence *fence;
+	int ret;
+
+	spin_lock(&man->move_lock);
+	fence = fence_get(man->move);
+	spin_unlock(&man->move_lock);
+
+	if (fence) {
+		reservation_object_add_shared_fence(bo->resv, fence);
+
+		ret = reservation_object_reserve_shared(bo->resv);
+		if (unlikely(ret))
+			return ret;
+
+		fence_put(bo->moving);
+		bo->moving = fence;
+	}
+
+	return 0;
+}
+
+/**
  * Repeatedly evict memory from the LRU for @mem_type until we create enough
  * space, or we've evicted everything and there isn't enough space.
  */
@@ -825,10 +840,8 @@ static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo,
 		if (unlikely(ret != 0))
 			return ret;
 	} while (1);
-	if (mem->mm_node == NULL)
-		return -ENOMEM;
 	mem->mem_type = mem_type;
-	return 0;
+	return ttm_bo_add_move_fence(bo, man, mem);
 }
 
 static uint32_t ttm_bo_select_caching(struct ttm_mem_type_manager *man,
@@ -898,6 +911,10 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 	bool has_erestartsys = false;
 	int i, ret;
 
+	ret = reservation_object_reserve_shared(bo->resv);
+	if (unlikely(ret))
+		return ret;
+
 	mem->mm_node = NULL;
 	for (i = 0; i < placement->num_placement; ++i) {
 		const struct ttm_place *place = &placement->placement[i];
@@ -931,9 +948,15 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 		ret = (*man->func->get_node)(man, bo, place, mem);
 		if (unlikely(ret))
 			return ret;
-		
-		if (mem->mm_node)
+
+		if (mem->mm_node) {
+			ret = ttm_bo_add_move_fence(bo, man, mem);
+			if (unlikely(ret)) {
+				(*man->func->put_node)(man, mem);
+				return ret;
+			}
 			break;
+		}
 	}
 
 	if ((type_ok && (mem_type == TTM_PL_SYSTEM)) || mem->mm_node) {
@@ -1000,20 +1023,6 @@ static int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
 
 	lockdep_assert_held(&bo->resv->lock.base);
 
-	/*
-	 * Don't wait for the BO on initial allocation. This is important when
-	 * the BO has an imported reservation object.
-	 */
-	if (bo->mem.mem_type != TTM_PL_SYSTEM || bo->ttm != NULL) {
-		/*
-		 * FIXME: It's possible to pipeline buffer moves.
-		 * Have the driver move function wait for idle when necessary,
-		 * instead of doing it here.
-		 */
-		ret = ttm_bo_wait(bo, interruptible, no_wait_gpu);
-		if (ret)
-			return ret;
-	}
 	mem.num_pages = bo->num_pages;
 	mem.size = mem.num_pages << PAGE_SHIFT;
 	mem.page_alignment = bo->mem.page_alignment;
@@ -1034,9 +1043,9 @@ out_unlock:
 	return ret;
 }
 
-static bool ttm_bo_mem_compat(struct ttm_placement *placement,
-			      struct ttm_mem_reg *mem,
-			      uint32_t *new_flags)
+bool ttm_bo_mem_compat(struct ttm_placement *placement,
+		       struct ttm_mem_reg *mem,
+		       uint32_t *new_flags)
 {
 	int i;
 
@@ -1068,6 +1077,7 @@ static bool ttm_bo_mem_compat(struct ttm_placement *placement,
 
 	return false;
 }
+EXPORT_SYMBOL(ttm_bo_mem_compat);
 
 int ttm_bo_validate(struct ttm_buffer_object *bo,
 			struct ttm_placement *placement,
@@ -1165,7 +1175,7 @@ int ttm_bo_init(struct ttm_bo_device *bdev,
 	bo->mem.page_alignment = page_alignment;
 	bo->mem.bus.io_reserved_vm = false;
 	bo->mem.bus.io_reserved_count = 0;
-	bo->priv_flags = 0;
+	bo->moving = NULL;
 	bo->mem.placement = (TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED);
 	bo->persistent_swap_storage = persistent_swap_storage;
 	bo->acc_size = acc_size;
@@ -1277,6 +1287,7 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
 {
 	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
 	struct ttm_bo_global *glob = bdev->glob;
+	struct fence *fence;
 	int ret;
 
 	/*
@@ -1297,6 +1308,23 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
 		spin_lock(&glob->lru_lock);
 	}
 	spin_unlock(&glob->lru_lock);
+
+	spin_lock(&man->move_lock);
+	fence = fence_get(man->move);
+	spin_unlock(&man->move_lock);
+
+	if (fence) {
+		ret = fence_wait(fence, false);
+		fence_put(fence);
+		if (ret) {
+			if (allow_errors) {
+				return ret;
+			} else {
+				pr_err("Cleanup eviction failed\n");
+			}
+		}
+	}
+
 	return 0;
 }
 
@@ -1316,6 +1344,7 @@ int ttm_bo_clean_mm(struct ttm_bo_device *bdev, unsigned mem_type)
 		       mem_type);
 		return ret;
 	}
+	fence_put(man->move);
 
 	man->use_type = false;
 	man->has_type = false;
@@ -1361,6 +1390,7 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type,
 	man->io_reserve_fastpath = true;
 	man->use_io_reserve_lru = false;
 	mutex_init(&man->io_reserve_mutex);
+	spin_lock_init(&man->move_lock);
 	INIT_LIST_HEAD(&man->io_reserve_lru);
 
 	ret = bdev->driver->init_mem_type(bdev, type, man);
@@ -1379,6 +1409,7 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type,
 	man->size = p_size;
 
 	INIT_LIST_HEAD(&man->lru);
+	man->move = NULL;
 
 	return 0;
 }
@@ -1572,47 +1603,17 @@ EXPORT_SYMBOL(ttm_bo_unmap_virtual);
 int ttm_bo_wait(struct ttm_buffer_object *bo,
 		bool interruptible, bool no_wait)
 {
-	struct reservation_object_list *fobj;
-	struct reservation_object *resv;
-	struct fence *excl;
-	long timeout = 15 * HZ;
-	int i;
-
-	resv = bo->resv;
-	fobj = reservation_object_get_list(resv);
-	excl = reservation_object_get_excl(resv);
-	if (excl) {
-		if (!fence_is_signaled(excl)) {
-			if (no_wait)
-				return -EBUSY;
-
-			timeout = fence_wait_timeout(excl,
-						     interruptible, timeout);
-		}
-	}
-
-	for (i = 0; fobj && timeout > 0 && i < fobj->shared_count; ++i) {
-		struct fence *fence;
-		fence = rcu_dereference_protected(fobj->shared[i],
-						reservation_object_held(resv));
-
-		if (!fence_is_signaled(fence)) {
-			if (no_wait)
-				return -EBUSY;
-
-			timeout = fence_wait_timeout(fence,
-						     interruptible, timeout);
-		}
-	}
+	long timeout = no_wait ? 0 : 15 * HZ;
 
+	timeout = reservation_object_wait_timeout_rcu(bo->resv, true,
+						      interruptible, timeout);
 	if (timeout < 0)
 		return timeout;
 
 	if (timeout == 0)
 		return -EBUSY;
 
-	reservation_object_add_excl_fence(resv, NULL);
-	clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags);
+	reservation_object_add_excl_fence(bo->resv, NULL);
 	return 0;
 }
 EXPORT_SYMBOL(ttm_bo_wait);
@@ -1682,14 +1683,9 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
 	ttm_bo_list_ref_sub(bo, put_count, true);
 
 	/**
-	 * Wait for GPU, then move to system cached.
+	 * Move to system cached
 	 */
 
-	ret = ttm_bo_wait(bo, false, false);
-
-	if (unlikely(ret != 0))
-		goto out;
-
 	if ((bo->mem.placement & swap_placement) != swap_placement) {
 		struct ttm_mem_reg evict_mem;
 
@@ -1704,6 +1700,14 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
 			goto out;
 	}
 
+	/**
+	 * Make sure BO is idle.
+	 */
+
+	ret = ttm_bo_wait(bo, false, false);
+	if (unlikely(ret != 0))
+		goto out;
+
 	ttm_bo_unmap_virtual(bo);
 
 	/**
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index d9831559706e..2df602a35f92 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -321,7 +321,8 @@ static int ttm_copy_ttm_io_page(struct ttm_tt *ttm, void *dst,
 }
 
 int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
-		       bool evict, bool no_wait_gpu,
+		       bool evict, bool interruptible,
+		       bool no_wait_gpu,
 		       struct ttm_mem_reg *new_mem)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
@@ -337,6 +338,10 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
 	unsigned long add = 0;
 	int dir;
 
+	ret = ttm_bo_wait(bo, interruptible, no_wait_gpu);
+	if (ret)
+		return ret;
+
 	ret = ttm_mem_reg_ioremap(bdev, old_mem, &old_iomap);
 	if (ret)
 		return ret;
@@ -401,8 +406,7 @@ out2:
 	*old_mem = *new_mem;
 	new_mem->mm_node = NULL;
 
-	if ((man->flags & TTM_MEMTYPE_FLAG_FIXED) && (ttm != NULL)) {
-		ttm_tt_unbind(ttm);
+	if (man->flags & TTM_MEMTYPE_FLAG_FIXED) {
 		ttm_tt_destroy(ttm);
 		bo->ttm = NULL;
 	}
@@ -462,6 +466,7 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
 	INIT_LIST_HEAD(&fbo->lru);
 	INIT_LIST_HEAD(&fbo->swap);
 	INIT_LIST_HEAD(&fbo->io_reserve_lru);
+	fbo->moving = NULL;
 	drm_vma_node_reset(&fbo->vma_node);
 	atomic_set(&fbo->cpu_writers, 0);
 
@@ -634,7 +639,6 @@ EXPORT_SYMBOL(ttm_bo_kunmap);
 int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
 			      struct fence *fence,
 			      bool evict,
-			      bool no_wait_gpu,
 			      struct ttm_mem_reg *new_mem)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
@@ -649,9 +653,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
 		if (ret)
 			return ret;
 
-		if ((man->flags & TTM_MEMTYPE_FLAG_FIXED) &&
-		    (bo->ttm != NULL)) {
-			ttm_tt_unbind(bo->ttm);
+		if (man->flags & TTM_MEMTYPE_FLAG_FIXED) {
 			ttm_tt_destroy(bo->ttm);
 			bo->ttm = NULL;
 		}
@@ -665,7 +667,8 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
 		 * operation has completed.
 		 */
 
-		set_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags);
+		fence_put(bo->moving);
+		bo->moving = fence_get(fence);
 
 		ret = ttm_buffer_object_transfer(bo, &ghost_obj);
 		if (ret)
@@ -694,3 +697,95 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
 	return 0;
 }
 EXPORT_SYMBOL(ttm_bo_move_accel_cleanup);
+
+int ttm_bo_pipeline_move(struct ttm_buffer_object *bo,
+			 struct fence *fence, bool evict,
+			 struct ttm_mem_reg *new_mem)
+{
+	struct ttm_bo_device *bdev = bo->bdev;
+	struct ttm_mem_reg *old_mem = &bo->mem;
+
+	struct ttm_mem_type_manager *from = &bdev->man[old_mem->mem_type];
+	struct ttm_mem_type_manager *to = &bdev->man[new_mem->mem_type];
+
+	int ret;
+
+	reservation_object_add_excl_fence(bo->resv, fence);
+
+	if (!evict) {
+		struct ttm_buffer_object *ghost_obj;
+
+		/**
+		 * This should help pipeline ordinary buffer moves.
+		 *
+		 * Hang old buffer memory on a new buffer object,
+		 * and leave it to be released when the GPU
+		 * operation has completed.
+		 */
+
+		fence_put(bo->moving);
+		bo->moving = fence_get(fence);
+
+		ret = ttm_buffer_object_transfer(bo, &ghost_obj);
+		if (ret)
+			return ret;
+
+		reservation_object_add_excl_fence(ghost_obj->resv, fence);
+
+		/**
+		 * If we're not moving to fixed memory, the TTM object
+		 * needs to stay alive. Otherwhise hang it on the ghost
+		 * bo to be unbound and destroyed.
+		 */
+
+		if (!(to->flags & TTM_MEMTYPE_FLAG_FIXED))
+			ghost_obj->ttm = NULL;
+		else
+			bo->ttm = NULL;
+
+		ttm_bo_unreserve(ghost_obj);
+		ttm_bo_unref(&ghost_obj);
+
+	} else if (from->flags & TTM_MEMTYPE_FLAG_FIXED) {
+
+		/**
+		 * BO doesn't have a TTM we need to bind/unbind. Just remember
+		 * this eviction and free up the allocation
+		 */
+
+		spin_lock(&from->move_lock);
+		if (!from->move || fence_is_later(fence, from->move)) {
+			fence_put(from->move);
+			from->move = fence_get(fence);
+		}
+		spin_unlock(&from->move_lock);
+
+		ttm_bo_free_old_node(bo);
+
+		fence_put(bo->moving);
+		bo->moving = fence_get(fence);
+
+	} else {
+		/**
+		 * Last resort, wait for the move to be completed.
+		 *
+		 * Should never happen in pratice.
+		 */
+
+		ret = ttm_bo_wait(bo, false, false);
+		if (ret)
+			return ret;
+
+		if (to->flags & TTM_MEMTYPE_FLAG_FIXED) {
+			ttm_tt_destroy(bo->ttm);
+			bo->ttm = NULL;
+		}
+		ttm_bo_free_old_node(bo);
+	}
+
+	*old_mem = *new_mem;
+	new_mem->mm_node = NULL;
+
+	return 0;
+}
+EXPORT_SYMBOL(ttm_bo_pipeline_move);
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 3216878bced3..a6ed9d5e5167 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -48,15 +48,14 @@ static int ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
 {
 	int ret = 0;
 
-	if (likely(!test_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags)))
+	if (likely(!bo->moving))
 		goto out_unlock;
 
 	/*
 	 * Quick non-stalling check for idle.
 	 */
-	ret = ttm_bo_wait(bo, false, true);
-	if (likely(ret == 0))
-		goto out_unlock;
+	if (fence_is_signaled(bo->moving))
+		goto out_clear;
 
 	/*
 	 * If possible, avoid waiting for GPU with mmap_sem
@@ -68,17 +67,23 @@ static int ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
 			goto out_unlock;
 
 		up_read(&vma->vm_mm->mmap_sem);
-		(void) ttm_bo_wait(bo, true, false);
+		(void) fence_wait(bo->moving, true);
 		goto out_unlock;
 	}
 
 	/*
 	 * Ordinary wait.
 	 */
-	ret = ttm_bo_wait(bo, true, false);
-	if (unlikely(ret != 0))
+	ret = fence_wait(bo->moving, true);
+	if (unlikely(ret != 0)) {
 		ret = (ret != -ERESTARTSYS) ? VM_FAULT_SIGBUS :
 			VM_FAULT_NOPAGE;
+		goto out_unlock;
+	}
+
+out_clear:
+	fence_put(bo->moving);
+	bo->moving = NULL;
 
 out_unlock:
 	return ret;
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 077ae9b2865d..526e5a78eea5 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -166,12 +166,10 @@ EXPORT_SYMBOL(ttm_tt_set_placement_caching);
 
 void ttm_tt_destroy(struct ttm_tt *ttm)
 {
-	if (unlikely(ttm == NULL))
+	if (ttm == NULL)
 		return;
 
-	if (ttm->state == tt_bound) {
-		ttm_tt_unbind(ttm);
-	}
+	ttm_tt_unbind(ttm);
 
 	if (ttm->state == tt_unbound)
 		ttm_tt_unpopulate(ttm);
diff --git a/drivers/gpu/drm/udl/Kconfig b/drivers/gpu/drm/udl/Kconfig
index 613ab0622d6e..1616ec4f4d84 100644
--- a/drivers/gpu/drm/udl/Kconfig
+++ b/drivers/gpu/drm/udl/Kconfig
@@ -4,12 +4,7 @@ config DRM_UDL
 	depends on USB_SUPPORT
 	depends on USB_ARCH_HAS_HCD
 	select USB
-	select FB_SYS_FILLRECT
-	select FB_SYS_COPYAREA
-	select FB_SYS_IMAGEBLIT
-	select FB_DEFERRED_IO
 	select DRM_KMS_HELPER
-        select DRM_KMS_FB_HELPER
 	help
 	  This is a KMS driver for the USB displaylink video adapters.
           Say M/Y to add support for these devices via drm/kms interfaces.
diff --git a/drivers/gpu/drm/udl/udl_drv.c b/drivers/gpu/drm/udl/udl_drv.c
index c20408940cd0..17d34e0edbdd 100644
--- a/drivers/gpu/drm/udl/udl_drv.c
+++ b/drivers/gpu/drm/udl/udl_drv.c
@@ -94,7 +94,6 @@ static void udl_usb_disconnect(struct usb_interface *interface)
 	struct drm_device *dev = usb_get_intfdata(interface);
 
 	drm_kms_helper_poll_disable(dev);
-	drm_connector_unregister_all(dev);
 	udl_fbdev_unplug(dev);
 	udl_drop_usb(dev);
 	drm_unplug_dev(dev);
diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index 4c0f26a644a3..8fc2b731b59a 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -46,12 +46,17 @@ struct vc4_crtc {
 	const struct vc4_crtc_data *data;
 	void __iomem *regs;
 
+	/* Timestamp at start of vblank irq - unaffected by lock delays. */
+	ktime_t t_vblank;
+
 	/* Which HVS channel we're using for our CRTC. */
 	int channel;
 
 	u8 lut_r[256];
 	u8 lut_g[256];
 	u8 lut_b[256];
+	/* Size in pixels of the COB memory allocated to this CRTC. */
+	u32 cob_size;
 
 	struct drm_pending_vblank_event *event;
 };
@@ -146,6 +151,144 @@ int vc4_crtc_debugfs_regs(struct seq_file *m, void *unused)
 }
 #endif
 
+int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id,
+			    unsigned int flags, int *vpos, int *hpos,
+			    ktime_t *stime, ktime_t *etime,
+			    const struct drm_display_mode *mode)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct vc4_crtc *vc4_crtc = vc4->crtc[crtc_id];
+	u32 val;
+	int fifo_lines;
+	int vblank_lines;
+	int ret = 0;
+
+	/*
+	 * XXX Doesn't work well in interlaced mode yet, partially due
+	 * to problems in vc4 kms or drm core interlaced mode handling,
+	 * so disable for now in interlaced mode.
+	 */
+	if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+		return ret;
+
+	/* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
+
+	/* Get optional system timestamp before query. */
+	if (stime)
+		*stime = ktime_get();
+
+	/*
+	 * Read vertical scanline which is currently composed for our
+	 * pixelvalve by the HVS, and also the scaler status.
+	 */
+	val = HVS_READ(SCALER_DISPSTATX(vc4_crtc->channel));
+
+	/* Get optional system timestamp after query. */
+	if (etime)
+		*etime = ktime_get();
+
+	/* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
+
+	/* Vertical position of hvs composed scanline. */
+	*vpos = VC4_GET_FIELD(val, SCALER_DISPSTATX_LINE);
+
+	/* No hpos info available. */
+	if (hpos)
+		*hpos = 0;
+
+	/* This is the offset we need for translating hvs -> pv scanout pos. */
+	fifo_lines = vc4_crtc->cob_size / mode->crtc_hdisplay;
+
+	if (fifo_lines > 0)
+		ret |= DRM_SCANOUTPOS_VALID;
+
+	/* HVS more than fifo_lines into frame for compositing? */
+	if (*vpos > fifo_lines) {
+		/*
+		 * We are in active scanout and can get some meaningful results
+		 * from HVS. The actual PV scanout can not trail behind more
+		 * than fifo_lines as that is the fifo's capacity. Assume that
+		 * in active scanout the HVS and PV work in lockstep wrt. HVS
+		 * refilling the fifo and PV consuming from the fifo, ie.
+		 * whenever the PV consumes and frees up a scanline in the
+		 * fifo, the HVS will immediately refill it, therefore
+		 * incrementing vpos. Therefore we choose HVS read position -
+		 * fifo size in scanlines as a estimate of the real scanout
+		 * position of the PV.
+		 */
+		*vpos -= fifo_lines + 1;
+		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+			*vpos /= 2;
+
+		ret |= DRM_SCANOUTPOS_ACCURATE;
+		return ret;
+	}
+
+	/*
+	 * Less: This happens when we are in vblank and the HVS, after getting
+	 * the VSTART restart signal from the PV, just started refilling its
+	 * fifo with new lines from the top-most lines of the new framebuffers.
+	 * The PV does not scan out in vblank, so does not remove lines from
+	 * the fifo, so the fifo will be full quickly and the HVS has to pause.
+	 * We can't get meaningful readings wrt. scanline position of the PV
+	 * and need to make things up in a approximative but consistent way.
+	 */
+	ret |= DRM_SCANOUTPOS_IN_VBLANK;
+	vblank_lines = mode->crtc_vtotal - mode->crtc_vdisplay;
+
+	if (flags & DRM_CALLED_FROM_VBLIRQ) {
+		/*
+		 * Assume the irq handler got called close to first
+		 * line of vblank, so PV has about a full vblank
+		 * scanlines to go, and as a base timestamp use the
+		 * one taken at entry into vblank irq handler, so it
+		 * is not affected by random delays due to lock
+		 * contention on event_lock or vblank_time lock in
+		 * the core.
+		 */
+		*vpos = -vblank_lines;
+
+		if (stime)
+			*stime = vc4_crtc->t_vblank;
+		if (etime)
+			*etime = vc4_crtc->t_vblank;
+
+		/*
+		 * If the HVS fifo is not yet full then we know for certain
+		 * we are at the very beginning of vblank, as the hvs just
+		 * started refilling, and the stime and etime timestamps
+		 * truly correspond to start of vblank.
+		 */
+		if ((val & SCALER_DISPSTATX_FULL) != SCALER_DISPSTATX_FULL)
+			ret |= DRM_SCANOUTPOS_ACCURATE;
+	} else {
+		/*
+		 * No clue where we are inside vblank. Return a vpos of zero,
+		 * which will cause calling code to just return the etime
+		 * timestamp uncorrected. At least this is no worse than the
+		 * standard fallback.
+		 */
+		*vpos = 0;
+	}
+
+	return ret;
+}
+
+int vc4_crtc_get_vblank_timestamp(struct drm_device *dev, unsigned int crtc_id,
+				  int *max_error, struct timeval *vblank_time,
+				  unsigned flags)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct vc4_crtc *vc4_crtc = vc4->crtc[crtc_id];
+	struct drm_crtc *crtc = &vc4_crtc->base;
+	struct drm_crtc_state *state = crtc->state;
+
+	/* Helper routine in DRM core does all the work: */
+	return drm_calc_vbltimestamp_from_scanoutpos(dev, crtc_id, max_error,
+						     vblank_time, flags,
+						     &state->adjusted_mode);
+}
+
 static void vc4_crtc_destroy(struct drm_crtc *crtc)
 {
 	drm_crtc_cleanup(crtc);
@@ -449,14 +592,6 @@ static void vc4_crtc_atomic_flush(struct drm_crtc *crtc,
 
 	WARN_ON_ONCE(dlist_next - dlist_start != vc4_state->mm.size);
 
-	HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel),
-		  vc4_state->mm.start);
-
-	if (debug_dump_regs) {
-		DRM_INFO("CRTC %d HVS after:\n", drm_crtc_index(crtc));
-		vc4_hvs_dump_state(dev);
-	}
-
 	if (crtc->state->event) {
 		unsigned long flags;
 
@@ -466,8 +601,20 @@ static void vc4_crtc_atomic_flush(struct drm_crtc *crtc,
 
 		spin_lock_irqsave(&dev->event_lock, flags);
 		vc4_crtc->event = crtc->state->event;
-		spin_unlock_irqrestore(&dev->event_lock, flags);
 		crtc->state->event = NULL;
+
+		HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel),
+			  vc4_state->mm.start);
+
+		spin_unlock_irqrestore(&dev->event_lock, flags);
+	} else {
+		HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel),
+			  vc4_state->mm.start);
+	}
+
+	if (debug_dump_regs) {
+		DRM_INFO("CRTC %d HVS after:\n", drm_crtc_index(crtc));
+		vc4_hvs_dump_state(dev);
 	}
 }
 
@@ -493,12 +640,17 @@ static void vc4_crtc_handle_page_flip(struct vc4_crtc *vc4_crtc)
 {
 	struct drm_crtc *crtc = &vc4_crtc->base;
 	struct drm_device *dev = crtc->dev;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state);
+	u32 chan = vc4_crtc->channel;
 	unsigned long flags;
 
 	spin_lock_irqsave(&dev->event_lock, flags);
-	if (vc4_crtc->event) {
+	if (vc4_crtc->event &&
+	    (vc4_state->mm.start == HVS_READ(SCALER_DISPLACTX(chan)))) {
 		drm_crtc_send_vblank_event(crtc, vc4_crtc->event);
 		vc4_crtc->event = NULL;
+		drm_crtc_vblank_put(crtc);
 	}
 	spin_unlock_irqrestore(&dev->event_lock, flags);
 }
@@ -510,6 +662,7 @@ static irqreturn_t vc4_crtc_irq_handler(int irq, void *data)
 	irqreturn_t ret = IRQ_NONE;
 
 	if (stat & PV_INT_VFP_START) {
+		vc4_crtc->t_vblank = ktime_get();
 		CRTC_WRITE(PV_INTSTAT, PV_INT_VFP_START);
 		drm_crtc_handle_vblank(&vc4_crtc->base);
 		vc4_crtc_handle_page_flip(vc4_crtc);
@@ -549,6 +702,7 @@ vc4_async_page_flip_complete(struct vc4_seqno_cb *cb)
 		spin_unlock_irqrestore(&dev->event_lock, flags);
 	}
 
+	drm_crtc_vblank_put(crtc);
 	drm_framebuffer_unreference(flip_state->fb);
 	kfree(flip_state);
 
@@ -591,6 +745,8 @@ static int vc4_async_page_flip(struct drm_crtc *crtc,
 		return ret;
 	}
 
+	WARN_ON(drm_crtc_vblank_get(crtc) != 0);
+
 	/* Immediately update the plane's legacy fb pointer, so that later
 	 * modeset prep sees the state that will be present when the semaphore
 	 * is released.
@@ -711,6 +867,22 @@ static void vc4_set_crtc_possible_masks(struct drm_device *drm,
 	}
 }
 
+static void
+vc4_crtc_get_cob_allocation(struct vc4_crtc *vc4_crtc)
+{
+	struct drm_device *drm = vc4_crtc->base.dev;
+	struct vc4_dev *vc4 = to_vc4_dev(drm);
+	u32 dispbase = HVS_READ(SCALER_DISPBASEX(vc4_crtc->channel));
+	/* Top/base are supposed to be 4-pixel aligned, but the
+	 * Raspberry Pi firmware fills the low bits (which are
+	 * presumably ignored).
+	 */
+	u32 top = VC4_GET_FIELD(dispbase, SCALER_DISPBASEX_TOP) & ~3;
+	u32 base = VC4_GET_FIELD(dispbase, SCALER_DISPBASEX_BASE) & ~3;
+
+	vc4_crtc->cob_size = top - base + 4;
+}
+
 static int vc4_crtc_bind(struct device *dev, struct device *master, void *data)
 {
 	struct platform_device *pdev = to_platform_device(dev);
@@ -787,6 +959,8 @@ static int vc4_crtc_bind(struct device *dev, struct device *master, void *data)
 		crtc->cursor = cursor_plane;
 	}
 
+	vc4_crtc_get_cob_allocation(vc4_crtc);
+
 	CRTC_WRITE(PV_INTEN, 0);
 	CRTC_WRITE(PV_INTSTAT, PV_INT_VFP_START);
 	ret = devm_request_irq(dev, platform_get_irq(pdev, 0),
diff --git a/drivers/gpu/drm/vc4/vc4_dpi.c b/drivers/gpu/drm/vc4/vc4_dpi.c
index dba1114297e4..275fedbdbd9e 100644
--- a/drivers/gpu/drm/vc4/vc4_dpi.c
+++ b/drivers/gpu/drm/vc4/vc4_dpi.c
@@ -227,14 +227,12 @@ static struct drm_connector *vc4_dpi_connector_init(struct drm_device *dev,
 {
 	struct drm_connector *connector = NULL;
 	struct vc4_dpi_connector *dpi_connector;
-	int ret = 0;
 
 	dpi_connector = devm_kzalloc(dev->dev, sizeof(*dpi_connector),
 				     GFP_KERNEL);
-	if (!dpi_connector) {
-		ret = -ENOMEM;
-		goto fail;
-	}
+	if (!dpi_connector)
+		return ERR_PTR(-ENOMEM);
+
 	connector = &dpi_connector->base;
 
 	dpi_connector->encoder = dpi->encoder;
@@ -251,12 +249,6 @@ static struct drm_connector *vc4_dpi_connector_init(struct drm_device *dev,
 	drm_mode_connector_attach_encoder(connector, dpi->encoder);
 
 	return connector;
-
- fail:
-	if (connector)
-		vc4_dpi_connector_destroy(connector);
-
-	return ERR_PTR(ret);
 }
 
 static const struct drm_encoder_funcs vc4_dpi_encoder_funcs = {
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
index 9e88231b8906..8b42d31a7f0e 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include "drm_fb_cma_helper.h"
 
 #include "uapi/drm/vc4_drm.h"
@@ -43,12 +44,54 @@ void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index)
 	return map;
 }
 
+static int vc4_get_param_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file_priv)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct drm_vc4_get_param *args = data;
+	int ret;
+
+	if (args->pad != 0)
+		return -EINVAL;
+
+	switch (args->param) {
+	case DRM_VC4_PARAM_V3D_IDENT0:
+		ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
+		if (ret)
+			return ret;
+		args->value = V3D_READ(V3D_IDENT0);
+		pm_runtime_put(&vc4->v3d->pdev->dev);
+		break;
+	case DRM_VC4_PARAM_V3D_IDENT1:
+		ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
+		if (ret)
+			return ret;
+		args->value = V3D_READ(V3D_IDENT1);
+		pm_runtime_put(&vc4->v3d->pdev->dev);
+		break;
+	case DRM_VC4_PARAM_V3D_IDENT2:
+		ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
+		if (ret)
+			return ret;
+		args->value = V3D_READ(V3D_IDENT2);
+		pm_runtime_put(&vc4->v3d->pdev->dev);
+		break;
+	case DRM_VC4_PARAM_SUPPORTS_BRANCHES:
+		args->value = true;
+		break;
+	default:
+		DRM_DEBUG("Unknown parameter %d\n", args->param);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static void vc4_lastclose(struct drm_device *dev)
 {
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 
-	if (vc4->fbdev)
-		drm_fbdev_cma_restore_mode(vc4->fbdev);
+	drm_fbdev_cma_restore_mode(vc4->fbdev);
 }
 
 static const struct file_operations vc4_drm_fops = {
@@ -66,14 +109,15 @@ static const struct file_operations vc4_drm_fops = {
 };
 
 static const struct drm_ioctl_desc vc4_drm_ioctls[] = {
-	DRM_IOCTL_DEF_DRV(VC4_SUBMIT_CL, vc4_submit_cl_ioctl, 0),
-	DRM_IOCTL_DEF_DRV(VC4_WAIT_SEQNO, vc4_wait_seqno_ioctl, 0),
-	DRM_IOCTL_DEF_DRV(VC4_WAIT_BO, vc4_wait_bo_ioctl, 0),
-	DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, 0),
-	DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, 0),
-	DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(VC4_SUBMIT_CL, vc4_submit_cl_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(VC4_WAIT_SEQNO, vc4_wait_seqno_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(VC4_WAIT_BO, vc4_wait_bo_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(VC4_GET_HANG_STATE, vc4_get_hang_state_ioctl,
 			  DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(VC4_GET_PARAM, vc4_get_param_ioctl, DRM_RENDER_ALLOW),
 };
 
 static struct drm_driver vc4_drm_driver = {
@@ -91,7 +135,9 @@ static struct drm_driver vc4_drm_driver = {
 
 	.enable_vblank = vc4_enable_vblank,
 	.disable_vblank = vc4_disable_vblank,
-	.get_vblank_counter = drm_vblank_count,
+	.get_vblank_counter = drm_vblank_no_hw_counter,
+	.get_scanout_position = vc4_crtc_get_scanoutpos,
+	.get_vblank_timestamp = vc4_crtc_get_vblank_timestamp,
 
 #if defined(CONFIG_DEBUG_FS)
 	.debugfs_init = vc4_debugfs_init,
@@ -195,8 +241,6 @@ static int vc4_drm_bind(struct device *dev)
 	vc4_bo_cache_init(drm);
 
 	drm_mode_config_init(drm);
-	if (ret)
-		goto unref;
 
 	vc4_gem_init(drm);
 
@@ -218,7 +262,6 @@ unbind_all:
 	component_unbind_all(dev, drm);
 gem_destroy:
 	vc4_gem_destroy(drm);
-unref:
 	drm_dev_unref(drm);
 	vc4_bo_cache_destroy(drm);
 	return ret;
@@ -246,8 +289,8 @@ static const struct component_master_ops vc4_drm_ops = {
 static struct platform_driver *const component_drivers[] = {
 	&vc4_hdmi_driver,
 	&vc4_dpi_driver,
-	&vc4_crtc_driver,
 	&vc4_hvs_driver,
+	&vc4_crtc_driver,
 	&vc4_v3d_driver,
 };
 
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
index c799baabc008..489e3de0c050 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -355,6 +355,9 @@ struct vc4_validated_shader_info {
 	uint32_t uniforms_src_size;
 	uint32_t num_texture_samples;
 	struct vc4_texture_sample_info *texture_samples;
+
+	uint32_t num_uniform_addr_offsets;
+	uint32_t *uniform_addr_offsets;
 };
 
 /**
@@ -415,6 +418,13 @@ extern struct platform_driver vc4_crtc_driver;
 int vc4_enable_vblank(struct drm_device *dev, unsigned int crtc_id);
 void vc4_disable_vblank(struct drm_device *dev, unsigned int crtc_id);
 int vc4_crtc_debugfs_regs(struct seq_file *m, void *arg);
+int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id,
+			    unsigned int flags, int *vpos, int *hpos,
+			    ktime_t *stime, ktime_t *etime,
+			    const struct drm_display_mode *mode);
+int vc4_crtc_get_vblank_timestamp(struct drm_device *dev, unsigned int crtc_id,
+				  int *max_error, struct timeval *vblank_time,
+				  unsigned flags);
 
 /* vc4_debugfs.c */
 int vc4_debugfs_init(struct drm_minor *minor);
diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
index 68df91c3f860..4452f3631cac 100644
--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
+++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
@@ -456,12 +456,6 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data)
 	if (IS_ERR(hdmi->hd_regs))
 		return PTR_ERR(hdmi->hd_regs);
 
-	ddc_node = of_parse_phandle(dev->of_node, "ddc", 0);
-	if (!ddc_node) {
-		DRM_ERROR("Failed to find ddc node in device tree\n");
-		return -ENODEV;
-	}
-
 	hdmi->pixel_clock = devm_clk_get(dev, "pixel");
 	if (IS_ERR(hdmi->pixel_clock)) {
 		DRM_ERROR("Failed to get pixel clock\n");
@@ -473,7 +467,14 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data)
 		return PTR_ERR(hdmi->hsm_clock);
 	}
 
+	ddc_node = of_parse_phandle(dev->of_node, "ddc", 0);
+	if (!ddc_node) {
+		DRM_ERROR("Failed to find ddc node in device tree\n");
+		return -ENODEV;
+	}
+
 	hdmi->ddc = of_find_i2c_adapter_by_node(ddc_node);
+	of_node_put(ddc_node);
 	if (!hdmi->ddc) {
 		DRM_DEBUG("Failed to get ddc i2c adapter by node\n");
 		return -EPROBE_DEFER;
diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c
index 8f4d5ffc32be..4ac894d993cd 100644
--- a/drivers/gpu/drm/vc4/vc4_kms.c
+++ b/drivers/gpu/drm/vc4/vc4_kms.c
@@ -26,8 +26,7 @@ static void vc4_output_poll_changed(struct drm_device *dev)
 {
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 
-	if (vc4->fbdev)
-		drm_fbdev_cma_hotplug_event(vc4->fbdev);
+	drm_fbdev_cma_hotplug_event(vc4->fbdev);
 }
 
 struct vc4_commit {
@@ -119,10 +118,18 @@ static int vc4_atomic_commit(struct drm_device *dev,
 		return -ENOMEM;
 
 	/* Make sure that any outstanding modesets have finished. */
-	ret = down_interruptible(&vc4->async_modeset);
-	if (ret) {
-		kfree(c);
-		return ret;
+	if (nonblock) {
+		ret = down_trylock(&vc4->async_modeset);
+		if (ret) {
+			kfree(c);
+			return -EBUSY;
+		}
+	} else {
+		ret = down_interruptible(&vc4->async_modeset);
+		if (ret) {
+			kfree(c);
+			return ret;
+		}
 	}
 
 	ret = drm_atomic_helper_prepare_planes(dev, state);
diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index 5d2c3d9fd17a..29e4b400e25e 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -94,6 +94,14 @@ static const struct hvs_format {
 		.pixel_order = HVS_PIXEL_ORDER_ABGR, .has_alpha = true,
 	},
 	{
+		.drm = DRM_FORMAT_ABGR8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888,
+		.pixel_order = HVS_PIXEL_ORDER_ARGB, .has_alpha = true,
+	},
+	{
+		.drm = DRM_FORMAT_XBGR8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888,
+		.pixel_order = HVS_PIXEL_ORDER_ARGB, .has_alpha = false,
+	},
+	{
 		.drm = DRM_FORMAT_RGB565, .hvs = HVS_PIXEL_FORMAT_RGB565,
 		.pixel_order = HVS_PIXEL_ORDER_XRGB, .has_alpha = false,
 	},
diff --git a/drivers/gpu/drm/vc4/vc4_qpu_defines.h b/drivers/gpu/drm/vc4/vc4_qpu_defines.h
index d5c2f3c85ebb..f4e795a0d3f6 100644
--- a/drivers/gpu/drm/vc4/vc4_qpu_defines.h
+++ b/drivers/gpu/drm/vc4/vc4_qpu_defines.h
@@ -70,7 +70,7 @@ enum qpu_raddr {
 	QPU_R_ELEM_QPU = 38,
 	QPU_R_NOP,
 	QPU_R_XY_PIXEL_COORD = 41,
-	QPU_R_MS_REV_FLAGS = 41,
+	QPU_R_MS_REV_FLAGS = 42,
 	QPU_R_VPM = 48,
 	QPU_R_VPM_LD_BUSY,
 	QPU_R_VPM_LD_WAIT,
@@ -230,6 +230,15 @@ enum qpu_unpack_r4 {
 #define QPU_COND_MUL_SHIFT              46
 #define QPU_COND_MUL_MASK               QPU_MASK(48, 46)
 
+#define QPU_BRANCH_COND_SHIFT           52
+#define QPU_BRANCH_COND_MASK            QPU_MASK(55, 52)
+
+#define QPU_BRANCH_REL                  ((uint64_t)1 << 51)
+#define QPU_BRANCH_REG                  ((uint64_t)1 << 50)
+
+#define QPU_BRANCH_RADDR_A_SHIFT        45
+#define QPU_BRANCH_RADDR_A_MASK         QPU_MASK(49, 45)
+
 #define QPU_SF                          ((uint64_t)1 << 45)
 
 #define QPU_WADDR_ADD_SHIFT             38
@@ -261,4 +270,10 @@ enum qpu_unpack_r4 {
 #define QPU_OP_ADD_SHIFT                24
 #define QPU_OP_ADD_MASK                 QPU_MASK(28, 24)
 
+#define QPU_LOAD_IMM_SHIFT              0
+#define QPU_LOAD_IMM_MASK               QPU_MASK(31, 0)
+
+#define QPU_BRANCH_TARGET_SHIFT         0
+#define QPU_BRANCH_TARGET_MASK          QPU_MASK(31, 0)
+
 #endif /* VC4_QPU_DEFINES_H */
diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h
index 6163b95c5411..160942a9180e 100644
--- a/drivers/gpu/drm/vc4/vc4_regs.h
+++ b/drivers/gpu/drm/vc4/vc4_regs.h
@@ -341,6 +341,10 @@
 #define SCALER_DISPLACT0                        0x00000030
 #define SCALER_DISPLACT1                        0x00000034
 #define SCALER_DISPLACT2                        0x00000038
+#define SCALER_DISPLACTX(x)			(SCALER_DISPLACT0 +	\
+						 (x) * (SCALER_DISPLACT1 - \
+							SCALER_DISPLACT0))
+
 #define SCALER_DISPCTRL0                        0x00000040
 # define SCALER_DISPCTRLX_ENABLE		BIT(31)
 # define SCALER_DISPCTRLX_RESET			BIT(30)
@@ -362,7 +366,6 @@
 # define SCALER_DISPBKGND_FILL			BIT(24)
 
 #define SCALER_DISPSTAT0                        0x00000048
-#define SCALER_DISPBASE0                        0x0000004c
 # define SCALER_DISPSTATX_MODE_MASK		VC4_MASK(31, 30)
 # define SCALER_DISPSTATX_MODE_SHIFT		30
 # define SCALER_DISPSTATX_MODE_DISABLED		0
@@ -371,6 +374,24 @@
 # define SCALER_DISPSTATX_MODE_EOF		3
 # define SCALER_DISPSTATX_FULL			BIT(29)
 # define SCALER_DISPSTATX_EMPTY			BIT(28)
+# define SCALER_DISPSTATX_FRAME_COUNT_MASK	VC4_MASK(17, 12)
+# define SCALER_DISPSTATX_FRAME_COUNT_SHIFT	12
+# define SCALER_DISPSTATX_LINE_MASK		VC4_MASK(11, 0)
+# define SCALER_DISPSTATX_LINE_SHIFT		0
+
+#define SCALER_DISPBASE0                        0x0000004c
+/* Last pixel in the COB (display FIFO memory) allocated to this HVS
+ * channel.  Must be 4-pixel aligned (and thus 4 pixels less than the
+ * next COB base).
+ */
+# define SCALER_DISPBASEX_TOP_MASK		VC4_MASK(31, 16)
+# define SCALER_DISPBASEX_TOP_SHIFT		16
+/* First pixel in the COB (display FIFO memory) allocated to this HVS
+ * channel.  Must be 4-pixel aligned.
+ */
+# define SCALER_DISPBASEX_BASE_MASK		VC4_MASK(15, 0)
+# define SCALER_DISPBASEX_BASE_SHIFT		0
+
 #define SCALER_DISPCTRL1                        0x00000050
 #define SCALER_DISPBKGND1                       0x00000054
 #define SCALER_DISPBKGNDX(x)			(SCALER_DISPBKGND0 +        \
@@ -381,6 +402,9 @@
 						 (x) * (SCALER_DISPSTAT1 - \
 							SCALER_DISPSTAT0))
 #define SCALER_DISPBASE1                        0x0000005c
+#define SCALER_DISPBASEX(x)			(SCALER_DISPBASE0 +        \
+						 (x) * (SCALER_DISPBASE1 - \
+							SCALER_DISPBASE0))
 #define SCALER_DISPCTRL2                        0x00000060
 #define SCALER_DISPCTRLX(x)			(SCALER_DISPCTRL0 +        \
 						 (x) * (SCALER_DISPCTRL1 - \
diff --git a/drivers/gpu/drm/vc4/vc4_validate.c b/drivers/gpu/drm/vc4/vc4_validate.c
index 24c2c746e8f3..9ce1d0adf882 100644
--- a/drivers/gpu/drm/vc4/vc4_validate.c
+++ b/drivers/gpu/drm/vc4/vc4_validate.c
@@ -802,7 +802,7 @@ validate_gl_shader_rec(struct drm_device *dev,
 		uint32_t src_offset = *(uint32_t *)(pkt_u + o);
 		uint32_t *texture_handles_u;
 		void *uniform_data_u;
-		uint32_t tex;
+		uint32_t tex, uni;
 
 		*(uint32_t *)(pkt_v + o) = bo[i]->paddr + src_offset;
 
@@ -840,6 +840,17 @@ validate_gl_shader_rec(struct drm_device *dev,
 			}
 		}
 
+		/* Fill in the uniform slots that need this shader's
+		 * start-of-uniforms address (used for resetting the uniform
+		 * stream in the presence of control flow).
+		 */
+		for (uni = 0;
+		     uni < validated_shader->num_uniform_addr_offsets;
+		     uni++) {
+			uint32_t o = validated_shader->uniform_addr_offsets[uni];
+			((uint32_t *)exec->uniforms_v)[o] = exec->uniforms_p;
+		}
+
 		*(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p;
 
 		exec->uniforms_u += validated_shader->uniforms_src_size;
diff --git a/drivers/gpu/drm/vc4/vc4_validate_shaders.c b/drivers/gpu/drm/vc4/vc4_validate_shaders.c
index f67124b4c534..46527e989ce3 100644
--- a/drivers/gpu/drm/vc4/vc4_validate_shaders.c
+++ b/drivers/gpu/drm/vc4/vc4_validate_shaders.c
@@ -39,7 +39,17 @@
 #include "vc4_drv.h"
 #include "vc4_qpu_defines.h"
 
+#define LIVE_REG_COUNT (32 + 32 + 4)
+
 struct vc4_shader_validation_state {
+	/* Current IP being validated. */
+	uint32_t ip;
+
+	/* IP at the end of the BO, do not read shader[max_ip] */
+	uint32_t max_ip;
+
+	uint64_t *shader;
+
 	struct vc4_texture_sample_info tmu_setup[2];
 	int tmu_write_count[2];
 
@@ -49,8 +59,30 @@ struct vc4_shader_validation_state {
 	 *
 	 * This is used for the validation of direct address memory reads.
 	 */
-	uint32_t live_min_clamp_offsets[32 + 32 + 4];
-	bool live_max_clamp_regs[32 + 32 + 4];
+	uint32_t live_min_clamp_offsets[LIVE_REG_COUNT];
+	bool live_max_clamp_regs[LIVE_REG_COUNT];
+	uint32_t live_immediates[LIVE_REG_COUNT];
+
+	/* Bitfield of which IPs are used as branch targets.
+	 *
+	 * Used for validation that the uniform stream is updated at the right
+	 * points and clearing the texturing/clamping state.
+	 */
+	unsigned long *branch_targets;
+
+	/* Set when entering a basic block, and cleared when the uniform
+	 * address update is found.  This is used to make sure that we don't
+	 * read uniforms when the address is undefined.
+	 */
+	bool needs_uniform_address_update;
+
+	/* Set when we find a backwards branch.  If the branch is backwards,
+	 * the taraget is probably doing an address reset to read uniforms,
+	 * and so we need to be sure that a uniforms address is present in the
+	 * stream, even if the shader didn't need to read uniforms in later
+	 * basic blocks.
+	 */
+	bool needs_uniform_address_for_loop;
 };
 
 static uint32_t
@@ -129,11 +161,11 @@ record_texture_sample(struct vc4_validated_shader_info *validated_shader,
 }
 
 static bool
-check_tmu_write(uint64_t inst,
-		struct vc4_validated_shader_info *validated_shader,
+check_tmu_write(struct vc4_validated_shader_info *validated_shader,
 		struct vc4_shader_validation_state *validation_state,
 		bool is_mul)
 {
+	uint64_t inst = validation_state->shader[validation_state->ip];
 	uint32_t waddr = (is_mul ?
 			  QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
 			  QPU_GET_FIELD(inst, QPU_WADDR_ADD));
@@ -162,7 +194,7 @@ check_tmu_write(uint64_t inst,
 			return false;
 		}
 
-		/* We assert that the the clamped address is the first
+		/* We assert that the clamped address is the first
 		 * argument, and the UBO base address is the second argument.
 		 * This is arbitrary, but simpler than supporting flipping the
 		 * two either way.
@@ -212,8 +244,14 @@ check_tmu_write(uint64_t inst,
 	/* Since direct uses a RADDR uniform reference, it will get counted in
 	 * check_instruction_reads()
 	 */
-	if (!is_direct)
+	if (!is_direct) {
+		if (validation_state->needs_uniform_address_update) {
+			DRM_ERROR("Texturing with undefined uniform address\n");
+			return false;
+		}
+
 		validated_shader->uniforms_size += 4;
+	}
 
 	if (submit) {
 		if (!record_texture_sample(validated_shader,
@@ -227,23 +265,138 @@ check_tmu_write(uint64_t inst,
 	return true;
 }
 
+static bool require_uniform_address_uniform(struct vc4_validated_shader_info *validated_shader)
+{
+	uint32_t o = validated_shader->num_uniform_addr_offsets;
+	uint32_t num_uniforms = validated_shader->uniforms_size / 4;
+
+	validated_shader->uniform_addr_offsets =
+		krealloc(validated_shader->uniform_addr_offsets,
+			 (o + 1) *
+			 sizeof(*validated_shader->uniform_addr_offsets),
+			 GFP_KERNEL);
+	if (!validated_shader->uniform_addr_offsets)
+		return false;
+
+	validated_shader->uniform_addr_offsets[o] = num_uniforms;
+	validated_shader->num_uniform_addr_offsets++;
+
+	return true;
+}
+
 static bool
-check_reg_write(uint64_t inst,
-		struct vc4_validated_shader_info *validated_shader,
+validate_uniform_address_write(struct vc4_validated_shader_info *validated_shader,
+			       struct vc4_shader_validation_state *validation_state,
+			       bool is_mul)
+{
+	uint64_t inst = validation_state->shader[validation_state->ip];
+	u32 add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
+	u32 raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
+	u32 raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
+	u32 add_lri = raddr_add_a_to_live_reg_index(inst);
+	/* We want our reset to be pointing at whatever uniform follows the
+	 * uniforms base address.
+	 */
+	u32 expected_offset = validated_shader->uniforms_size + 4;
+
+	/* We only support absolute uniform address changes, and we
+	 * require that they be in the current basic block before any
+	 * of its uniform reads.
+	 *
+	 * One could potentially emit more efficient QPU code, by
+	 * noticing that (say) an if statement does uniform control
+	 * flow for all threads and that the if reads the same number
+	 * of uniforms on each side.  However, this scheme is easy to
+	 * validate so it's all we allow for now.
+	 */
+
+	if (QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_NONE) {
+		DRM_ERROR("uniforms address change must be "
+			  "normal math\n");
+		return false;
+	}
+
+	if (is_mul || QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) {
+		DRM_ERROR("Uniform address reset must be an ADD.\n");
+		return false;
+	}
+
+	if (QPU_GET_FIELD(inst, QPU_COND_ADD) != QPU_COND_ALWAYS) {
+		DRM_ERROR("Uniform address reset must be unconditional.\n");
+		return false;
+	}
+
+	if (QPU_GET_FIELD(inst, QPU_PACK) != QPU_PACK_A_NOP &&
+	    !(inst & QPU_PM)) {
+		DRM_ERROR("No packing allowed on uniforms reset\n");
+		return false;
+	}
+
+	if (add_lri == -1) {
+		DRM_ERROR("First argument of uniform address write must be "
+			  "an immediate value.\n");
+		return false;
+	}
+
+	if (validation_state->live_immediates[add_lri] != expected_offset) {
+		DRM_ERROR("Resetting uniforms with offset %db instead of %db\n",
+			  validation_state->live_immediates[add_lri],
+			  expected_offset);
+		return false;
+	}
+
+	if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
+	    !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) {
+		DRM_ERROR("Second argument of uniform address write must be "
+			  "a uniform.\n");
+		return false;
+	}
+
+	validation_state->needs_uniform_address_update = false;
+	validation_state->needs_uniform_address_for_loop = false;
+	return require_uniform_address_uniform(validated_shader);
+}
+
+static bool
+check_reg_write(struct vc4_validated_shader_info *validated_shader,
 		struct vc4_shader_validation_state *validation_state,
 		bool is_mul)
 {
+	uint64_t inst = validation_state->shader[validation_state->ip];
 	uint32_t waddr = (is_mul ?
 			  QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
 			  QPU_GET_FIELD(inst, QPU_WADDR_ADD));
+	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
+	bool ws = inst & QPU_WS;
+	bool is_b = is_mul ^ ws;
+	u32 lri = waddr_to_live_reg_index(waddr, is_b);
+
+	if (lri != -1) {
+		uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
+		uint32_t cond_mul = QPU_GET_FIELD(inst, QPU_COND_MUL);
+
+		if (sig == QPU_SIG_LOAD_IMM &&
+		    QPU_GET_FIELD(inst, QPU_PACK) == QPU_PACK_A_NOP &&
+		    ((is_mul && cond_mul == QPU_COND_ALWAYS) ||
+		     (!is_mul && cond_add == QPU_COND_ALWAYS))) {
+			validation_state->live_immediates[lri] =
+				QPU_GET_FIELD(inst, QPU_LOAD_IMM);
+		} else {
+			validation_state->live_immediates[lri] = ~0;
+		}
+	}
 
 	switch (waddr) {
 	case QPU_W_UNIFORMS_ADDRESS:
-		/* XXX: We'll probably need to support this for reladdr, but
-		 * it's definitely a security-related one.
-		 */
-		DRM_ERROR("uniforms address load unsupported\n");
-		return false;
+		if (is_b) {
+			DRM_ERROR("relative uniforms address change "
+				  "unsupported\n");
+			return false;
+		}
+
+		return validate_uniform_address_write(validated_shader,
+						      validation_state,
+						      is_mul);
 
 	case QPU_W_TLB_COLOR_MS:
 	case QPU_W_TLB_COLOR_ALL:
@@ -261,7 +414,7 @@ check_reg_write(uint64_t inst,
 	case QPU_W_TMU1_T:
 	case QPU_W_TMU1_R:
 	case QPU_W_TMU1_B:
-		return check_tmu_write(inst, validated_shader, validation_state,
+		return check_tmu_write(validated_shader, validation_state,
 				       is_mul);
 
 	case QPU_W_HOST_INT:
@@ -294,10 +447,10 @@ check_reg_write(uint64_t inst,
 }
 
 static void
-track_live_clamps(uint64_t inst,
-		  struct vc4_validated_shader_info *validated_shader,
+track_live_clamps(struct vc4_validated_shader_info *validated_shader,
 		  struct vc4_shader_validation_state *validation_state)
 {
+	uint64_t inst = validation_state->shader[validation_state->ip];
 	uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD);
 	uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
 	uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
@@ -369,10 +522,10 @@ track_live_clamps(uint64_t inst,
 }
 
 static bool
-check_instruction_writes(uint64_t inst,
-			 struct vc4_validated_shader_info *validated_shader,
+check_instruction_writes(struct vc4_validated_shader_info *validated_shader,
 			 struct vc4_shader_validation_state *validation_state)
 {
+	uint64_t inst = validation_state->shader[validation_state->ip];
 	uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
 	uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
 	bool ok;
@@ -382,20 +535,44 @@ check_instruction_writes(uint64_t inst,
 		return false;
 	}
 
-	ok = (check_reg_write(inst, validated_shader, validation_state,
-			      false) &&
-	      check_reg_write(inst, validated_shader, validation_state,
-			      true));
+	ok = (check_reg_write(validated_shader, validation_state, false) &&
+	      check_reg_write(validated_shader, validation_state, true));
 
-	track_live_clamps(inst, validated_shader, validation_state);
+	track_live_clamps(validated_shader, validation_state);
 
 	return ok;
 }
 
 static bool
-check_instruction_reads(uint64_t inst,
-			struct vc4_validated_shader_info *validated_shader)
+check_branch(uint64_t inst,
+	     struct vc4_validated_shader_info *validated_shader,
+	     struct vc4_shader_validation_state *validation_state,
+	     int ip)
+{
+	int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET);
+	uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
+	uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
+
+	if ((int)branch_imm < 0)
+		validation_state->needs_uniform_address_for_loop = true;
+
+	/* We don't want to have to worry about validation of this, and
+	 * there's no need for it.
+	 */
+	if (waddr_add != QPU_W_NOP || waddr_mul != QPU_W_NOP) {
+		DRM_ERROR("branch instruction at %d wrote a register.\n",
+			  validation_state->ip);
+		return false;
+	}
+
+	return true;
+}
+
+static bool
+check_instruction_reads(struct vc4_validated_shader_info *validated_shader,
+			struct vc4_shader_validation_state *validation_state)
 {
+	uint64_t inst = validation_state->shader[validation_state->ip];
 	uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
 	uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
 	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
@@ -407,40 +584,204 @@ check_instruction_reads(uint64_t inst,
 		 * already be OOM.
 		 */
 		validated_shader->uniforms_size += 4;
+
+		if (validation_state->needs_uniform_address_update) {
+			DRM_ERROR("Uniform read with undefined uniform "
+				  "address\n");
+			return false;
+		}
+	}
+
+	return true;
+}
+
+/* Make sure that all branches are absolute and point within the shader, and
+ * note their targets for later.
+ */
+static bool
+vc4_validate_branches(struct vc4_shader_validation_state *validation_state)
+{
+	uint32_t max_branch_target = 0;
+	bool found_shader_end = false;
+	int ip;
+	int shader_end_ip = 0;
+	int last_branch = -2;
+
+	for (ip = 0; ip < validation_state->max_ip; ip++) {
+		uint64_t inst = validation_state->shader[ip];
+		int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET);
+		uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
+		uint32_t after_delay_ip = ip + 4;
+		uint32_t branch_target_ip;
+
+		if (sig == QPU_SIG_PROG_END) {
+			shader_end_ip = ip;
+			found_shader_end = true;
+			continue;
+		}
+
+		if (sig != QPU_SIG_BRANCH)
+			continue;
+
+		if (ip - last_branch < 4) {
+			DRM_ERROR("Branch at %d during delay slots\n", ip);
+			return false;
+		}
+		last_branch = ip;
+
+		if (inst & QPU_BRANCH_REG) {
+			DRM_ERROR("branching from register relative "
+				  "not supported\n");
+			return false;
+		}
+
+		if (!(inst & QPU_BRANCH_REL)) {
+			DRM_ERROR("relative branching required\n");
+			return false;
+		}
+
+		/* The actual branch target is the instruction after the delay
+		 * slots, plus whatever byte offset is in the low 32 bits of
+		 * the instruction.  Make sure we're not branching beyond the
+		 * end of the shader object.
+		 */
+		if (branch_imm % sizeof(inst) != 0) {
+			DRM_ERROR("branch target not aligned\n");
+			return false;
+		}
+
+		branch_target_ip = after_delay_ip + (branch_imm >> 3);
+		if (branch_target_ip >= validation_state->max_ip) {
+			DRM_ERROR("Branch at %d outside of shader (ip %d/%d)\n",
+				  ip, branch_target_ip,
+				  validation_state->max_ip);
+			return false;
+		}
+		set_bit(branch_target_ip, validation_state->branch_targets);
+
+		/* Make sure that the non-branching path is also not outside
+		 * the shader.
+		 */
+		if (after_delay_ip >= validation_state->max_ip) {
+			DRM_ERROR("Branch at %d continues past shader end "
+				  "(%d/%d)\n",
+				  ip, after_delay_ip, validation_state->max_ip);
+			return false;
+		}
+		set_bit(after_delay_ip, validation_state->branch_targets);
+		max_branch_target = max(max_branch_target, after_delay_ip);
+
+		/* There are two delay slots after program end is signaled
+		 * that are still executed, then we're finished.
+		 */
+		if (found_shader_end && ip == shader_end_ip + 2)
+			break;
+	}
+
+	if (max_branch_target > shader_end_ip) {
+		DRM_ERROR("Branch landed after QPU_SIG_PROG_END");
+		return false;
 	}
 
 	return true;
 }
 
+/* Resets any known state for the shader, used when we may be branched to from
+ * multiple locations in the program (or at shader start).
+ */
+static void
+reset_validation_state(struct vc4_shader_validation_state *validation_state)
+{
+	int i;
+
+	for (i = 0; i < 8; i++)
+		validation_state->tmu_setup[i / 4].p_offset[i % 4] = ~0;
+
+	for (i = 0; i < LIVE_REG_COUNT; i++) {
+		validation_state->live_min_clamp_offsets[i] = ~0;
+		validation_state->live_max_clamp_regs[i] = false;
+		validation_state->live_immediates[i] = ~0;
+	}
+}
+
+static bool
+texturing_in_progress(struct vc4_shader_validation_state *validation_state)
+{
+	return (validation_state->tmu_write_count[0] != 0 ||
+		validation_state->tmu_write_count[1] != 0);
+}
+
+static bool
+vc4_handle_branch_target(struct vc4_shader_validation_state *validation_state)
+{
+	uint32_t ip = validation_state->ip;
+
+	if (!test_bit(ip, validation_state->branch_targets))
+		return true;
+
+	if (texturing_in_progress(validation_state)) {
+		DRM_ERROR("Branch target landed during TMU setup\n");
+		return false;
+	}
+
+	/* Reset our live values tracking, since this instruction may have
+	 * multiple predecessors.
+	 *
+	 * One could potentially do analysis to determine that, for
+	 * example, all predecessors have a live max clamp in the same
+	 * register, but we don't bother with that.
+	 */
+	reset_validation_state(validation_state);
+
+	/* Since we've entered a basic block from potentially multiple
+	 * predecessors, we need the uniforms address to be updated before any
+	 * unforms are read.  We require that after any branch point, the next
+	 * uniform to be loaded is a uniform address offset.  That uniform's
+	 * offset will be marked by the uniform address register write
+	 * validation, or a one-off the end-of-program check.
+	 */
+	validation_state->needs_uniform_address_update = true;
+
+	return true;
+}
+
 struct vc4_validated_shader_info *
 vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
 {
 	bool found_shader_end = false;
 	int shader_end_ip = 0;
-	uint32_t ip, max_ip;
-	uint64_t *shader;
-	struct vc4_validated_shader_info *validated_shader;
+	uint32_t ip;
+	struct vc4_validated_shader_info *validated_shader = NULL;
 	struct vc4_shader_validation_state validation_state;
-	int i;
 
 	memset(&validation_state, 0, sizeof(validation_state));
+	validation_state.shader = shader_obj->vaddr;
+	validation_state.max_ip = shader_obj->base.size / sizeof(uint64_t);
 
-	for (i = 0; i < 8; i++)
-		validation_state.tmu_setup[i / 4].p_offset[i % 4] = ~0;
-	for (i = 0; i < ARRAY_SIZE(validation_state.live_min_clamp_offsets); i++)
-		validation_state.live_min_clamp_offsets[i] = ~0;
+	reset_validation_state(&validation_state);
 
-	shader = shader_obj->vaddr;
-	max_ip = shader_obj->base.size / sizeof(uint64_t);
+	validation_state.branch_targets =
+		kcalloc(BITS_TO_LONGS(validation_state.max_ip),
+			sizeof(unsigned long), GFP_KERNEL);
+	if (!validation_state.branch_targets)
+		goto fail;
 
 	validated_shader = kcalloc(1, sizeof(*validated_shader), GFP_KERNEL);
 	if (!validated_shader)
-		return NULL;
+		goto fail;
+
+	if (!vc4_validate_branches(&validation_state))
+		goto fail;
 
-	for (ip = 0; ip < max_ip; ip++) {
-		uint64_t inst = shader[ip];
+	for (ip = 0; ip < validation_state.max_ip; ip++) {
+		uint64_t inst = validation_state.shader[ip];
 		uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
 
+		validation_state.ip = ip;
+
+		if (!vc4_handle_branch_target(&validation_state))
+			goto fail;
+
 		switch (sig) {
 		case QPU_SIG_NONE:
 		case QPU_SIG_WAIT_FOR_SCOREBOARD:
@@ -450,13 +791,14 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
 		case QPU_SIG_LOAD_TMU1:
 		case QPU_SIG_PROG_END:
 		case QPU_SIG_SMALL_IMM:
-			if (!check_instruction_writes(inst, validated_shader,
+			if (!check_instruction_writes(validated_shader,
 						      &validation_state)) {
 				DRM_ERROR("Bad write at ip %d\n", ip);
 				goto fail;
 			}
 
-			if (!check_instruction_reads(inst, validated_shader))
+			if (!check_instruction_reads(validated_shader,
+						     &validation_state))
 				goto fail;
 
 			if (sig == QPU_SIG_PROG_END) {
@@ -467,13 +809,18 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
 			break;
 
 		case QPU_SIG_LOAD_IMM:
-			if (!check_instruction_writes(inst, validated_shader,
+			if (!check_instruction_writes(validated_shader,
 						      &validation_state)) {
 				DRM_ERROR("Bad LOAD_IMM write at ip %d\n", ip);
 				goto fail;
 			}
 			break;
 
+		case QPU_SIG_BRANCH:
+			if (!check_branch(inst, validated_shader,
+					  &validation_state, ip))
+				goto fail;
+			break;
 		default:
 			DRM_ERROR("Unsupported QPU signal %d at "
 				  "instruction %d\n", sig, ip);
@@ -487,13 +834,28 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
 			break;
 	}
 
-	if (ip == max_ip) {
+	if (ip == validation_state.max_ip) {
 		DRM_ERROR("shader failed to terminate before "
 			  "shader BO end at %zd\n",
 			  shader_obj->base.size);
 		goto fail;
 	}
 
+	/* If we did a backwards branch and we haven't emitted a uniforms
+	 * reset since then, we still need the uniforms stream to have the
+	 * uniforms address available so that the backwards branch can do its
+	 * uniforms reset.
+	 *
+	 * We could potentially prove that the backwards branch doesn't
+	 * contain any uses of uniforms until program exit, but that doesn't
+	 * seem to be worth the trouble.
+	 */
+	if (validation_state.needs_uniform_address_for_loop) {
+		if (!require_uniform_address_uniform(validated_shader))
+			goto fail;
+		validated_shader->uniforms_size += 4;
+	}
+
 	/* Again, no chance of integer overflow here because the worst case
 	 * scenario is 8 bytes of uniforms plus handles per 8-byte
 	 * instruction.
@@ -502,9 +864,12 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
 		(validated_shader->uniforms_size +
 		 4 * validated_shader->num_texture_samples);
 
+	kfree(validation_state.branch_targets);
+
 	return validated_shader;
 
 fail:
+	kfree(validation_state.branch_targets);
 	if (validated_shader) {
 		kfree(validated_shader->texture_samples);
 		kfree(validated_shader);
diff --git a/drivers/gpu/drm/vgem/Makefile b/drivers/gpu/drm/vgem/Makefile
index 3f4c7b842028..bfcdea1330e6 100644
--- a/drivers/gpu/drm/vgem/Makefile
+++ b/drivers/gpu/drm/vgem/Makefile
@@ -1,4 +1,4 @@
 ccflags-y := -Iinclude/drm
-vgem-y := vgem_drv.o
+vgem-y := vgem_drv.o vgem_fence.o
 
 obj-$(CONFIG_DRM_VGEM)	+= vgem.o
diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
index 35ea5d02a827..c15bafb06665 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.c
+++ b/drivers/gpu/drm/vgem/vgem_drv.c
@@ -42,81 +42,38 @@
 #define DRIVER_MAJOR	1
 #define DRIVER_MINOR	0
 
-void vgem_gem_put_pages(struct drm_vgem_gem_object *obj)
-{
-	drm_gem_put_pages(&obj->base, obj->pages, false, false);
-	obj->pages = NULL;
-}
-
 static void vgem_gem_free_object(struct drm_gem_object *obj)
 {
 	struct drm_vgem_gem_object *vgem_obj = to_vgem_bo(obj);
 
-	drm_gem_free_mmap_offset(obj);
-
-	if (vgem_obj->use_dma_buf && obj->dma_buf) {
-		dma_buf_put(obj->dma_buf);
-		obj->dma_buf = NULL;
-	}
-
 	drm_gem_object_release(obj);
-
-	if (vgem_obj->pages)
-		vgem_gem_put_pages(vgem_obj);
-
-	vgem_obj->pages = NULL;
-
 	kfree(vgem_obj);
 }
 
-int vgem_gem_get_pages(struct drm_vgem_gem_object *obj)
-{
-	struct page **pages;
-
-	if (obj->pages || obj->use_dma_buf)
-		return 0;
-
-	pages = drm_gem_get_pages(&obj->base);
-	if (IS_ERR(pages)) {
-		return PTR_ERR(pages);
-	}
-
-	obj->pages = pages;
-
-	return 0;
-}
-
 static int vgem_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	struct drm_vgem_gem_object *obj = vma->vm_private_data;
-	loff_t num_pages;
-	pgoff_t page_offset;
-	int ret;
-
 	/* We don't use vmf->pgoff since that has the fake offset */
-	page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
-		PAGE_SHIFT;
-
-	num_pages = DIV_ROUND_UP(obj->base.size, PAGE_SIZE);
-
-	if (page_offset > num_pages)
-		return VM_FAULT_SIGBUS;
-
-	ret = vm_insert_page(vma, (unsigned long)vmf->virtual_address,
-			     obj->pages[page_offset]);
-	switch (ret) {
-	case 0:
-		return VM_FAULT_NOPAGE;
-	case -ENOMEM:
-		return VM_FAULT_OOM;
-	case -EBUSY:
-		return VM_FAULT_RETRY;
-	case -EFAULT:
-	case -EINVAL:
-		return VM_FAULT_SIGBUS;
-	default:
-		WARN_ON(1);
-		return VM_FAULT_SIGBUS;
+	unsigned long vaddr = (unsigned long)vmf->virtual_address;
+	struct page *page;
+
+	page = shmem_read_mapping_page(file_inode(obj->base.filp)->i_mapping,
+				       (vaddr - vma->vm_start) >> PAGE_SHIFT);
+	if (!IS_ERR(page)) {
+		vmf->page = page;
+		return 0;
+	} else switch (PTR_ERR(page)) {
+		case -ENOSPC:
+		case -ENOMEM:
+			return VM_FAULT_OOM;
+		case -EBUSY:
+			return VM_FAULT_RETRY;
+		case -EFAULT:
+		case -EINVAL:
+			return VM_FAULT_SIGBUS;
+		default:
+			WARN_ON_ONCE(PTR_ERR(page));
+			return VM_FAULT_SIGBUS;
 	}
 }
 
@@ -126,6 +83,34 @@ static const struct vm_operations_struct vgem_gem_vm_ops = {
 	.close = drm_gem_vm_close,
 };
 
+static int vgem_open(struct drm_device *dev, struct drm_file *file)
+{
+	struct vgem_file *vfile;
+	int ret;
+
+	vfile = kzalloc(sizeof(*vfile), GFP_KERNEL);
+	if (!vfile)
+		return -ENOMEM;
+
+	file->driver_priv = vfile;
+
+	ret = vgem_fence_open(vfile);
+	if (ret) {
+		kfree(vfile);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void vgem_preclose(struct drm_device *dev, struct drm_file *file)
+{
+	struct vgem_file *vfile = file->driver_priv;
+
+	vgem_fence_close(vfile);
+	kfree(vfile);
+}
+
 /* ioctls */
 
 static struct drm_gem_object *vgem_gem_create(struct drm_device *dev,
@@ -134,57 +119,43 @@ static struct drm_gem_object *vgem_gem_create(struct drm_device *dev,
 					      unsigned long size)
 {
 	struct drm_vgem_gem_object *obj;
-	struct drm_gem_object *gem_object;
-	int err;
-
-	size = roundup(size, PAGE_SIZE);
+	int ret;
 
 	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
 	if (!obj)
 		return ERR_PTR(-ENOMEM);
 
-	gem_object = &obj->base;
-
-	err = drm_gem_object_init(dev, gem_object, size);
-	if (err)
-		goto out;
-
-	err = vgem_gem_get_pages(obj);
-	if (err)
-		goto out;
-
-	err = drm_gem_handle_create(file, gem_object, handle);
-	if (err)
-		goto handle_out;
+	ret = drm_gem_object_init(dev, &obj->base, roundup(size, PAGE_SIZE));
+	if (ret)
+		goto err_free;
 
-	drm_gem_object_unreference_unlocked(gem_object);
+	ret = drm_gem_handle_create(file, &obj->base, handle);
+	drm_gem_object_unreference_unlocked(&obj->base);
+	if (ret)
+		goto err;
 
-	return gem_object;
+	return &obj->base;
 
-handle_out:
-	drm_gem_object_release(gem_object);
-out:
+err_free:
 	kfree(obj);
-	return ERR_PTR(err);
+err:
+	return ERR_PTR(ret);
 }
 
 static int vgem_gem_dumb_create(struct drm_file *file, struct drm_device *dev,
 				struct drm_mode_create_dumb *args)
 {
 	struct drm_gem_object *gem_object;
-	uint64_t size;
-	uint64_t pitch = args->width * DIV_ROUND_UP(args->bpp, 8);
+	u64 pitch, size;
 
+	pitch = args->width * DIV_ROUND_UP(args->bpp, 8);
 	size = args->height * pitch;
 	if (size == 0)
 		return -EINVAL;
 
 	gem_object = vgem_gem_create(dev, file, &args->handle, size);
-
-	if (IS_ERR(gem_object)) {
-		DRM_DEBUG_DRIVER("object creation failed\n");
+	if (IS_ERR(gem_object))
 		return PTR_ERR(gem_object);
-	}
 
 	args->size = gem_object->size;
 	args->pitch = pitch;
@@ -194,26 +165,26 @@ static int vgem_gem_dumb_create(struct drm_file *file, struct drm_device *dev,
 	return 0;
 }
 
-int vgem_gem_dumb_map(struct drm_file *file, struct drm_device *dev,
-		      uint32_t handle, uint64_t *offset)
+static int vgem_gem_dumb_map(struct drm_file *file, struct drm_device *dev,
+			     uint32_t handle, uint64_t *offset)
 {
-	int ret = 0;
 	struct drm_gem_object *obj;
+	int ret;
 
 	obj = drm_gem_object_lookup(file, handle);
 	if (!obj)
 		return -ENOENT;
 
+	if (!obj->filp) {
+		ret = -EINVAL;
+		goto unref;
+	}
+
 	ret = drm_gem_create_mmap_offset(obj);
 	if (ret)
 		goto unref;
 
-	BUG_ON(!obj->filp);
-
-	obj->filp->private_data = obj;
-
 	*offset = drm_vma_node_offset_addr(&obj->vma_node);
-
 unref:
 	drm_gem_object_unreference_unlocked(obj);
 
@@ -221,26 +192,134 @@ unref:
 }
 
 static struct drm_ioctl_desc vgem_ioctls[] = {
+	DRM_IOCTL_DEF_DRV(VGEM_FENCE_ATTACH, vgem_fence_attach_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(VGEM_FENCE_SIGNAL, vgem_fence_signal_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 };
 
+static int vgem_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	unsigned long flags = vma->vm_flags;
+	int ret;
+
+	ret = drm_gem_mmap(filp, vma);
+	if (ret)
+		return ret;
+
+	/* Keep the WC mmaping set by drm_gem_mmap() but our pages
+	 * are ordinary and not special.
+	 */
+	vma->vm_flags = flags | VM_DONTEXPAND | VM_DONTDUMP;
+	return 0;
+}
+
 static const struct file_operations vgem_driver_fops = {
 	.owner		= THIS_MODULE,
 	.open		= drm_open,
-	.mmap		= drm_gem_mmap,
+	.mmap		= vgem_mmap,
 	.poll		= drm_poll,
 	.read		= drm_read,
 	.unlocked_ioctl = drm_ioctl,
 	.release	= drm_release,
 };
 
+static int vgem_prime_pin(struct drm_gem_object *obj)
+{
+	long n_pages = obj->size >> PAGE_SHIFT;
+	struct page **pages;
+
+	/* Flush the object from the CPU cache so that importers can rely
+	 * on coherent indirect access via the exported dma-address.
+	 */
+	pages = drm_gem_get_pages(obj);
+	if (IS_ERR(pages))
+		return PTR_ERR(pages);
+
+	drm_clflush_pages(pages, n_pages);
+	drm_gem_put_pages(obj, pages, true, false);
+
+	return 0;
+}
+
+static struct sg_table *vgem_prime_get_sg_table(struct drm_gem_object *obj)
+{
+	struct sg_table *st;
+	struct page **pages;
+
+	pages = drm_gem_get_pages(obj);
+	if (IS_ERR(pages))
+		return ERR_CAST(pages);
+
+	st = drm_prime_pages_to_sg(pages, obj->size >> PAGE_SHIFT);
+	drm_gem_put_pages(obj, pages, false, false);
+
+	return st;
+}
+
+static void *vgem_prime_vmap(struct drm_gem_object *obj)
+{
+	long n_pages = obj->size >> PAGE_SHIFT;
+	struct page **pages;
+	void *addr;
+
+	pages = drm_gem_get_pages(obj);
+	if (IS_ERR(pages))
+		return NULL;
+
+	addr = vmap(pages, n_pages, 0, pgprot_writecombine(PAGE_KERNEL));
+	drm_gem_put_pages(obj, pages, false, false);
+
+	return addr;
+}
+
+static void vgem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
+{
+	vunmap(vaddr);
+}
+
+static int vgem_prime_mmap(struct drm_gem_object *obj,
+			   struct vm_area_struct *vma)
+{
+	int ret;
+
+	if (obj->size < vma->vm_end - vma->vm_start)
+		return -EINVAL;
+
+	if (!obj->filp)
+		return -ENODEV;
+
+	ret = obj->filp->f_op->mmap(obj->filp, vma);
+	if (ret)
+		return ret;
+
+	fput(vma->vm_file);
+	vma->vm_file = get_file(obj->filp);
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
+	vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+
+	return 0;
+}
+
 static struct drm_driver vgem_driver = {
-	.driver_features		= DRIVER_GEM,
+	.driver_features		= DRIVER_GEM | DRIVER_PRIME,
+	.open				= vgem_open,
+	.preclose			= vgem_preclose,
 	.gem_free_object_unlocked	= vgem_gem_free_object,
 	.gem_vm_ops			= &vgem_gem_vm_ops,
 	.ioctls				= vgem_ioctls,
+	.num_ioctls 			= ARRAY_SIZE(vgem_ioctls),
 	.fops				= &vgem_driver_fops,
+
 	.dumb_create			= vgem_gem_dumb_create,
 	.dumb_map_offset		= vgem_gem_dumb_map,
+
+	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
+	.gem_prime_pin = vgem_prime_pin,
+	.gem_prime_export = drm_gem_prime_export,
+	.gem_prime_get_sg_table = vgem_prime_get_sg_table,
+	.gem_prime_vmap = vgem_prime_vmap,
+	.gem_prime_vunmap = vgem_prime_vunmap,
+	.gem_prime_mmap = vgem_prime_mmap,
+
 	.name	= DRIVER_NAME,
 	.desc	= DRIVER_DESC,
 	.date	= DRIVER_DATE,
@@ -248,7 +327,7 @@ static struct drm_driver vgem_driver = {
 	.minor	= DRIVER_MINOR,
 };
 
-struct drm_device *vgem_device;
+static struct drm_device *vgem_device;
 
 static int __init vgem_init(void)
 {
@@ -261,7 +340,6 @@ static int __init vgem_init(void)
 	}
 
 	ret  = drm_dev_register(vgem_device, 0);
-
 	if (ret)
 		goto out_unref;
 
@@ -283,5 +361,6 @@ module_init(vgem_init);
 module_exit(vgem_exit);
 
 MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_AUTHOR("Intel Corporation");
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL and additional rights");
diff --git a/drivers/gpu/drm/vgem/vgem_drv.h b/drivers/gpu/drm/vgem/vgem_drv.h
index e9f92f7ee275..1f8798ad329c 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.h
+++ b/drivers/gpu/drm/vgem/vgem_drv.h
@@ -32,15 +32,25 @@
 #include <drm/drmP.h>
 #include <drm/drm_gem.h>
 
+#include <uapi/drm/vgem_drm.h>
+
+struct vgem_file {
+	struct idr fence_idr;
+	struct mutex fence_mutex;
+};
+
 #define to_vgem_bo(x) container_of(x, struct drm_vgem_gem_object, base)
 struct drm_vgem_gem_object {
 	struct drm_gem_object base;
-	struct page **pages;
-	bool use_dma_buf;
 };
 
-/* vgem_drv.c */
-extern void vgem_gem_put_pages(struct drm_vgem_gem_object *obj);
-extern int vgem_gem_get_pages(struct drm_vgem_gem_object *obj);
+int vgem_fence_open(struct vgem_file *file);
+int vgem_fence_attach_ioctl(struct drm_device *dev,
+			    void *data,
+			    struct drm_file *file);
+int vgem_fence_signal_ioctl(struct drm_device *dev,
+			    void *data,
+			    struct drm_file *file);
+void vgem_fence_close(struct vgem_file *file);
 
 #endif
diff --git a/drivers/gpu/drm/vgem/vgem_fence.c b/drivers/gpu/drm/vgem/vgem_fence.c
new file mode 100644
index 000000000000..5c57c1ffa1f9
--- /dev/null
+++ b/drivers/gpu/drm/vgem/vgem_fence.c
@@ -0,0 +1,283 @@
+/*
+ * Copyright 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software")
+ * to deal in the software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * them Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTIBILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT, OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/dma-buf.h>
+#include <linux/reservation.h>
+
+#include "vgem_drv.h"
+
+#define VGEM_FENCE_TIMEOUT (10*HZ)
+
+struct vgem_fence {
+	struct fence base;
+	struct spinlock lock;
+	struct timer_list timer;
+};
+
+static const char *vgem_fence_get_driver_name(struct fence *fence)
+{
+	return "vgem";
+}
+
+static const char *vgem_fence_get_timeline_name(struct fence *fence)
+{
+	return "unbound";
+}
+
+static bool vgem_fence_signaled(struct fence *fence)
+{
+	return false;
+}
+
+static bool vgem_fence_enable_signaling(struct fence *fence)
+{
+	return true;
+}
+
+static void vgem_fence_release(struct fence *base)
+{
+	struct vgem_fence *fence = container_of(base, typeof(*fence), base);
+
+	del_timer_sync(&fence->timer);
+	fence_free(&fence->base);
+}
+
+static void vgem_fence_value_str(struct fence *fence, char *str, int size)
+{
+	snprintf(str, size, "%u", fence->seqno);
+}
+
+static void vgem_fence_timeline_value_str(struct fence *fence, char *str,
+					  int size)
+{
+	snprintf(str, size, "%u", fence_is_signaled(fence) ? fence->seqno : 0);
+}
+
+static const struct fence_ops vgem_fence_ops = {
+	.get_driver_name = vgem_fence_get_driver_name,
+	.get_timeline_name = vgem_fence_get_timeline_name,
+	.enable_signaling = vgem_fence_enable_signaling,
+	.signaled = vgem_fence_signaled,
+	.wait = fence_default_wait,
+	.release = vgem_fence_release,
+
+	.fence_value_str = vgem_fence_value_str,
+	.timeline_value_str = vgem_fence_timeline_value_str,
+};
+
+static void vgem_fence_timeout(unsigned long data)
+{
+	struct vgem_fence *fence = (struct vgem_fence *)data;
+
+	fence_signal(&fence->base);
+}
+
+static struct fence *vgem_fence_create(struct vgem_file *vfile,
+				       unsigned int flags)
+{
+	struct vgem_fence *fence;
+
+	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+	if (!fence)
+		return NULL;
+
+	spin_lock_init(&fence->lock);
+	fence_init(&fence->base, &vgem_fence_ops, &fence->lock,
+		   fence_context_alloc(1), 1);
+
+	setup_timer(&fence->timer, vgem_fence_timeout, (unsigned long)fence);
+
+	/* We force the fence to expire within 10s to prevent driver hangs */
+	mod_timer(&fence->timer, jiffies + VGEM_FENCE_TIMEOUT);
+
+	return &fence->base;
+}
+
+static int attach_dmabuf(struct drm_device *dev,
+			 struct drm_gem_object *obj)
+{
+	struct dma_buf *dmabuf;
+
+	if (obj->dma_buf)
+		return 0;
+
+	dmabuf = dev->driver->gem_prime_export(dev, obj, 0);
+	if (IS_ERR(dmabuf))
+		return PTR_ERR(dmabuf);
+
+	obj->dma_buf = dmabuf;
+	drm_gem_object_reference(obj);
+	return 0;
+}
+
+/*
+ * vgem_fence_attach_ioctl (DRM_IOCTL_VGEM_FENCE_ATTACH):
+ *
+ * Create and attach a fence to the vGEM handle. This fence is then exposed
+ * via the dma-buf reservation object and visible to consumers of the exported
+ * dma-buf. If the flags contain VGEM_FENCE_WRITE, the fence indicates the
+ * vGEM buffer is being written to by the client and is exposed as an exclusive
+ * fence, otherwise the fence indicates the client is current reading from the
+ * buffer and all future writes should wait for the client to signal its
+ * completion. Note that if a conflicting fence is already on the dma-buf (i.e.
+ * an exclusive fence when adding a read, or any fence when adding a write),
+ * -EBUSY is reported. Serialisation between operations should be handled
+ * by waiting upon the dma-buf.
+ *
+ * This returns the handle for the new fence that must be signaled within 10
+ * seconds (or otherwise it will automatically expire). See
+ * vgem_fence_signal_ioctl (DRM_IOCTL_VGEM_FENCE_SIGNAL).
+ *
+ * If the vGEM handle does not exist, vgem_fence_attach_ioctl returns -ENOENT.
+ */
+int vgem_fence_attach_ioctl(struct drm_device *dev,
+			    void *data,
+			    struct drm_file *file)
+{
+	struct drm_vgem_fence_attach *arg = data;
+	struct vgem_file *vfile = file->driver_priv;
+	struct reservation_object *resv;
+	struct drm_gem_object *obj;
+	struct fence *fence;
+	int ret;
+
+	if (arg->flags & ~VGEM_FENCE_WRITE)
+		return -EINVAL;
+
+	if (arg->pad)
+		return -EINVAL;
+
+	obj = drm_gem_object_lookup(file, arg->handle);
+	if (!obj)
+		return -ENOENT;
+
+	ret = attach_dmabuf(dev, obj);
+	if (ret)
+		goto err;
+
+	fence = vgem_fence_create(vfile, arg->flags);
+	if (!fence) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	/* Check for a conflicting fence */
+	resv = obj->dma_buf->resv;
+	if (!reservation_object_test_signaled_rcu(resv,
+						  arg->flags & VGEM_FENCE_WRITE)) {
+		ret = -EBUSY;
+		goto err_fence;
+	}
+
+	/* Expose the fence via the dma-buf */
+	ret = 0;
+	mutex_lock(&resv->lock.base);
+	if (arg->flags & VGEM_FENCE_WRITE)
+		reservation_object_add_excl_fence(resv, fence);
+	else if ((ret = reservation_object_reserve_shared(resv)) == 0)
+		reservation_object_add_shared_fence(resv, fence);
+	mutex_unlock(&resv->lock.base);
+
+	/* Record the fence in our idr for later signaling */
+	if (ret == 0) {
+		mutex_lock(&vfile->fence_mutex);
+		ret = idr_alloc(&vfile->fence_idr, fence, 1, 0, GFP_KERNEL);
+		mutex_unlock(&vfile->fence_mutex);
+		if (ret > 0) {
+			arg->out_fence = ret;
+			ret = 0;
+		}
+	}
+err_fence:
+	if (ret) {
+		fence_signal(fence);
+		fence_put(fence);
+	}
+err:
+	drm_gem_object_unreference_unlocked(obj);
+	return ret;
+}
+
+/*
+ * vgem_fence_signal_ioctl (DRM_IOCTL_VGEM_FENCE_SIGNAL):
+ *
+ * Signal and consume a fence ealier attached to a vGEM handle using
+ * vgem_fence_attach_ioctl (DRM_IOCTL_VGEM_FENCE_ATTACH).
+ *
+ * All fences must be signaled within 10s of attachment or otherwise they
+ * will automatically expire (and a vgem_fence_signal_ioctl returns -ETIMEDOUT).
+ *
+ * Signaling a fence indicates to all consumers of the dma-buf that the
+ * client has completed the operation associated with the fence, and that the
+ * buffer is then ready for consumption.
+ *
+ * If the fence does not exist (or has already been signaled by the client),
+ * vgem_fence_signal_ioctl returns -ENOENT.
+ */
+int vgem_fence_signal_ioctl(struct drm_device *dev,
+			    void *data,
+			    struct drm_file *file)
+{
+	struct vgem_file *vfile = file->driver_priv;
+	struct drm_vgem_fence_signal *arg = data;
+	struct fence *fence;
+	int ret = 0;
+
+	if (arg->flags)
+		return -EINVAL;
+
+	mutex_lock(&vfile->fence_mutex);
+	fence = idr_replace(&vfile->fence_idr, NULL, arg->fence);
+	mutex_unlock(&vfile->fence_mutex);
+	if (!fence)
+		return -ENOENT;
+	if (IS_ERR(fence))
+		return PTR_ERR(fence);
+
+	if (fence_is_signaled(fence))
+		ret = -ETIMEDOUT;
+
+	fence_signal(fence);
+	fence_put(fence);
+	return ret;
+}
+
+int vgem_fence_open(struct vgem_file *vfile)
+{
+	mutex_init(&vfile->fence_mutex);
+	idr_init(&vfile->fence_idr);
+
+	return 0;
+}
+
+static int __vgem_fence_idr_fini(int id, void *p, void *data)
+{
+	fence_signal(p);
+	fence_put(p);
+	return 0;
+}
+
+void vgem_fence_close(struct vgem_file *vfile)
+{
+	idr_for_each(&vfile->fence_idr, __vgem_fence_idr_fini, vfile);
+	idr_destroy(&vfile->fence_idr);
+}
diff --git a/drivers/gpu/drm/virtio/Kconfig b/drivers/gpu/drm/virtio/Kconfig
index 9983eadb81b6..e1afc3d3f8d9 100644
--- a/drivers/gpu/drm/virtio/Kconfig
+++ b/drivers/gpu/drm/virtio/Kconfig
@@ -1,11 +1,7 @@
 config DRM_VIRTIO_GPU
 	tristate "Virtio GPU driver"
 	depends on DRM && VIRTIO
-	select FB_SYS_FILLRECT
-	select FB_SYS_COPYAREA
-	select FB_SYS_IMAGEBLIT
         select DRM_KMS_HELPER
-        select DRM_KMS_FB_HELPER
         select DRM_TTM
 	help
 	   This is the virtual GPU driver for virtio.  It can be used with
diff --git a/drivers/gpu/drm/virtio/virtgpu_display.c b/drivers/gpu/drm/virtio/virtgpu_display.c
index ac758cdbc1bc..4e192aa2d021 100644
--- a/drivers/gpu/drm/virtio/virtgpu_display.c
+++ b/drivers/gpu/drm/virtio/virtgpu_display.c
@@ -53,8 +53,7 @@ static void virtio_gpu_user_framebuffer_destroy(struct drm_framebuffer *fb)
 	struct virtio_gpu_framebuffer *virtio_gpu_fb
 		= to_virtio_gpu_framebuffer(fb);
 
-	if (virtio_gpu_fb->obj)
-		drm_gem_object_unreference_unlocked(virtio_gpu_fb->obj);
+	drm_gem_object_unreference_unlocked(virtio_gpu_fb->obj);
 	drm_framebuffer_cleanup(fb);
 	kfree(virtio_gpu_fb);
 }
@@ -326,8 +325,7 @@ virtio_gpu_user_framebuffer_create(struct drm_device *dev,
 	ret = virtio_gpu_framebuffer_init(dev, virtio_gpu_fb, mode_cmd, obj);
 	if (ret) {
 		kfree(virtio_gpu_fb);
-		if (obj)
-			drm_gem_object_unreference_unlocked(obj);
+		drm_gem_object_unreference_unlocked(obj);
 		return NULL;
 	}
 
@@ -348,7 +346,7 @@ static void vgdev_atomic_commit_tail(struct drm_atomic_state *state)
 	drm_atomic_helper_cleanup_planes(dev, state);
 }
 
-struct drm_mode_config_helper_funcs virtio_mode_config_helpers = {
+static struct drm_mode_config_helper_funcs virtio_mode_config_helpers = {
 	.atomic_commit_tail = vgdev_atomic_commit_tail,
 };
 
diff --git a/drivers/gpu/drm/virtio/virtgpu_ttm.c b/drivers/gpu/drm/virtio/virtgpu_ttm.c
index a0580815629f..80482ac5f95d 100644
--- a/drivers/gpu/drm/virtio/virtgpu_ttm.c
+++ b/drivers/gpu/drm/virtio/virtgpu_ttm.c
@@ -375,6 +375,12 @@ static int virtio_gpu_bo_move(struct ttm_buffer_object *bo,
 			      bool no_wait_gpu,
 			      struct ttm_mem_reg *new_mem)
 {
+	int ret;
+
+	ret = ttm_bo_wait(bo, interruptible, no_wait_gpu);
+	if (ret)
+		return ret;
+
 	virtio_gpu_move_null(bo, new_mem);
 	return 0;
 }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
index 9b078a493996..0cd889015dc5 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
@@ -49,6 +49,7 @@ int vmw_dmabuf_pin_in_placement(struct vmw_private *dev_priv,
 {
 	struct ttm_buffer_object *bo = &buf->base;
 	int ret;
+	uint32_t new_flags;
 
 	ret = ttm_write_lock(&dev_priv->reservation_sem, interruptible);
 	if (unlikely(ret != 0))
@@ -60,7 +61,12 @@ int vmw_dmabuf_pin_in_placement(struct vmw_private *dev_priv,
 	if (unlikely(ret != 0))
 		goto err;
 
-	ret = ttm_bo_validate(bo, placement, interruptible, false);
+	if (buf->pin_count > 0)
+		ret = ttm_bo_mem_compat(placement, &bo->mem,
+					&new_flags) == true ? 0 : -EINVAL;
+	else
+		ret = ttm_bo_validate(bo, placement, interruptible, false);
+
 	if (!ret)
 		vmw_bo_pin_reserved(buf, true);
 
@@ -91,6 +97,7 @@ int vmw_dmabuf_pin_in_vram_or_gmr(struct vmw_private *dev_priv,
 {
 	struct ttm_buffer_object *bo = &buf->base;
 	int ret;
+	uint32_t new_flags;
 
 	ret = ttm_write_lock(&dev_priv->reservation_sem, interruptible);
 	if (unlikely(ret != 0))
@@ -102,6 +109,12 @@ int vmw_dmabuf_pin_in_vram_or_gmr(struct vmw_private *dev_priv,
 	if (unlikely(ret != 0))
 		goto err;
 
+	if (buf->pin_count > 0) {
+		ret = ttm_bo_mem_compat(&vmw_vram_gmr_placement, &bo->mem,
+					&new_flags) == true ? 0 : -EINVAL;
+		goto out_unreserve;
+	}
+
 	ret = ttm_bo_validate(bo, &vmw_vram_gmr_placement, interruptible,
 			      false);
 	if (likely(ret == 0) || ret == -ERESTARTSYS)
@@ -161,6 +174,7 @@ int vmw_dmabuf_pin_in_start_of_vram(struct vmw_private *dev_priv,
 	struct ttm_placement placement;
 	struct ttm_place place;
 	int ret = 0;
+	uint32_t new_flags;
 
 	place = vmw_vram_placement.placement[0];
 	place.lpfn = bo->num_pages;
@@ -185,10 +199,15 @@ int vmw_dmabuf_pin_in_start_of_vram(struct vmw_private *dev_priv,
 	 */
 	if (bo->mem.mem_type == TTM_PL_VRAM &&
 	    bo->mem.start < bo->num_pages &&
-	    bo->mem.start > 0)
+	    bo->mem.start > 0 &&
+	    buf->pin_count == 0)
 		(void) ttm_bo_validate(bo, &vmw_sys_placement, false, false);
 
-	ret = ttm_bo_validate(bo, &placement, interruptible, false);
+	if (buf->pin_count > 0)
+		ret = ttm_bo_mem_compat(&placement, &bo->mem,
+					&new_flags) == true ? 0 : -EINVAL;
+	else
+		ret = ttm_bo_validate(bo, &placement, interruptible, false);
 
 	/* For some reason we didn't end up at the start of vram */
 	WARN_ON(ret == 0 && bo->offset != 0);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 60646644bef3..e8ae3dc476d1 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -233,6 +233,7 @@ static int vmw_force_iommu;
 static int vmw_restrict_iommu;
 static int vmw_force_coherent;
 static int vmw_restrict_dma_mask;
+static int vmw_assume_16bpp;
 
 static int vmw_probe(struct pci_dev *, const struct pci_device_id *);
 static void vmw_master_init(struct vmw_master *);
@@ -249,6 +250,8 @@ MODULE_PARM_DESC(force_coherent, "Force coherent TTM pages");
 module_param_named(force_coherent, vmw_force_coherent, int, 0600);
 MODULE_PARM_DESC(restrict_dma_mask, "Restrict DMA mask to 44 bits with IOMMU");
 module_param_named(restrict_dma_mask, vmw_restrict_dma_mask, int, 0600);
+MODULE_PARM_DESC(assume_16bpp, "Assume 16-bpp when filtering modes");
+module_param_named(assume_16bpp, vmw_assume_16bpp, int, 0600);
 
 
 static void vmw_print_capabilities(uint32_t capabilities)
@@ -660,6 +663,8 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 	dev_priv->vram_start = pci_resource_start(dev->pdev, 1);
 	dev_priv->mmio_start = pci_resource_start(dev->pdev, 2);
 
+	dev_priv->assume_16bpp = !!vmw_assume_16bpp;
+
 	dev_priv->enable_fb = enable_fbdev;
 
 	vmw_write(dev_priv, SVGA_REG_ID, SVGA_ID_2);
@@ -706,6 +711,13 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 			vmw_read(dev_priv,
 				 SVGA_REG_SUGGESTED_GBOBJECT_MEM_SIZE_KB);
 
+		/*
+		 * Workaround for low memory 2D VMs to compensate for the
+		 * allocation taken by fbdev
+		 */
+		if (!(dev_priv->capabilities & SVGA_CAP_3D))
+			mem_size *= 2;
+
 		dev_priv->max_mob_pages = mem_size * 1024 / PAGE_SIZE;
 		dev_priv->prim_bb_mem =
 			vmw_read(dev_priv,
@@ -1041,8 +1053,7 @@ static struct vmw_master *vmw_master_check(struct drm_device *dev,
 	struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv);
 	struct vmw_master *vmaster;
 
-	if (file_priv->minor->type != DRM_MINOR_LEGACY ||
-	    !(flags & DRM_AUTH))
+	if (!drm_is_primary_client(file_priv) || !(flags & DRM_AUTH))
 		return NULL;
 
 	ret = mutex_lock_interruptible(&dev->master_mutex);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 9a90f824814e..74304b03f9d4 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -387,6 +387,7 @@ struct vmw_private {
 	spinlock_t hw_lock;
 	spinlock_t cap_lock;
 	bool has_dx;
+	bool assume_16bpp;
 
 	/*
 	 * VGA registers.
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 1a1a87cbf109..dc5beff2b4aa 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -3625,9 +3625,7 @@ static int vmw_resize_cmd_bounce(struct vmw_sw_context *sw_context,
 				   (sw_context->cmd_bounce_size >> 1));
 	}
 
-	if (sw_context->cmd_bounce != NULL)
-		vfree(sw_context->cmd_bounce);
-
+	vfree(sw_context->cmd_bounce);
 	sw_context->cmd_bounce = vmalloc(sw_context->cmd_bounce_size);
 
 	if (sw_context->cmd_bounce == NULL) {
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
index 679a4cb98ee3..d2d93959b119 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
@@ -517,28 +517,6 @@ static int vmw_fb_kms_framebuffer(struct fb_info *info)
 
 	par->set_fb = &vfb->base;
 
-	if (!par->bo_ptr) {
-		/*
-		 * Pin before mapping. Since we don't know in what placement
-		 * to pin, call into KMS to do it for us.
-		 */
-		ret = vfb->pin(vfb);
-		if (ret) {
-			DRM_ERROR("Could not pin the fbdev framebuffer.\n");
-			return ret;
-		}
-
-		ret = ttm_bo_kmap(&par->vmw_bo->base, 0,
-				  par->vmw_bo->base.num_pages, &par->map);
-		if (ret) {
-			vfb->unpin(vfb);
-			DRM_ERROR("Could not map the fbdev framebuffer.\n");
-			return ret;
-		}
-
-		par->bo_ptr = ttm_kmap_obj_virtual(&par->map, &par->bo_iowrite);
-	}
-
 	return 0;
 }
 
@@ -601,6 +579,31 @@ static int vmw_fb_set_par(struct fb_info *info)
 	if (ret)
 		goto out_unlock;
 
+	if (!par->bo_ptr) {
+		struct vmw_framebuffer *vfb = vmw_framebuffer_to_vfb(set.fb);
+
+		/*
+		 * Pin before mapping. Since we don't know in what placement
+		 * to pin, call into KMS to do it for us.
+		 */
+		ret = vfb->pin(vfb);
+		if (ret) {
+			DRM_ERROR("Could not pin the fbdev framebuffer.\n");
+			goto out_unlock;
+		}
+
+		ret = ttm_bo_kmap(&par->vmw_bo->base, 0,
+				  par->vmw_bo->base.num_pages, &par->map);
+		if (ret) {
+			vfb->unpin(vfb);
+			DRM_ERROR("Could not map the fbdev framebuffer.\n");
+			goto out_unlock;
+		}
+
+		par->bo_ptr = ttm_kmap_obj_virtual(&par->map, &par->bo_iowrite);
+	}
+
+
 	vmw_fb_dirty_mark(par, par->fb_x, par->fb_y,
 			  par->set_fb->width, par->set_fb->height);
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index 8a69d4da40b5..bf28ccc150df 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -1555,14 +1555,10 @@ int vmw_du_connector_fill_modes(struct drm_connector *connector,
 		DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC)
 	};
 	int i;
-	u32 assumed_bpp = 2;
+	u32 assumed_bpp = 4;
 
-	/*
-	 * If using screen objects, then assume 32-bpp because that's what the
-	 * SVGA device is assuming
-	 */
-	if (dev_priv->active_display_unit == vmw_du_screen_object)
-		assumed_bpp = 4;
+	if (dev_priv->assume_16bpp)
+		assumed_bpp = 2;
 
 	if (dev_priv->active_display_unit == vmw_du_screen_target) {
 		max_width  = min(max_width,  dev_priv->stdu_max_width);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
index 6de283c8fa3e..e57a0bad7a62 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
@@ -28,6 +28,7 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/frame.h>
 #include <asm/hypervisor.h>
 #include "drmP.h"
 #include "vmwgfx_msg.h"
@@ -194,7 +195,7 @@ static int vmw_send_msg(struct rpc_channel *channel, const char *msg)
 
 	return -EINVAL;
 }
-
+STACK_FRAME_NON_STANDARD(vmw_send_msg);
 
 
 /**
@@ -299,11 +300,15 @@ static int vmw_recv_msg(struct rpc_channel *channel, void **msg,
 		break;
 	}
 
+	if (retries == RETRIES)
+		return -EINVAL;
+
 	*msg_len = reply_len;
 	*msg     = reply;
 
 	return 0;
 }
+STACK_FRAME_NON_STANDARD(vmw_recv_msg);
 
 
 /**
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
index 9ca818fb034c..41932a7c4f79 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
@@ -399,8 +399,10 @@ static int vmw_stdu_bind_fb(struct vmw_private *dev_priv,
 
 	WARN_ON_ONCE(!stdu->defined);
 
-	if (!vfb->dmabuf && new_fb->width == mode->hdisplay &&
-	    new_fb->height == mode->vdisplay)
+	new_vfbs = (vfb->dmabuf) ? NULL : vmw_framebuffer_to_vfbs(new_fb);
+
+	if (new_vfbs && new_vfbs->surface->base_size.width == mode->hdisplay &&
+	    new_vfbs->surface->base_size.height == mode->vdisplay)
 		new_content_type = SAME_AS_DISPLAY;
 	else if (vfb->dmabuf)
 		new_content_type = SEPARATE_DMA;
@@ -444,7 +446,6 @@ static int vmw_stdu_bind_fb(struct vmw_private *dev_priv,
 			content_srf.mip_levels[0]     = 1;
 			content_srf.multisample_count = 0;
 		} else {
-			new_vfbs = vmw_framebuffer_to_vfbs(new_fb);
 			content_srf = *new_vfbs->surface;
 		}
 
@@ -464,7 +465,6 @@ static int vmw_stdu_bind_fb(struct vmw_private *dev_priv,
 			return ret;
 		}
 	} else if (new_content_type == SAME_AS_DISPLAY) {
-		new_vfbs = vmw_framebuffer_to_vfbs(new_fb);
 		new_display_srf = vmw_surface_reference(new_vfbs->surface);
 	}
 
diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
index a18db4d5347c..c5d82a8a2ec9 100644
--- a/drivers/gpu/host1x/cdma.c
+++ b/drivers/gpu/host1x/cdma.c
@@ -96,12 +96,12 @@ fail:
  */
 static void host1x_pushbuffer_push(struct push_buffer *pb, u32 op1, u32 op2)
 {
-	u32 pos = pb->pos;
-	u32 *p = (u32 *)((void *)pb->mapped + pos);
-	WARN_ON(pos == pb->fence);
+	u32 *p = (u32 *)((void *)pb->mapped + pb->pos);
+
+	WARN_ON(pb->pos == pb->fence);
 	*(p++) = op1;
 	*(p++) = op2;
-	pb->pos = (pos + 8) & (pb->size_bytes - 1);
+	pb->pos = (pb->pos + 8) & (pb->size_bytes - 1);
 }
 
 /*
@@ -134,14 +134,19 @@ unsigned int host1x_cdma_wait_locked(struct host1x_cdma *cdma,
 				     enum cdma_event event)
 {
 	for (;;) {
+		struct push_buffer *pb = &cdma->push_buffer;
 		unsigned int space;
 
-		if (event == CDMA_EVENT_SYNC_QUEUE_EMPTY)
+		switch (event) {
+		case CDMA_EVENT_SYNC_QUEUE_EMPTY:
 			space = list_empty(&cdma->sync_queue) ? 1 : 0;
-		else if (event == CDMA_EVENT_PUSH_BUFFER_SPACE) {
-			struct push_buffer *pb = &cdma->push_buffer;
+			break;
+
+		case CDMA_EVENT_PUSH_BUFFER_SPACE:
 			space = host1x_pushbuffer_space(pb);
-		} else {
+			break;
+
+		default:
 			WARN_ON(1);
 			return -EINVAL;
 		}
@@ -159,12 +164,14 @@ unsigned int host1x_cdma_wait_locked(struct host1x_cdma *cdma,
 			mutex_lock(&cdma->lock);
 			continue;
 		}
+
 		cdma->event = event;
 
 		mutex_unlock(&cdma->lock);
 		down(&cdma->sem);
 		mutex_lock(&cdma->lock);
 	}
+
 	return 0;
 }
 
@@ -234,6 +241,7 @@ static void update_cdma_locked(struct host1x_cdma *cdma)
 			/* Start timer on next pending syncpt */
 			if (job->timeout)
 				cdma_start_timer_locked(cdma, job);
+
 			break;
 		}
 
@@ -247,7 +255,9 @@ static void update_cdma_locked(struct host1x_cdma *cdma)
 		/* Pop push buffer slots */
 		if (job->num_slots) {
 			struct push_buffer *pb = &cdma->push_buffer;
+
 			host1x_pushbuffer_pop(pb, job->num_slots);
+
 			if (cdma->event == CDMA_EVENT_PUSH_BUFFER_SPACE)
 				signal = true;
 		}
@@ -269,11 +279,9 @@ static void update_cdma_locked(struct host1x_cdma *cdma)
 void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma,
 				   struct device *dev)
 {
-	u32 restart_addr;
-	u32 syncpt_incrs;
-	struct host1x_job *job = NULL;
-	u32 syncpt_val;
 	struct host1x *host1x = cdma_to_host1x(cdma);
+	u32 restart_addr, syncpt_incrs, syncpt_val;
+	struct host1x_job *job = NULL;
 
 	syncpt_val = host1x_syncpt_load(cdma->timeout.syncpt);
 
@@ -342,9 +350,11 @@ void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma,
 		syncpt_val += syncpt_incrs;
 	}
 
-	/* The following sumbits from the same client may be dependent on the
+	/*
+	 * The following sumbits from the same client may be dependent on the
 	 * failed submit and therefore they may fail. Force a small timeout
-	 * to make the queue cleanup faster */
+	 * to make the queue cleanup faster.
+	 */
 
 	list_for_each_entry_from(job, &cdma->sync_queue, list)
 		if (job->client == cdma->timeout.client)
@@ -375,6 +385,7 @@ int host1x_cdma_init(struct host1x_cdma *cdma)
 	err = host1x_pushbuffer_init(&cdma->push_buffer);
 	if (err)
 		return err;
+
 	return 0;
 }
 
@@ -410,6 +421,7 @@ int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job)
 		/* init state on first submit with timeout value */
 		if (!cdma->timeout.initialized) {
 			int err;
+
 			err = host1x_hw_cdma_timeout_init(host1x, cdma,
 							  job->syncpt_id);
 			if (err) {
@@ -418,6 +430,7 @@ int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job)
 			}
 		}
 	}
+
 	if (!cdma->running)
 		host1x_hw_cdma_start(host1x, cdma);
 
@@ -448,6 +461,7 @@ void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2)
 		slots_free = host1x_cdma_wait_locked(cdma,
 						CDMA_EVENT_PUSH_BUFFER_SPACE);
 	}
+
 	cdma->slots_free = slots_free - 1;
 	cdma->slots_used++;
 	host1x_pushbuffer_push(pb, op1, op2);
diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c
index b4ae3affb987..8f437d924c10 100644
--- a/drivers/gpu/host1x/channel.c
+++ b/drivers/gpu/host1x/channel.c
@@ -83,9 +83,10 @@ EXPORT_SYMBOL(host1x_channel_put);
 struct host1x_channel *host1x_channel_request(struct device *dev)
 {
 	struct host1x *host = dev_get_drvdata(dev->parent);
-	int max_channels = host->info->nb_channels;
+	unsigned int max_channels = host->info->nb_channels;
 	struct host1x_channel *channel = NULL;
-	int index, err;
+	unsigned long index;
+	int err;
 
 	mutex_lock(&host->chlist_mutex);
 
diff --git a/drivers/gpu/host1x/debug.c b/drivers/gpu/host1x/debug.c
index ee3d12b51c50..d9330fcc62ad 100644
--- a/drivers/gpu/host1x/debug.c
+++ b/drivers/gpu/host1x/debug.c
@@ -39,6 +39,7 @@ void host1x_debug_output(struct output *o, const char *fmt, ...)
 	va_start(args, fmt);
 	len = vsnprintf(o->buf, sizeof(o->buf), fmt, args);
 	va_end(args);
+
 	o->fn(o->ctx, o->buf, len);
 }
 
@@ -48,13 +49,17 @@ static int show_channels(struct host1x_channel *ch, void *data, bool show_fifo)
 	struct output *o = data;
 
 	mutex_lock(&ch->reflock);
+
 	if (ch->refcount) {
 		mutex_lock(&ch->cdma.lock);
+
 		if (show_fifo)
 			host1x_hw_show_channel_fifo(m, ch, o);
+
 		host1x_hw_show_channel_cdma(m, ch, o);
 		mutex_unlock(&ch->cdma.lock);
 	}
+
 	mutex_unlock(&ch->reflock);
 
 	return 0;
@@ -62,22 +67,27 @@ static int show_channels(struct host1x_channel *ch, void *data, bool show_fifo)
 
 static void show_syncpts(struct host1x *m, struct output *o)
 {
-	int i;
+	unsigned int i;
+
 	host1x_debug_output(o, "---- syncpts ----\n");
+
 	for (i = 0; i < host1x_syncpt_nb_pts(m); i++) {
 		u32 max = host1x_syncpt_read_max(m->syncpt + i);
 		u32 min = host1x_syncpt_load(m->syncpt + i);
+
 		if (!min && !max)
 			continue;
-		host1x_debug_output(o, "id %d (%s) min %d max %d\n",
+
+		host1x_debug_output(o, "id %u (%s) min %d max %d\n",
 				    i, m->syncpt[i].name, min, max);
 	}
 
 	for (i = 0; i < host1x_syncpt_nb_bases(m); i++) {
 		u32 base_val;
+
 		base_val = host1x_syncpt_load_wait_base(m->syncpt + i);
 		if (base_val)
-			host1x_debug_output(o, "waitbase id %d val %d\n", i,
+			host1x_debug_output(o, "waitbase id %u val %d\n", i,
 					    base_val);
 	}
 
@@ -114,7 +124,9 @@ static int host1x_debug_show_all(struct seq_file *s, void *unused)
 		.fn = write_to_seqfile,
 		.ctx = s
 	};
+
 	show_all(s->private, &o);
+
 	return 0;
 }
 
@@ -124,7 +136,9 @@ static int host1x_debug_show(struct seq_file *s, void *unused)
 		.fn = write_to_seqfile,
 		.ctx = s
 	};
+
 	show_all_no_fifo(s->private, &o);
+
 	return 0;
 }
 
@@ -134,10 +148,10 @@ static int host1x_debug_open_all(struct inode *inode, struct file *file)
 }
 
 static const struct file_operations host1x_debug_all_fops = {
-	.open		= host1x_debug_open_all,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
+	.open = host1x_debug_open_all,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
 };
 
 static int host1x_debug_open(struct inode *inode, struct file *file)
@@ -146,10 +160,10 @@ static int host1x_debug_open(struct inode *inode, struct file *file)
 }
 
 static const struct file_operations host1x_debug_fops = {
-	.open		= host1x_debug_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
+	.open = host1x_debug_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
 };
 
 static void host1x_debugfs_init(struct host1x *host1x)
@@ -201,6 +215,7 @@ void host1x_debug_dump(struct host1x *host1x)
 	struct output o = {
 		.fn = write_to_printk
 	};
+
 	show_all(host1x, &o);
 }
 
@@ -209,5 +224,6 @@ void host1x_debug_dump_syncpts(struct host1x *host1x)
 	struct output o = {
 		.fn = write_to_printk
 	};
+
 	show_syncpts(host1x, &o);
 }
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index ff348690df94..a62317af76ad 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -63,13 +63,13 @@ u32 host1x_ch_readl(struct host1x_channel *ch, u32 r)
 }
 
 static const struct host1x_info host1x01_info = {
-	.nb_channels	= 8,
-	.nb_pts		= 32,
-	.nb_mlocks	= 16,
-	.nb_bases	= 8,
-	.init		= host1x01_init,
-	.sync_offset	= 0x3000,
-	.dma_mask	= DMA_BIT_MASK(32),
+	.nb_channels = 8,
+	.nb_pts = 32,
+	.nb_mlocks = 16,
+	.nb_bases = 8,
+	.init = host1x01_init,
+	.sync_offset = 0x3000,
+	.dma_mask = DMA_BIT_MASK(32),
 };
 
 static const struct host1x_info host1x02_info = {
@@ -102,7 +102,7 @@ static const struct host1x_info host1x05_info = {
 	.dma_mask = DMA_BIT_MASK(34),
 };
 
-static struct of_device_id host1x_of_match[] = {
+static const struct of_device_id host1x_of_match[] = {
 	{ .compatible = "nvidia,tegra210-host1x", .data = &host1x05_info, },
 	{ .compatible = "nvidia,tegra124-host1x", .data = &host1x04_info, },
 	{ .compatible = "nvidia,tegra114-host1x", .data = &host1x02_info, },
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index dace124994bb..5220510f39da 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -45,7 +45,7 @@ struct host1x_cdma_ops {
 	void (*start)(struct host1x_cdma *cdma);
 	void (*stop)(struct host1x_cdma *cdma);
 	void (*flush)(struct  host1x_cdma *cdma);
-	int (*timeout_init)(struct host1x_cdma *cdma, u32 syncpt_id);
+	int (*timeout_init)(struct host1x_cdma *cdma, unsigned int syncpt);
 	void (*timeout_destroy)(struct host1x_cdma *cdma);
 	void (*freeze)(struct host1x_cdma *cdma);
 	void (*resume)(struct host1x_cdma *cdma, u32 getptr);
@@ -82,21 +82,21 @@ struct host1x_intr_ops {
 	int (*init_host_sync)(struct host1x *host, u32 cpm,
 		void (*syncpt_thresh_work)(struct work_struct *work));
 	void (*set_syncpt_threshold)(
-		struct host1x *host, u32 id, u32 thresh);
-	void (*enable_syncpt_intr)(struct host1x *host, u32 id);
-	void (*disable_syncpt_intr)(struct host1x *host, u32 id);
+		struct host1x *host, unsigned int id, u32 thresh);
+	void (*enable_syncpt_intr)(struct host1x *host, unsigned int id);
+	void (*disable_syncpt_intr)(struct host1x *host, unsigned int id);
 	void (*disable_all_syncpt_intrs)(struct host1x *host);
 	int (*free_syncpt_irq)(struct host1x *host);
 };
 
 struct host1x_info {
-	int	nb_channels;		/* host1x: num channels supported */
-	int	nb_pts;			/* host1x: num syncpoints supported */
-	int	nb_bases;		/* host1x: num syncpoints supported */
-	int	nb_mlocks;		/* host1x: number of mlocks */
-	int	(*init)(struct host1x *); /* initialize per SoC ops */
-	int	sync_offset;
-	u64	dma_mask;		/* mask of addressable memory */
+	unsigned int nb_channels; /* host1x: number of channels supported */
+	unsigned int nb_pts; /* host1x: number of syncpoints supported */
+	unsigned int nb_bases; /* host1x: number of syncpoint bases supported */
+	unsigned int nb_mlocks; /* host1x: number of mlocks supported */
+	int (*init)(struct host1x *host1x); /* initialize per SoC ops */
+	unsigned int sync_offset; /* offset of syncpoint registers */
+	u64 dma_mask; /* mask of addressable memory */
 };
 
 struct host1x {
@@ -109,7 +109,6 @@ struct host1x {
 	struct clk *clk;
 
 	struct mutex intr_mutex;
-	struct workqueue_struct *intr_wq;
 	int intr_syncpt_irq;
 
 	const struct host1x_syncpt_ops *syncpt_op;
@@ -183,19 +182,20 @@ static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm,
 }
 
 static inline void host1x_hw_intr_set_syncpt_threshold(struct host1x *host,
-						       u32 id, u32 thresh)
+						       unsigned int id,
+						       u32 thresh)
 {
 	host->intr_op->set_syncpt_threshold(host, id, thresh);
 }
 
 static inline void host1x_hw_intr_enable_syncpt_intr(struct host1x *host,
-						     u32 id)
+						     unsigned int id)
 {
 	host->intr_op->enable_syncpt_intr(host, id);
 }
 
 static inline void host1x_hw_intr_disable_syncpt_intr(struct host1x *host,
-						      u32 id)
+						      unsigned int id)
 {
 	host->intr_op->disable_syncpt_intr(host, id);
 }
@@ -212,9 +212,9 @@ static inline int host1x_hw_intr_free_syncpt_irq(struct host1x *host)
 
 static inline int host1x_hw_channel_init(struct host1x *host,
 					 struct host1x_channel *channel,
-					 int chid)
+					 unsigned int id)
 {
-	return host->channel_op->init(channel, host, chid);
+	return host->channel_op->init(channel, host, id);
 }
 
 static inline int host1x_hw_channel_submit(struct host1x *host,
@@ -243,9 +243,9 @@ static inline void host1x_hw_cdma_flush(struct host1x *host,
 
 static inline int host1x_hw_cdma_timeout_init(struct host1x *host,
 					      struct host1x_cdma *cdma,
-					      u32 syncpt_id)
+					      unsigned int syncpt)
 {
-	return host->cdma_op->timeout_init(cdma, syncpt_id);
+	return host->cdma_op->timeout_init(cdma, syncpt);
 }
 
 static inline void host1x_hw_cdma_timeout_destroy(struct host1x *host,
diff --git a/drivers/gpu/host1x/hw/cdma_hw.c b/drivers/gpu/host1x/hw/cdma_hw.c
index 305ea8f3382d..659c1bbfeeba 100644
--- a/drivers/gpu/host1x/hw/cdma_hw.c
+++ b/drivers/gpu/host1x/hw/cdma_hw.c
@@ -41,7 +41,7 @@ static void cdma_timeout_cpu_incr(struct host1x_cdma *cdma, u32 getptr,
 {
 	struct host1x *host1x = cdma_to_host1x(cdma);
 	struct push_buffer *pb = &cdma->push_buffer;
-	u32 i;
+	unsigned int i;
 
 	for (i = 0; i < syncpt_incrs; i++)
 		host1x_syncpt_incr(cdma->timeout.syncpt);
@@ -58,6 +58,7 @@ static void cdma_timeout_cpu_incr(struct host1x_cdma *cdma, u32 getptr,
 			&pb->phys, getptr);
 		getptr = (getptr + 8) & (pb->size_bytes - 1);
 	}
+
 	wmb();
 }
 
@@ -162,12 +163,14 @@ static void cdma_stop(struct host1x_cdma *cdma)
 	struct host1x_channel *ch = cdma_to_channel(cdma);
 
 	mutex_lock(&cdma->lock);
+
 	if (cdma->running) {
 		host1x_cdma_wait_locked(cdma, CDMA_EVENT_SYNC_QUEUE_EMPTY);
 		host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP,
 				 HOST1X_CHANNEL_DMACTRL);
 		cdma->running = false;
 	}
+
 	mutex_unlock(&cdma->lock);
 }
 
@@ -213,11 +216,11 @@ static void cdma_resume(struct host1x_cdma *cdma, u32 getptr)
 	u32 cmdproc_stop;
 
 	dev_dbg(host1x->dev,
-		"resuming channel (id %d, DMAGET restart = 0x%x)\n",
+		"resuming channel (id %u, DMAGET restart = 0x%x)\n",
 		ch->id, getptr);
 
 	cmdproc_stop = host1x_sync_readl(host1x, HOST1X_SYNC_CMDPROC_STOP);
-	cmdproc_stop &= ~(BIT(ch->id));
+	cmdproc_stop &= ~BIT(ch->id);
 	host1x_sync_writel(host1x, cmdproc_stop, HOST1X_SYNC_CMDPROC_STOP);
 
 	cdma->torndown = false;
@@ -231,14 +234,11 @@ static void cdma_resume(struct host1x_cdma *cdma, u32 getptr)
  */
 static void cdma_timeout_handler(struct work_struct *work)
 {
+	u32 prev_cmdproc, cmdproc_stop, syncpt_val;
 	struct host1x_cdma *cdma;
 	struct host1x *host1x;
 	struct host1x_channel *ch;
 
-	u32 syncpt_val;
-
-	u32 prev_cmdproc, cmdproc_stop;
-
 	cdma = container_of(to_delayed_work(work), struct host1x_cdma,
 			    timeout.wq);
 	host1x = cdma_to_host1x(cdma);
@@ -277,9 +277,9 @@ static void cdma_timeout_handler(struct work_struct *work)
 		return;
 	}
 
-	dev_warn(host1x->dev, "%s: timeout: %d (%s), HW thresh %d, done %d\n",
-		__func__, cdma->timeout.syncpt->id, cdma->timeout.syncpt->name,
-		syncpt_val, cdma->timeout.syncpt_val);
+	dev_warn(host1x->dev, "%s: timeout: %u (%s), HW thresh %d, done %d\n",
+		 __func__, cdma->timeout.syncpt->id, cdma->timeout.syncpt->name,
+		 syncpt_val, cdma->timeout.syncpt_val);
 
 	/* stop HW, resetting channel/module */
 	host1x_hw_cdma_freeze(host1x, cdma);
@@ -291,7 +291,7 @@ static void cdma_timeout_handler(struct work_struct *work)
 /*
  * Init timeout resources
  */
-static int cdma_timeout_init(struct host1x_cdma *cdma, u32 syncpt_id)
+static int cdma_timeout_init(struct host1x_cdma *cdma, unsigned int syncpt)
 {
 	INIT_DELAYED_WORK(&cdma->timeout.wq, cdma_timeout_handler);
 	cdma->timeout.initialized = true;
@@ -306,6 +306,7 @@ static void cdma_timeout_destroy(struct host1x_cdma *cdma)
 {
 	if (cdma->timeout.initialized)
 		cancel_delayed_work(&cdma->timeout.wq);
+
 	cdma->timeout.initialized = false;
 }
 
diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
index 946c332c3906..5e8df78b7acd 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -46,6 +46,7 @@ static void trace_write_gather(struct host1x_cdma *cdma, struct host1x_bo *bo,
 		 */
 		for (i = 0; i < words; i += TRACE_MAX_LENGTH) {
 			u32 num_words = min(words - i, TRACE_MAX_LENGTH);
+
 			offset += i * sizeof(u32);
 
 			trace_host1x_cdma_push_gather(dev_name(dev), bo,
@@ -66,6 +67,7 @@ static void submit_gathers(struct host1x_job *job)
 		struct host1x_job_gather *g = &job->gathers[i];
 		u32 op1 = host1x_opcode_gather(g->words);
 		u32 op2 = g->base + g->offset;
+
 		trace_write_gather(cdma, g->bo, g->offset, op1 & 0xffff);
 		host1x_cdma_push(cdma, op1, op2);
 	}
@@ -75,7 +77,8 @@ static inline void synchronize_syncpt_base(struct host1x_job *job)
 {
 	struct host1x *host = dev_get_drvdata(job->channel->dev->parent);
 	struct host1x_syncpt *sp = host->syncpt + job->syncpt_id;
-	u32 id, value;
+	unsigned int id;
+	u32 value;
 
 	value = host1x_syncpt_read_max(sp);
 	id = sp->base->id;
diff --git a/drivers/gpu/host1x/hw/debug_hw.c b/drivers/gpu/host1x/hw/debug_hw.c
index cc3f1825c735..7a4a3286e4a7 100644
--- a/drivers/gpu/host1x/hw/debug_hw.c
+++ b/drivers/gpu/host1x/hw/debug_hw.c
@@ -40,8 +40,7 @@ enum {
 
 static unsigned int show_channel_command(struct output *o, u32 val)
 {
-	unsigned mask;
-	unsigned subop;
+	unsigned int mask, subop;
 
 	switch (val >> 28) {
 	case HOST1X_OPCODE_SETCLASS:
@@ -51,12 +50,11 @@ static unsigned int show_channel_command(struct output *o, u32 val)
 					    val >> 6 & 0x3ff,
 					    val >> 16 & 0xfff, mask);
 			return hweight8(mask);
-		} else {
-			host1x_debug_output(o, "SETCL(class=%03x)\n",
-					    val >> 6 & 0x3ff);
-			return 0;
 		}
 
+		host1x_debug_output(o, "SETCL(class=%03x)\n", val >> 6 & 0x3ff);
+		return 0;
+
 	case HOST1X_OPCODE_INCR:
 		host1x_debug_output(o, "INCR(offset=%03x, [",
 				    val >> 16 & 0xfff);
@@ -143,7 +141,8 @@ static void show_channel_gathers(struct output *o, struct host1x_cdma *cdma)
 	struct host1x_job *job;
 
 	list_for_each_entry(job, &cdma->sync_queue, list) {
-		int i;
+		unsigned int i;
+
 		host1x_debug_output(o, "\n%p: JOB, syncpt_id=%d, syncpt_val=%d, first_get=%08x, timeout=%d num_slots=%d, num_handles=%d\n",
 				    job, job->syncpt_id, job->syncpt_end,
 				    job->first_get, job->timeout,
@@ -190,7 +189,7 @@ static void host1x_debug_show_channel_cdma(struct host1x *host,
 	cbread = host1x_sync_readl(host, HOST1X_SYNC_CBREAD(ch->id));
 	cbstat = host1x_sync_readl(host, HOST1X_SYNC_CBSTAT(ch->id));
 
-	host1x_debug_output(o, "%d-%s: ", ch->id, dev_name(ch->dev));
+	host1x_debug_output(o, "%u-%s: ", ch->id, dev_name(ch->dev));
 
 	if (HOST1X_CHANNEL_DMACTRL_DMASTOP_V(dmactrl) ||
 	    !ch->cdma.push_buffer.mapped) {
@@ -200,14 +199,13 @@ static void host1x_debug_show_channel_cdma(struct host1x *host,
 
 	if (HOST1X_SYNC_CBSTAT_CBCLASS_V(cbstat) == HOST1X_CLASS_HOST1X &&
 	    HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat) ==
-	    HOST1X_UCLASS_WAIT_SYNCPT)
+			HOST1X_UCLASS_WAIT_SYNCPT)
 		host1x_debug_output(o, "waiting on syncpt %d val %d\n",
 				    cbread >> 24, cbread & 0xffffff);
 	else if (HOST1X_SYNC_CBSTAT_CBCLASS_V(cbstat) ==
-	   HOST1X_CLASS_HOST1X &&
-	   HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat) ==
-	   HOST1X_UCLASS_WAIT_SYNCPT_BASE) {
-
+				HOST1X_CLASS_HOST1X &&
+		 HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat) ==
+				HOST1X_UCLASS_WAIT_SYNCPT_BASE) {
 		base = (cbread >> 16) & 0xff;
 		baseval =
 			host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_BASE(base));
@@ -236,7 +234,7 @@ static void host1x_debug_show_channel_fifo(struct host1x *host,
 	u32 val, rd_ptr, wr_ptr, start, end;
 	unsigned int data_count = 0;
 
-	host1x_debug_output(o, "%d: fifo:\n", ch->id);
+	host1x_debug_output(o, "%u: fifo:\n", ch->id);
 
 	val = host1x_ch_readl(ch, HOST1X_CHANNEL_FIFOSTAT);
 	host1x_debug_output(o, "FIFOSTAT %08x\n", val);
@@ -290,20 +288,22 @@ static void host1x_debug_show_channel_fifo(struct host1x *host,
 
 static void host1x_debug_show_mlocks(struct host1x *host, struct output *o)
 {
-	int i;
+	unsigned int i;
 
 	host1x_debug_output(o, "---- mlocks ----\n");
+
 	for (i = 0; i < host1x_syncpt_nb_mlocks(host); i++) {
 		u32 owner =
 			host1x_sync_readl(host, HOST1X_SYNC_MLOCK_OWNER(i));
 		if (HOST1X_SYNC_MLOCK_OWNER_CH_OWNS_V(owner))
-			host1x_debug_output(o, "%d: locked by channel %d\n",
+			host1x_debug_output(o, "%u: locked by channel %u\n",
 				i, HOST1X_SYNC_MLOCK_OWNER_CHID_V(owner));
 		else if (HOST1X_SYNC_MLOCK_OWNER_CPU_OWNS_V(owner))
-			host1x_debug_output(o, "%d: locked by cpu\n", i);
+			host1x_debug_output(o, "%u: locked by cpu\n", i);
 		else
-			host1x_debug_output(o, "%d: unlocked\n", i);
+			host1x_debug_output(o, "%u: unlocked\n", i);
 	}
+
 	host1x_debug_output(o, "\n");
 }
 
diff --git a/drivers/gpu/host1x/hw/intr_hw.c b/drivers/gpu/host1x/hw/intr_hw.c
index e1e31e9e67cd..dacb8009a605 100644
--- a/drivers/gpu/host1x/hw/intr_hw.c
+++ b/drivers/gpu/host1x/hw/intr_hw.c
@@ -38,14 +38,14 @@ static void host1x_intr_syncpt_handle(struct host1x_syncpt *syncpt)
 	host1x_sync_writel(host, BIT_MASK(id),
 		HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(BIT_WORD(id)));
 
-	queue_work(host->intr_wq, &syncpt->intr.work);
+	schedule_work(&syncpt->intr.work);
 }
 
 static irqreturn_t syncpt_thresh_isr(int irq, void *dev_id)
 {
 	struct host1x *host = dev_id;
 	unsigned long reg;
-	int i, id;
+	unsigned int i, id;
 
 	for (i = 0; i < DIV_ROUND_UP(host->info->nb_pts, 32); i++) {
 		reg = host1x_sync_readl(host,
@@ -62,7 +62,7 @@ static irqreturn_t syncpt_thresh_isr(int irq, void *dev_id)
 
 static void _host1x_intr_disable_all_syncpt_intrs(struct host1x *host)
 {
-	u32 i;
+	unsigned int i;
 
 	for (i = 0; i < DIV_ROUND_UP(host->info->nb_pts, 32); ++i) {
 		host1x_sync_writel(host, 0xffffffffu,
@@ -72,10 +72,12 @@ static void _host1x_intr_disable_all_syncpt_intrs(struct host1x *host)
 	}
 }
 
-static int _host1x_intr_init_host_sync(struct host1x *host, u32 cpm,
-	void (*syncpt_thresh_work)(struct work_struct *))
+static int
+_host1x_intr_init_host_sync(struct host1x *host, u32 cpm,
+			    void (*syncpt_thresh_work)(struct work_struct *))
 {
-	int i, err;
+	unsigned int i;
+	int err;
 
 	host1x_hw_intr_disable_all_syncpt_intrs(host);
 
@@ -106,18 +108,21 @@ static int _host1x_intr_init_host_sync(struct host1x *host, u32 cpm,
 }
 
 static void _host1x_intr_set_syncpt_threshold(struct host1x *host,
-	u32 id, u32 thresh)
+					      unsigned int id,
+					      u32 thresh)
 {
 	host1x_sync_writel(host, thresh, HOST1X_SYNC_SYNCPT_INT_THRESH(id));
 }
 
-static void _host1x_intr_enable_syncpt_intr(struct host1x *host, u32 id)
+static void _host1x_intr_enable_syncpt_intr(struct host1x *host,
+					    unsigned int id)
 {
 	host1x_sync_writel(host, BIT_MASK(id),
 		HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(BIT_WORD(id)));
 }
 
-static void _host1x_intr_disable_syncpt_intr(struct host1x *host, u32 id)
+static void _host1x_intr_disable_syncpt_intr(struct host1x *host,
+					     unsigned int id)
 {
 	host1x_sync_writel(host, BIT_MASK(id),
 		HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(BIT_WORD(id)));
@@ -127,8 +132,13 @@ static void _host1x_intr_disable_syncpt_intr(struct host1x *host, u32 id)
 
 static int _host1x_free_syncpt_irq(struct host1x *host)
 {
+	unsigned int i;
+
 	devm_free_irq(host->dev, host->intr_syncpt_irq, host);
-	flush_workqueue(host->intr_wq);
+
+	for (i = 0; i < host->info->nb_pts; i++)
+		cancel_work_sync(&host->syncpt[i].intr.work);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c b/drivers/gpu/host1x/hw/syncpt_hw.c
index 56e85395ac24..c93f74fcce72 100644
--- a/drivers/gpu/host1x/hw/syncpt_hw.c
+++ b/drivers/gpu/host1x/hw/syncpt_hw.c
@@ -26,8 +26,9 @@
  */
 static void syncpt_restore(struct host1x_syncpt *sp)
 {
+	u32 min = host1x_syncpt_read_min(sp);
 	struct host1x *host = sp->host;
-	int min = host1x_syncpt_read_min(sp);
+
 	host1x_sync_writel(host, min, HOST1X_SYNC_SYNCPT(sp->id));
 }
 
@@ -37,6 +38,7 @@ static void syncpt_restore(struct host1x_syncpt *sp)
 static void syncpt_restore_wait_base(struct host1x_syncpt *sp)
 {
 	struct host1x *host = sp->host;
+
 	host1x_sync_writel(host, sp->base_val,
 			   HOST1X_SYNC_SYNCPT_BASE(sp->id));
 }
@@ -47,6 +49,7 @@ static void syncpt_restore_wait_base(struct host1x_syncpt *sp)
 static void syncpt_read_wait_base(struct host1x_syncpt *sp)
 {
 	struct host1x *host = sp->host;
+
 	sp->base_val =
 		host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_BASE(sp->id));
 }
@@ -85,6 +88,7 @@ static int syncpt_cpu_incr(struct host1x_syncpt *sp)
 	if (!host1x_syncpt_client_managed(sp) &&
 	    host1x_syncpt_idle(sp))
 		return -EINVAL;
+
 	host1x_sync_writel(host, BIT_MASK(sp->id),
 			   HOST1X_SYNC_SYNCPT_CPU_INCR(reg_offset));
 	wmb();
@@ -95,10 +99,10 @@ static int syncpt_cpu_incr(struct host1x_syncpt *sp)
 /* remove a wait pointed to by patch_addr */
 static int syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr)
 {
-	u32 override = host1x_class_host_wait_syncpt(
-		HOST1X_SYNCPT_RESERVED, 0);
+	u32 override = host1x_class_host_wait_syncpt(HOST1X_SYNCPT_RESERVED, 0);
 
 	*((u32 *)patch_addr) = override;
+
 	return 0;
 }
 
diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c
index 2491bf82e30c..8b4fad0ab35d 100644
--- a/drivers/gpu/host1x/intr.c
+++ b/drivers/gpu/host1x/intr.c
@@ -122,18 +122,20 @@ static void action_submit_complete(struct host1x_waitlist *waiter)
 static void action_wakeup(struct host1x_waitlist *waiter)
 {
 	wait_queue_head_t *wq = waiter->data;
+
 	wake_up(wq);
 }
 
 static void action_wakeup_interruptible(struct host1x_waitlist *waiter)
 {
 	wait_queue_head_t *wq = waiter->data;
+
 	wake_up_interruptible(wq);
 }
 
 typedef void (*action_handler)(struct host1x_waitlist *waiter);
 
-static action_handler action_handlers[HOST1X_INTR_ACTION_COUNT] = {
+static const action_handler action_handlers[HOST1X_INTR_ACTION_COUNT] = {
 	action_submit_complete,
 	action_wakeup,
 	action_wakeup_interruptible,
@@ -209,7 +211,7 @@ static void syncpt_thresh_work(struct work_struct *work)
 				host1x_syncpt_load(host->syncpt + id));
 }
 
-int host1x_intr_add_action(struct host1x *host, u32 id, u32 thresh,
+int host1x_intr_add_action(struct host1x *host, unsigned int id, u32 thresh,
 			   enum host1x_intr_action action, void *data,
 			   struct host1x_waitlist *waiter, void **ref)
 {
@@ -254,7 +256,7 @@ int host1x_intr_add_action(struct host1x *host, u32 id, u32 thresh,
 	return 0;
 }
 
-void host1x_intr_put_ref(struct host1x *host, u32 id, void *ref)
+void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref)
 {
 	struct host1x_waitlist *waiter = ref;
 	struct host1x_syncpt *syncpt;
@@ -277,9 +279,6 @@ int host1x_intr_init(struct host1x *host, unsigned int irq_sync)
 
 	mutex_init(&host->intr_mutex);
 	host->intr_syncpt_irq = irq_sync;
-	host->intr_wq = create_workqueue("host_syncpt");
-	if (!host->intr_wq)
-		return -ENOMEM;
 
 	for (id = 0; id < nb_pts; ++id) {
 		struct host1x_syncpt *syncpt = host->syncpt + id;
@@ -288,7 +287,7 @@ int host1x_intr_init(struct host1x *host, unsigned int irq_sync)
 		INIT_LIST_HEAD(&syncpt->intr.wait_head);
 		snprintf(syncpt->intr.thresh_irq_name,
 			 sizeof(syncpt->intr.thresh_irq_name),
-			 "host1x_sp_%02d", id);
+			 "host1x_sp_%02u", id);
 	}
 
 	host1x_intr_start(host);
@@ -299,7 +298,6 @@ int host1x_intr_init(struct host1x *host, unsigned int irq_sync)
 void host1x_intr_deinit(struct host1x *host)
 {
 	host1x_intr_stop(host);
-	destroy_workqueue(host->intr_wq);
 }
 
 void host1x_intr_start(struct host1x *host)
@@ -342,7 +340,7 @@ void host1x_intr_stop(struct host1x *host)
 		if (!list_empty(&syncpt[id].intr.wait_head)) {
 			/* output diagnostics */
 			mutex_unlock(&host->intr_mutex);
-			pr_warn("%s cannot stop syncpt intr id=%d\n",
+			pr_warn("%s cannot stop syncpt intr id=%u\n",
 				__func__, id);
 			return;
 		}
diff --git a/drivers/gpu/host1x/intr.h b/drivers/gpu/host1x/intr.h
index 2b8adf016a05..1370c2bb75b8 100644
--- a/drivers/gpu/host1x/intr.h
+++ b/drivers/gpu/host1x/intr.h
@@ -75,7 +75,7 @@ struct host1x_waitlist {
  *
  * This is a non-blocking api.
  */
-int host1x_intr_add_action(struct host1x *host, u32 id, u32 thresh,
+int host1x_intr_add_action(struct host1x *host, unsigned int id, u32 thresh,
 	enum host1x_intr_action action, void *data,
 	struct host1x_waitlist *waiter, void **ref);
 
@@ -84,7 +84,7 @@ int host1x_intr_add_action(struct host1x *host, u32 id, u32 thresh,
  * You must call this if you passed non-NULL as ref.
  * @ref the ref returned from host1x_intr_add_action()
  */
-void host1x_intr_put_ref(struct host1x *host, u32 id, void *ref);
+void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref);
 
 /* Initialize host1x sync point interrupt */
 int host1x_intr_init(struct host1x *host, unsigned int irq_sync);
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index b4515d544039..a91b7c4a6110 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -161,7 +161,7 @@ static int do_waitchks(struct host1x_job *job, struct host1x *host,
 
 		if (host1x_syncpt_is_expired(sp, wait->thresh)) {
 			dev_dbg(host->dev,
-				"drop WAIT id %d (%s) thresh 0x%x, min 0x%x\n",
+				"drop WAIT id %u (%s) thresh 0x%x, min 0x%x\n",
 				wait->syncpt_id, sp->name, wait->thresh,
 				host1x_syncpt_read_min(sp));
 
@@ -464,6 +464,7 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev)
 
 	for (i = 0; i < job->num_gathers; i++) {
 		struct host1x_job_gather *g = &job->gathers[i];
+
 		size += g->words * sizeof(u32);
 	}
 
@@ -514,6 +515,7 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev)
 	bitmap_zero(waitchk_mask, host1x_syncpt_nb_pts(host));
 	for (i = 0; i < job->num_waitchk; i++) {
 		u32 syncpt_id = job->waitchk[i].syncpt_id;
+
 		if (syncpt_id < host1x_syncpt_nb_pts(host))
 			set_bit(syncpt_id, waitchk_mask);
 	}
@@ -571,14 +573,16 @@ void host1x_job_unpin(struct host1x_job *job)
 
 	for (i = 0; i < job->num_unpins; i++) {
 		struct host1x_job_unpin_data *unpin = &job->unpins[i];
+
 		host1x_bo_unpin(unpin->bo, unpin->sgt);
 		host1x_bo_put(unpin->bo);
 	}
+
 	job->num_unpins = 0;
 
 	if (job->gather_copy_size)
 		dma_free_wc(job->channel->dev, job->gather_copy_size,
-		            job->gather_copy_mapped, job->gather_copy);
+			    job->gather_copy_mapped, job->gather_copy);
 }
 EXPORT_SYMBOL(host1x_job_unpin);
 
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index 6b7fdc1e2ed0..95589328ad52 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -73,7 +73,7 @@ static struct host1x_syncpt *host1x_syncpt_alloc(struct host1x *host,
 			return NULL;
 	}
 
-	name = kasprintf(GFP_KERNEL, "%02d-%s", sp->id,
+	name = kasprintf(GFP_KERNEL, "%02u-%s", sp->id,
 			dev ? dev_name(dev) : NULL);
 	if (!name)
 		return NULL;
@@ -110,12 +110,14 @@ EXPORT_SYMBOL(host1x_syncpt_incr_max);
 void host1x_syncpt_restore(struct host1x *host)
 {
 	struct host1x_syncpt *sp_base = host->syncpt;
-	u32 i;
+	unsigned int i;
 
 	for (i = 0; i < host1x_syncpt_nb_pts(host); i++)
 		host1x_hw_syncpt_restore(host, sp_base + i);
+
 	for (i = 0; i < host1x_syncpt_nb_bases(host); i++)
 		host1x_hw_syncpt_restore_wait_base(host, sp_base + i);
+
 	wmb();
 }
 
@@ -126,7 +128,7 @@ void host1x_syncpt_restore(struct host1x *host)
 void host1x_syncpt_save(struct host1x *host)
 {
 	struct host1x_syncpt *sp_base = host->syncpt;
-	u32 i;
+	unsigned int i;
 
 	for (i = 0; i < host1x_syncpt_nb_pts(host); i++) {
 		if (host1x_syncpt_client_managed(sp_base + i))
@@ -146,6 +148,7 @@ void host1x_syncpt_save(struct host1x *host)
 u32 host1x_syncpt_load(struct host1x_syncpt *sp)
 {
 	u32 val;
+
 	val = host1x_hw_syncpt_load(sp->host, sp);
 	trace_host1x_syncpt_load_min(sp->id, val);
 
@@ -157,10 +160,9 @@ u32 host1x_syncpt_load(struct host1x_syncpt *sp)
  */
 u32 host1x_syncpt_load_wait_base(struct host1x_syncpt *sp)
 {
-	u32 val;
 	host1x_hw_syncpt_load_wait_base(sp->host, sp);
-	val = sp->base_val;
-	return val;
+
+	return sp->base_val;
 }
 
 /*
@@ -179,6 +181,7 @@ EXPORT_SYMBOL(host1x_syncpt_incr);
 static bool syncpt_load_min_is_expired(struct host1x_syncpt *sp, u32 thresh)
 {
 	host1x_hw_syncpt_load(sp->host, sp);
+
 	return host1x_syncpt_is_expired(sp, thresh);
 }
 
@@ -186,7 +189,7 @@ static bool syncpt_load_min_is_expired(struct host1x_syncpt *sp, u32 thresh)
  * Main entrypoint for syncpoint value waits.
  */
 int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout,
-			u32 *value)
+		       u32 *value)
 {
 	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
 	void *ref;
@@ -201,6 +204,7 @@ int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout,
 	if (host1x_syncpt_is_expired(sp, thresh)) {
 		if (value)
 			*value = host1x_syncpt_load(sp);
+
 		return 0;
 	}
 
@@ -209,6 +213,7 @@ int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout,
 	if (host1x_syncpt_is_expired(sp, thresh)) {
 		if (value)
 			*value = val;
+
 		goto done;
 	}
 
@@ -239,32 +244,42 @@ int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout,
 	/* wait for the syncpoint, or timeout, or signal */
 	while (timeout) {
 		long check = min_t(long, SYNCPT_CHECK_PERIOD, timeout);
-		int remain = wait_event_interruptible_timeout(wq,
+		int remain;
+
+		remain = wait_event_interruptible_timeout(wq,
 				syncpt_load_min_is_expired(sp, thresh),
 				check);
 		if (remain > 0 || host1x_syncpt_is_expired(sp, thresh)) {
 			if (value)
 				*value = host1x_syncpt_load(sp);
+
 			err = 0;
+
 			break;
 		}
+
 		if (remain < 0) {
 			err = remain;
 			break;
 		}
+
 		timeout -= check;
+
 		if (timeout && check_count <= MAX_STUCK_CHECK_COUNT) {
 			dev_warn(sp->host->dev,
-				"%s: syncpoint id %d (%s) stuck waiting %d, timeout=%ld\n",
+				"%s: syncpoint id %u (%s) stuck waiting %d, timeout=%ld\n",
 				 current->comm, sp->id, sp->name,
 				 thresh, timeout);
 
 			host1x_debug_dump_syncpts(sp->host);
+
 			if (check_count == MAX_STUCK_CHECK_COUNT)
 				host1x_debug_dump(sp->host);
+
 			check_count++;
 		}
 	}
+
 	host1x_intr_put_ref(sp->host, sp->id, ref);
 
 done:
@@ -279,7 +294,9 @@ bool host1x_syncpt_is_expired(struct host1x_syncpt *sp, u32 thresh)
 {
 	u32 current_val;
 	u32 future_val;
+
 	smp_rmb();
+
 	current_val = (u32)atomic_read(&sp->min_val);
 	future_val = (u32)atomic_read(&sp->max_val);
 
@@ -341,14 +358,14 @@ int host1x_syncpt_init(struct host1x *host)
 {
 	struct host1x_syncpt_base *bases;
 	struct host1x_syncpt *syncpt;
-	int i;
+	unsigned int i;
 
-	syncpt = devm_kzalloc(host->dev, sizeof(*syncpt) * host->info->nb_pts,
+	syncpt = devm_kcalloc(host->dev, host->info->nb_pts, sizeof(*syncpt),
 			      GFP_KERNEL);
 	if (!syncpt)
 		return -ENOMEM;
 
-	bases = devm_kzalloc(host->dev, sizeof(*bases) * host->info->nb_bases,
+	bases = devm_kcalloc(host->dev, host->info->nb_bases, sizeof(*bases),
 			     GFP_KERNEL);
 	if (!bases)
 		return -ENOMEM;
@@ -378,6 +395,7 @@ struct host1x_syncpt *host1x_syncpt_request(struct device *dev,
 					    unsigned long flags)
 {
 	struct host1x *host = dev_get_drvdata(dev->parent);
+
 	return host1x_syncpt_alloc(host, dev, flags);
 }
 EXPORT_SYMBOL(host1x_syncpt_request);
@@ -398,8 +416,9 @@ EXPORT_SYMBOL(host1x_syncpt_free);
 
 void host1x_syncpt_deinit(struct host1x *host)
 {
-	int i;
 	struct host1x_syncpt *sp = host->syncpt;
+	unsigned int i;
+
 	for (i = 0; i < host->info->nb_pts; i++, sp++)
 		kfree(sp->name);
 }
@@ -407,10 +426,11 @@ void host1x_syncpt_deinit(struct host1x *host)
 /*
  * Read max. It indicates how many operations there are in queue, either in
  * channel or in a software thread.
- * */
+ */
 u32 host1x_syncpt_read_max(struct host1x_syncpt *sp)
 {
 	smp_rmb();
+
 	return (u32)atomic_read(&sp->max_val);
 }
 EXPORT_SYMBOL(host1x_syncpt_read_max);
@@ -421,6 +441,7 @@ EXPORT_SYMBOL(host1x_syncpt_read_max);
 u32 host1x_syncpt_read_min(struct host1x_syncpt *sp)
 {
 	smp_rmb();
+
 	return (u32)atomic_read(&sp->min_val);
 }
 EXPORT_SYMBOL(host1x_syncpt_read_min);
@@ -431,25 +452,26 @@ u32 host1x_syncpt_read(struct host1x_syncpt *sp)
 }
 EXPORT_SYMBOL(host1x_syncpt_read);
 
-int host1x_syncpt_nb_pts(struct host1x *host)
+unsigned int host1x_syncpt_nb_pts(struct host1x *host)
 {
 	return host->info->nb_pts;
 }
 
-int host1x_syncpt_nb_bases(struct host1x *host)
+unsigned int host1x_syncpt_nb_bases(struct host1x *host)
 {
 	return host->info->nb_bases;
 }
 
-int host1x_syncpt_nb_mlocks(struct host1x *host)
+unsigned int host1x_syncpt_nb_mlocks(struct host1x *host)
 {
 	return host->info->nb_mlocks;
 }
 
-struct host1x_syncpt *host1x_syncpt_get(struct host1x *host, u32 id)
+struct host1x_syncpt *host1x_syncpt_get(struct host1x *host, unsigned int id)
 {
 	if (host->info->nb_pts < id)
 		return NULL;
+
 	return host->syncpt + id;
 }
 EXPORT_SYMBOL(host1x_syncpt_get);
diff --git a/drivers/gpu/host1x/syncpt.h b/drivers/gpu/host1x/syncpt.h
index 9056465ecd3f..f719205105ac 100644
--- a/drivers/gpu/host1x/syncpt.h
+++ b/drivers/gpu/host1x/syncpt.h
@@ -37,7 +37,7 @@ struct host1x_syncpt_base {
 };
 
 struct host1x_syncpt {
-	int id;
+	unsigned int id;
 	atomic_t min_val;
 	atomic_t max_val;
 	u32 base_val;
@@ -58,13 +58,13 @@ int host1x_syncpt_init(struct host1x *host);
 void host1x_syncpt_deinit(struct host1x *host);
 
 /* Return number of sync point supported. */
-int host1x_syncpt_nb_pts(struct host1x *host);
+unsigned int host1x_syncpt_nb_pts(struct host1x *host);
 
 /* Return number of wait bases supported. */
-int host1x_syncpt_nb_bases(struct host1x *host);
+unsigned int host1x_syncpt_nb_bases(struct host1x *host);
 
 /* Return number of mlocks supported. */
-int host1x_syncpt_nb_mlocks(struct host1x *host);
+unsigned int host1x_syncpt_nb_mlocks(struct host1x *host);
 
 /*
  * Check sync point sanity. If max is larger than min, there have too many
diff --git a/drivers/gpu/ipu-v3/ipu-dc.c b/drivers/gpu/ipu-v3/ipu-dc.c
index 2f29780e7c68..659475c1e44a 100644
--- a/drivers/gpu/ipu-v3/ipu-dc.c
+++ b/drivers/gpu/ipu-v3/ipu-dc.c
@@ -150,6 +150,9 @@ static void dc_write_tmpl(struct ipu_dc *dc, int word, u32 opcode, u32 operand,
 static int ipu_bus_format_to_map(u32 fmt)
 {
 	switch (fmt) {
+	default:
+		WARN_ON(1);
+		/* fall-through */
 	case MEDIA_BUS_FMT_RGB888_1X24:
 		return IPU_DC_MAP_RGB24;
 	case MEDIA_BUS_FMT_RGB565_1X16:
@@ -162,8 +165,6 @@ static int ipu_bus_format_to_map(u32 fmt)
 		return IPU_DC_MAP_LVDS666;
 	case MEDIA_BUS_FMT_BGR888_1X24:
 		return IPU_DC_MAP_BGR24;
-	default:
-		return -EINVAL;
 	}
 }
 
@@ -178,10 +179,6 @@ int ipu_dc_init_sync(struct ipu_dc *dc, struct ipu_di *di, bool interlaced,
 	dc->di = ipu_di_get_num(di);
 
 	map = ipu_bus_format_to_map(bus_format);
-	if (map < 0) {
-		dev_dbg(priv->dev, "IPU_DISP: No MAP\n");
-		return map;
-	}
 
 	/*
 	 * In interlaced mode we need more counters to create the asymmetric
diff --git a/drivers/gpu/ipu-v3/ipu-di.c b/drivers/gpu/ipu-v3/ipu-di.c
index 359268e3a166..a8d87ddd8a17 100644
--- a/drivers/gpu/ipu-v3/ipu-di.c
+++ b/drivers/gpu/ipu-v3/ipu-di.c
@@ -572,9 +572,6 @@ int ipu_di_init_sync_panel(struct ipu_di *di, struct ipu_di_signal_cfg *sig)
 	dev_dbg(di->ipu->dev, "disp %d: panel size = %d x %d\n",
 		di->id, sig->mode.hactive, sig->mode.vactive);
 
-	if ((sig->mode.vsync_len == 0) || (sig->mode.hsync_len == 0))
-		return -EINVAL;
-
 	dev_dbg(di->ipu->dev, "Clocks: IPU %luHz DI %luHz Needed %luHz\n",
 		clk_get_rate(di->clk_ipu),
 		clk_get_rate(di->clk_di),
diff --git a/drivers/gpu/ipu-v3/ipu-dmfc.c b/drivers/gpu/ipu-v3/ipu-dmfc.c
index 837b1ec22800..42705bb5aaa3 100644
--- a/drivers/gpu/ipu-v3/ipu-dmfc.c
+++ b/drivers/gpu/ipu-v3/ipu-dmfc.c
@@ -45,17 +45,6 @@
 #define DMFC_DP_CHAN_6B_24		16
 #define DMFC_DP_CHAN_6F_29		24
 
-#define DMFC_FIFO_SIZE_64		(3 << 3)
-#define DMFC_FIFO_SIZE_128		(2 << 3)
-#define DMFC_FIFO_SIZE_256		(1 << 3)
-#define DMFC_FIFO_SIZE_512		(0 << 3)
-
-#define DMFC_SEGMENT(x)			((x & 0x7) << 0)
-#define DMFC_BURSTSIZE_128		(0 << 6)
-#define DMFC_BURSTSIZE_64		(1 << 6)
-#define DMFC_BURSTSIZE_32		(2 << 6)
-#define DMFC_BURSTSIZE_16		(3 << 6)
-
 struct dmfc_channel_data {
 	int		ipu_channel;
 	unsigned long	channel_reg;
@@ -104,9 +93,6 @@ struct ipu_dmfc_priv;
 
 struct dmfc_channel {
 	unsigned			slots;
-	unsigned			slotmask;
-	unsigned			segment;
-	int				burstsize;
 	struct ipu_soc			*ipu;
 	struct ipu_dmfc_priv		*priv;
 	const struct dmfc_channel_data	*data;
@@ -117,7 +103,6 @@ struct ipu_dmfc_priv {
 	struct device *dev;
 	struct dmfc_channel channels[DMFC_NUM_CHANNELS];
 	struct mutex mutex;
-	unsigned long bandwidth_per_slot;
 	void __iomem *base;
 	int use_count;
 };
@@ -172,184 +157,6 @@ void ipu_dmfc_disable_channel(struct dmfc_channel *dmfc)
 }
 EXPORT_SYMBOL_GPL(ipu_dmfc_disable_channel);
 
-static int ipu_dmfc_setup_channel(struct dmfc_channel *dmfc, int slots,
-		int segment, int burstsize)
-{
-	struct ipu_dmfc_priv *priv = dmfc->priv;
-	u32 val, field;
-
-	dev_dbg(priv->dev,
-			"dmfc: using %d slots starting from segment %d for IPU channel %d\n",
-			slots, segment, dmfc->data->ipu_channel);
-
-	switch (slots) {
-	case 1:
-		field = DMFC_FIFO_SIZE_64;
-		break;
-	case 2:
-		field = DMFC_FIFO_SIZE_128;
-		break;
-	case 4:
-		field = DMFC_FIFO_SIZE_256;
-		break;
-	case 8:
-		field = DMFC_FIFO_SIZE_512;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	switch (burstsize) {
-	case 16:
-		field |= DMFC_BURSTSIZE_16;
-		break;
-	case 32:
-		field |= DMFC_BURSTSIZE_32;
-		break;
-	case 64:
-		field |= DMFC_BURSTSIZE_64;
-		break;
-	case 128:
-		field |= DMFC_BURSTSIZE_128;
-		break;
-	}
-
-	field |= DMFC_SEGMENT(segment);
-
-	val = readl(priv->base + dmfc->data->channel_reg);
-
-	val &= ~(0xff << dmfc->data->shift);
-	val |= field << dmfc->data->shift;
-
-	writel(val, priv->base + dmfc->data->channel_reg);
-
-	dmfc->slots = slots;
-	dmfc->segment = segment;
-	dmfc->burstsize = burstsize;
-	dmfc->slotmask = ((1 << slots) - 1) << segment;
-
-	return 0;
-}
-
-static int dmfc_bandwidth_to_slots(struct ipu_dmfc_priv *priv,
-		unsigned long bandwidth)
-{
-	int slots = 1;
-
-	while (slots * priv->bandwidth_per_slot < bandwidth)
-		slots *= 2;
-
-	return slots;
-}
-
-static int dmfc_find_slots(struct ipu_dmfc_priv *priv, int slots)
-{
-	unsigned slotmask_need, slotmask_used = 0;
-	int i, segment = 0;
-
-	slotmask_need = (1 << slots) - 1;
-
-	for (i = 0; i < DMFC_NUM_CHANNELS; i++)
-		slotmask_used |= priv->channels[i].slotmask;
-
-	while (slotmask_need <= 0xff) {
-		if (!(slotmask_used & slotmask_need))
-			return segment;
-
-		slotmask_need <<= 1;
-		segment++;
-	}
-
-	return -EBUSY;
-}
-
-void ipu_dmfc_free_bandwidth(struct dmfc_channel *dmfc)
-{
-	struct ipu_dmfc_priv *priv = dmfc->priv;
-	int i;
-
-	dev_dbg(priv->dev, "dmfc: freeing %d slots starting from segment %d\n",
-			dmfc->slots, dmfc->segment);
-
-	mutex_lock(&priv->mutex);
-
-	if (!dmfc->slots)
-		goto out;
-
-	dmfc->slotmask = 0;
-	dmfc->slots = 0;
-	dmfc->segment = 0;
-
-	for (i = 0; i < DMFC_NUM_CHANNELS; i++)
-		priv->channels[i].slotmask = 0;
-
-	for (i = 0; i < DMFC_NUM_CHANNELS; i++) {
-		if (priv->channels[i].slots > 0) {
-			priv->channels[i].segment =
-				dmfc_find_slots(priv, priv->channels[i].slots);
-			priv->channels[i].slotmask =
-				((1 << priv->channels[i].slots) - 1) <<
-				priv->channels[i].segment;
-		}
-	}
-
-	for (i = 0; i < DMFC_NUM_CHANNELS; i++) {
-		if (priv->channels[i].slots > 0)
-			ipu_dmfc_setup_channel(&priv->channels[i],
-					priv->channels[i].slots,
-					priv->channels[i].segment,
-					priv->channels[i].burstsize);
-	}
-out:
-	mutex_unlock(&priv->mutex);
-}
-EXPORT_SYMBOL_GPL(ipu_dmfc_free_bandwidth);
-
-int ipu_dmfc_alloc_bandwidth(struct dmfc_channel *dmfc,
-		unsigned long bandwidth_pixel_per_second, int burstsize)
-{
-	struct ipu_dmfc_priv *priv = dmfc->priv;
-	int slots = dmfc_bandwidth_to_slots(priv, bandwidth_pixel_per_second);
-	int segment = -1, ret = 0;
-
-	dev_dbg(priv->dev, "dmfc: trying to allocate %ldMpixel/s for IPU channel %d\n",
-			bandwidth_pixel_per_second / 1000000,
-			dmfc->data->ipu_channel);
-
-	ipu_dmfc_free_bandwidth(dmfc);
-
-	mutex_lock(&priv->mutex);
-
-	if (slots > 8) {
-		ret = -EBUSY;
-		goto out;
-	}
-
-	/* For the MEM_BG channel, first try to allocate twice the slots */
-	if (dmfc->data->ipu_channel == IPUV3_CHANNEL_MEM_BG_SYNC)
-		segment = dmfc_find_slots(priv, slots * 2);
-	else if (slots < 2)
-		/* Always allocate at least 128*4 bytes (2 slots) */
-		slots = 2;
-
-	if (segment >= 0)
-		slots *= 2;
-	else
-		segment = dmfc_find_slots(priv, slots);
-	if (segment < 0) {
-		ret = -EBUSY;
-		goto out;
-	}
-
-	ipu_dmfc_setup_channel(dmfc, slots, segment, burstsize);
-
-out:
-	mutex_unlock(&priv->mutex);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(ipu_dmfc_alloc_bandwidth);
-
 void ipu_dmfc_config_wait4eot(struct dmfc_channel *dmfc, int width)
 {
 	struct ipu_dmfc_priv *priv = dmfc->priv;
@@ -384,7 +191,6 @@ EXPORT_SYMBOL_GPL(ipu_dmfc_get);
 
 void ipu_dmfc_put(struct dmfc_channel *dmfc)
 {
-	ipu_dmfc_free_bandwidth(dmfc);
 }
 EXPORT_SYMBOL_GPL(ipu_dmfc_put);
 
@@ -412,20 +218,15 @@ int ipu_dmfc_init(struct ipu_soc *ipu, struct device *dev, unsigned long base,
 		priv->channels[i].priv = priv;
 		priv->channels[i].ipu = ipu;
 		priv->channels[i].data = &dmfcdata[i];
-	}
-
-	writel(0x0, priv->base + DMFC_WR_CHAN);
-	writel(0x0, priv->base + DMFC_DP_CHAN);
 
-	/*
-	 * We have a total bandwidth of clkrate * 4pixel divided
-	 * into 8 slots.
-	 */
-	priv->bandwidth_per_slot = clk_get_rate(ipu_clk) * 4 / 8;
-
-	dev_dbg(dev, "dmfc: 8 slots with %ldMpixel/s bandwidth each\n",
-			priv->bandwidth_per_slot / 1000000);
+		if (dmfcdata[i].ipu_channel == IPUV3_CHANNEL_MEM_BG_SYNC ||
+		    dmfcdata[i].ipu_channel == IPUV3_CHANNEL_MEM_FG_SYNC ||
+		    dmfcdata[i].ipu_channel == IPUV3_CHANNEL_MEM_DC_SYNC)
+			priv->channels[i].slots = 2;
+	}
 
+	writel(0x00000050, priv->base + DMFC_WR_CHAN);
+	writel(0x00005654, priv->base + DMFC_DP_CHAN);
 	writel(0x202020f6, priv->base + DMFC_WR_CHAN_DEF);
 	writel(0x2020f6f6, priv->base + DMFC_DP_CHAN_DEF);
 	writel(0x00000003, priv->base + DMFC_GENERAL1);
diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
index 2df216b39cc5..5f962bfcb43c 100644
--- a/drivers/gpu/vga/vga_switcheroo.c
+++ b/drivers/gpu/vga/vga_switcheroo.c
@@ -52,9 +52,9 @@
  *
  * * muxed: Dual GPUs with a multiplexer chip to switch outputs between GPUs.
  * * muxless: Dual GPUs but only one of them is connected to outputs.
- * 	The other one is merely used to offload rendering, its results
- * 	are copied over PCIe into the framebuffer. On Linux this is
- * 	supported with DRI PRIME.
+ *   The other one is merely used to offload rendering, its results
+ *   are copied over PCIe into the framebuffer. On Linux this is
+ *   supported with DRI PRIME.
  *
  * Hybrid graphics started to appear in the late Naughties and were initially
  * all muxed. Newer laptops moved to a muxless architecture for cost reasons.
@@ -560,21 +560,21 @@ EXPORT_SYMBOL(vga_switcheroo_unlock_ddc);
  * * OFF: Power off the device not in use.
  * * ON: Power on the device not in use.
  * * IGD: Switch to the integrated graphics device.
- * 	Power on the integrated GPU if necessary, power off the discrete GPU.
- * 	Prerequisite is that no user space processes (e.g. Xorg, alsactl)
- * 	have opened device files of the GPUs or the audio client. If the
- * 	switch fails, the user may invoke lsof(8) or fuser(1) on /dev/dri/
- * 	and /dev/snd/controlC1 to identify processes blocking the switch.
+ *   Power on the integrated GPU if necessary, power off the discrete GPU.
+ *   Prerequisite is that no user space processes (e.g. Xorg, alsactl)
+ *   have opened device files of the GPUs or the audio client. If the
+ *   switch fails, the user may invoke lsof(8) or fuser(1) on /dev/dri/
+ *   and /dev/snd/controlC1 to identify processes blocking the switch.
  * * DIS: Switch to the discrete graphics device.
  * * DIGD: Delayed switch to the integrated graphics device.
- * 	This will perform the switch once the last user space process has
- * 	closed the device files of the GPUs and the audio client.
+ *   This will perform the switch once the last user space process has
+ *   closed the device files of the GPUs and the audio client.
  * * DDIS: Delayed switch to the discrete graphics device.
  * * MIGD: Mux-only switch to the integrated graphics device.
- * 	Does not remap console or change the power state of either gpu.
- * 	If the integrated GPU is currently off, the screen will turn black.
- * 	If it is on, the screen will show whatever happens to be in VRAM.
- * 	Either way, the user has to blindly enter the command to switch back.
+ *   Does not remap console or change the power state of either gpu.
+ *   If the integrated GPU is currently off, the screen will turn black.
+ *   If it is on, the screen will show whatever happens to be in VRAM.
+ *   Either way, the user has to blindly enter the command to switch back.
  * * MDIS: Mux-only switch to the discrete graphics device.
  *
  * For GPUs whose power state is controlled by the driver's runtime pm,
diff --git a/drivers/hid/hid-elo.c b/drivers/hid/hid-elo.c
index aad8c162a825..0cd4f7216239 100644
--- a/drivers/hid/hid-elo.c
+++ b/drivers/hid/hid-elo.c
@@ -261,7 +261,7 @@ static void elo_remove(struct hid_device *hdev)
 	struct elo_priv *priv = hid_get_drvdata(hdev);
 
 	hid_hw_stop(hdev);
-	flush_workqueue(wq);
+	cancel_delayed_work_sync(&priv->work);
 	kfree(priv);
 }
 
diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
index c741f5e50a66..fb6f1f447279 100644
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c
@@ -61,6 +61,7 @@ MODULE_LICENSE("GPL");
 #define MT_QUIRK_ALWAYS_VALID		(1 << 4)
 #define MT_QUIRK_VALID_IS_INRANGE	(1 << 5)
 #define MT_QUIRK_VALID_IS_CONFIDENCE	(1 << 6)
+#define MT_QUIRK_CONFIDENCE		(1 << 7)
 #define MT_QUIRK_SLOT_IS_CONTACTID_MINUS_ONE	(1 << 8)
 #define MT_QUIRK_NO_AREA		(1 << 9)
 #define MT_QUIRK_IGNORE_DUPLICATES	(1 << 10)
@@ -78,6 +79,7 @@ struct mt_slot {
 	__s32 contactid;	/* the device ContactID assigned to this slot */
 	bool touch_state;	/* is the touch valid? */
 	bool inrange_state;	/* is the finger in proximity of the sensor? */
+	bool confidence_state;  /* is the touch made by a finger? */
 };
 
 struct mt_class {
@@ -503,10 +505,8 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi,
 			return 1;
 		case HID_DG_CONFIDENCE:
 			if (cls->name == MT_CLS_WIN_8 &&
-				field->application == HID_DG_TOUCHPAD) {
-				cls->quirks &= ~MT_QUIRK_ALWAYS_VALID;
-				cls->quirks |= MT_QUIRK_VALID_IS_CONFIDENCE;
-			}
+				field->application == HID_DG_TOUCHPAD)
+				cls->quirks |= MT_QUIRK_CONFIDENCE;
 			mt_store_field(usage, td, hi);
 			return 1;
 		case HID_DG_TIPSWITCH:
@@ -619,6 +619,7 @@ static void mt_complete_slot(struct mt_device *td, struct input_dev *input)
 		return;
 
 	if (td->curvalid || (td->mtclass.quirks & MT_QUIRK_ALWAYS_VALID)) {
+		int active;
 		int slotnum = mt_compute_slot(td, input);
 		struct mt_slot *s = &td->curdata;
 		struct input_mt *mt = input->mt;
@@ -633,10 +634,14 @@ static void mt_complete_slot(struct mt_device *td, struct input_dev *input)
 				return;
 		}
 
+		if (!(td->mtclass.quirks & MT_QUIRK_CONFIDENCE))
+			s->confidence_state = 1;
+		active = (s->touch_state || s->inrange_state) &&
+							s->confidence_state;
+
 		input_mt_slot(input, slotnum);
-		input_mt_report_slot_state(input, MT_TOOL_FINGER,
-			s->touch_state || s->inrange_state);
-		if (s->touch_state || s->inrange_state) {
+		input_mt_report_slot_state(input, MT_TOOL_FINGER, active);
+		if (active) {
 			/* this finger is in proximity of the sensor */
 			int wide = (s->w > s->h);
 			/* divided by two to match visual scale of touch */
@@ -701,6 +706,8 @@ static void mt_process_mt_event(struct hid_device *hid, struct hid_field *field,
 			td->curdata.touch_state = value;
 			break;
 		case HID_DG_CONFIDENCE:
+			if (quirks & MT_QUIRK_CONFIDENCE)
+				td->curdata.confidence_state = value;
 			if (quirks & MT_QUIRK_VALID_IS_CONFIDENCE)
 				td->curvalid = value;
 			break;
@@ -1401,6 +1408,11 @@ static const struct hid_device_id mt_devices[] = {
 		MT_USB_DEVICE(USB_VENDOR_ID_NOVATEK,
 			USB_DEVICE_ID_NOVATEK_PCT) },
 
+	/* Ntrig Panel */
+	{ .driver_data = MT_CLS_NSMU,
+		HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8,
+			USB_VENDOR_ID_NTRIG, 0x1b05) },
+
 	/* PixArt optical touch screen */
 	{ .driver_data = MT_CLS_INRANGE_CONTACTNUMBER,
 		MT_USB_DEVICE(USB_VENDOR_ID_PIXART,
diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c
index 2f1ddca6f2e0..700145b15088 100644
--- a/drivers/hid/usbhid/hiddev.c
+++ b/drivers/hid/usbhid/hiddev.c
@@ -516,13 +516,13 @@ static noinline int hiddev_ioctl_usage(struct hiddev *hiddev, unsigned int cmd,
 					goto inval;
 			} else if (uref->usage_index >= field->report_count)
 				goto inval;
-
-			else if ((cmd == HIDIOCGUSAGES || cmd == HIDIOCSUSAGES) &&
-				 (uref_multi->num_values > HID_MAX_MULTI_USAGES ||
-				  uref->usage_index + uref_multi->num_values > field->report_count))
-				goto inval;
 		}
 
+		if ((cmd == HIDIOCGUSAGES || cmd == HIDIOCSUSAGES) &&
+		    (uref_multi->num_values > HID_MAX_MULTI_USAGES ||
+		     uref->usage_index + uref_multi->num_values > field->report_count))
+			goto inval;
+
 		switch (cmd) {
 		case HIDIOCGUSAGE:
 			uref->value = field->value[uref->usage_index];
diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c
index c43318d3416e..2ac87d553e22 100644
--- a/drivers/hwmon/dell-smm-hwmon.c
+++ b/drivers/hwmon/dell-smm-hwmon.c
@@ -35,6 +35,7 @@
 #include <linux/uaccess.h>
 #include <linux/io.h>
 #include <linux/sched.h>
+#include <linux/ctype.h>
 
 #include <linux/i8k.h>
 
@@ -66,11 +67,13 @@
 
 static DEFINE_MUTEX(i8k_mutex);
 static char bios_version[4];
+static char bios_machineid[16];
 static struct device *i8k_hwmon_dev;
 static u32 i8k_hwmon_flags;
 static uint i8k_fan_mult = I8K_FAN_MULT;
 static uint i8k_pwm_mult;
 static uint i8k_fan_max = I8K_FAN_HIGH;
+static bool disallow_fan_type_call;
 
 #define I8K_HWMON_HAVE_TEMP1	(1 << 0)
 #define I8K_HWMON_HAVE_TEMP2	(1 << 1)
@@ -94,13 +97,13 @@ module_param(ignore_dmi, bool, 0);
 MODULE_PARM_DESC(ignore_dmi, "Continue probing hardware even if DMI data does not match");
 
 #if IS_ENABLED(CONFIG_I8K)
-static bool restricted;
+static bool restricted = true;
 module_param(restricted, bool, 0);
-MODULE_PARM_DESC(restricted, "Allow fan control if SYS_ADMIN capability set");
+MODULE_PARM_DESC(restricted, "Restrict fan control and serial number to CAP_SYS_ADMIN (default: 1)");
 
 static bool power_status;
 module_param(power_status, bool, 0600);
-MODULE_PARM_DESC(power_status, "Report power status in /proc/i8k");
+MODULE_PARM_DESC(power_status, "Report power status in /proc/i8k (default: 0)");
 #endif
 
 static uint fan_mult;
@@ -235,14 +238,28 @@ static int i8k_get_fan_speed(int fan)
 /*
  * Read the fan type.
  */
-static int i8k_get_fan_type(int fan)
+static int _i8k_get_fan_type(int fan)
 {
 	struct smm_regs regs = { .eax = I8K_SMM_GET_FAN_TYPE, };
 
+	if (disallow_fan_type_call)
+		return -EINVAL;
+
 	regs.ebx = fan & 0xff;
 	return i8k_smm(&regs) ? : regs.eax & 0xff;
 }
 
+static int i8k_get_fan_type(int fan)
+{
+	/* I8K_SMM_GET_FAN_TYPE SMM call is expensive, so cache values */
+	static int types[2] = { INT_MIN, INT_MIN };
+
+	if (types[fan] == INT_MIN)
+		types[fan] = _i8k_get_fan_type(fan);
+
+	return types[fan];
+}
+
 /*
  * Read the fan nominal rpm for specific fan speed.
  */
@@ -387,14 +404,20 @@ i8k_ioctl_unlocked(struct file *fp, unsigned int cmd, unsigned long arg)
 
 	switch (cmd) {
 	case I8K_BIOS_VERSION:
+		if (!isdigit(bios_version[0]) || !isdigit(bios_version[1]) ||
+		    !isdigit(bios_version[2]))
+			return -EINVAL;
+
 		val = (bios_version[0] << 16) |
 				(bios_version[1] << 8) | bios_version[2];
 		break;
 
 	case I8K_MACHINE_ID:
-		memset(buff, 0, 16);
-		strlcpy(buff, i8k_get_dmi_data(DMI_PRODUCT_SERIAL),
-			sizeof(buff));
+		if (restricted && !capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		memset(buff, 0, sizeof(buff));
+		strlcpy(buff, bios_machineid, sizeof(buff));
 		break;
 
 	case I8K_FN_STATUS:
@@ -511,7 +534,7 @@ static int i8k_proc_show(struct seq_file *seq, void *offset)
 	seq_printf(seq, "%s %s %s %d %d %d %d %d %d %d\n",
 		   I8K_PROC_FMT,
 		   bios_version,
-		   i8k_get_dmi_data(DMI_PRODUCT_SERIAL),
+		   (restricted && !capable(CAP_SYS_ADMIN)) ? "-1" : bios_machineid,
 		   cpu_temp,
 		   left_fan, right_fan, left_speed, right_speed,
 		   ac_power, fn_key);
@@ -718,6 +741,9 @@ static struct attribute *i8k_attrs[] = {
 static umode_t i8k_is_visible(struct kobject *kobj, struct attribute *attr,
 			      int index)
 {
+	if (disallow_fan_type_call &&
+	    (index == 9 || index == 12))
+		return 0;
 	if (index >= 0 && index <= 1 &&
 	    !(i8k_hwmon_flags & I8K_HWMON_HAVE_TEMP1))
 		return 0;
@@ -767,13 +793,17 @@ static int __init i8k_init_hwmon(void)
 	if (err >= 0)
 		i8k_hwmon_flags |= I8K_HWMON_HAVE_TEMP4;
 
-	/* First fan attributes, if fan type is OK */
-	err = i8k_get_fan_type(0);
+	/* First fan attributes, if fan status or type is OK */
+	err = i8k_get_fan_status(0);
+	if (err < 0)
+		err = i8k_get_fan_type(0);
 	if (err >= 0)
 		i8k_hwmon_flags |= I8K_HWMON_HAVE_FAN1;
 
-	/* Second fan attributes, if fan type is OK */
-	err = i8k_get_fan_type(1);
+	/* Second fan attributes, if fan status or type is OK */
+	err = i8k_get_fan_status(1);
+	if (err < 0)
+		err = i8k_get_fan_type(1);
 	if (err >= 0)
 		i8k_hwmon_flags |= I8K_HWMON_HAVE_FAN2;
 
@@ -929,12 +959,14 @@ static struct dmi_system_id i8k_dmi_table[] __initdata = {
 
 MODULE_DEVICE_TABLE(dmi, i8k_dmi_table);
 
-static struct dmi_system_id i8k_blacklist_dmi_table[] __initdata = {
+/*
+ * On some machines once I8K_SMM_GET_FAN_TYPE is issued then CPU fan speed
+ * randomly going up and down due to bug in Dell SMM or BIOS. Here is blacklist
+ * of affected Dell machines for which we disallow I8K_SMM_GET_FAN_TYPE call.
+ * See bug: https://bugzilla.kernel.org/show_bug.cgi?id=100121
+ */
+static struct dmi_system_id i8k_blacklist_fan_type_dmi_table[] __initdata = {
 	{
-		/*
-		 * CPU fan speed going up and down on Dell Studio XPS 8000
-		 * for unknown reasons.
-		 */
 		.ident = "Dell Studio XPS 8000",
 		.matches = {
 			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
@@ -942,16 +974,19 @@ static struct dmi_system_id i8k_blacklist_dmi_table[] __initdata = {
 		},
 	},
 	{
-		/*
-		 * CPU fan speed going up and down on Dell Studio XPS 8100
-		 * for unknown reasons.
-		 */
 		.ident = "Dell Studio XPS 8100",
 		.matches = {
 			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
 			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Studio XPS 8100"),
 		},
 	},
+	{
+		.ident = "Dell Inspiron 580",
+		.matches = {
+			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Inspiron 580 "),
+		},
+	},
 	{ }
 };
 
@@ -966,8 +1001,7 @@ static int __init i8k_probe(void)
 	/*
 	 * Get DMI information
 	 */
-	if (!dmi_check_system(i8k_dmi_table) ||
-	    dmi_check_system(i8k_blacklist_dmi_table)) {
+	if (!dmi_check_system(i8k_dmi_table)) {
 		if (!ignore_dmi && !force)
 			return -ENODEV;
 
@@ -978,8 +1012,13 @@ static int __init i8k_probe(void)
 			i8k_get_dmi_data(DMI_BIOS_VERSION));
 	}
 
+	if (dmi_check_system(i8k_blacklist_fan_type_dmi_table))
+		disallow_fan_type_call = true;
+
 	strlcpy(bios_version, i8k_get_dmi_data(DMI_BIOS_VERSION),
 		sizeof(bios_version));
+	strlcpy(bios_machineid, i8k_get_dmi_data(DMI_PRODUCT_SERIAL),
+		sizeof(bios_machineid));
 
 	/*
 	 * Get SMM Dell signature
diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
index eb97a9241d17..15aa49d082c4 100644
--- a/drivers/hwmon/fam15h_power.c
+++ b/drivers/hwmon/fam15h_power.c
@@ -172,9 +172,9 @@ static void do_read_registers_on_cu(void *_data)
  */
 static int read_registers(struct fam15h_power_data *data)
 {
-	int this_cpu, ret, cpu;
 	int core, this_core;
 	cpumask_var_t mask;
+	int ret, cpu;
 
 	ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
 	if (!ret)
@@ -183,7 +183,6 @@ static int read_registers(struct fam15h_power_data *data)
 	memset(data->cu_on, 0, sizeof(int) * MAX_CUS);
 
 	get_online_cpus();
-	this_cpu = smp_processor_id();
 
 	/*
 	 * Choose the first online core of each compute unit, and then
@@ -205,12 +204,9 @@ static int read_registers(struct fam15h_power_data *data)
 		cpumask_set_cpu(cpumask_any(topology_sibling_cpumask(cpu)), mask);
 	}
 
-	if (cpumask_test_cpu(this_cpu, mask))
-		do_read_registers_on_cu(data);
+	on_each_cpu_mask(mask, do_read_registers_on_cu, data, true);
 
-	smp_call_function_many(mask, do_read_registers_on_cu, data, true);
 	put_online_cpus();
-
 	free_cpumask_var(mask);
 
 	return 0;
diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c
index c9ff08dbe10c..e30a5939dc0d 100644
--- a/drivers/hwmon/lm90.c
+++ b/drivers/hwmon/lm90.c
@@ -375,7 +375,7 @@ struct lm90_data {
 	int kind;
 	u32 flags;
 
-	int update_interval;	/* in milliseconds */
+	unsigned int update_interval; /* in milliseconds */
 
 	u8 config_orig;		/* Original configuration register value */
 	u8 convrate_orig;	/* Original conversion rate register value */
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
index 847d1b5f2c13..688be9e060fc 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
@@ -300,13 +300,10 @@ int tmc_read_unprepare_etr(struct tmc_drvdata *drvdata)
 	if (local_read(&drvdata->mode) == CS_MODE_SYSFS) {
 		/*
 		 * The trace run will continue with the same allocated trace
-		 * buffer. As such zero-out the buffer so that we don't end
-		 * up with stale data.
-		 *
-		 * Since the tracer is still enabled drvdata::buf
-		 * can't be NULL.
+		 * buffer. The trace buffer is cleared in tmc_etr_enable_hw(),
+		 * so we don't have to explicitly clear it. Also, since the
+		 * tracer is still enabled drvdata::buf can't be NULL.
 		 */
-		memset(drvdata->buf, 0, drvdata->size);
 		tmc_etr_enable_hw(drvdata);
 	} else {
 		/*
@@ -315,7 +312,7 @@ int tmc_read_unprepare_etr(struct tmc_drvdata *drvdata)
 		 */
 		vaddr = drvdata->vaddr;
 		paddr = drvdata->paddr;
-		drvdata->buf = NULL;
+		drvdata->buf = drvdata->vaddr = NULL;
 	}
 
 	drvdata->reading = false;
diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
index 5443d03a1eec..d08d1ab9bba5 100644
--- a/drivers/hwtracing/coresight/coresight.c
+++ b/drivers/hwtracing/coresight/coresight.c
@@ -385,7 +385,6 @@ static int _coresight_build_path(struct coresight_device *csdev,
 	int i;
 	bool found = false;
 	struct coresight_node *node;
-	struct coresight_connection *conn;
 
 	/* An activated sink has been found.  Enqueue the element */
 	if ((csdev->type == CORESIGHT_DEV_TYPE_SINK ||
@@ -394,8 +393,9 @@ static int _coresight_build_path(struct coresight_device *csdev,
 
 	/* Not a sink - recursively explore each port found on this element */
 	for (i = 0; i < csdev->nr_outport; i++) {
-		conn = &csdev->conns[i];
-		if (_coresight_build_path(conn->child_dev, path) == 0) {
+		struct coresight_device *child_dev = csdev->conns[i].child_dev;
+
+		if (child_dev && _coresight_build_path(child_dev, path) == 0) {
 			found = true;
 			break;
 		}
@@ -425,6 +425,7 @@ out:
 struct list_head *coresight_build_path(struct coresight_device *csdev)
 {
 	struct list_head *path;
+	int rc;
 
 	path = kzalloc(sizeof(struct list_head), GFP_KERNEL);
 	if (!path)
@@ -432,9 +433,10 @@ struct list_head *coresight_build_path(struct coresight_device *csdev)
 
 	INIT_LIST_HEAD(path);
 
-	if (_coresight_build_path(csdev, path)) {
+	rc = _coresight_build_path(csdev, path);
+	if (rc) {
 		kfree(path);
-		path = NULL;
+		return ERR_PTR(rc);
 	}
 
 	return path;
@@ -507,8 +509,9 @@ int coresight_enable(struct coresight_device *csdev)
 		goto out;
 
 	path = coresight_build_path(csdev);
-	if (!path) {
+	if (IS_ERR(path)) {
 		pr_err("building path(s) failed\n");
+		ret = PTR_ERR(path);
 		goto out;
 	}
 
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index 64b1208bca5e..4a60ad214747 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -245,6 +245,13 @@ struct i801_priv {
 	struct platform_device *mux_pdev;
 #endif
 	struct platform_device *tco_pdev;
+
+	/*
+	 * If set to true the host controller registers are reserved for
+	 * ACPI AML use. Protected by acpi_lock.
+	 */
+	bool acpi_reserved;
+	struct mutex acpi_lock;
 };
 
 #define FEATURE_SMBUS_PEC	(1 << 0)
@@ -718,6 +725,12 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr,
 	int ret = 0, xact = 0;
 	struct i801_priv *priv = i2c_get_adapdata(adap);
 
+	mutex_lock(&priv->acpi_lock);
+	if (priv->acpi_reserved) {
+		mutex_unlock(&priv->acpi_lock);
+		return -EBUSY;
+	}
+
 	pm_runtime_get_sync(&priv->pci_dev->dev);
 
 	hwpec = (priv->features & FEATURE_SMBUS_PEC) && (flags & I2C_CLIENT_PEC)
@@ -820,6 +833,7 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr,
 out:
 	pm_runtime_mark_last_busy(&priv->pci_dev->dev);
 	pm_runtime_put_autosuspend(&priv->pci_dev->dev);
+	mutex_unlock(&priv->acpi_lock);
 	return ret;
 }
 
@@ -1257,6 +1271,83 @@ static void i801_add_tco(struct i801_priv *priv)
 	priv->tco_pdev = pdev;
 }
 
+#ifdef CONFIG_ACPI
+static acpi_status
+i801_acpi_io_handler(u32 function, acpi_physical_address address, u32 bits,
+		     u64 *value, void *handler_context, void *region_context)
+{
+	struct i801_priv *priv = handler_context;
+	struct pci_dev *pdev = priv->pci_dev;
+	acpi_status status;
+
+	/*
+	 * Once BIOS AML code touches the OpRegion we warn and inhibit any
+	 * further access from the driver itself. This device is now owned
+	 * by the system firmware.
+	 */
+	mutex_lock(&priv->acpi_lock);
+
+	if (!priv->acpi_reserved) {
+		priv->acpi_reserved = true;
+
+		dev_warn(&pdev->dev, "BIOS is accessing SMBus registers\n");
+		dev_warn(&pdev->dev, "Driver SMBus register access inhibited\n");
+
+		/*
+		 * BIOS is accessing the host controller so prevent it from
+		 * suspending automatically from now on.
+		 */
+		pm_runtime_get_sync(&pdev->dev);
+	}
+
+	if ((function & ACPI_IO_MASK) == ACPI_READ)
+		status = acpi_os_read_port(address, (u32 *)value, bits);
+	else
+		status = acpi_os_write_port(address, (u32)*value, bits);
+
+	mutex_unlock(&priv->acpi_lock);
+
+	return status;
+}
+
+static int i801_acpi_probe(struct i801_priv *priv)
+{
+	struct acpi_device *adev;
+	acpi_status status;
+
+	adev = ACPI_COMPANION(&priv->pci_dev->dev);
+	if (adev) {
+		status = acpi_install_address_space_handler(adev->handle,
+				ACPI_ADR_SPACE_SYSTEM_IO, i801_acpi_io_handler,
+				NULL, priv);
+		if (ACPI_SUCCESS(status))
+			return 0;
+	}
+
+	return acpi_check_resource_conflict(&priv->pci_dev->resource[SMBBAR]);
+}
+
+static void i801_acpi_remove(struct i801_priv *priv)
+{
+	struct acpi_device *adev;
+
+	adev = ACPI_COMPANION(&priv->pci_dev->dev);
+	if (!adev)
+		return;
+
+	acpi_remove_address_space_handler(adev->handle,
+		ACPI_ADR_SPACE_SYSTEM_IO, i801_acpi_io_handler);
+
+	mutex_lock(&priv->acpi_lock);
+	if (priv->acpi_reserved)
+		pm_runtime_put(&priv->pci_dev->dev);
+	mutex_unlock(&priv->acpi_lock);
+}
+#else
+static inline int i801_acpi_probe(struct i801_priv *priv) { return 0; }
+static inline void i801_acpi_remove(struct i801_priv *priv) { }
+#endif
+
 static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
 {
 	unsigned char temp;
@@ -1274,6 +1365,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
 	priv->adapter.dev.parent = &dev->dev;
 	ACPI_COMPANION_SET(&priv->adapter.dev, ACPI_COMPANION(&dev->dev));
 	priv->adapter.retries = 3;
+	mutex_init(&priv->acpi_lock);
 
 	priv->pci_dev = dev;
 	switch (dev->device) {
@@ -1336,10 +1428,8 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
 		return -ENODEV;
 	}
 
-	err = acpi_check_resource_conflict(&dev->resource[SMBBAR]);
-	if (err) {
+	if (i801_acpi_probe(priv))
 		return -ENODEV;
-	}
 
 	err = pcim_iomap_regions(dev, 1 << SMBBAR,
 				 dev_driver_string(&dev->dev));
@@ -1348,6 +1438,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
 			"Failed to request SMBus region 0x%lx-0x%Lx\n",
 			priv->smba,
 			(unsigned long long)pci_resource_end(dev, SMBBAR));
+		i801_acpi_remove(priv);
 		return err;
 	}
 
@@ -1412,6 +1503,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
 	err = i2c_add_adapter(&priv->adapter);
 	if (err) {
 		dev_err(&dev->dev, "Failed to add SMBus adapter\n");
+		i801_acpi_remove(priv);
 		return err;
 	}
 
@@ -1438,6 +1530,7 @@ static void i801_remove(struct pci_dev *dev)
 
 	i801_del_mux(priv);
 	i2c_del_adapter(&priv->adapter);
+	i801_acpi_remove(priv);
 	pci_write_config_byte(dev, SMBHSTCFG, priv->original_hstcfg);
 
 	platform_device_unregister(priv->tco_pdev);
diff --git a/drivers/i2c/busses/i2c-octeon.c b/drivers/i2c/busses/i2c-octeon.c
index aa5f01efd826..30ae35146723 100644
--- a/drivers/i2c/busses/i2c-octeon.c
+++ b/drivers/i2c/busses/i2c-octeon.c
@@ -934,8 +934,15 @@ static int octeon_i2c_read(struct octeon_i2c *i2c, int target,
 		return result;
 
 	for (i = 0; i < length; i++) {
-		/* for the last byte TWSI_CTL_AAK must not be set */
-		if (i + 1 == length)
+		/*
+		 * For the last byte to receive TWSI_CTL_AAK must not be set.
+		 *
+		 * A special case is I2C_M_RECV_LEN where we don't know the
+		 * additional length yet. If recv_len is set we assume we're
+		 * not reading the final byte and therefore need to set
+		 * TWSI_CTL_AAK.
+		 */
+		if ((i + 1 == length) && !(recv_len && i == 0))
 			final_read = true;
 
 		/* clear iflg to allow next event */
@@ -950,12 +957,8 @@ static int octeon_i2c_read(struct octeon_i2c *i2c, int target,
 
 		data[i] = octeon_i2c_data_read(i2c);
 		if (recv_len && i == 0) {
-			if (data[i] > I2C_SMBUS_BLOCK_MAX + 1) {
-				dev_err(i2c->dev,
-					"%s: read len > I2C_SMBUS_BLOCK_MAX %d\n",
-					__func__, data[i]);
+			if (data[i] > I2C_SMBUS_BLOCK_MAX + 1)
 				return -EPROTO;
-			}
 			length += data[i];
 		}
 
diff --git a/drivers/i2c/busses/i2c-qup.c b/drivers/i2c/busses/i2c-qup.c
index cc6439ab3f71..041050edd809 100644
--- a/drivers/i2c/busses/i2c-qup.c
+++ b/drivers/i2c/busses/i2c-qup.c
@@ -1268,6 +1268,8 @@ static int qup_i2c_xfer_v2(struct i2c_adapter *adap,
 		}
 	}
 
+	idx = 0;
+
 	do {
 		if (msgs[idx].len == 0) {
 			ret = -EINVAL;
diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c
index 445398c314a3..b126dbaa47e3 100644
--- a/drivers/i2c/busses/i2c-tegra.c
+++ b/drivers/i2c/busses/i2c-tegra.c
@@ -912,7 +912,7 @@ static int tegra_i2c_probe(struct platform_device *pdev)
 	ret = tegra_i2c_init(i2c_dev);
 	if (ret) {
 		dev_err(&pdev->dev, "Failed to initialize i2c controller");
-		goto unprepare_div_clk;
+		goto disable_div_clk;
 	}
 
 	ret = devm_request_irq(&pdev->dev, i2c_dev->irq,
diff --git a/drivers/i2c/i2c-boardinfo.c b/drivers/i2c/i2c-boardinfo.c
index e33022e2d459..6e5fac6a5262 100644
--- a/drivers/i2c/i2c-boardinfo.c
+++ b/drivers/i2c/i2c-boardinfo.c
@@ -56,9 +56,7 @@ EXPORT_SYMBOL_GPL(__i2c_first_dynamic_bus_num);
  * The board info passed can safely be __initdata, but be careful of embedded
  * pointers (for platform_data, functions, etc) since that won't be copied.
  */
-int __init
-i2c_register_board_info(int busnum,
-	struct i2c_board_info const *info, unsigned len)
+int i2c_register_board_info(int busnum, struct i2c_board_info const *info, unsigned len)
 {
 	int status;
 
diff --git a/drivers/i2c/muxes/i2c-mux-reg.c b/drivers/i2c/muxes/i2c-mux-reg.c
index 6773cadf7c9f..c6a90b4a9c62 100644
--- a/drivers/i2c/muxes/i2c-mux-reg.c
+++ b/drivers/i2c/muxes/i2c-mux-reg.c
@@ -145,7 +145,7 @@ static int i2c_mux_reg_probe_dt(struct regmux *mux,
 		mux->data.idle_in_use = true;
 
 	/* map address from "reg" if exists */
-	if (of_address_to_resource(np, 0, &res)) {
+	if (of_address_to_resource(np, 0, &res) == 0) {
 		mux->data.reg_size = resource_size(&res);
 		mux->data.reg = devm_ioremap_resource(&pdev->dev, &res);
 		if (IS_ERR(mux->data.reg))
@@ -260,6 +260,7 @@ static struct platform_driver i2c_mux_reg_driver = {
 	.remove	= i2c_mux_reg_remove,
 	.driver	= {
 		.name	= "i2c-mux-reg",
+		.of_match_table = of_match_ptr(i2c_mux_reg_of_match),
 	},
 };
 
diff --git a/drivers/iio/accel/kxsd9.c b/drivers/iio/accel/kxsd9.c
index 923f56598d4b..3a9f106787d2 100644
--- a/drivers/iio/accel/kxsd9.c
+++ b/drivers/iio/accel/kxsd9.c
@@ -81,7 +81,7 @@ static int kxsd9_write_scale(struct iio_dev *indio_dev, int micro)
 
 	mutex_lock(&st->buf_lock);
 	ret = spi_w8r8(st->us, KXSD9_READ(KXSD9_REG_CTRL_C));
-	if (ret)
+	if (ret < 0)
 		goto error_ret;
 	st->tx[0] = KXSD9_WRITE(KXSD9_REG_CTRL_C);
 	st->tx[1] = (ret & ~KXSD9_FS_MASK) | i;
@@ -163,7 +163,7 @@ static int kxsd9_read_raw(struct iio_dev *indio_dev,
 		break;
 	case IIO_CHAN_INFO_SCALE:
 		ret = spi_w8r8(st->us, KXSD9_READ(KXSD9_REG_CTRL_C));
-		if (ret)
+		if (ret < 0)
 			goto error_ret;
 		*val2 = kxsd9_micro_scales[ret & KXSD9_FS_MASK];
 		ret = IIO_VAL_INT_PLUS_MICRO;
diff --git a/drivers/iio/accel/st_accel_buffer.c b/drivers/iio/accel/st_accel_buffer.c
index a1e642ee13d6..7fddc137e91e 100644
--- a/drivers/iio/accel/st_accel_buffer.c
+++ b/drivers/iio/accel/st_accel_buffer.c
@@ -91,7 +91,7 @@ static const struct iio_buffer_setup_ops st_accel_buffer_setup_ops = {
 
 int st_accel_allocate_ring(struct iio_dev *indio_dev)
 {
-	return iio_triggered_buffer_setup(indio_dev, &iio_pollfunc_store_time,
+	return iio_triggered_buffer_setup(indio_dev, NULL,
 		&st_sensors_trigger_handler, &st_accel_buffer_setup_ops);
 }
 
diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c
index dc73f2d85e6d..4d95bfc4786c 100644
--- a/drivers/iio/accel/st_accel_core.c
+++ b/drivers/iio/accel/st_accel_core.c
@@ -741,6 +741,7 @@ static const struct iio_info accel_info = {
 static const struct iio_trigger_ops st_accel_trigger_ops = {
 	.owner = THIS_MODULE,
 	.set_trigger_state = ST_ACCEL_TRIGGER_SET_STATE,
+	.validate_device = st_sensors_validate_device,
 };
 #define ST_ACCEL_TRIGGER_OPS (&st_accel_trigger_ops)
 #else
diff --git a/drivers/iio/adc/ad7266.c b/drivers/iio/adc/ad7266.c
index 21e19b60e2b9..2123f0ac2e2a 100644
--- a/drivers/iio/adc/ad7266.c
+++ b/drivers/iio/adc/ad7266.c
@@ -396,8 +396,8 @@ static int ad7266_probe(struct spi_device *spi)
 
 	st = iio_priv(indio_dev);
 
-	st->reg = devm_regulator_get(&spi->dev, "vref");
-	if (!IS_ERR_OR_NULL(st->reg)) {
+	st->reg = devm_regulator_get_optional(&spi->dev, "vref");
+	if (!IS_ERR(st->reg)) {
 		ret = regulator_enable(st->reg);
 		if (ret)
 			return ret;
@@ -408,6 +408,9 @@ static int ad7266_probe(struct spi_device *spi)
 
 		st->vref_mv = ret / 1000;
 	} else {
+		/* Any other error indicates that the regulator does exist */
+		if (PTR_ERR(st->reg) != -ENODEV)
+			return PTR_ERR(st->reg);
 		/* Use internal reference */
 		st->vref_mv = 2500;
 	}
diff --git a/drivers/iio/common/st_sensors/st_sensors_buffer.c b/drivers/iio/common/st_sensors/st_sensors_buffer.c
index c55898543a47..f1693dbebb8a 100644
--- a/drivers/iio/common/st_sensors/st_sensors_buffer.c
+++ b/drivers/iio/common/st_sensors/st_sensors_buffer.c
@@ -57,31 +57,20 @@ irqreturn_t st_sensors_trigger_handler(int irq, void *p)
 	struct iio_poll_func *pf = p;
 	struct iio_dev *indio_dev = pf->indio_dev;
 	struct st_sensor_data *sdata = iio_priv(indio_dev);
+	s64 timestamp;
 
-	/* If we have a status register, check if this IRQ came from us */
-	if (sdata->sensor_settings->drdy_irq.addr_stat_drdy) {
-		u8 status;
-
-		len = sdata->tf->read_byte(&sdata->tb, sdata->dev,
-			   sdata->sensor_settings->drdy_irq.addr_stat_drdy,
-			   &status);
-		if (len < 0)
-			dev_err(sdata->dev, "could not read channel status\n");
-
-		/*
-		 * If this was not caused by any channels on this sensor,
-		 * return IRQ_NONE
-		 */
-		if (!(status & (u8)indio_dev->active_scan_mask[0]))
-			return IRQ_NONE;
-	}
+	/* If we do timetamping here, do it before reading the values */
+	if (sdata->hw_irq_trigger)
+		timestamp = sdata->hw_timestamp;
+	else
+		timestamp = iio_get_time_ns();
 
 	len = st_sensors_get_buffer_element(indio_dev, sdata->buffer_data);
 	if (len < 0)
 		goto st_sensors_get_buffer_element_error;
 
 	iio_push_to_buffers_with_timestamp(indio_dev, sdata->buffer_data,
-		pf->timestamp);
+					   timestamp);
 
 st_sensors_get_buffer_element_error:
 	iio_trigger_notify_done(indio_dev->trig);
diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c
index dffe00692169..9e59c90f6a8d 100644
--- a/drivers/iio/common/st_sensors/st_sensors_core.c
+++ b/drivers/iio/common/st_sensors/st_sensors_core.c
@@ -363,6 +363,11 @@ int st_sensors_init_sensor(struct iio_dev *indio_dev,
 	if (err < 0)
 		return err;
 
+	/* Disable DRDY, this might be still be enabled after reboot. */
+	err = st_sensors_set_dataready_irq(indio_dev, false);
+	if (err < 0)
+		return err;
+
 	if (sdata->current_fullscale) {
 		err = st_sensors_set_fullscale(indio_dev,
 						sdata->current_fullscale->num);
@@ -424,6 +429,9 @@ int st_sensors_set_dataready_irq(struct iio_dev *indio_dev, bool enable)
 	else
 		drdy_mask = sdata->sensor_settings->drdy_irq.mask_int2;
 
+	/* Flag to the poll function that the hardware trigger is in use */
+	sdata->hw_irq_trigger = enable;
+
 	/* Enable/Disable the interrupt generator for data ready. */
 	err = st_sensors_write_data_with_mask(indio_dev,
 					sdata->sensor_settings->drdy_irq.addr,
diff --git a/drivers/iio/common/st_sensors/st_sensors_trigger.c b/drivers/iio/common/st_sensors/st_sensors_trigger.c
index da72279fcf99..296e4ff19ae8 100644
--- a/drivers/iio/common/st_sensors/st_sensors_trigger.c
+++ b/drivers/iio/common/st_sensors/st_sensors_trigger.c
@@ -17,6 +17,73 @@
 #include <linux/iio/common/st_sensors.h>
 #include "st_sensors_core.h"
 
+/**
+ * st_sensors_irq_handler() - top half of the IRQ-based triggers
+ * @irq: irq number
+ * @p: private handler data
+ */
+irqreturn_t st_sensors_irq_handler(int irq, void *p)
+{
+	struct iio_trigger *trig = p;
+	struct iio_dev *indio_dev = iio_trigger_get_drvdata(trig);
+	struct st_sensor_data *sdata = iio_priv(indio_dev);
+
+	/* Get the time stamp as close in time as possible */
+	sdata->hw_timestamp = iio_get_time_ns();
+	return IRQ_WAKE_THREAD;
+}
+
+/**
+ * st_sensors_irq_thread() - bottom half of the IRQ-based triggers
+ * @irq: irq number
+ * @p: private handler data
+ */
+irqreturn_t st_sensors_irq_thread(int irq, void *p)
+{
+	struct iio_trigger *trig = p;
+	struct iio_dev *indio_dev = iio_trigger_get_drvdata(trig);
+	struct st_sensor_data *sdata = iio_priv(indio_dev);
+	int ret;
+
+	/*
+	 * If this trigger is backed by a hardware interrupt and we have a
+	 * status register, check if this IRQ came from us
+	 */
+	if (sdata->sensor_settings->drdy_irq.addr_stat_drdy) {
+		u8 status;
+
+		ret = sdata->tf->read_byte(&sdata->tb, sdata->dev,
+			   sdata->sensor_settings->drdy_irq.addr_stat_drdy,
+			   &status);
+		if (ret < 0) {
+			dev_err(sdata->dev, "could not read channel status\n");
+			goto out_poll;
+		}
+		/*
+		 * the lower bits of .active_scan_mask[0] is directly mapped
+		 * to the channels on the sensor: either bit 0 for
+		 * one-dimensional sensors, or e.g. x,y,z for accelerometers,
+		 * gyroscopes or magnetometers. No sensor use more than 3
+		 * channels, so cut the other status bits here.
+		 */
+		status &= 0x07;
+
+		/*
+		 * If this was not caused by any channels on this sensor,
+		 * return IRQ_NONE
+		 */
+		if (!indio_dev->active_scan_mask)
+			return IRQ_NONE;
+		if (!(status & (u8)indio_dev->active_scan_mask[0]))
+			return IRQ_NONE;
+	}
+
+out_poll:
+	/* It's our IRQ: proceed to handle the register polling */
+	iio_trigger_poll_chained(p);
+	return IRQ_HANDLED;
+}
+
 int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
 				const struct iio_trigger_ops *trigger_ops)
 {
@@ -30,6 +97,10 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
 		return -ENOMEM;
 	}
 
+	iio_trigger_set_drvdata(sdata->trig, indio_dev);
+	sdata->trig->ops = trigger_ops;
+	sdata->trig->dev.parent = sdata->dev;
+
 	irq = sdata->get_irq_data_ready(indio_dev);
 	irq_trig = irqd_get_trigger_type(irq_get_irq_data(irq));
 	/*
@@ -77,9 +148,12 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
 	    sdata->sensor_settings->drdy_irq.addr_stat_drdy)
 		irq_trig |= IRQF_SHARED;
 
-	err = request_threaded_irq(irq,
-			iio_trigger_generic_data_rdy_poll,
-			NULL,
+	/* Let's create an interrupt thread masking the hard IRQ here */
+	irq_trig |= IRQF_ONESHOT;
+
+	err = request_threaded_irq(sdata->get_irq_data_ready(indio_dev),
+			st_sensors_irq_handler,
+			st_sensors_irq_thread,
 			irq_trig,
 			sdata->trig->name,
 			sdata->trig);
@@ -88,10 +162,6 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
 		goto iio_trigger_free;
 	}
 
-	iio_trigger_set_drvdata(sdata->trig, indio_dev);
-	sdata->trig->ops = trigger_ops;
-	sdata->trig->dev.parent = sdata->dev;
-
 	err = iio_trigger_register(sdata->trig);
 	if (err < 0) {
 		dev_err(&indio_dev->dev, "failed to register iio trigger.\n");
@@ -119,6 +189,18 @@ void st_sensors_deallocate_trigger(struct iio_dev *indio_dev)
 }
 EXPORT_SYMBOL(st_sensors_deallocate_trigger);
 
+int st_sensors_validate_device(struct iio_trigger *trig,
+			       struct iio_dev *indio_dev)
+{
+	struct iio_dev *indio = iio_trigger_get_drvdata(trig);
+
+	if (indio != indio_dev)
+		return -EINVAL;
+
+	return 0;
+}
+EXPORT_SYMBOL(st_sensors_validate_device);
+
 MODULE_AUTHOR("Denis Ciocca <denis.ciocca@st.com>");
 MODULE_DESCRIPTION("STMicroelectronics ST-sensors trigger");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/iio/dac/Kconfig b/drivers/iio/dac/Kconfig
index e63b957c985f..f7c71da42f15 100644
--- a/drivers/iio/dac/Kconfig
+++ b/drivers/iio/dac/Kconfig
@@ -247,7 +247,7 @@ config MCP4922
 
 config STX104
 	tristate "Apex Embedded Systems STX104 DAC driver"
-	depends on X86 && ISA
+	depends on X86 && ISA_BUS_API
 	help
 	  Say yes here to build support for the 2-channel DAC on the Apex
 	  Embedded Systems STX104 integrated analog PC/104 card. The base port
diff --git a/drivers/iio/dac/ad5592r-base.c b/drivers/iio/dac/ad5592r-base.c
index 948f600e7059..69bde5909854 100644
--- a/drivers/iio/dac/ad5592r-base.c
+++ b/drivers/iio/dac/ad5592r-base.c
@@ -525,7 +525,7 @@ static int ad5592r_alloc_channels(struct ad5592r_state *st)
 
 	device_for_each_child_node(st->dev, child) {
 		ret = fwnode_property_read_u32(child, "reg", &reg);
-		if (ret || reg > ARRAY_SIZE(st->channel_modes))
+		if (ret || reg >= ARRAY_SIZE(st->channel_modes))
 			continue;
 
 		ret = fwnode_property_read_u32(child, "adi,mode", &tmp);
diff --git a/drivers/iio/gyro/st_gyro_buffer.c b/drivers/iio/gyro/st_gyro_buffer.c
index d67b17b6a7aa..a5377044e42f 100644
--- a/drivers/iio/gyro/st_gyro_buffer.c
+++ b/drivers/iio/gyro/st_gyro_buffer.c
@@ -91,7 +91,7 @@ static const struct iio_buffer_setup_ops st_gyro_buffer_setup_ops = {
 
 int st_gyro_allocate_ring(struct iio_dev *indio_dev)
 {
-	return iio_triggered_buffer_setup(indio_dev, &iio_pollfunc_store_time,
+	return iio_triggered_buffer_setup(indio_dev, NULL,
 		&st_sensors_trigger_handler, &st_gyro_buffer_setup_ops);
 }
 
diff --git a/drivers/iio/gyro/st_gyro_core.c b/drivers/iio/gyro/st_gyro_core.c
index 52a3c87c375c..a8012955a1f6 100644
--- a/drivers/iio/gyro/st_gyro_core.c
+++ b/drivers/iio/gyro/st_gyro_core.c
@@ -409,6 +409,7 @@ static const struct iio_info gyro_info = {
 static const struct iio_trigger_ops st_gyro_trigger_ops = {
 	.owner = THIS_MODULE,
 	.set_trigger_state = ST_GYRO_TRIGGER_SET_STATE,
+	.validate_device = st_sensors_validate_device,
 };
 #define ST_GYRO_TRIGGER_OPS (&st_gyro_trigger_ops)
 #else
diff --git a/drivers/iio/humidity/am2315.c b/drivers/iio/humidity/am2315.c
index 3be6d209a159..11535911a5c6 100644
--- a/drivers/iio/humidity/am2315.c
+++ b/drivers/iio/humidity/am2315.c
@@ -165,10 +165,8 @@ static irqreturn_t am2315_trigger_handler(int irq, void *p)
 	struct am2315_sensor_data sensor_data;
 
 	ret = am2315_read_data(data, &sensor_data);
-	if (ret < 0) {
-		mutex_unlock(&data->lock);
+	if (ret < 0)
 		goto err;
-	}
 
 	mutex_lock(&data->lock);
 	if (*(indio_dev->active_scan_mask) == AM2315_ALL_CHANNEL_MASK) {
diff --git a/drivers/iio/humidity/hdc100x.c b/drivers/iio/humidity/hdc100x.c
index fa4767613173..a03832a5fc95 100644
--- a/drivers/iio/humidity/hdc100x.c
+++ b/drivers/iio/humidity/hdc100x.c
@@ -55,7 +55,7 @@ static const struct {
 	},
 	{ /* IIO_HUMIDITYRELATIVE channel */
 		.shift = 8,
-		.mask = 2,
+		.mask = 3,
 	},
 };
 
@@ -164,14 +164,14 @@ static int hdc100x_get_measurement(struct hdc100x_data *data,
 		dev_err(&client->dev, "cannot read high byte measurement");
 		return ret;
 	}
-	val = ret << 6;
+	val = ret << 8;
 
 	ret = i2c_smbus_read_byte(client);
 	if (ret < 0) {
 		dev_err(&client->dev, "cannot read low byte measurement");
 		return ret;
 	}
-	val |= ret >> 2;
+	val |= ret;
 
 	return val;
 }
@@ -211,18 +211,18 @@ static int hdc100x_read_raw(struct iio_dev *indio_dev,
 		return IIO_VAL_INT_PLUS_MICRO;
 	case IIO_CHAN_INFO_SCALE:
 		if (chan->type == IIO_TEMP) {
-			*val = 165;
-			*val2 = 65536 >> 2;
+			*val = 165000;
+			*val2 = 65536;
 			return IIO_VAL_FRACTIONAL;
 		} else {
-			*val = 0;
-			*val2 = 10000;
-			return IIO_VAL_INT_PLUS_MICRO;
+			*val = 100;
+			*val2 = 65536;
+			return IIO_VAL_FRACTIONAL;
 		}
 		break;
 	case IIO_CHAN_INFO_OFFSET:
-		*val = -3971;
-		*val2 = 879096;
+		*val = -15887;
+		*val2 = 515151;
 		return IIO_VAL_INT_PLUS_MICRO;
 	default:
 		return -EINVAL;
diff --git a/drivers/iio/imu/bmi160/bmi160_core.c b/drivers/iio/imu/bmi160/bmi160_core.c
index 0bf92b06d7d8..b8a290ec984e 100644
--- a/drivers/iio/imu/bmi160/bmi160_core.c
+++ b/drivers/iio/imu/bmi160/bmi160_core.c
@@ -209,11 +209,11 @@ static const struct  bmi160_scale_item bmi160_scale_table[] = {
 };
 
 static const struct bmi160_odr bmi160_accel_odr[] = {
-	{0x01, 0, 78125},
-	{0x02, 1, 5625},
-	{0x03, 3, 125},
-	{0x04, 6, 25},
-	{0x05, 12, 5},
+	{0x01, 0, 781250},
+	{0x02, 1, 562500},
+	{0x03, 3, 125000},
+	{0x04, 6, 250000},
+	{0x05, 12, 500000},
 	{0x06, 25, 0},
 	{0x07, 50, 0},
 	{0x08, 100, 0},
@@ -229,7 +229,7 @@ static const struct bmi160_odr bmi160_gyro_odr[] = {
 	{0x08, 100, 0},
 	{0x09, 200, 0},
 	{0x0A, 400, 0},
-	{0x0B, 8000, 0},
+	{0x0B, 800, 0},
 	{0x0C, 1600, 0},
 	{0x0D, 3200, 0},
 };
@@ -364,8 +364,8 @@ int bmi160_set_odr(struct bmi160_data *data, enum bmi160_sensor_type t,
 
 	return regmap_update_bits(data->regmap,
 				  bmi160_regs[t].config,
-				  bmi160_odr_table[t].tbl[i].bits,
-				  bmi160_regs[t].config_odr_mask);
+				  bmi160_regs[t].config_odr_mask,
+				  bmi160_odr_table[t].tbl[i].bits);
 }
 
 static int bmi160_get_odr(struct bmi160_data *data, enum bmi160_sensor_type t,
diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_acpi.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_acpi.c
index f62b8bd9ad7e..dd6fc6d21f9d 100644
--- a/drivers/iio/imu/inv_mpu6050/inv_mpu_acpi.c
+++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_acpi.c
@@ -56,6 +56,7 @@ static int asus_acpi_get_sensor_info(struct acpi_device *adev,
 	int i;
 	acpi_status status;
 	union acpi_object *cpm;
+	int ret;
 
 	status = acpi_evaluate_object(adev->handle, "CNF0", NULL, &buffer);
 	if (ACPI_FAILURE(status))
@@ -82,10 +83,10 @@ static int asus_acpi_get_sensor_info(struct acpi_device *adev,
 			}
 		}
 	}
-
+	ret = cpm->package.count;
 	kfree(buffer.pointer);
 
-	return cpm->package.count;
+	return ret;
 }
 
 static int acpi_i2c_check_resource(struct acpi_resource *ares, void *data)
diff --git a/drivers/iio/industrialio-trigger.c b/drivers/iio/industrialio-trigger.c
index ae2806aafb72..0c52dfe64977 100644
--- a/drivers/iio/industrialio-trigger.c
+++ b/drivers/iio/industrialio-trigger.c
@@ -210,22 +210,35 @@ static int iio_trigger_attach_poll_func(struct iio_trigger *trig,
 
 	/* Prevent the module from being removed whilst attached to a trigger */
 	__module_get(pf->indio_dev->info->driver_module);
+
+	/* Get irq number */
 	pf->irq = iio_trigger_get_irq(trig);
+	if (pf->irq < 0)
+		goto out_put_module;
+
+	/* Request irq */
 	ret = request_threaded_irq(pf->irq, pf->h, pf->thread,
 				   pf->type, pf->name,
 				   pf);
-	if (ret < 0) {
-		module_put(pf->indio_dev->info->driver_module);
-		return ret;
-	}
+	if (ret < 0)
+		goto out_put_irq;
 
+	/* Enable trigger in driver */
 	if (trig->ops && trig->ops->set_trigger_state && notinuse) {
 		ret = trig->ops->set_trigger_state(trig, true);
 		if (ret < 0)
-			module_put(pf->indio_dev->info->driver_module);
+			goto out_free_irq;
 	}
 
 	return ret;
+
+out_free_irq:
+	free_irq(pf->irq, pf);
+out_put_irq:
+	iio_trigger_put_irq(trig, pf->irq);
+out_put_module:
+	module_put(pf->indio_dev->info->driver_module);
+	return ret;
 }
 
 static int iio_trigger_detach_poll_func(struct iio_trigger *trig,
diff --git a/drivers/iio/light/apds9960.c b/drivers/iio/light/apds9960.c
index b4dbb3912977..651d57b8abbf 100644
--- a/drivers/iio/light/apds9960.c
+++ b/drivers/iio/light/apds9960.c
@@ -1011,6 +1011,7 @@ static int apds9960_probe(struct i2c_client *client,
 
 	iio_device_attach_buffer(indio_dev, buffer);
 
+	indio_dev->dev.parent = &client->dev;
 	indio_dev->info = &apds9960_info;
 	indio_dev->name = APDS9960_DRV_NAME;
 	indio_dev->channels = apds9960_channels;
diff --git a/drivers/iio/light/bh1780.c b/drivers/iio/light/bh1780.c
index 72b364e4aa72..b54dcba05a82 100644
--- a/drivers/iio/light/bh1780.c
+++ b/drivers/iio/light/bh1780.c
@@ -84,7 +84,7 @@ static int bh1780_debugfs_reg_access(struct iio_dev *indio_dev,
 	int ret;
 
 	if (!readval)
-		bh1780_write(bh1780, (u8)reg, (u8)writeval);
+		return bh1780_write(bh1780, (u8)reg, (u8)writeval);
 
 	ret = bh1780_read(bh1780, (u8)reg);
 	if (ret < 0)
@@ -187,7 +187,7 @@ static int bh1780_probe(struct i2c_client *client,
 
 	indio_dev->dev.parent = &client->dev;
 	indio_dev->info = &bh1780_info;
-	indio_dev->name = id->name;
+	indio_dev->name = "bh1780";
 	indio_dev->channels = bh1780_channels;
 	indio_dev->num_channels = ARRAY_SIZE(bh1780_channels);
 	indio_dev->modes = INDIO_DIRECT_MODE;
@@ -226,7 +226,8 @@ static int bh1780_remove(struct i2c_client *client)
 static int bh1780_runtime_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
-	struct bh1780_data *bh1780 = i2c_get_clientdata(client);
+	struct iio_dev *indio_dev = i2c_get_clientdata(client);
+	struct bh1780_data *bh1780 = iio_priv(indio_dev);
 	int ret;
 
 	ret = bh1780_write(bh1780, BH1780_REG_CONTROL, BH1780_POFF);
@@ -241,7 +242,8 @@ static int bh1780_runtime_suspend(struct device *dev)
 static int bh1780_runtime_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
-	struct bh1780_data *bh1780 = i2c_get_clientdata(client);
+	struct iio_dev *indio_dev = i2c_get_clientdata(client);
+	struct bh1780_data *bh1780 = iio_priv(indio_dev);
 	int ret;
 
 	ret = bh1780_write(bh1780, BH1780_REG_CONTROL, BH1780_PON);
diff --git a/drivers/iio/light/max44000.c b/drivers/iio/light/max44000.c
index e01e58a9bd14..f17cb2ea18f5 100644
--- a/drivers/iio/light/max44000.c
+++ b/drivers/iio/light/max44000.c
@@ -147,7 +147,6 @@ static const struct iio_chan_spec max44000_channels[] = {
 	{
 		.type = IIO_PROXIMITY,
 		.info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
-		.info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE),
 		.scan_index = MAX44000_SCAN_INDEX_PRX,
 		.scan_type = {
 			.sign		= 'u',
diff --git a/drivers/iio/magnetometer/st_magn_buffer.c b/drivers/iio/magnetometer/st_magn_buffer.c
index ecd3bd0a9769..0a9e8fadfa9d 100644
--- a/drivers/iio/magnetometer/st_magn_buffer.c
+++ b/drivers/iio/magnetometer/st_magn_buffer.c
@@ -82,7 +82,7 @@ static const struct iio_buffer_setup_ops st_magn_buffer_setup_ops = {
 
 int st_magn_allocate_ring(struct iio_dev *indio_dev)
 {
-	return iio_triggered_buffer_setup(indio_dev, &iio_pollfunc_store_time,
+	return iio_triggered_buffer_setup(indio_dev, NULL,
 		&st_sensors_trigger_handler, &st_magn_buffer_setup_ops);
 }
 
diff --git a/drivers/iio/magnetometer/st_magn_core.c b/drivers/iio/magnetometer/st_magn_core.c
index 62036d2a9956..8250fc322c56 100644
--- a/drivers/iio/magnetometer/st_magn_core.c
+++ b/drivers/iio/magnetometer/st_magn_core.c
@@ -572,6 +572,7 @@ static const struct iio_info magn_info = {
 static const struct iio_trigger_ops st_magn_trigger_ops = {
 	.owner = THIS_MODULE,
 	.set_trigger_state = ST_MAGN_TRIGGER_SET_STATE,
+	.validate_device = st_sensors_validate_device,
 };
 #define ST_MAGN_TRIGGER_OPS (&st_magn_trigger_ops)
 #else
diff --git a/drivers/iio/pressure/bmp280.c b/drivers/iio/pressure/bmp280.c
index 2f1498e12bb2..724452d61846 100644
--- a/drivers/iio/pressure/bmp280.c
+++ b/drivers/iio/pressure/bmp280.c
@@ -879,8 +879,8 @@ static int bmp280_probe(struct i2c_client *client,
 	if (ret < 0)
 		return ret;
 	if (chip_id != id->driver_data) {
-		dev_err(&client->dev, "bad chip id.  expected %x got %x\n",
-			BMP280_CHIP_ID, chip_id);
+		dev_err(&client->dev, "bad chip id.  expected %lx got %x\n",
+			id->driver_data, chip_id);
 		return -EINVAL;
 	}
 
diff --git a/drivers/iio/pressure/st_pressure_buffer.c b/drivers/iio/pressure/st_pressure_buffer.c
index 2ff53f222352..99468d0a64e7 100644
--- a/drivers/iio/pressure/st_pressure_buffer.c
+++ b/drivers/iio/pressure/st_pressure_buffer.c
@@ -82,7 +82,7 @@ static const struct iio_buffer_setup_ops st_press_buffer_setup_ops = {
 
 int st_press_allocate_ring(struct iio_dev *indio_dev)
 {
-	return iio_triggered_buffer_setup(indio_dev, &iio_pollfunc_store_time,
+	return iio_triggered_buffer_setup(indio_dev, NULL,
 		&st_sensors_trigger_handler, &st_press_buffer_setup_ops);
 }
 
diff --git a/drivers/iio/pressure/st_pressure_core.c b/drivers/iio/pressure/st_pressure_core.c
index 9e9b72a8f18f..92a118c3c4ac 100644
--- a/drivers/iio/pressure/st_pressure_core.c
+++ b/drivers/iio/pressure/st_pressure_core.c
@@ -28,15 +28,21 @@
 #include <linux/iio/common/st_sensors.h>
 #include "st_pressure.h"
 
+#define MCELSIUS_PER_CELSIUS			1000
+
+/* Default pressure sensitivity */
 #define ST_PRESS_LSB_PER_MBAR			4096UL
 #define ST_PRESS_KPASCAL_NANO_SCALE		(100000000UL / \
 						 ST_PRESS_LSB_PER_MBAR)
+
+/* Default temperature sensitivity */
 #define ST_PRESS_LSB_PER_CELSIUS		480UL
-#define ST_PRESS_CELSIUS_NANO_SCALE		(1000000000UL / \
-						 ST_PRESS_LSB_PER_CELSIUS)
+#define ST_PRESS_MILLI_CELSIUS_OFFSET		42500UL
+
 #define ST_PRESS_NUMBER_DATA_CHANNELS		1
 
 /* FULLSCALE */
+#define ST_PRESS_FS_AVL_1100MB			1100
 #define ST_PRESS_FS_AVL_1260MB			1260
 
 #define ST_PRESS_1_OUT_XL_ADDR			0x28
@@ -54,9 +60,6 @@
 #define ST_PRESS_LPS331AP_PW_MASK		0x80
 #define ST_PRESS_LPS331AP_FS_ADDR		0x23
 #define ST_PRESS_LPS331AP_FS_MASK		0x30
-#define ST_PRESS_LPS331AP_FS_AVL_1260_VAL	0x00
-#define ST_PRESS_LPS331AP_FS_AVL_1260_GAIN	ST_PRESS_KPASCAL_NANO_SCALE
-#define ST_PRESS_LPS331AP_FS_AVL_TEMP_GAIN	ST_PRESS_CELSIUS_NANO_SCALE
 #define ST_PRESS_LPS331AP_BDU_ADDR		0x20
 #define ST_PRESS_LPS331AP_BDU_MASK		0x04
 #define ST_PRESS_LPS331AP_DRDY_IRQ_ADDR		0x22
@@ -67,9 +70,14 @@
 #define ST_PRESS_LPS331AP_OD_IRQ_ADDR		0x22
 #define ST_PRESS_LPS331AP_OD_IRQ_MASK		0x40
 #define ST_PRESS_LPS331AP_MULTIREAD_BIT		true
-#define ST_PRESS_LPS331AP_TEMP_OFFSET		42500
 
 /* CUSTOM VALUES FOR LPS001WP SENSOR */
+
+/* LPS001WP pressure resolution */
+#define ST_PRESS_LPS001WP_LSB_PER_MBAR		16UL
+/* LPS001WP temperature resolution */
+#define ST_PRESS_LPS001WP_LSB_PER_CELSIUS	64UL
+
 #define ST_PRESS_LPS001WP_WAI_EXP		0xba
 #define ST_PRESS_LPS001WP_ODR_ADDR		0x20
 #define ST_PRESS_LPS001WP_ODR_MASK		0x30
@@ -78,6 +86,8 @@
 #define ST_PRESS_LPS001WP_ODR_AVL_13HZ_VAL	0x03
 #define ST_PRESS_LPS001WP_PW_ADDR		0x20
 #define ST_PRESS_LPS001WP_PW_MASK		0x40
+#define ST_PRESS_LPS001WP_FS_AVL_PRESS_GAIN \
+	(100000000UL / ST_PRESS_LPS001WP_LSB_PER_MBAR)
 #define ST_PRESS_LPS001WP_BDU_ADDR		0x20
 #define ST_PRESS_LPS001WP_BDU_MASK		0x04
 #define ST_PRESS_LPS001WP_MULTIREAD_BIT		true
@@ -94,11 +104,6 @@
 #define ST_PRESS_LPS25H_ODR_AVL_25HZ_VAL	0x04
 #define ST_PRESS_LPS25H_PW_ADDR			0x20
 #define ST_PRESS_LPS25H_PW_MASK			0x80
-#define ST_PRESS_LPS25H_FS_ADDR			0x00
-#define ST_PRESS_LPS25H_FS_MASK			0x00
-#define ST_PRESS_LPS25H_FS_AVL_1260_VAL		0x00
-#define ST_PRESS_LPS25H_FS_AVL_1260_GAIN	ST_PRESS_KPASCAL_NANO_SCALE
-#define ST_PRESS_LPS25H_FS_AVL_TEMP_GAIN	ST_PRESS_CELSIUS_NANO_SCALE
 #define ST_PRESS_LPS25H_BDU_ADDR		0x20
 #define ST_PRESS_LPS25H_BDU_MASK		0x04
 #define ST_PRESS_LPS25H_DRDY_IRQ_ADDR		0x23
@@ -109,7 +114,6 @@
 #define ST_PRESS_LPS25H_OD_IRQ_ADDR		0x22
 #define ST_PRESS_LPS25H_OD_IRQ_MASK		0x40
 #define ST_PRESS_LPS25H_MULTIREAD_BIT		true
-#define ST_PRESS_LPS25H_TEMP_OFFSET		42500
 #define ST_PRESS_LPS25H_OUT_XL_ADDR		0x28
 #define ST_TEMP_LPS25H_OUT_L_ADDR		0x2b
 
@@ -161,7 +165,9 @@ static const struct iio_chan_spec st_press_lps001wp_channels[] = {
 			.storagebits = 16,
 			.endianness = IIO_LE,
 		},
-		.info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
+		.info_mask_separate =
+			BIT(IIO_CHAN_INFO_RAW) |
+			BIT(IIO_CHAN_INFO_SCALE),
 		.modified = 0,
 	},
 	{
@@ -177,7 +183,7 @@ static const struct iio_chan_spec st_press_lps001wp_channels[] = {
 		},
 		.info_mask_separate =
 			BIT(IIO_CHAN_INFO_RAW) |
-			BIT(IIO_CHAN_INFO_OFFSET),
+			BIT(IIO_CHAN_INFO_SCALE),
 		.modified = 0,
 	},
 	IIO_CHAN_SOFT_TIMESTAMP(1)
@@ -212,11 +218,14 @@ static const struct st_sensor_settings st_press_sensors_settings[] = {
 			.addr = ST_PRESS_LPS331AP_FS_ADDR,
 			.mask = ST_PRESS_LPS331AP_FS_MASK,
 			.fs_avl = {
+				/*
+				 * Pressure and temperature sensitivity values
+				 * as defined in table 3 of LPS331AP datasheet.
+				 */
 				[0] = {
 					.num = ST_PRESS_FS_AVL_1260MB,
-					.value = ST_PRESS_LPS331AP_FS_AVL_1260_VAL,
-					.gain = ST_PRESS_LPS331AP_FS_AVL_1260_GAIN,
-					.gain2 = ST_PRESS_LPS331AP_FS_AVL_TEMP_GAIN,
+					.gain = ST_PRESS_KPASCAL_NANO_SCALE,
+					.gain2 = ST_PRESS_LSB_PER_CELSIUS,
 				},
 			},
 		},
@@ -261,7 +270,17 @@ static const struct st_sensor_settings st_press_sensors_settings[] = {
 			.value_off = ST_SENSORS_DEFAULT_POWER_OFF_VALUE,
 		},
 		.fs = {
-			.addr = 0,
+			.fs_avl = {
+				/*
+				 * Pressure and temperature resolution values
+				 * as defined in table 3 of LPS001WP datasheet.
+				 */
+				[0] = {
+					.num = ST_PRESS_FS_AVL_1100MB,
+					.gain = ST_PRESS_LPS001WP_FS_AVL_PRESS_GAIN,
+					.gain2 = ST_PRESS_LPS001WP_LSB_PER_CELSIUS,
+				},
+			},
 		},
 		.bdu = {
 			.addr = ST_PRESS_LPS001WP_BDU_ADDR,
@@ -298,14 +317,15 @@ static const struct st_sensor_settings st_press_sensors_settings[] = {
 			.value_off = ST_SENSORS_DEFAULT_POWER_OFF_VALUE,
 		},
 		.fs = {
-			.addr = ST_PRESS_LPS25H_FS_ADDR,
-			.mask = ST_PRESS_LPS25H_FS_MASK,
 			.fs_avl = {
+				/*
+				 * Pressure and temperature sensitivity values
+				 * as defined in table 3 of LPS25H datasheet.
+				 */
 				[0] = {
 					.num = ST_PRESS_FS_AVL_1260MB,
-					.value = ST_PRESS_LPS25H_FS_AVL_1260_VAL,
-					.gain = ST_PRESS_LPS25H_FS_AVL_1260_GAIN,
-					.gain2 = ST_PRESS_LPS25H_FS_AVL_TEMP_GAIN,
+					.gain = ST_PRESS_KPASCAL_NANO_SCALE,
+					.gain2 = ST_PRESS_LSB_PER_CELSIUS,
 				},
 			},
 		},
@@ -364,26 +384,26 @@ static int st_press_read_raw(struct iio_dev *indio_dev,
 
 		return IIO_VAL_INT;
 	case IIO_CHAN_INFO_SCALE:
-		*val = 0;
-
 		switch (ch->type) {
 		case IIO_PRESSURE:
+			*val = 0;
 			*val2 = press_data->current_fullscale->gain;
-			break;
+			return IIO_VAL_INT_PLUS_NANO;
 		case IIO_TEMP:
+			*val = MCELSIUS_PER_CELSIUS;
 			*val2 = press_data->current_fullscale->gain2;
-			break;
+			return IIO_VAL_FRACTIONAL;
 		default:
 			err = -EINVAL;
 			goto read_error;
 		}
 
-		return IIO_VAL_INT_PLUS_NANO;
 	case IIO_CHAN_INFO_OFFSET:
 		switch (ch->type) {
 		case IIO_TEMP:
-			*val = 425;
-			*val2 = 10;
+			*val = ST_PRESS_MILLI_CELSIUS_OFFSET *
+			       press_data->current_fullscale->gain2;
+			*val2 = MCELSIUS_PER_CELSIUS;
 			break;
 		default:
 			err = -EINVAL;
@@ -425,6 +445,7 @@ static const struct iio_info press_info = {
 static const struct iio_trigger_ops st_press_trigger_ops = {
 	.owner = THIS_MODULE,
 	.set_trigger_state = ST_PRESS_TRIGGER_SET_STATE,
+	.validate_device = st_sensors_validate_device,
 };
 #define ST_PRESS_TRIGGER_OPS (&st_press_trigger_ops)
 #else
diff --git a/drivers/iio/proximity/as3935.c b/drivers/iio/proximity/as3935.c
index f4d29d5dbd5f..e2f926cdcad2 100644
--- a/drivers/iio/proximity/as3935.c
+++ b/drivers/iio/proximity/as3935.c
@@ -64,6 +64,7 @@ struct as3935_state {
 	struct delayed_work work;
 
 	u32 tune_cap;
+	u8 buffer[16]; /* 8-bit data + 56-bit padding + 64-bit timestamp */
 	u8 buf[2] ____cacheline_aligned;
 };
 
@@ -72,7 +73,8 @@ static const struct iio_chan_spec as3935_channels[] = {
 		.type           = IIO_PROXIMITY,
 		.info_mask_separate =
 			BIT(IIO_CHAN_INFO_RAW) |
-			BIT(IIO_CHAN_INFO_PROCESSED),
+			BIT(IIO_CHAN_INFO_PROCESSED) |
+			BIT(IIO_CHAN_INFO_SCALE),
 		.scan_index     = 0,
 		.scan_type = {
 			.sign           = 'u',
@@ -181,7 +183,12 @@ static int as3935_read_raw(struct iio_dev *indio_dev,
 		/* storm out of range */
 		if (*val == AS3935_DATA_MASK)
 			return -EINVAL;
-		*val *= 1000;
+
+		if (m == IIO_CHAN_INFO_PROCESSED)
+			*val *= 1000;
+		break;
+	case IIO_CHAN_INFO_SCALE:
+		*val = 1000;
 		break;
 	default:
 		return -EINVAL;
@@ -206,10 +213,10 @@ static irqreturn_t as3935_trigger_handler(int irq, void *private)
 	ret = as3935_read(st, AS3935_DATA, &val);
 	if (ret)
 		goto err_read;
-	val &= AS3935_DATA_MASK;
-	val *= 1000;
 
-	iio_push_to_buffers_with_timestamp(indio_dev, &val, pf->timestamp);
+	st->buffer[0] = val & AS3935_DATA_MASK;
+	iio_push_to_buffers_with_timestamp(indio_dev, &st->buffer,
+					   pf->timestamp);
 err_read:
 	iio_trigger_notify_done(indio_dev->trig);
 
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index c2e257d97eff..1a2984c28b95 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -178,6 +178,7 @@ static int write_gid(struct ib_device *ib_dev, u8 port,
 {
 	int ret = 0;
 	struct net_device *old_net_dev;
+	enum ib_gid_type old_gid_type;
 
 	/* in rdma_cap_roce_gid_table, this funciton should be protected by a
 	 * sleep-able lock.
@@ -199,6 +200,7 @@ static int write_gid(struct ib_device *ib_dev, u8 port,
 	}
 
 	old_net_dev = table->data_vec[ix].attr.ndev;
+	old_gid_type = table->data_vec[ix].attr.gid_type;
 	if (old_net_dev && old_net_dev != attr->ndev)
 		dev_put(old_net_dev);
 	/* if modify_gid failed, just delete the old gid */
@@ -207,10 +209,14 @@ static int write_gid(struct ib_device *ib_dev, u8 port,
 		attr = &zattr;
 		table->data_vec[ix].context = NULL;
 	}
-	if (default_gid)
-		table->data_vec[ix].props |= GID_TABLE_ENTRY_DEFAULT;
+
 	memcpy(&table->data_vec[ix].gid, gid, sizeof(*gid));
 	memcpy(&table->data_vec[ix].attr, attr, sizeof(*attr));
+	if (default_gid) {
+		table->data_vec[ix].props |= GID_TABLE_ENTRY_DEFAULT;
+		if (action == GID_TABLE_WRITE_ACTION_DEL)
+			table->data_vec[ix].attr.gid_type = old_gid_type;
+	}
 	if (table->data_vec[ix].attr.ndev &&
 	    table->data_vec[ix].attr.ndev != old_net_dev)
 		dev_hold(table->data_vec[ix].attr.ndev);
@@ -405,7 +411,9 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
 
 	for (ix = 0; ix < table->sz; ix++)
 		if (table->data_vec[ix].attr.ndev == ndev)
-			if (!del_gid(ib_dev, port, table, ix, false))
+			if (!del_gid(ib_dev, port, table, ix,
+				     !!(table->data_vec[ix].props &
+					GID_TABLE_ENTRY_DEFAULT)))
 				deleted = true;
 
 	write_unlock_irq(&table->rwlock);
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 1d92e091e22e..c99525512b34 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -3452,14 +3452,14 @@ static int cm_establish(struct ib_cm_id *cm_id)
 	work->cm_event.event = IB_CM_USER_ESTABLISHED;
 
 	/* Check if the device started its remove_one */
-	spin_lock_irq(&cm.lock);
+	spin_lock_irqsave(&cm.lock, flags);
 	if (!cm_dev->going_down) {
 		queue_delayed_work(cm.wq, &work->work, 0);
 	} else {
 		kfree(work);
 		ret = -ENODEV;
 	}
-	spin_unlock_irq(&cm.lock);
+	spin_unlock_irqrestore(&cm.lock, flags);
 
 out:
 	return ret;
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index f0c91ba3178a..ad1b1adcf6f0 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -708,17 +708,6 @@ static void cma_deref_id(struct rdma_id_private *id_priv)
 		complete(&id_priv->comp);
 }
 
-static int cma_disable_callback(struct rdma_id_private *id_priv,
-				enum rdma_cm_state state)
-{
-	mutex_lock(&id_priv->handler_mutex);
-	if (id_priv->state != state) {
-		mutex_unlock(&id_priv->handler_mutex);
-		return -EINVAL;
-	}
-	return 0;
-}
-
 struct rdma_cm_id *rdma_create_id(struct net *net,
 				  rdma_cm_event_handler event_handler,
 				  void *context, enum rdma_port_space ps,
@@ -1671,11 +1660,12 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 	struct rdma_cm_event event;
 	int ret = 0;
 
+	mutex_lock(&id_priv->handler_mutex);
 	if ((ib_event->event != IB_CM_TIMEWAIT_EXIT &&
-		cma_disable_callback(id_priv, RDMA_CM_CONNECT)) ||
+	     id_priv->state != RDMA_CM_CONNECT) ||
 	    (ib_event->event == IB_CM_TIMEWAIT_EXIT &&
-		cma_disable_callback(id_priv, RDMA_CM_DISCONNECT)))
-		return 0;
+	     id_priv->state != RDMA_CM_DISCONNECT))
+		goto out;
 
 	memset(&event, 0, sizeof event);
 	switch (ib_event->event) {
@@ -1870,7 +1860,7 @@ static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_e
 
 static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 {
-	struct rdma_id_private *listen_id, *conn_id;
+	struct rdma_id_private *listen_id, *conn_id = NULL;
 	struct rdma_cm_event event;
 	struct net_device *net_dev;
 	int offset, ret;
@@ -1884,9 +1874,10 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 		goto net_dev_put;
 	}
 
-	if (cma_disable_callback(listen_id, RDMA_CM_LISTEN)) {
+	mutex_lock(&listen_id->handler_mutex);
+	if (listen_id->state != RDMA_CM_LISTEN) {
 		ret = -ECONNABORTED;
-		goto net_dev_put;
+		goto err1;
 	}
 
 	memset(&event, 0, sizeof event);
@@ -1976,8 +1967,9 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
 	struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
 	struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
 
-	if (cma_disable_callback(id_priv, RDMA_CM_CONNECT))
-		return 0;
+	mutex_lock(&id_priv->handler_mutex);
+	if (id_priv->state != RDMA_CM_CONNECT)
+		goto out;
 
 	memset(&event, 0, sizeof event);
 	switch (iw_event->event) {
@@ -2029,6 +2021,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
 		return ret;
 	}
 
+out:
 	mutex_unlock(&id_priv->handler_mutex);
 	return ret;
 }
@@ -2039,13 +2032,15 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
 	struct rdma_cm_id *new_cm_id;
 	struct rdma_id_private *listen_id, *conn_id;
 	struct rdma_cm_event event;
-	int ret;
+	int ret = -ECONNABORTED;
 	struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
 	struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
 
 	listen_id = cm_id->context;
-	if (cma_disable_callback(listen_id, RDMA_CM_LISTEN))
-		return -ECONNABORTED;
+
+	mutex_lock(&listen_id->handler_mutex);
+	if (listen_id->state != RDMA_CM_LISTEN)
+		goto out;
 
 	/* Create a new RDMA id for the new IW CM ID */
 	new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net,
@@ -3216,8 +3211,9 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
 	struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd;
 	int ret = 0;
 
-	if (cma_disable_callback(id_priv, RDMA_CM_CONNECT))
-		return 0;
+	mutex_lock(&id_priv->handler_mutex);
+	if (id_priv->state != RDMA_CM_CONNECT)
+		goto out;
 
 	memset(&event, 0, sizeof event);
 	switch (ib_event->event) {
@@ -3673,12 +3669,13 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
 	struct rdma_id_private *id_priv;
 	struct cma_multicast *mc = multicast->context;
 	struct rdma_cm_event event;
-	int ret;
+	int ret = 0;
 
 	id_priv = mc->id_priv;
-	if (cma_disable_callback(id_priv, RDMA_CM_ADDR_BOUND) &&
-	    cma_disable_callback(id_priv, RDMA_CM_ADDR_RESOLVED))
-		return 0;
+	mutex_lock(&id_priv->handler_mutex);
+	if (id_priv->state != RDMA_CM_ADDR_BOUND &&
+	    id_priv->state != RDMA_CM_ADDR_RESOLVED)
+		goto out;
 
 	if (!status)
 		status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
@@ -3720,6 +3717,7 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
 		return 0;
 	}
 
+out:
 	mutex_unlock(&id_priv->handler_mutex);
 	return 0;
 }
@@ -3878,12 +3876,12 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
 	gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
 		   rdma_start_port(id_priv->cma_dev->device)];
 	if (addr->sa_family == AF_INET) {
-		if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+		if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
+			mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
 			err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
 					    true);
-		if (!err) {
-			mc->igmp_joined = true;
-			mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
+			if (!err)
+				mc->igmp_joined = true;
 		}
 	} else {
 		if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 5516fb070344..5c155fa91eec 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -661,6 +661,9 @@ int ib_query_port(struct ib_device *device,
 	if (err || port_attr->subnet_prefix)
 		return err;
 
+	if (rdma_port_get_link_layer(device, port_num) != IB_LINK_LAYER_INFINIBAND)
+		return 0;
+
 	err = ib_query_gid(device, port_num, 0, &gid, NULL);
 	if (err)
 		return err;
@@ -1024,7 +1027,8 @@ static int __init ib_core_init(void)
 		goto err_mad;
 	}
 
-	if (ib_add_ibnl_clients()) {
+	ret = ib_add_ibnl_clients();
+	if (ret) {
 		pr_warn("Couldn't register ibnl clients\n");
 		goto err_sa;
 	}
diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c
index 43e3fa27102b..1c41b95cefec 100644
--- a/drivers/infiniband/core/iwpm_msg.c
+++ b/drivers/infiniband/core/iwpm_msg.c
@@ -506,7 +506,7 @@ int iwpm_add_and_query_mapping_cb(struct sk_buff *skb,
 	if (!nlmsg_request) {
 		pr_info("%s: Could not find a matching request (seq = %u)\n",
 				 __func__, msg_seq);
-			return -EINVAL;
+		return -EINVAL;
 	}
 	pm_msg = nlmsg_request->req_buffer;
 	local_sockaddr = (struct sockaddr_storage *)
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 82fb511112da..2d49228f28b2 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -1638,9 +1638,9 @@ static void remove_mad_reg_req(struct ib_mad_agent_private *agent_priv)
 		/* Now, check to see if there are any methods still in use */
 		if (!check_method_table(method)) {
 			/* If not, release management method table */
-			 kfree(method);
-			 class->method_table[mgmt_class] = NULL;
-			 /* Any management classes left ? */
+			kfree(method);
+			class->method_table[mgmt_class] = NULL;
+			/* Any management classes left ? */
 			if (!check_class_table(class)) {
 				/* If not, release management class table */
 				kfree(class);
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 5e573bb18660..60df4f8e81be 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -530,6 +530,7 @@ static PORT_PMA_ATTR(port_xmit_data		    , 12, 32, 192);
 static PORT_PMA_ATTR(port_rcv_data		    , 13, 32, 224);
 static PORT_PMA_ATTR(port_xmit_packets		    , 14, 32, 256);
 static PORT_PMA_ATTR(port_rcv_packets		    , 15, 32, 288);
+static PORT_PMA_ATTR(port_xmit_wait		    ,  0, 32, 320);
 
 /*
  * Counters added by extended set
@@ -560,6 +561,7 @@ static struct attribute *pma_attrs[] = {
 	&port_pma_attr_port_rcv_data.attr.attr,
 	&port_pma_attr_port_xmit_packets.attr.attr,
 	&port_pma_attr_port_rcv_packets.attr.attr,
+	&port_pma_attr_port_xmit_wait.attr.attr,
 	NULL
 };
 
@@ -579,6 +581,7 @@ static struct attribute *pma_attrs_ext[] = {
 	&port_pma_attr_ext_port_xmit_data.attr.attr,
 	&port_pma_attr_ext_port_rcv_data.attr.attr,
 	&port_pma_attr_ext_port_xmit_packets.attr.attr,
+	&port_pma_attr_port_xmit_wait.attr.attr,
 	&port_pma_attr_ext_port_rcv_packets.attr.attr,
 	&port_pma_attr_ext_unicast_rcv_packets.attr.attr,
 	&port_pma_attr_ext_unicast_xmit_packets.attr.attr,
@@ -604,6 +607,7 @@ static struct attribute *pma_attrs_noietf[] = {
 	&port_pma_attr_ext_port_rcv_data.attr.attr,
 	&port_pma_attr_ext_port_xmit_packets.attr.attr,
 	&port_pma_attr_ext_port_rcv_packets.attr.attr,
+	&port_pma_attr_port_xmit_wait.attr.attr,
 	NULL
 };
 
@@ -889,9 +893,9 @@ static struct attribute *alloc_hsa_lifespan(char *name, u8 port_num)
 static void setup_hw_stats(struct ib_device *device, struct ib_port *port,
 			   u8 port_num)
 {
-	struct attribute_group *hsag = NULL;
+	struct attribute_group *hsag;
 	struct rdma_hw_stats *stats;
-	int i = 0, ret;
+	int i, ret;
 
 	stats = device->alloc_hw_stats(device, port_num);
 
@@ -899,19 +903,22 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port,
 		return;
 
 	if (!stats->names || stats->num_counters <= 0)
-		goto err;
+		goto err_free_stats;
 
+	/*
+	 * Two extra attribue elements here, one for the lifespan entry and
+	 * one to NULL terminate the list for the sysfs core code
+	 */
 	hsag = kzalloc(sizeof(*hsag) +
-		       // 1 extra for the lifespan config entry
-		       sizeof(void *) * (stats->num_counters + 1),
+		       sizeof(void *) * (stats->num_counters + 2),
 		       GFP_KERNEL);
 	if (!hsag)
-		return;
+		goto err_free_stats;
 
 	ret = device->get_hw_stats(device, stats, port_num,
 				   stats->num_counters);
 	if (ret != stats->num_counters)
-		goto err;
+		goto err_free_hsag;
 
 	stats->timestamp = jiffies;
 
@@ -922,10 +929,13 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port,
 		hsag->attrs[i] = alloc_hsa(i, port_num, stats->names[i]);
 		if (!hsag->attrs[i])
 			goto err;
+		sysfs_attr_init(hsag->attrs[i]);
 	}
 
 	/* treat an error here as non-fatal */
 	hsag->attrs[i] = alloc_hsa_lifespan("lifespan", port_num);
+	if (hsag->attrs[i])
+		sysfs_attr_init(hsag->attrs[i]);
 
 	if (port) {
 		struct kobject *kobj = &port->kobj;
@@ -946,10 +956,12 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port,
 	return;
 
 err:
-	kfree(stats);
 	for (; i >= 0; i--)
 		kfree(hsag->attrs[i]);
+err_free_hsag:
 	kfree(hsag);
+err_free_stats:
+	kfree(stats);
 	return;
 }
 
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 1a8babb8ee3c..825021d1008b 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1747,7 +1747,7 @@ static int create_qp(struct ib_uverbs_file *file,
 	struct ib_srq			*srq = NULL;
 	struct ib_qp			*qp;
 	char				*buf;
-	struct ib_qp_init_attr		attr;
+	struct ib_qp_init_attr		attr = {};
 	struct ib_uverbs_ex_create_qp_resp resp;
 	int				ret;
 
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 1d7d4cf442e3..6298f54b4137 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -511,12 +511,16 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
 		ah_attr->grh.dgid = sgid;
 
 		if (!rdma_cap_eth_ah(device, port_num)) {
-			ret = ib_find_cached_gid_by_port(device, &dgid,
-							 IB_GID_TYPE_IB,
-							 port_num, NULL,
-							 &gid_index);
-			if (ret)
-				return ret;
+			if (dgid.global.interface_id != cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) {
+				ret = ib_find_cached_gid_by_port(device, &dgid,
+								 IB_GID_TYPE_IB,
+								 port_num, NULL,
+								 &gid_index);
+				if (ret)
+					return ret;
+			} else {
+				gid_index = 0;
+			}
 		}
 
 		ah_attr->grh.sgid_index = (u8) gid_index;
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index 6e7050ab9e16..14d7eeb09be6 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -300,16 +300,15 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node)
 	const struct cpumask *node_mask,
 		*proc_mask = tsk_cpus_allowed(current);
 	struct cpu_mask_set *set = &dd->affinity->proc;
-	char buf[1024];
 
 	/*
 	 * check whether process/context affinity has already
 	 * been set
 	 */
 	if (cpumask_weight(proc_mask) == 1) {
-		scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(proc_mask));
-		hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %s",
-			  current->pid, current->comm, buf);
+		hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %*pbl",
+			  current->pid, current->comm,
+			  cpumask_pr_args(proc_mask));
 		/*
 		 * Mark the pre-set CPU as used. This is atomic so we don't
 		 * need the lock
@@ -318,9 +317,9 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node)
 		cpumask_set_cpu(cpu, &set->used);
 		goto done;
 	} else if (cpumask_weight(proc_mask) < cpumask_weight(&set->mask)) {
-		scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(proc_mask));
-		hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %s",
-			  current->pid, current->comm, buf);
+		hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %*pbl",
+			  current->pid, current->comm,
+			  cpumask_pr_args(proc_mask));
 		goto done;
 	}
 
@@ -356,8 +355,8 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node)
 	cpumask_or(intrs, intrs, (dd->affinity->rcv_intr.gen ?
 				  &dd->affinity->rcv_intr.mask :
 				  &dd->affinity->rcv_intr.used));
-	scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(intrs));
-	hfi1_cdbg(PROC, "CPUs used by interrupts: %s", buf);
+	hfi1_cdbg(PROC, "CPUs used by interrupts: %*pbl",
+		  cpumask_pr_args(intrs));
 
 	/*
 	 * If we don't have a NUMA node requested, preference is towards
@@ -366,18 +365,16 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node)
 	if (node == -1)
 		node = dd->node;
 	node_mask = cpumask_of_node(node);
-	scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(node_mask));
-	hfi1_cdbg(PROC, "device on NUMA %u, CPUs %s", node, buf);
+	hfi1_cdbg(PROC, "device on NUMA %u, CPUs %*pbl", node,
+		  cpumask_pr_args(node_mask));
 
 	/* diff will hold all unused cpus */
 	cpumask_andnot(diff, &set->mask, &set->used);
-	scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(diff));
-	hfi1_cdbg(PROC, "unused CPUs (all) %s", buf);
+	hfi1_cdbg(PROC, "unused CPUs (all) %*pbl", cpumask_pr_args(diff));
 
 	/* get cpumask of available CPUs on preferred NUMA */
 	cpumask_and(mask, diff, node_mask);
-	scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(mask));
-	hfi1_cdbg(PROC, "available cpus on NUMA %s", buf);
+	hfi1_cdbg(PROC, "available cpus on NUMA %*pbl", cpumask_pr_args(mask));
 
 	/*
 	 * At first, we don't want to place processes on the same
@@ -395,8 +392,8 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node)
 		cpumask_andnot(diff, &set->mask, &set->used);
 		cpumask_andnot(mask, diff, node_mask);
 	}
-	scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(mask));
-	hfi1_cdbg(PROC, "possible CPUs for process %s", buf);
+	hfi1_cdbg(PROC, "possible CPUs for process %*pbl",
+		  cpumask_pr_args(mask));
 
 	cpu = cpumask_first(mask);
 	if (cpu >= nr_cpu_ids) /* empty */
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 3b876da745a1..dad4d0ebbdff 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1037,7 +1037,7 @@ static void dc_shutdown(struct hfi1_devdata *);
 static void dc_start(struct hfi1_devdata *);
 static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp,
 			   unsigned int *np);
-static void remove_full_mgmt_pkey(struct hfi1_pportdata *ppd);
+static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd);
 
 /*
  * Error interrupt table entry.  This is used as input to the interrupt
@@ -6962,8 +6962,6 @@ void handle_link_down(struct work_struct *work)
 	}
 
 	reset_neighbor_info(ppd);
-	if (ppd->mgmt_allowed)
-		remove_full_mgmt_pkey(ppd);
 
 	/* disable the port */
 	clear_rcvctrl(ppd->dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
@@ -7070,12 +7068,16 @@ static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd)
 			    __func__, ppd->pkeys[2], FULL_MGMT_P_KEY);
 	ppd->pkeys[2] = FULL_MGMT_P_KEY;
 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
+	hfi1_event_pkey_change(ppd->dd, ppd->port);
 }
 
-static void remove_full_mgmt_pkey(struct hfi1_pportdata *ppd)
+static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd)
 {
-	ppd->pkeys[2] = 0;
-	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
+	if (ppd->pkeys[2] != 0) {
+		ppd->pkeys[2] = 0;
+		(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
+		hfi1_event_pkey_change(ppd->dd, ppd->port);
+	}
 }
 
 /*
@@ -7832,8 +7834,8 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
 			 * save first 2 flits in the packet that caused
 			 * the error
 			 */
-			 dd->err_info_rcvport.packet_flit1 = hdr0;
-			 dd->err_info_rcvport.packet_flit2 = hdr1;
+			dd->err_info_rcvport.packet_flit1 = hdr0;
+			dd->err_info_rcvport.packet_flit2 = hdr1;
 		}
 		switch (info) {
 		case 1:
@@ -9168,6 +9170,13 @@ int start_link(struct hfi1_pportdata *ppd)
 		return 0;
 	}
 
+	/*
+	 * FULL_MGMT_P_KEY is cleared from the pkey table, so that the
+	 * pkey table can be configured properly if the HFI unit is connected
+	 * to switch port with MgmtAllowed=NO
+	 */
+	clear_full_mgmt_pkey(ppd);
+
 	return set_link_state(ppd, HLS_DN_POLL);
 }
 
@@ -9777,7 +9786,7 @@ static void set_send_length(struct hfi1_pportdata *ppd)
 	u64 len1 = 0, len2 = (((dd->vld[15].mtu + max_hb) >> 2)
 			      & SEND_LEN_CHECK1_LEN_VL15_MASK) <<
 		SEND_LEN_CHECK1_LEN_VL15_SHIFT;
-	int i;
+	int i, j;
 	u32 thres;
 
 	for (i = 0; i < ppd->vls_supported; i++) {
@@ -9801,7 +9810,10 @@ static void set_send_length(struct hfi1_pportdata *ppd)
 			    sc_mtu_to_threshold(dd->vld[i].sc,
 						dd->vld[i].mtu,
 						dd->rcd[0]->rcvhdrqentsize));
-		sc_set_cr_threshold(dd->vld[i].sc, thres);
+		for (j = 0; j < INIT_SC_PER_VL; j++)
+			sc_set_cr_threshold(
+					pio_select_send_context_vl(dd, j, i),
+					    thres);
 	}
 	thres = min(sc_percent_to_threshold(dd->vld[15].sc, 50),
 		    sc_mtu_to_threshold(dd->vld[15].sc,
@@ -11906,7 +11918,7 @@ static void update_synth_timer(unsigned long opaque)
 		hfi1_cdbg(CNTR, "[%d] No update necessary", dd->unit);
 	}
 
-mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
+	mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
 }
 
 #define C_MAX_NAME 13 /* 12 chars + one for /0 */
@@ -14101,8 +14113,14 @@ static int init_asic_data(struct hfi1_devdata *dd)
 {
 	unsigned long flags;
 	struct hfi1_devdata *tmp, *peer = NULL;
+	struct hfi1_asic_data *asic_data;
 	int ret = 0;
 
+	/* pre-allocate the asic structure in case we are the first device */
+	asic_data = kzalloc(sizeof(*dd->asic_data), GFP_KERNEL);
+	if (!asic_data)
+		return -ENOMEM;
+
 	spin_lock_irqsave(&hfi1_devs_lock, flags);
 	/* Find our peer device */
 	list_for_each_entry(tmp, &hfi1_dev_list, list) {
@@ -14114,18 +14132,14 @@ static int init_asic_data(struct hfi1_devdata *dd)
 	}
 
 	if (peer) {
+		/* use already allocated structure */
 		dd->asic_data = peer->asic_data;
+		kfree(asic_data);
 	} else {
-		dd->asic_data = kzalloc(sizeof(*dd->asic_data), GFP_KERNEL);
-		if (!dd->asic_data) {
-			ret = -ENOMEM;
-			goto done;
-		}
+		dd->asic_data = asic_data;
 		mutex_init(&dd->asic_data->asic_resource_mutex);
 	}
 	dd->asic_data->dds[dd->hfi1_id] = dd; /* self back-pointer */
-
-done:
 	spin_unlock_irqrestore(&hfi1_devs_lock, flags);
 	return ret;
 }
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 7a5b0e676cc7..c702a009608f 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -203,6 +203,9 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
 
 	switch (cmd) {
 	case HFI1_IOCTL_ASSIGN_CTXT:
+		if (uctxt)
+			return -EINVAL;
+
 		if (copy_from_user(&uinfo,
 				   (struct hfi1_user_info __user *)arg,
 				   sizeof(uinfo)))
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 5cc492e5776d..eed971ccd2a1 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -1337,7 +1337,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
 		dma_free_coherent(&dd->pcidev->dev, sizeof(u64),
 				  (void *)dd->rcvhdrtail_dummy_kvaddr,
 				  dd->rcvhdrtail_dummy_physaddr);
-				  dd->rcvhdrtail_dummy_kvaddr = NULL;
+		dd->rcvhdrtail_dummy_kvaddr = NULL;
 	}
 
 	for (ctxt = 0; tmp && ctxt < dd->num_rcv_contexts; ctxt++) {
@@ -1383,7 +1383,7 @@ static void postinit_cleanup(struct hfi1_devdata *dd)
 static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	int ret = 0, j, pidx, initfail;
-	struct hfi1_devdata *dd = NULL;
+	struct hfi1_devdata *dd = ERR_PTR(-EINVAL);
 	struct hfi1_pportdata *ppd;
 
 	/* First, lock the non-writable module parameters */
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 219029576ba0..fca07a1d6c28 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -78,6 +78,16 @@ static inline void clear_opa_smp_data(struct opa_smp *smp)
 	memset(data, 0, size);
 }
 
+void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port)
+{
+	struct ib_event event;
+
+	event.event = IB_EVENT_PKEY_CHANGE;
+	event.device = &dd->verbs_dev.rdi.ibdev;
+	event.element.port_num = port;
+	ib_dispatch_event(&event);
+}
+
 static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len)
 {
 	struct ib_mad_send_buf *send_buf;
@@ -1418,15 +1428,10 @@ static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
 	}
 
 	if (changed) {
-		struct ib_event event;
-
 		(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
-
-		event.event = IB_EVENT_PKEY_CHANGE;
-		event.device = &dd->verbs_dev.rdi.ibdev;
-		event.element.port_num = port;
-		ib_dispatch_event(&event);
+		hfi1_event_pkey_change(dd, port);
 	}
+
 	return 0;
 }
 
diff --git a/drivers/infiniband/hw/hfi1/mad.h b/drivers/infiniband/hw/hfi1/mad.h
index 55ee08675333..8b734aaae88a 100644
--- a/drivers/infiniband/hw/hfi1/mad.h
+++ b/drivers/infiniband/hw/hfi1/mad.h
@@ -434,4 +434,6 @@ struct sc2vlnt {
 		    COUNTER_MASK(1, 3) | \
 		    COUNTER_MASK(1, 4))
 
+void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port);
+
 #endif				/* _HFI1_MAD_H */
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index d5edb1afbb8f..d4022450b73f 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -995,7 +995,7 @@ static void sc_wait_for_packet_egress(struct send_context *sc, int pause)
 		/* counter is reset if occupancy count changes */
 		if (reg != reg_prev)
 			loop = 0;
-		if (loop > 500) {
+		if (loop > 50000) {
 			/* timed out - bounce the link */
 			dd_dev_err(dd,
 				   "%s: context %u(%u) timeout waiting for packets to egress, remaining count %u, bouncing link\n",
@@ -1798,6 +1798,21 @@ static void pio_map_rcu_callback(struct rcu_head *list)
 }
 
 /*
+ * Set credit return threshold for the kernel send context
+ */
+static void set_threshold(struct hfi1_devdata *dd, int scontext, int i)
+{
+	u32 thres;
+
+	thres = min(sc_percent_to_threshold(dd->kernel_send_context[scontext],
+					    50),
+		    sc_mtu_to_threshold(dd->kernel_send_context[scontext],
+					dd->vld[i].mtu,
+					dd->rcd[0]->rcvhdrqentsize));
+	sc_set_cr_threshold(dd->kernel_send_context[scontext], thres);
+}
+
+/*
  * pio_map_init - called when #vls change
  * @dd: hfi1_devdata
  * @port: port number
@@ -1872,11 +1887,16 @@ int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_scontexts)
 			if (!newmap->map[i])
 				goto bail;
 			newmap->map[i]->mask = (1 << ilog2(sz)) - 1;
-			/* assign send contexts */
+			/*
+			 * assign send contexts and
+			 * adjust credit return threshold
+			 */
 			for (j = 0; j < sz; j++) {
-				if (dd->kernel_send_context[scontext])
+				if (dd->kernel_send_context[scontext]) {
 					newmap->map[i]->ksc[j] =
 					dd->kernel_send_context[scontext];
+					set_threshold(dd, scontext, i);
+				}
 				if (++scontext >= first_scontext +
 						  vl_scontexts[i])
 					/* wrap back to first send context */
diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c
index 2441669f0817..9fb561682c66 100644
--- a/drivers/infiniband/hw/hfi1/qsfp.c
+++ b/drivers/infiniband/hw/hfi1/qsfp.c
@@ -579,7 +579,8 @@ int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len)
 
 	if (ppd->qsfp_info.cache_valid) {
 		if (QSFP_IS_CU(cache[QSFP_MOD_TECH_OFFS]))
-			sprintf(lenstr, "%dM ", cache[QSFP_MOD_LEN_OFFS]);
+			snprintf(lenstr, sizeof(lenstr), "%dM ",
+				 cache[QSFP_MOD_LEN_OFFS]);
 
 		power_byte = cache[QSFP_MOD_PWR_OFFS];
 		sofar += scnprintf(buf + sofar, len - sofar, "PWR:%.3sW\n",
diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c
index 79b2952c0dfb..4cfb13771897 100644
--- a/drivers/infiniband/hw/hfi1/trace.c
+++ b/drivers/infiniband/hw/hfi1/trace.c
@@ -214,19 +214,6 @@ const char *print_u32_array(
 	return ret;
 }
 
-const char *print_u64_array(
-	struct trace_seq *p,
-	u64 *arr, int len)
-{
-	int i;
-	const char *ret = trace_seq_buffer_ptr(p);
-
-	for (i = 0; i < len; i++)
-		trace_seq_printf(p, "%s0x%016llx", i == 0 ? "" : " ", arr[i]);
-	trace_seq_putc(p, 0);
-	return ret;
-}
-
 __hfi1_trace_fn(PKT);
 __hfi1_trace_fn(PROC);
 __hfi1_trace_fn(SDMA);
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index 1e503ad0bebb..be91f6fa1c87 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -678,8 +678,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
 	u32 tlen = packet->tlen;
 	struct rvt_qp *qp = packet->qp;
 	bool has_grh = rcv_flags & HFI1_HAS_GRH;
-	bool sc4_bit = has_sc4_bit(packet);
-	u8 sc;
+	u8 sc5 = hdr2sc((struct hfi1_message_header *)hdr, packet->rhf);
 	u32 bth1;
 	int is_mcast;
 	struct ib_grh *grh = NULL;
@@ -697,10 +696,8 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
 		 */
 		struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 		u32 lqpn =  be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
-		u8 sl, sc5;
+		u8 sl;
 
-		sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
-		sc5 |= sc4_bit;
 		sl = ibp->sc_to_sl[sc5];
 
 		process_becn(ppd, sl, 0, lqpn, 0, IB_CC_SVCTYPE_UD);
@@ -717,10 +714,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
 
 	if (!is_mcast && (opcode != IB_OPCODE_CNP) && bth1 & HFI1_FECN_SMASK) {
 		u16 slid = be16_to_cpu(hdr->lrh[3]);
-		u8 sc5;
-
-		sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
-		sc5 |= sc4_bit;
 
 		return_cnp(ibp, qp, src_qp, pkey, dlid, slid, sc5, grh);
 	}
@@ -745,10 +738,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
 		if (qp->ibqp.qp_num > 1) {
 			struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 			u16 slid;
-			u8 sc5;
-
-			sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
-			sc5 |= sc4_bit;
 
 			slid = be16_to_cpu(hdr->lrh[3]);
 			if (unlikely(rcv_pkey_check(ppd, pkey, sc5, slid))) {
@@ -790,10 +779,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
 		/* Received on QP0, and so by definition, this is an SMP */
 		struct opa_smp *smp = (struct opa_smp *)data;
 		u16 slid = be16_to_cpu(hdr->lrh[3]);
-		u8 sc5;
-
-		sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
-		sc5 |= sc4_bit;
 
 		if (opa_smp_check(ibp, pkey, sc5, qp, slid, smp))
 			goto drop;
@@ -890,9 +875,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
 	}
 
 	wc.slid = be16_to_cpu(hdr->lrh[3]);
-	sc = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
-	sc |= sc4_bit;
-	wc.sl = ibp->sc_to_sl[sc];
+	wc.sl = ibp->sc_to_sl[sc5];
 
 	/*
 	 * Save the LMC lower bits if the destination LID is a unicast LID.
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index 29f4795f866c..47ffd273ecbd 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -183,7 +183,7 @@ struct user_sdma_iovec {
 	struct sdma_mmu_node *node;
 };
 
-#define SDMA_CACHE_NODE_EVICT BIT(0)
+#define SDMA_CACHE_NODE_EVICT 0
 
 struct sdma_mmu_node {
 	struct mmu_rb_node rb;
@@ -1355,11 +1355,11 @@ static int set_txreq_header(struct user_sdma_request *req,
 		 */
 		SDMA_DBG(req, "TID offset %ubytes %uunits om%u",
 			 req->tidoffset, req->tidoffset / req->omfactor,
-			 !!(req->omfactor - KDETH_OM_SMALL));
+			 req->omfactor != KDETH_OM_SMALL);
 		KDETH_SET(hdr->kdeth.ver_tid_offset, OFFSET,
 			  req->tidoffset / req->omfactor);
 		KDETH_SET(hdr->kdeth.ver_tid_offset, OM,
-			  !!(req->omfactor - KDETH_OM_SMALL));
+			  req->omfactor != KDETH_OM_SMALL);
 	}
 done:
 	trace_hfi1_sdma_user_header(pq->dd, pq->ctxt, pq->subctxt,
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c
index bc95c4112c61..d8fb056526f8 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.c
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c
@@ -92,11 +92,10 @@ void hfi1_put_txreq(struct verbs_txreq *tx)
 
 struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
 				struct rvt_qp *qp)
+	__must_hold(&qp->s_lock)
 {
 	struct verbs_txreq *tx = ERR_PTR(-EBUSY);
-	unsigned long flags;
 
-	spin_lock_irqsave(&qp->s_lock, flags);
 	write_seqlock(&dev->iowait_lock);
 	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
 		struct hfi1_qp_priv *priv;
@@ -116,7 +115,6 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
 	}
 out:
 	write_sequnlock(&dev->iowait_lock);
-	spin_unlock_irqrestore(&qp->s_lock, flags);
 	return tx;
 }
 
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h
index 1cf69b2fe4a5..a1d6e0807f97 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.h
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h
@@ -73,6 +73,7 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
 
 static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev,
 					    struct rvt_qp *qp)
+	__must_hold(&qp->slock)
 {
 	struct verbs_txreq *tx;
 	struct hfi1_qp_priv *priv = qp->priv;
diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h
index 8b9532034558..b738acdb9b02 100644
--- a/drivers/infiniband/hw/i40iw/i40iw.h
+++ b/drivers/infiniband/hw/i40iw/i40iw.h
@@ -113,6 +113,8 @@
 
 #define IW_HMC_OBJ_TYPE_NUM ARRAY_SIZE(iw_hmc_obj_types)
 #define IW_CFG_FPM_QP_COUNT		32768
+#define I40IW_MAX_PAGES_PER_FMR		512
+#define I40IW_MIN_PAGES_PER_FMR		1
 
 #define I40IW_MTU_TO_MSS		40
 #define I40IW_DEFAULT_MSS		1460
diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
index c963cad92f5a..6e9081380a27 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -600,8 +600,7 @@ static enum i40iw_status_code i40iw_create_cqp(struct i40iw_device *iwdev)
 	cqp_init_info.scratch_array = cqp->scratch_array;
 	status = dev->cqp_ops->cqp_init(dev->cqp, &cqp_init_info);
 	if (status) {
-		i40iw_pr_err("cqp init status %d maj_err %d min_err %d\n",
-			     status, maj_err, min_err);
+		i40iw_pr_err("cqp init status %d\n", status);
 		goto exit;
 	}
 	status = dev->cqp_ops->cqp_create(dev->cqp, true, &maj_err, &min_err);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 02a735b64208..283b64c942ee 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -79,6 +79,7 @@ static int i40iw_query_device(struct ib_device *ibdev,
 	props->max_qp_init_rd_atom = props->max_qp_rd_atom;
 	props->atomic_cap = IB_ATOMIC_NONE;
 	props->max_map_per_fmr = 1;
+	props->max_fast_reg_page_list_len = I40IW_MAX_PAGES_PER_FMR;
 	return 0;
 }
 
@@ -1473,6 +1474,7 @@ static int i40iw_hw_alloc_stag(struct i40iw_device *iwdev, struct i40iw_mr *iwmr
 	info->stag_idx = iwmr->stag >> I40IW_CQPSQ_STAG_IDX_SHIFT;
 	info->pd_id = iwpd->sc_pd.pd_id;
 	info->total_len = iwmr->length;
+	info->remote_access = true;
 	cqp_info->cqp_cmd = OP_ALLOC_STAG;
 	cqp_info->post_sq = 1;
 	cqp_info->in.u.alloc_stag.dev = &iwdev->sc_dev;
@@ -1527,7 +1529,7 @@ static struct ib_mr *i40iw_alloc_mr(struct ib_pd *pd,
 	mutex_lock(&iwdev->pbl_mutex);
 	status = i40iw_get_pble(&iwdev->sc_dev, iwdev->pble_rsrc, palloc, iwmr->page_cnt);
 	mutex_unlock(&iwdev->pbl_mutex);
-	if (!status)
+	if (status)
 		goto err1;
 
 	if (palloc->level != I40IW_LEVEL_1)
@@ -2149,6 +2151,7 @@ static int i40iw_post_send(struct ib_qp *ibqp,
 			struct i40iw_sc_dev *dev = &iwqp->iwdev->sc_dev;
 			struct i40iw_fast_reg_stag_info info;
 
+			memset(&info, 0, sizeof(info));
 			info.access_rights = I40IW_ACCESS_FLAGS_LOCALREAD;
 			info.access_rights |= i40iw_get_user_access(flags);
 			info.stag_key = reg_wr(ib_wr)->key & 0xff;
@@ -2158,10 +2161,14 @@ static int i40iw_post_send(struct ib_qp *ibqp,
 			info.addr_type = I40IW_ADDR_TYPE_VA_BASED;
 			info.va = (void *)(uintptr_t)iwmr->ibmr.iova;
 			info.total_len = iwmr->ibmr.length;
+			info.reg_addr_pa = *(u64 *)palloc->level1.addr;
 			info.first_pm_pbl_index = palloc->level1.idx;
 			info.local_fence = ib_wr->send_flags & IB_SEND_FENCE;
 			info.signaled = ib_wr->send_flags & IB_SEND_SIGNALED;
 
+			if (iwmr->npages > I40IW_MIN_PAGES_PER_FMR)
+				info.chunk_size = 1;
+
 			if (page_shift == 21)
 				info.page_size = 1; /* 2M page */
 
@@ -2327,13 +2334,16 @@ static int i40iw_req_notify_cq(struct ib_cq *ibcq,
 {
 	struct i40iw_cq *iwcq;
 	struct i40iw_cq_uk *ukcq;
-	enum i40iw_completion_notify cq_notify = IW_CQ_COMPL_SOLICITED;
+	unsigned long flags;
+	enum i40iw_completion_notify cq_notify = IW_CQ_COMPL_EVENT;
 
 	iwcq = (struct i40iw_cq *)ibcq;
 	ukcq = &iwcq->sc_cq.cq_uk;
-	if (notify_flags == IB_CQ_NEXT_COMP)
-		cq_notify = IW_CQ_COMPL_EVENT;
+	if (notify_flags == IB_CQ_SOLICITED)
+		cq_notify = IW_CQ_COMPL_SOLICITED;
+	spin_lock_irqsave(&iwcq->lock, flags);
 	ukcq->ops.iw_cq_request_notification(ukcq, cq_notify);
+	spin_unlock_irqrestore(&iwcq->lock, flags);
 	return 0;
 }
 
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index 105246fba2e7..5fc623362731 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -47,6 +47,7 @@ static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
 
 	ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
 	ah->av.ib.g_slid  = ah_attr->src_path_bits;
+	ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
 	if (ah_attr->ah_flags & IB_AH_GRH) {
 		ah->av.ib.g_slid   |= 0x80;
 		ah->av.ib.gid_index = ah_attr->grh.sgid_index;
@@ -64,7 +65,6 @@ static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
 		       !(1 << ah->av.ib.stat_rate & dev->caps.stat_rate_support))
 			--ah->av.ib.stat_rate;
 	}
-	ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
 
 	return &ah->ibah;
 }
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index d68f506c1922..9c2e53d28f98 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -527,7 +527,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
 		tun_tx_ix = (++tun_qp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1);
 	spin_unlock(&tun_qp->tx_lock);
 	if (ret)
-		goto out;
+		goto end;
 
 	tun_mad = (struct mlx4_rcv_tunnel_mad *) (tun_qp->tx_ring[tun_tx_ix].buf.addr);
 	if (tun_qp->tx_ring[tun_tx_ix].ah)
@@ -596,9 +596,15 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
 	wr.wr.send_flags = IB_SEND_SIGNALED;
 
 	ret = ib_post_send(src_qp, &wr.wr, &bad_wr);
-out:
-	if (ret)
-		ib_destroy_ah(ah);
+	if (!ret)
+		return 0;
+ out:
+	spin_lock(&tun_qp->tx_lock);
+	tun_qp->tx_ix_tail++;
+	spin_unlock(&tun_qp->tx_lock);
+	tun_qp->tx_ring[tun_tx_ix].ah = NULL;
+end:
+	ib_destroy_ah(ah);
 	return ret;
 }
 
@@ -1326,9 +1332,15 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
 
 
 	ret = ib_post_send(send_qp, &wr.wr, &bad_wr);
+	if (!ret)
+		return 0;
+
+	spin_lock(&sqp->tx_lock);
+	sqp->tx_ix_tail++;
+	spin_unlock(&sqp->tx_lock);
+	sqp->tx_ring[wire_tx_ix].ah = NULL;
 out:
-	if (ret)
-		ib_destroy_ah(ah);
+	ib_destroy_ah(ah);
 	return ret;
 }
 
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index b01ef6eee6e8..42a46078d7d5 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -505,9 +505,9 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
 			props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B;
 		else
 			props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A;
-	if (dev->steering_support ==  MLX4_STEERING_MODE_DEVICE_MANAGED)
-		props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
 	}
+	if (dev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED)
+		props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
 
 	props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
 
@@ -1704,6 +1704,9 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
 	struct mlx4_dev *dev = (to_mdev(qp->device))->dev;
 	int is_bonded = mlx4_is_bonded(dev);
 
+	if (flow_attr->port < 1 || flow_attr->port > qp->device->phys_port_cnt)
+		return ERR_PTR(-EINVAL);
+
 	if ((flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) &&
 	    (flow_attr->type != IB_FLOW_ATTR_NORMAL))
 		return ERR_PTR(-EOPNOTSUPP);
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 6c5ac5d8f32f..29acda249612 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -139,7 +139,7 @@ struct mlx4_ib_mr {
 	u32			max_pages;
 	struct mlx4_mr		mmr;
 	struct ib_umem	       *umem;
-	void			*pages_alloc;
+	size_t			page_map_size;
 };
 
 struct mlx4_ib_mw {
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 631272172a0b..5d73989d9771 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -277,20 +277,23 @@ mlx4_alloc_priv_pages(struct ib_device *device,
 		      struct mlx4_ib_mr *mr,
 		      int max_pages)
 {
-	int size = max_pages * sizeof(u64);
-	int add_size;
 	int ret;
 
-	add_size = max_t(int, MLX4_MR_PAGES_ALIGN - ARCH_KMALLOC_MINALIGN, 0);
+	/* Ensure that size is aligned to DMA cacheline
+	 * requirements.
+	 * max_pages is limited to MLX4_MAX_FAST_REG_PAGES
+	 * so page_map_size will never cross PAGE_SIZE.
+	 */
+	mr->page_map_size = roundup(max_pages * sizeof(u64),
+				    MLX4_MR_PAGES_ALIGN);
 
-	mr->pages_alloc = kzalloc(size + add_size, GFP_KERNEL);
-	if (!mr->pages_alloc)
+	/* Prevent cross page boundary allocation. */
+	mr->pages = (__be64 *)get_zeroed_page(GFP_KERNEL);
+	if (!mr->pages)
 		return -ENOMEM;
 
-	mr->pages = PTR_ALIGN(mr->pages_alloc, MLX4_MR_PAGES_ALIGN);
-
 	mr->page_map = dma_map_single(device->dma_device, mr->pages,
-				      size, DMA_TO_DEVICE);
+				      mr->page_map_size, DMA_TO_DEVICE);
 
 	if (dma_mapping_error(device->dma_device, mr->page_map)) {
 		ret = -ENOMEM;
@@ -298,9 +301,9 @@ mlx4_alloc_priv_pages(struct ib_device *device,
 	}
 
 	return 0;
-err:
-	kfree(mr->pages_alloc);
 
+err:
+	free_page((unsigned long)mr->pages);
 	return ret;
 }
 
@@ -309,11 +312,10 @@ mlx4_free_priv_pages(struct mlx4_ib_mr *mr)
 {
 	if (mr->pages) {
 		struct ib_device *device = mr->ibmr.device;
-		int size = mr->max_pages * sizeof(u64);
 
 		dma_unmap_single(device->dma_device, mr->page_map,
-				 size, DMA_TO_DEVICE);
-		kfree(mr->pages_alloc);
+				 mr->page_map_size, DMA_TO_DEVICE);
+		free_page((unsigned long)mr->pages);
 		mr->pages = NULL;
 	}
 }
@@ -537,14 +539,12 @@ int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
 	mr->npages = 0;
 
 	ib_dma_sync_single_for_cpu(ibmr->device, mr->page_map,
-				   sizeof(u64) * mr->max_pages,
-				   DMA_TO_DEVICE);
+				   mr->page_map_size, DMA_TO_DEVICE);
 
 	rc = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, mlx4_set_page);
 
 	ib_dma_sync_single_for_device(ibmr->device, mr->page_map,
-				      sizeof(u64) * mr->max_pages,
-				      DMA_TO_DEVICE);
+				      mr->page_map_size, DMA_TO_DEVICE);
 
 	return rc;
 }
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 81b0e1fbec1d..8db8405c1e99 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -362,7 +362,7 @@ static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags)
 			sizeof (struct mlx4_wqe_raddr_seg);
 	case MLX4_IB_QPT_RC:
 		return sizeof (struct mlx4_wqe_ctrl_seg) +
-			sizeof (struct mlx4_wqe_atomic_seg) +
+			sizeof (struct mlx4_wqe_masked_atomic_seg) +
 			sizeof (struct mlx4_wqe_raddr_seg);
 	case MLX4_IB_QPT_SMI:
 	case MLX4_IB_QPT_GSI:
@@ -1191,8 +1191,10 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
 	{
 		err = create_qp_common(to_mdev(pd->device), pd, init_attr,
 				       udata, 0, &qp, gfp);
-		if (err)
+		if (err) {
+			kfree(qp);
 			return ERR_PTR(err);
+		}
 
 		qp->ibqp.qp_num = qp->mqp.qpn;
 		qp->xrcdn = xrcdn;
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index dabcc65bd65e..9c0e67bd2ba7 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -822,7 +822,8 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
 	int eqn;
 	int err;
 
-	if (entries < 0)
+	if (entries < 0 ||
+	    (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))))
 		return ERR_PTR(-EINVAL);
 
 	if (check_cq_create_flags(attr->flags))
@@ -1168,11 +1169,16 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
 		return -ENOSYS;
 	}
 
-	if (entries < 1)
+	if (entries < 1 ||
+	    entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))) {
+		mlx5_ib_warn(dev, "wrong entries number %d, max %d\n",
+			     entries,
+			     1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz));
 		return -EINVAL;
+	}
 
 	entries = roundup_pow_of_two(entries + 1);
-	if (entries >  (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)) + 1)
+	if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)) + 1)
 		return -EINVAL;
 
 	if (entries == ibcq->cqe + 1)
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index 1534af113058..364aab9f3c9e 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -121,7 +121,7 @@ static void pma_cnt_ext_assign(struct ib_pma_portcounters_ext *pma_cnt_ext,
 	pma_cnt_ext->port_xmit_data =
 		cpu_to_be64(MLX5_SUM_CNT(out, transmitted_ib_unicast.octets,
 					 transmitted_ib_multicast.octets) >> 2);
-	pma_cnt_ext->port_xmit_data =
+	pma_cnt_ext->port_rcv_data =
 		cpu_to_be64(MLX5_SUM_CNT(out, received_ib_unicast.octets,
 					 received_ib_multicast.octets) >> 2);
 	pma_cnt_ext->port_xmit_packets =
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index c72797cd9e4f..b48ad85315dc 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -524,6 +524,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
 	    MLX5_CAP_ETH(dev->mdev, scatter_fcs))
 		props->device_cap_flags |= IB_DEVICE_RAW_SCATTER_FCS;
 
+	if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS))
+		props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
+
 	props->vendor_part_id	   = mdev->pdev->device;
 	props->hw_ver		   = mdev->pdev->revision;
 
@@ -915,7 +918,8 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 	num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE;
 	gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE;
 	resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
-	resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
+	if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf))
+		resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
 	resp.cache_line_size = L1_CACHE_BYTES;
 	resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
 	resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq);
@@ -988,7 +992,14 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 	if (field_avail(typeof(resp), cqe_version, udata->outlen))
 		resp.response_length += sizeof(resp.cqe_version);
 
-	if (field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) {
+	/*
+	 * We don't want to expose information from the PCI bar that is located
+	 * after 4096 bytes, so if the arch only supports larger pages, let's
+	 * pretend we don't support reading the HCA's core clock. This is also
+	 * forced by mmap function.
+	 */
+	if (PAGE_SIZE <= 4096 &&
+	    field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) {
 		resp.comp_mask |=
 			MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
 		resp.hca_core_clock_offset =
@@ -1798,7 +1809,7 @@ static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
 {
 	struct mlx5_ib_dev *dev =
 		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
-	return sprintf(buf, "%d.%d.%d\n", fw_rev_maj(dev->mdev),
+	return sprintf(buf, "%d.%d.%04d\n", fw_rev_maj(dev->mdev),
 		       fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev));
 }
 
@@ -1866,14 +1877,11 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
 		break;
 
 	case MLX5_DEV_EVENT_PORT_DOWN:
+	case MLX5_DEV_EVENT_PORT_INITIALIZED:
 		ibev.event = IB_EVENT_PORT_ERR;
 		port = (u8)param;
 		break;
 
-	case MLX5_DEV_EVENT_PORT_INITIALIZED:
-		/* not used by ULPs */
-		return;
-
 	case MLX5_DEV_EVENT_LID_CHANGE:
 		ibev.event = IB_EVENT_LID_CHANGE;
 		port = (u8)param;
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 504117657d41..ce0a7ab35a22 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -235,6 +235,8 @@ static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
 		qp->rq.max_gs = 0;
 		qp->rq.wqe_cnt = 0;
 		qp->rq.wqe_shift = 0;
+		cap->max_recv_wr = 0;
+		cap->max_recv_sge = 0;
 	} else {
 		if (ucmd) {
 			qp->rq.wqe_cnt = ucmd->rq_wqe_count;
@@ -1851,13 +1853,15 @@ static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
 static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 			 const struct ib_ah_attr *ah,
 			 struct mlx5_qp_path *path, u8 port, int attr_mask,
-			 u32 path_flags, const struct ib_qp_attr *attr)
+			 u32 path_flags, const struct ib_qp_attr *attr,
+			 bool alt)
 {
 	enum rdma_link_layer ll = rdma_port_get_link_layer(&dev->ib_dev, port);
 	int err;
 
 	if (attr_mask & IB_QP_PKEY_INDEX)
-		path->pkey_index = attr->pkey_index;
+		path->pkey_index = cpu_to_be16(alt ? attr->alt_pkey_index :
+						     attr->pkey_index);
 
 	if (ah->ah_flags & IB_AH_GRH) {
 		if (ah->grh.sgid_index >=
@@ -1877,9 +1881,9 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 							  ah->grh.sgid_index);
 		path->dci_cfi_prio_sl = (ah->sl & 0x7) << 4;
 	} else {
-		path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
-		path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 :
-									0;
+		path->fl_free_ar = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
+		path->fl_free_ar |=
+			(path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x40 : 0;
 		path->rlid = cpu_to_be16(ah->dlid);
 		path->grh_mlid = ah->src_path_bits & 0x7f;
 		if (ah->ah_flags & IB_AH_GRH)
@@ -1903,7 +1907,7 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 	path->port = port;
 
 	if (attr_mask & IB_QP_TIMEOUT)
-		path->ackto_lt = attr->timeout << 3;
+		path->ackto_lt = (alt ? attr->alt_timeout : attr->timeout) << 3;
 
 	if ((qp->ibqp.qp_type == IB_QPT_RAW_PACKET) && qp->sq.wqe_cnt)
 		return modify_raw_packet_eth_prio(dev->mdev,
@@ -2264,7 +2268,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
 		context->log_pg_sz_remote_qpn = cpu_to_be32(attr->dest_qp_num);
 
 	if (attr_mask & IB_QP_PKEY_INDEX)
-		context->pri_path.pkey_index = attr->pkey_index;
+		context->pri_path.pkey_index = cpu_to_be16(attr->pkey_index);
 
 	/* todo implement counter_index functionality */
 
@@ -2277,7 +2281,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
 	if (attr_mask & IB_QP_AV) {
 		err = mlx5_set_path(dev, qp, &attr->ah_attr, &context->pri_path,
 				    attr_mask & IB_QP_PORT ? attr->port_num : qp->port,
-				    attr_mask, 0, attr);
+				    attr_mask, 0, attr, false);
 		if (err)
 			goto out;
 	}
@@ -2288,7 +2292,9 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
 	if (attr_mask & IB_QP_ALT_PATH) {
 		err = mlx5_set_path(dev, qp, &attr->alt_ah_attr,
 				    &context->alt_path,
-				    attr->alt_port_num, attr_mask, 0, attr);
+				    attr->alt_port_num,
+				    attr_mask | IB_QP_PKEY_INDEX | IB_QP_TIMEOUT,
+				    0, attr, true);
 		if (err)
 			goto out;
 	}
@@ -3326,10 +3332,11 @@ static u8 get_fence(u8 fence, struct ib_send_wr *wr)
 			return MLX5_FENCE_MODE_SMALL_AND_FENCE;
 		else
 			return fence;
-
-	} else {
-		return 0;
+	} else if (unlikely(wr->send_flags & IB_SEND_FENCE)) {
+		return MLX5_FENCE_MODE_FENCE;
 	}
+
+	return 0;
 }
 
 static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
@@ -4013,11 +4020,12 @@ static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 	if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
 		to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
 		to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
-		qp_attr->alt_pkey_index = context->alt_path.pkey_index & 0x7f;
+		qp_attr->alt_pkey_index =
+			be16_to_cpu(context->alt_path.pkey_index);
 		qp_attr->alt_port_num	= qp_attr->alt_ah_attr.port_num;
 	}
 
-	qp_attr->pkey_index = context->pri_path.pkey_index & 0x7f;
+	qp_attr->pkey_index = be16_to_cpu(context->pri_path.pkey_index);
 	qp_attr->port_num = context->pri_path.port;
 
 	/* qp_attr->en_sqd_async_notify is only applicable in modify qp */
@@ -4079,17 +4087,19 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
 	qp_attr->cap.max_recv_sge    = qp->rq.max_gs;
 
 	if (!ibqp->uobject) {
-		qp_attr->cap.max_send_wr  = qp->sq.wqe_cnt;
+		qp_attr->cap.max_send_wr  = qp->sq.max_post;
 		qp_attr->cap.max_send_sge = qp->sq.max_gs;
+		qp_init_attr->qp_context = ibqp->qp_context;
 	} else {
 		qp_attr->cap.max_send_wr  = 0;
 		qp_attr->cap.max_send_sge = 0;
 	}
 
-	/* We don't support inline sends for kernel QPs (yet), and we
-	 * don't know what userspace's value should be.
-	 */
-	qp_attr->cap.max_inline_data = 0;
+	qp_init_attr->qp_type = ibqp->qp_type;
+	qp_init_attr->recv_cq = ibqp->recv_cq;
+	qp_init_attr->send_cq = ibqp->send_cq;
+	qp_init_attr->srq = ibqp->srq;
+	qp_attr->cap.max_inline_data = qp->max_inline_data;
 
 	qp_init_attr->cap	     = qp_attr->cap;
 
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index ff946d5f59e4..382466a90da7 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -2178,6 +2178,11 @@ static ssize_t qib_write(struct file *fp, const char __user *data,
 
 	switch (cmd.type) {
 	case QIB_CMD_ASSIGN_CTXT:
+		if (rcd) {
+			ret = -EINVAL;
+			goto bail;
+		}
+
 		ret = qib_assign_ctxt(fp, &cmd.cmd.user_info);
 		if (ret)
 			goto bail;
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index 7209fbc03ccb..a0b6ebee4d8a 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -36,7 +36,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/sched.h>
 #include <linux/hugetlb.h>
-#include <linux/dma-attrs.h>
 #include <linux/iommu.h>
 #include <linux/workqueue.h>
 #include <linux/list.h>
@@ -112,10 +111,6 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
 	int i;
 	int flags;
 	dma_addr_t pa;
-	DEFINE_DMA_ATTRS(attrs);
-
-	if (dmasync)
-		dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs);
 
 	if (!can_do_mlock())
 		return -EPERM;
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 5fa4d4d81ee0..41ba7e9cadaa 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -369,8 +369,8 @@ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
 			/* wrap to first map page, invert bit 0 */
 			offset = qpt->incr | ((offset & 1) ^ 1);
 		}
-		/* there can be no bits at shift and below */
-		WARN_ON(offset & (rdi->dparms.qos_shift - 1));
+		/* there can be no set bits in low-order QoS bits */
+		WARN_ON(offset & (BIT(rdi->dparms.qos_shift) - 1));
 		qpn = mk_qpn(qpt, map, offset);
 	}
 
@@ -502,6 +502,12 @@ static void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp)
  */
 static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
 		  enum ib_qp_type type)
+	__releases(&qp->s_lock)
+	__releases(&qp->s_hlock)
+	__releases(&qp->r_lock)
+	__acquires(&qp->r_lock)
+	__acquires(&qp->s_hlock)
+	__acquires(&qp->s_lock)
 {
 	if (qp->state != IB_QPS_RESET) {
 		qp->state = IB_QPS_RESET;
@@ -570,12 +576,6 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
 	qp->s_ssn = 1;
 	qp->s_lsn = 0;
 	qp->s_mig_state = IB_MIG_MIGRATED;
-	if (qp->s_ack_queue)
-		memset(
-			qp->s_ack_queue,
-			0,
-			rvt_max_atomic(rdi) *
-				sizeof(*qp->s_ack_queue));
 	qp->r_head_ack_queue = 0;
 	qp->s_tail_ack_queue = 0;
 	qp->s_num_rd_atomic = 0;
@@ -699,8 +699,10 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
 		 * initialization that is needed.
 		 */
 		priv = rdi->driver_f.qp_priv_alloc(rdi, qp, gfp);
-		if (!priv)
+		if (IS_ERR(priv)) {
+			ret = priv;
 			goto bail_qp;
+		}
 		qp->priv = priv;
 		qp->timeout_jiffies =
 			usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index e1cc2cc42f25..30c4fda7a05a 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -501,9 +501,7 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb)
 			    !rdi->driver_f.quiesce_qp ||
 			    !rdi->driver_f.notify_error_qp ||
 			    !rdi->driver_f.mtu_from_qp ||
-			    !rdi->driver_f.mtu_to_path_mtu ||
-			    !rdi->driver_f.shut_down_port ||
-			    !rdi->driver_f.cap_mask_chg)
+			    !rdi->driver_f.mtu_to_path_mtu)
 				return -EINVAL;
 		break;
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index bab7db6fa9ab..4f7d9b48df64 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -94,6 +94,7 @@ enum {
 	IPOIB_NEIGH_TBL_FLUSH	  = 12,
 	IPOIB_FLAG_DEV_ADDR_SET	  = 13,
 	IPOIB_FLAG_DEV_ADDR_CTRL  = 14,
+	IPOIB_FLAG_GOING_DOWN	  = 15,
 
 	IPOIB_MAX_BACKOFF_SECONDS = 16,
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index b2f42835d76d..951d9abcca8b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -1486,6 +1486,10 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
 {
 	struct net_device *dev = to_net_dev(d);
 	int ret;
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+
+	if (test_bit(IPOIB_FLAG_GOING_DOWN, &priv->flags))
+		return -EPERM;
 
 	if (!rtnl_trylock())
 		return restart_syscall();
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 45c40a17d6a6..dc6d241b9406 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -1015,7 +1015,7 @@ static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv)
 	if (ib_query_gid(priv->ca, priv->port, 0, &gid0, NULL))
 		return false;
 
-	netif_addr_lock(priv->dev);
+	netif_addr_lock_bh(priv->dev);
 
 	/* The subnet prefix may have changed, update it now so we won't have
 	 * to do it later
@@ -1026,12 +1026,12 @@ static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv)
 
 	search_gid.global.interface_id = priv->local_gid.global.interface_id;
 
-	netif_addr_unlock(priv->dev);
+	netif_addr_unlock_bh(priv->dev);
 
 	err = ib_find_gid(priv->ca, &search_gid, IB_GID_TYPE_IB,
 			  priv->dev, &port, &index);
 
-	netif_addr_lock(priv->dev);
+	netif_addr_lock_bh(priv->dev);
 
 	if (search_gid.global.interface_id !=
 	    priv->local_gid.global.interface_id)
@@ -1092,7 +1092,7 @@ static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv)
 	}
 
 out:
-	netif_addr_unlock(priv->dev);
+	netif_addr_unlock_bh(priv->dev);
 
 	return ret;
 }
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 2d7c16346648..5f58c41ef787 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1206,7 +1206,9 @@ struct ipoib_neigh *ipoib_neigh_get(struct net_device *dev, u8 *daddr)
 				neigh = NULL;
 				goto out_unlock;
 			}
-			neigh->alive = jiffies;
+
+			if (likely(skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE))
+				neigh->alive = jiffies;
 			goto out_unlock;
 		}
 	}
@@ -1851,7 +1853,7 @@ static void set_base_guid(struct ipoib_dev_priv *priv, union ib_gid *gid)
 	struct ipoib_dev_priv *child_priv;
 	struct net_device *netdev = priv->dev;
 
-	netif_addr_lock(netdev);
+	netif_addr_lock_bh(netdev);
 
 	memcpy(&priv->local_gid.global.interface_id,
 	       &gid->global.interface_id,
@@ -1859,7 +1861,7 @@ static void set_base_guid(struct ipoib_dev_priv *priv, union ib_gid *gid)
 	memcpy(netdev->dev_addr + 4, &priv->local_gid, sizeof(priv->local_gid));
 	clear_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
 
-	netif_addr_unlock(netdev);
+	netif_addr_unlock_bh(netdev);
 
 	if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
 		down_read(&priv->vlan_rwsem);
@@ -1875,7 +1877,7 @@ static int ipoib_check_lladdr(struct net_device *dev,
 	union ib_gid *gid = (union ib_gid *)(ss->__data + 4);
 	int ret = 0;
 
-	netif_addr_lock(dev);
+	netif_addr_lock_bh(dev);
 
 	/* Make sure the QPN, reserved and subnet prefix match the current
 	 * lladdr, it also makes sure the lladdr is unicast.
@@ -1885,7 +1887,7 @@ static int ipoib_check_lladdr(struct net_device *dev,
 	    gid->global.interface_id == 0)
 		ret = -EINVAL;
 
-	netif_addr_unlock(dev);
+	netif_addr_unlock_bh(dev);
 
 	return ret;
 }
@@ -2141,6 +2143,9 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data)
 		ib_unregister_event_handler(&priv->event_handler);
 		flush_workqueue(ipoib_workqueue);
 
+		/* mark interface in the middle of destruction */
+		set_bit(IPOIB_FLAG_GOING_DOWN, &priv->flags);
+
 		rtnl_lock();
 		dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP);
 		rtnl_unlock();
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 82fbc9442608..d3394b6add24 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -582,13 +582,13 @@ void ipoib_mcast_join_task(struct work_struct *work)
 		return;
 	}
 	priv->local_lid = port_attr.lid;
-	netif_addr_lock(dev);
+	netif_addr_lock_bh(dev);
 
 	if (!test_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags)) {
-		netif_addr_unlock(dev);
+		netif_addr_unlock_bh(dev);
 		return;
 	}
-	netif_addr_unlock(dev);
+	netif_addr_unlock_bh(dev);
 
 	spin_lock_irq(&priv->lock);
 	if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 64a35595eab8..a2f9f29c6ab5 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -131,6 +131,9 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
 
 	ppriv = netdev_priv(pdev);
 
+	if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags))
+		return -EPERM;
+
 	snprintf(intf_name, sizeof intf_name, "%s.%04x",
 		 ppriv->dev->name, pkey);
 	priv = ipoib_intf_alloc(intf_name);
@@ -183,6 +186,9 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
 
 	ppriv = netdev_priv(pdev);
 
+	if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags))
+		return -EPERM;
+
 	if (!rtnl_trylock())
 		return restart_syscall();
 
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 646de170ec12..3322ed750172 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -1457,7 +1457,6 @@ static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
 {
 	unsigned int sg_offset = 0;
 
-	state->desc = req->indirect_desc;
 	state->fr.next = req->fr_list;
 	state->fr.end = req->fr_list + ch->target->mr_per_cmd;
 	state->sg = scat;
@@ -1489,7 +1488,6 @@ static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch,
 	struct scatterlist *sg;
 	int i;
 
-	state->desc = req->indirect_desc;
 	for_each_sg(scat, sg, count, i) {
 		srp_map_desc(state, ib_sg_dma_address(dev->dev, sg),
 			     ib_sg_dma_len(dev->dev, sg),
@@ -1655,6 +1653,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
 				   target->indirect_size, DMA_TO_DEVICE);
 
 	memset(&state, 0, sizeof(state));
+	state.desc = req->indirect_desc;
 	if (dev->use_fast_reg)
 		ret = srp_map_sg_fr(&state, ch, req, scat, count);
 	else if (dev->use_fmr)
@@ -3526,7 +3525,7 @@ static void srp_add_one(struct ib_device *device)
 	int mr_page_shift, p;
 	u64 max_pages_per_mr;
 
-	srp_dev = kmalloc(sizeof *srp_dev, GFP_KERNEL);
+	srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL);
 	if (!srp_dev)
 		return;
 
@@ -3586,8 +3585,6 @@ static void srp_add_one(struct ib_device *device)
 						   IB_ACCESS_REMOTE_WRITE);
 		if (IS_ERR(srp_dev->global_mr))
 			goto err_pd;
-	} else {
-		srp_dev->global_mr = NULL;
 	}
 
 	for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index e68b20cba70b..4a4155640d51 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -1638,8 +1638,7 @@ retry:
 	 */
 	qp_init->cap.max_send_wr = srp_sq_size / 2;
 	qp_init->cap.max_rdma_ctxs = srp_sq_size / 2;
-	qp_init->cap.max_send_sge = max(sdev->device->attrs.max_sge_rd,
-					sdev->device->attrs.max_sge);
+	qp_init->cap.max_send_sge = SRPT_DEF_SG_PER_WQE;
 	qp_init->port_num = ch->sport->port;
 
 	ch->qp = ib_create_qp(sdev->pd, qp_init);
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h
index fee6bfd7ca21..389030487da7 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.h
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.h
@@ -106,6 +106,7 @@ enum {
 	SRP_LOGIN_RSP_MULTICHAN_MAINTAINED = 0x2,
 
 	SRPT_DEF_SG_TABLESIZE = 128,
+	SRPT_DEF_SG_PER_WQE = 16,
 
 	MIN_SRPT_SQ_SIZE = 16,
 	DEF_SRPT_SQ_SIZE = 4096,
diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index 804dbcc37d3f..a529a4535457 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -1031,17 +1031,17 @@ static int xpad_play_effect(struct input_dev *dev, void *data, struct ff_effect
 
 	case XTYPE_XBOXONE:
 		packet->data[0] = 0x09; /* activate rumble */
-		packet->data[1] = 0x08;
+		packet->data[1] = 0x00;
 		packet->data[2] = xpad->odata_serial++;
-		packet->data[3] = 0x08; /* continuous effect */
-		packet->data[4] = 0x00; /* simple rumble mode */
-		packet->data[5] = 0x03; /* L and R actuator only */
-		packet->data[6] = 0x00; /* TODO: LT actuator */
-		packet->data[7] = 0x00; /* TODO: RT actuator */
+		packet->data[3] = 0x09;
+		packet->data[4] = 0x00;
+		packet->data[5] = 0x0F;
+		packet->data[6] = 0x00;
+		packet->data[7] = 0x00;
 		packet->data[8] = strong / 512;	/* left actuator */
 		packet->data[9] = weak / 512;	/* right actuator */
-		packet->data[10] = 0x80;	/* length of pulse */
-		packet->data[11] = 0x00;	/* stop period of pulse */
+		packet->data[10] = 0xFF;
+		packet->data[11] = 0x00;
 		packet->data[12] = 0x00;
 		packet->len = 13;
 		packet->pending = true;
@@ -1431,22 +1431,15 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id
 	int ep_irq_in_idx;
 	int i, error;
 
+	if (intf->cur_altsetting->desc.bNumEndpoints != 2)
+		return -ENODEV;
+
 	for (i = 0; xpad_device[i].idVendor; i++) {
 		if ((le16_to_cpu(udev->descriptor.idVendor) == xpad_device[i].idVendor) &&
 		    (le16_to_cpu(udev->descriptor.idProduct) == xpad_device[i].idProduct))
 			break;
 	}
 
-	if (xpad_device[i].xtype == XTYPE_XBOXONE &&
-	    intf->cur_altsetting->desc.bInterfaceNumber != 0) {
-		/*
-		 * The Xbox One controller lists three interfaces all with the
-		 * same interface class, subclass and protocol. Differentiate by
-		 * interface number.
-		 */
-		return -ENODEV;
-	}
-
 	xpad = kzalloc(sizeof(struct usb_xpad), GFP_KERNEL);
 	if (!xpad)
 		return -ENOMEM;
@@ -1478,6 +1471,8 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id
 		if (intf->cur_altsetting->desc.bInterfaceClass == USB_CLASS_VENDOR_SPEC) {
 			if (intf->cur_altsetting->desc.bInterfaceProtocol == 129)
 				xpad->xtype = XTYPE_XBOX360W;
+			else if (intf->cur_altsetting->desc.bInterfaceProtocol == 208)
+				xpad->xtype = XTYPE_XBOXONE;
 			else
 				xpad->xtype = XTYPE_XBOX360;
 		} else {
@@ -1492,6 +1487,17 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id
 			xpad->mapping |= MAP_STICKS_TO_NULL;
 	}
 
+	if (xpad->xtype == XTYPE_XBOXONE &&
+	    intf->cur_altsetting->desc.bInterfaceNumber != 0) {
+		/*
+		 * The Xbox One controller lists three interfaces all with the
+		 * same interface class, subclass and protocol. Differentiate by
+		 * interface number.
+		 */
+		error = -ENODEV;
+		goto err_free_in_urb;
+	}
+
 	error = xpad_init_output(intf, xpad);
 	if (error)
 		goto err_free_in_urb;
diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c
index 78f93cf68840..be5b399da5d3 100644
--- a/drivers/input/mouse/elantech.c
+++ b/drivers/input/mouse/elantech.c
@@ -1568,13 +1568,7 @@ static int elantech_set_properties(struct elantech_data *etd)
 		case 5:
 			etd->hw_version = 3;
 			break;
-		case 6:
-		case 7:
-		case 8:
-		case 9:
-		case 10:
-		case 13:
-		case 14:
+		case 6 ... 14:
 			etd->hw_version = 4;
 			break;
 		default:
diff --git a/drivers/input/mouse/vmmouse.c b/drivers/input/mouse/vmmouse.c
index a3f0f5a47490..0f586780ceb4 100644
--- a/drivers/input/mouse/vmmouse.c
+++ b/drivers/input/mouse/vmmouse.c
@@ -355,18 +355,11 @@ int vmmouse_detect(struct psmouse *psmouse, bool set_properties)
 		return -ENXIO;
 	}
 
-	if (!request_region(VMMOUSE_PROTO_PORT, 4, "vmmouse")) {
-		psmouse_dbg(psmouse, "VMMouse port in use.\n");
-		return -EBUSY;
-	}
-
 	/* Check if the device is present */
 	response = ~VMMOUSE_PROTO_MAGIC;
 	VMMOUSE_CMD(GETVERSION, 0, version, response, dummy1, dummy2);
-	if (response != VMMOUSE_PROTO_MAGIC || version == 0xffffffffU) {
-		release_region(VMMOUSE_PROTO_PORT, 4);
+	if (response != VMMOUSE_PROTO_MAGIC || version == 0xffffffffU)
 		return -ENXIO;
-	}
 
 	if (set_properties) {
 		psmouse->vendor = VMMOUSE_VENDOR;
@@ -374,8 +367,6 @@ int vmmouse_detect(struct psmouse *psmouse, bool set_properties)
 		psmouse->model = version;
 	}
 
-	release_region(VMMOUSE_PROTO_PORT, 4);
-
 	return 0;
 }
 
@@ -394,7 +385,6 @@ static void vmmouse_disconnect(struct psmouse *psmouse)
 	psmouse_reset(psmouse);
 	input_unregister_device(priv->abs_dev);
 	kfree(priv);
-	release_region(VMMOUSE_PROTO_PORT, 4);
 }
 
 /**
@@ -438,15 +428,10 @@ int vmmouse_init(struct psmouse *psmouse)
 	struct input_dev *rel_dev = psmouse->dev, *abs_dev;
 	int error;
 
-	if (!request_region(VMMOUSE_PROTO_PORT, 4, "vmmouse")) {
-		psmouse_dbg(psmouse, "VMMouse port in use.\n");
-		return -EBUSY;
-	}
-
 	psmouse_reset(psmouse);
 	error = vmmouse_enable(psmouse);
 	if (error)
-		goto release_region;
+		return error;
 
 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 	abs_dev = input_allocate_device();
@@ -502,8 +487,5 @@ init_fail:
 	kfree(priv);
 	psmouse->private = NULL;
 
-release_region:
-	release_region(VMMOUSE_PROTO_PORT, 4);
-
 	return error;
 }
diff --git a/drivers/input/rmi4/rmi_bus.c b/drivers/input/rmi4/rmi_bus.c
index b368b0515c5a..253df96be427 100644
--- a/drivers/input/rmi4/rmi_bus.c
+++ b/drivers/input/rmi4/rmi_bus.c
@@ -157,11 +157,11 @@ static int rmi_function_match(struct device *dev, struct device_driver *drv)
 static void rmi_function_of_probe(struct rmi_function *fn)
 {
 	char of_name[9];
+	struct device_node *node = fn->rmi_dev->xport->dev->of_node;
 
 	snprintf(of_name, sizeof(of_name), "rmi4-f%02x",
 		fn->fd.function_number);
-	fn->dev.of_node = of_find_node_by_name(
-				fn->rmi_dev->xport->dev->of_node, of_name);
+	fn->dev.of_node = of_get_child_by_name(node, of_name);
 }
 #else
 static inline void rmi_function_of_probe(struct rmi_function *fn)
diff --git a/drivers/input/rmi4/rmi_f12.c b/drivers/input/rmi4/rmi_f12.c
index 8dd3fb5e1f94..88e91559c84e 100644
--- a/drivers/input/rmi4/rmi_f12.c
+++ b/drivers/input/rmi4/rmi_f12.c
@@ -66,7 +66,7 @@ static int rmi_f12_read_sensor_tuning(struct f12_data *f12)
 	struct rmi_device *rmi_dev = fn->rmi_dev;
 	int ret;
 	int offset;
-	u8 buf[14];
+	u8 buf[15];
 	int pitch_x = 0;
 	int pitch_y = 0;
 	int clip_x_low = 0;
@@ -86,9 +86,10 @@ static int rmi_f12_read_sensor_tuning(struct f12_data *f12)
 
 	offset = rmi_register_desc_calc_reg_offset(&f12->control_reg_desc, 8);
 
-	if (item->reg_size > 14) {
-		dev_err(&fn->dev, "F12 control8 should be 14 bytes, not: %ld\n",
-			item->reg_size);
+	if (item->reg_size > sizeof(buf)) {
+		dev_err(&fn->dev,
+			"F12 control8 should be no bigger than %zd bytes, not: %ld\n",
+			sizeof(buf), item->reg_size);
 		return -ENODEV;
 	}
 
diff --git a/drivers/input/touchscreen/ts4800-ts.c b/drivers/input/touchscreen/ts4800-ts.c
index 3c3dd78303be..fed73eeb47b3 100644
--- a/drivers/input/touchscreen/ts4800-ts.c
+++ b/drivers/input/touchscreen/ts4800-ts.c
@@ -118,6 +118,13 @@ static int ts4800_parse_dt(struct platform_device *pdev,
 		return -ENODEV;
 	}
 
+	ts->regmap = syscon_node_to_regmap(syscon_np);
+	of_node_put(syscon_np);
+	if (IS_ERR(ts->regmap)) {
+		dev_err(dev, "cannot get parent's regmap\n");
+		return PTR_ERR(ts->regmap);
+	}
+
 	error = of_property_read_u32_index(np, "syscon", 1, &reg);
 	if (error < 0) {
 		dev_err(dev, "no offset in syscon\n");
@@ -134,12 +141,6 @@ static int ts4800_parse_dt(struct platform_device *pdev,
 
 	ts->bit = BIT(bit);
 
-	ts->regmap = syscon_node_to_regmap(syscon_np);
-	if (IS_ERR(ts->regmap)) {
-		dev_err(dev, "cannot get parent's regmap\n");
-		return PTR_ERR(ts->regmap);
-	}
-
 	return 0;
 }
 
diff --git a/drivers/input/touchscreen/tsc2004.c b/drivers/input/touchscreen/tsc2004.c
index 7295c198aa08..6fe55d598fac 100644
--- a/drivers/input/touchscreen/tsc2004.c
+++ b/drivers/input/touchscreen/tsc2004.c
@@ -22,6 +22,11 @@
 #include <linux/regmap.h>
 #include "tsc200x-core.h"
 
+static const struct input_id tsc2004_input_id = {
+	.bustype = BUS_I2C,
+	.product = 2004,
+};
+
 static int tsc2004_cmd(struct device *dev, u8 cmd)
 {
 	u8 tx = TSC200X_CMD | TSC200X_CMD_12BIT | cmd;
@@ -42,7 +47,7 @@ static int tsc2004_probe(struct i2c_client *i2c,
 			 const struct i2c_device_id *id)
 
 {
-	return tsc200x_probe(&i2c->dev, i2c->irq, BUS_I2C,
+	return tsc200x_probe(&i2c->dev, i2c->irq, &tsc2004_input_id,
 			     devm_regmap_init_i2c(i2c, &tsc200x_regmap_config),
 			     tsc2004_cmd);
 }
diff --git a/drivers/input/touchscreen/tsc2005.c b/drivers/input/touchscreen/tsc2005.c
index b9f593dfd2ef..f2c5f0e47f77 100644
--- a/drivers/input/touchscreen/tsc2005.c
+++ b/drivers/input/touchscreen/tsc2005.c
@@ -24,6 +24,11 @@
 #include <linux/regmap.h>
 #include "tsc200x-core.h"
 
+static const struct input_id tsc2005_input_id = {
+	.bustype = BUS_SPI,
+	.product = 2005,
+};
+
 static int tsc2005_cmd(struct device *dev, u8 cmd)
 {
 	u8 tx = TSC200X_CMD | TSC200X_CMD_12BIT | cmd;
@@ -62,7 +67,7 @@ static int tsc2005_probe(struct spi_device *spi)
 	if (error)
 		return error;
 
-	return tsc200x_probe(&spi->dev, spi->irq, BUS_SPI,
+	return tsc200x_probe(&spi->dev, spi->irq, &tsc2005_input_id,
 			     devm_regmap_init_spi(spi, &tsc200x_regmap_config),
 			     tsc2005_cmd);
 }
diff --git a/drivers/input/touchscreen/tsc200x-core.c b/drivers/input/touchscreen/tsc200x-core.c
index 15240c1ee850..dfa7f1c4f545 100644
--- a/drivers/input/touchscreen/tsc200x-core.c
+++ b/drivers/input/touchscreen/tsc200x-core.c
@@ -450,7 +450,7 @@ static void tsc200x_close(struct input_dev *input)
 	mutex_unlock(&ts->mutex);
 }
 
-int tsc200x_probe(struct device *dev, int irq, __u16 bustype,
+int tsc200x_probe(struct device *dev, int irq, const struct input_id *tsc_id,
 		  struct regmap *regmap,
 		  int (*tsc200x_cmd)(struct device *dev, u8 cmd))
 {
@@ -547,9 +547,18 @@ int tsc200x_probe(struct device *dev, int irq, __u16 bustype,
 	snprintf(ts->phys, sizeof(ts->phys),
 		 "%s/input-ts", dev_name(dev));
 
-	input_dev->name = "TSC200X touchscreen";
+	if (tsc_id->product == 2004) {
+		input_dev->name = "TSC200X touchscreen";
+	} else {
+		input_dev->name = devm_kasprintf(dev, GFP_KERNEL,
+						 "TSC%04d touchscreen",
+						 tsc_id->product);
+		if (!input_dev->name)
+			return -ENOMEM;
+	}
+
 	input_dev->phys = ts->phys;
-	input_dev->id.bustype = bustype;
+	input_dev->id = *tsc_id;
 	input_dev->dev.parent = dev;
 	input_dev->evbit[0] = BIT(EV_ABS) | BIT(EV_KEY);
 	input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
diff --git a/drivers/input/touchscreen/tsc200x-core.h b/drivers/input/touchscreen/tsc200x-core.h
index 7a482d102614..49a63a3c6840 100644
--- a/drivers/input/touchscreen/tsc200x-core.h
+++ b/drivers/input/touchscreen/tsc200x-core.h
@@ -70,7 +70,7 @@
 extern const struct regmap_config tsc200x_regmap_config;
 extern const struct dev_pm_ops tsc200x_pm_ops;
 
-int tsc200x_probe(struct device *dev, int irq, __u16 bustype,
+int tsc200x_probe(struct device *dev, int irq, const struct input_id *tsc_id,
 		  struct regmap *regmap,
 		  int (*tsc200x_cmd)(struct device *dev, u8 cmd));
 int tsc200x_remove(struct device *dev);
diff --git a/drivers/input/touchscreen/wacom_w8001.c b/drivers/input/touchscreen/wacom_w8001.c
index bab3c6acf6a2..b6fc4bde79de 100644
--- a/drivers/input/touchscreen/wacom_w8001.c
+++ b/drivers/input/touchscreen/wacom_w8001.c
@@ -27,7 +27,7 @@ MODULE_AUTHOR("Jaya Kumar <jayakumar.lkml@gmail.com>");
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL");
 
-#define W8001_MAX_LENGTH	11
+#define W8001_MAX_LENGTH	13
 #define W8001_LEAD_MASK		0x80
 #define W8001_LEAD_BYTE		0x80
 #define W8001_TAB_MASK		0x40
@@ -155,6 +155,7 @@ static void parse_multi_touch(struct w8001 *w8001)
 		bool touch = data[0] & (1 << i);
 
 		input_mt_slot(dev, i);
+		input_mt_report_slot_state(dev, MT_TOOL_FINGER, touch);
 		if (touch) {
 			x = (data[6 * i + 1] << 7) | data[6 * i + 2];
 			y = (data[6 * i + 3] << 7) | data[6 * i + 4];
@@ -339,6 +340,15 @@ static irqreturn_t w8001_interrupt(struct serio *serio,
 		w8001->idx = 0;
 		parse_multi_touch(w8001);
 		break;
+
+	default:
+		/*
+		 * ThinkPad X60 Tablet PC (pen only device) sometimes
+		 * sends invalid data packets that are larger than
+		 * W8001_PKTLEN_TPCPEN. Let's start over again.
+		 */
+		if (!w8001->touch_dev && w8001->idx > W8001_PKTLEN_TPCPEN - 1)
+			w8001->idx = 0;
 	}
 
 	return IRQ_HANDLED;
@@ -513,6 +523,8 @@ static int w8001_setup_touch(struct w8001 *w8001, char *basename,
 					0, touch.x, 0, 0);
 		input_set_abs_params(dev, ABS_MT_POSITION_Y,
 					0, touch.y, 0, 0);
+		input_set_abs_params(dev, ABS_MT_TOOL_TYPE,
+					0, MT_TOOL_MAX, 0, 0);
 
 		strlcat(basename, " 2FG", basename_sz);
 		if (w8001->max_pen_x && w8001->max_pen_y)
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 9e0034196e10..59741ead7e15 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -1107,13 +1107,13 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
 				break;
 			}
 
+			devid = e->devid;
 			DUMP_printk("  DEV_ACPI_HID(%s[%s])\t\tdevid: %02x:%02x.%x\n",
 				    hid, uid,
 				    PCI_BUS_NUM(devid),
 				    PCI_SLOT(devid),
 				    PCI_FUNC(devid));
 
-			devid  = e->devid;
 			flags = e->flags;
 
 			ret = add_acpi_hid_device(hid, uid, &devid, false);
@@ -1568,13 +1568,23 @@ static int __init amd_iommu_init_pci(void)
 			break;
 	}
 
+	/*
+	 * Order is important here to make sure any unity map requirements are
+	 * fulfilled. The unity mappings are created and written to the device
+	 * table during the amd_iommu_init_api() call.
+	 *
+	 * After that we call init_device_table_dma() to make sure any
+	 * uninitialized DTE will block DMA, and in the end we flush the caches
+	 * of all IOMMUs to make sure the changes to the device table are
+	 * active.
+	 */
+	ret = amd_iommu_init_api();
+
 	init_device_table_dma();
 
 	for_each_iommu(iommu)
 		iommu_flush_all_caches(iommu);
 
-	ret = amd_iommu_init_api();
-
 	if (!ret)
 		print_iommu_info();
 
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 94b68213c50d..5f6b3bcab078 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1941,6 +1941,7 @@ static struct iommu_ops arm_smmu_ops = {
 	.attach_dev		= arm_smmu_attach_dev,
 	.map			= arm_smmu_map,
 	.unmap			= arm_smmu_unmap,
+	.map_sg			= default_iommu_map_sg,
 	.iova_to_phys		= arm_smmu_iova_to_phys,
 	.add_device		= arm_smmu_add_device,
 	.remove_device		= arm_smmu_remove_device,
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index a644d0cec2d8..323dac9900ba 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -3222,11 +3222,6 @@ static int __init init_dmars(void)
 			}
 		}
 
-		iommu_flush_write_buffer(iommu);
-		iommu_set_root_entry(iommu);
-		iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
-		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
-
 		if (!ecap_pass_through(iommu->ecap))
 			hw_pass_through = 0;
 #ifdef CONFIG_INTEL_IOMMU_SVM
@@ -3235,6 +3230,18 @@ static int __init init_dmars(void)
 #endif
 	}
 
+	/*
+	 * Now that qi is enabled on all iommus, set the root entry and flush
+	 * caches. This is required on some Intel X58 chipsets, otherwise the
+	 * flush_context function will loop forever and the boot hangs.
+	 */
+	for_each_active_iommu(iommu, drhd) {
+		iommu_flush_write_buffer(iommu);
+		iommu_set_root_entry(iommu);
+		iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
+		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
+	}
+
 	if (iommu_pass_through)
 		iommu_identity_mapping |= IDENTMAP_ALL;
 
@@ -4595,13 +4602,13 @@ static void free_all_cpu_cached_iovas(unsigned int cpu)
 	for (i = 0; i < g_num_of_iommus; i++) {
 		struct intel_iommu *iommu = g_iommus[i];
 		struct dmar_domain *domain;
-		u16 did;
+		int did;
 
 		if (!iommu)
 			continue;
 
-		for (did = 0; did < 0xffff; did++) {
-			domain = get_iommu_domain(iommu, did);
+		for (did = 0; did < cap_ndoms(iommu->cap); did++) {
+			domain = get_iommu_domain(iommu, (u16)did);
 
 			if (!domain)
 				continue;
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index ba764a0835d3..e23001bfcfee 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -420,8 +420,10 @@ retry:
 
 		/* Try replenishing IOVAs by flushing rcache. */
 		flushed_rcache = true;
+		preempt_disable();
 		for_each_online_cpu(cpu)
 			free_cpu_cached_iovas(cpu, iovad);
+		preempt_enable();
 		goto retry;
 	}
 
@@ -749,7 +751,7 @@ static bool __iova_rcache_insert(struct iova_domain *iovad,
 	bool can_insert = false;
 	unsigned long flags;
 
-	cpu_rcache = this_cpu_ptr(rcache->cpu_rcaches);
+	cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches);
 	spin_lock_irqsave(&cpu_rcache->lock, flags);
 
 	if (!iova_magazine_full(cpu_rcache->loaded)) {
@@ -779,6 +781,7 @@ static bool __iova_rcache_insert(struct iova_domain *iovad,
 		iova_magazine_push(cpu_rcache->loaded, iova_pfn);
 
 	spin_unlock_irqrestore(&cpu_rcache->lock, flags);
+	put_cpu_ptr(rcache->cpu_rcaches);
 
 	if (mag_to_free) {
 		iova_magazine_free_pfns(mag_to_free, iovad);
@@ -812,7 +815,7 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
 	bool has_pfn = false;
 	unsigned long flags;
 
-	cpu_rcache = this_cpu_ptr(rcache->cpu_rcaches);
+	cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches);
 	spin_lock_irqsave(&cpu_rcache->lock, flags);
 
 	if (!iova_magazine_empty(cpu_rcache->loaded)) {
@@ -834,6 +837,7 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
 		iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn);
 
 	spin_unlock_irqrestore(&cpu_rcache->lock, flags);
+	put_cpu_ptr(rcache->cpu_rcaches);
 
 	return iova_pfn;
 }
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index c7d6156ff536..25b4627cb57f 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -815,7 +815,7 @@ static int rk_iommu_attach_device(struct iommu_domain *domain,
 	dte_addr = virt_to_phys(rk_domain->dt);
 	for (i = 0; i < iommu->num_mmu; i++) {
 		rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, dte_addr);
-		rk_iommu_command(iommu->bases[i], RK_MMU_CMD_ZAP_CACHE);
+		rk_iommu_base_command(iommu->bases[i], RK_MMU_CMD_ZAP_CACHE);
 		rk_iommu_write(iommu->bases[i], RK_MMU_INT_MASK, RK_MMU_IRQ_MASK);
 	}
 
diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index 3b5e10aa48ab..70ed1d0151b8 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -718,7 +718,7 @@ static int gic_shared_irq_domain_map(struct irq_domain *d, unsigned int virq,
 
 	spin_lock_irqsave(&gic_lock, flags);
 	gic_map_to_pin(intr, gic_cpu_pin);
-	gic_map_to_vpe(intr, vpe);
+	gic_map_to_vpe(intr, mips_cm_vp_id(vpe));
 	for (i = 0; i < min(gic_vpes, NR_CPUS); i++)
 		clear_bit(intr, pcpu_masks[i].pcpu_mask);
 	set_bit(intr, pcpu_masks[vpe].pcpu_mask);
@@ -746,6 +746,12 @@ static int gic_irq_domain_alloc(struct irq_domain *d, unsigned int virq,
 		/* verify that it doesn't conflict with an IPI irq */
 		if (test_bit(spec->hwirq, ipi_resrv))
 			return -EBUSY;
+
+		hwirq = GIC_SHARED_TO_HWIRQ(spec->hwirq);
+
+		return irq_domain_set_hwirq_and_chip(d, virq, hwirq,
+						     &gic_level_irq_controller,
+						     NULL);
 	} else {
 		base_hwirq = find_first_bit(ipi_resrv, gic_shared_intrs);
 		if (base_hwirq == gic_shared_intrs) {
@@ -867,10 +873,14 @@ static int gic_dev_domain_alloc(struct irq_domain *d, unsigned int virq,
 						    &gic_level_irq_controller,
 						    NULL);
 		if (ret)
-			return ret;
+			goto error;
 	}
 
 	return 0;
+
+error:
+	irq_domain_free_irqs_parent(d, virq, nr_irqs);
+	return ret;
 }
 
 void gic_dev_domain_free(struct irq_domain *d, unsigned int virq,
@@ -949,7 +959,7 @@ int gic_ipi_domain_match(struct irq_domain *d, struct device_node *node,
 	switch (bus_token) {
 	case DOMAIN_BUS_IPI:
 		is_ipi = d->bus_token == bus_token;
-		return to_of_node(d->fwnode) == node && is_ipi;
+		return (!node || to_of_node(d->fwnode) == node) && is_ipi;
 		break;
 	default:
 		return 0;
diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c
index 3495d5d6547f..3bce44893021 100644
--- a/drivers/leds/led-core.c
+++ b/drivers/leds/led-core.c
@@ -53,11 +53,12 @@ static void led_timer_function(unsigned long data)
 
 	if (!led_cdev->blink_delay_on || !led_cdev->blink_delay_off) {
 		led_set_brightness_nosleep(led_cdev, LED_OFF);
+		led_cdev->flags &= ~LED_BLINK_SW;
 		return;
 	}
 
 	if (led_cdev->flags & LED_BLINK_ONESHOT_STOP) {
-		led_cdev->flags &= ~LED_BLINK_ONESHOT_STOP;
+		led_cdev->flags &=  ~(LED_BLINK_ONESHOT_STOP | LED_BLINK_SW);
 		return;
 	}
 
@@ -151,6 +152,7 @@ static void led_set_software_blink(struct led_classdev *led_cdev,
 		return;
 	}
 
+	led_cdev->flags |= LED_BLINK_SW;
 	mod_timer(&led_cdev->blink_timer, jiffies + 1);
 }
 
@@ -219,6 +221,7 @@ void led_stop_software_blink(struct led_classdev *led_cdev)
 	del_timer_sync(&led_cdev->blink_timer);
 	led_cdev->blink_delay_on = 0;
 	led_cdev->blink_delay_off = 0;
+	led_cdev->flags &= ~LED_BLINK_SW;
 }
 EXPORT_SYMBOL_GPL(led_stop_software_blink);
 
@@ -226,10 +229,10 @@ void led_set_brightness(struct led_classdev *led_cdev,
 			enum led_brightness brightness)
 {
 	/*
-	 * In case blinking is on delay brightness setting
+	 * If software blink is active, delay brightness setting
 	 * until the next timer tick.
 	 */
-	if (led_cdev->blink_delay_on || led_cdev->blink_delay_off) {
+	if (led_cdev->flags & LED_BLINK_SW) {
 		/*
 		 * If we need to disable soft blinking delegate this to the
 		 * work queue task to avoid problems in case we are called
diff --git a/drivers/leds/trigger/ledtrig-heartbeat.c b/drivers/leds/trigger/ledtrig-heartbeat.c
index 410c39c62dc7..c9f386213e9e 100644
--- a/drivers/leds/trigger/ledtrig-heartbeat.c
+++ b/drivers/leds/trigger/ledtrig-heartbeat.c
@@ -19,6 +19,7 @@
 #include <linux/sched.h>
 #include <linux/leds.h>
 #include <linux/reboot.h>
+#include <linux/suspend.h>
 #include "../leds.h"
 
 static int panic_heartbeats;
@@ -154,6 +155,30 @@ static struct led_trigger heartbeat_led_trigger = {
 	.deactivate = heartbeat_trig_deactivate,
 };
 
+static int heartbeat_pm_notifier(struct notifier_block *nb,
+				 unsigned long pm_event, void *unused)
+{
+	int rc;
+
+	switch (pm_event) {
+	case PM_SUSPEND_PREPARE:
+	case PM_HIBERNATION_PREPARE:
+	case PM_RESTORE_PREPARE:
+		led_trigger_unregister(&heartbeat_led_trigger);
+		break;
+	case PM_POST_SUSPEND:
+	case PM_POST_HIBERNATION:
+	case PM_POST_RESTORE:
+		rc = led_trigger_register(&heartbeat_led_trigger);
+		if (rc)
+			pr_err("could not re-register heartbeat trigger\n");
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
 static int heartbeat_reboot_notifier(struct notifier_block *nb,
 				     unsigned long code, void *unused)
 {
@@ -168,6 +193,10 @@ static int heartbeat_panic_notifier(struct notifier_block *nb,
 	return NOTIFY_DONE;
 }
 
+static struct notifier_block heartbeat_pm_nb = {
+	.notifier_call = heartbeat_pm_notifier,
+};
+
 static struct notifier_block heartbeat_reboot_nb = {
 	.notifier_call = heartbeat_reboot_notifier,
 };
@@ -184,12 +213,14 @@ static int __init heartbeat_trig_init(void)
 		atomic_notifier_chain_register(&panic_notifier_list,
 					       &heartbeat_panic_nb);
 		register_reboot_notifier(&heartbeat_reboot_nb);
+		register_pm_notifier(&heartbeat_pm_nb);
 	}
 	return rc;
 }
 
 static void __exit heartbeat_trig_exit(void)
 {
+	unregister_pm_notifier(&heartbeat_pm_nb);
 	unregister_reboot_notifier(&heartbeat_reboot_nb);
 	atomic_notifier_chain_unregister(&panic_notifier_list,
 					 &heartbeat_panic_nb);
diff --git a/drivers/mcb/mcb-core.c b/drivers/mcb/mcb-core.c
index b73c6e7d28e4..6f2c8522e14a 100644
--- a/drivers/mcb/mcb-core.c
+++ b/drivers/mcb/mcb-core.c
@@ -61,21 +61,36 @@ static int mcb_probe(struct device *dev)
 	struct mcb_driver *mdrv = to_mcb_driver(dev->driver);
 	struct mcb_device *mdev = to_mcb_device(dev);
 	const struct mcb_device_id *found_id;
+	struct module *carrier_mod;
+	int ret;
 
 	found_id = mcb_match_id(mdrv->id_table, mdev);
 	if (!found_id)
 		return -ENODEV;
 
-	return mdrv->probe(mdev, found_id);
+	carrier_mod = mdev->dev.parent->driver->owner;
+	if (!try_module_get(carrier_mod))
+		return -EINVAL;
+
+	get_device(dev);
+	ret = mdrv->probe(mdev, found_id);
+	if (ret)
+		module_put(carrier_mod);
+
+	return ret;
 }
 
 static int mcb_remove(struct device *dev)
 {
 	struct mcb_driver *mdrv = to_mcb_driver(dev->driver);
 	struct mcb_device *mdev = to_mcb_device(dev);
+	struct module *carrier_mod;
 
 	mdrv->remove(mdev);
 
+	carrier_mod = mdev->dev.parent->driver->owner;
+	module_put(carrier_mod);
+
 	put_device(&mdev->dev);
 
 	return 0;
diff --git a/drivers/media/i2c/adv7604.c b/drivers/media/i2c/adv7604.c
index beb2841ceae5..3f1ab4986cfc 100644
--- a/drivers/media/i2c/adv7604.c
+++ b/drivers/media/i2c/adv7604.c
@@ -779,11 +779,31 @@ static const struct v4l2_dv_timings_cap adv76xx_timings_cap_digital = {
 			V4L2_DV_BT_CAP_CUSTOM)
 };
 
-static inline const struct v4l2_dv_timings_cap *
-adv76xx_get_dv_timings_cap(struct v4l2_subdev *sd)
+/*
+ * Return the DV timings capabilities for the requested sink pad. As a special
+ * case, pad value -1 returns the capabilities for the currently selected input.
+ */
+static const struct v4l2_dv_timings_cap *
+adv76xx_get_dv_timings_cap(struct v4l2_subdev *sd, int pad)
 {
-	return is_digital_input(sd) ? &adv76xx_timings_cap_digital :
-				      &adv7604_timings_cap_analog;
+	if (pad == -1) {
+		struct adv76xx_state *state = to_state(sd);
+
+		pad = state->selected_input;
+	}
+
+	switch (pad) {
+	case ADV76XX_PAD_HDMI_PORT_A:
+	case ADV7604_PAD_HDMI_PORT_B:
+	case ADV7604_PAD_HDMI_PORT_C:
+	case ADV7604_PAD_HDMI_PORT_D:
+		return &adv76xx_timings_cap_digital;
+
+	case ADV7604_PAD_VGA_RGB:
+	case ADV7604_PAD_VGA_COMP:
+	default:
+		return &adv7604_timings_cap_analog;
+	}
 }
 
 
@@ -1329,7 +1349,7 @@ static int stdi2dv_timings(struct v4l2_subdev *sd,
 		const struct v4l2_bt_timings *bt = &v4l2_dv_timings_presets[i].bt;
 
 		if (!v4l2_valid_dv_timings(&v4l2_dv_timings_presets[i],
-					   adv76xx_get_dv_timings_cap(sd),
+					   adv76xx_get_dv_timings_cap(sd, -1),
 					   adv76xx_check_dv_timings, NULL))
 			continue;
 		if (vtotal(bt) != stdi->lcf + 1)
@@ -1430,18 +1450,22 @@ static int adv76xx_enum_dv_timings(struct v4l2_subdev *sd,
 		return -EINVAL;
 
 	return v4l2_enum_dv_timings_cap(timings,
-		adv76xx_get_dv_timings_cap(sd), adv76xx_check_dv_timings, NULL);
+		adv76xx_get_dv_timings_cap(sd, timings->pad),
+		adv76xx_check_dv_timings, NULL);
 }
 
 static int adv76xx_dv_timings_cap(struct v4l2_subdev *sd,
 			struct v4l2_dv_timings_cap *cap)
 {
 	struct adv76xx_state *state = to_state(sd);
+	unsigned int pad = cap->pad;
 
 	if (cap->pad >= state->source_pad)
 		return -EINVAL;
 
-	*cap = *adv76xx_get_dv_timings_cap(sd);
+	*cap = *adv76xx_get_dv_timings_cap(sd, pad);
+	cap->pad = pad;
+
 	return 0;
 }
 
@@ -1450,9 +1474,9 @@ static int adv76xx_dv_timings_cap(struct v4l2_subdev *sd,
 static void adv76xx_fill_optional_dv_timings_fields(struct v4l2_subdev *sd,
 		struct v4l2_dv_timings *timings)
 {
-	v4l2_find_dv_timings_cap(timings, adv76xx_get_dv_timings_cap(sd),
-			is_digital_input(sd) ? 250000 : 1000000,
-			adv76xx_check_dv_timings, NULL);
+	v4l2_find_dv_timings_cap(timings, adv76xx_get_dv_timings_cap(sd, -1),
+				 is_digital_input(sd) ? 250000 : 1000000,
+				 adv76xx_check_dv_timings, NULL);
 }
 
 static unsigned int adv7604_read_hdmi_pixelclock(struct v4l2_subdev *sd)
@@ -1620,7 +1644,7 @@ static int adv76xx_s_dv_timings(struct v4l2_subdev *sd,
 
 	bt = &timings->bt;
 
-	if (!v4l2_valid_dv_timings(timings, adv76xx_get_dv_timings_cap(sd),
+	if (!v4l2_valid_dv_timings(timings, adv76xx_get_dv_timings_cap(sd, -1),
 				   adv76xx_check_dv_timings, NULL))
 		return -ERANGE;
 
diff --git a/drivers/media/usb/airspy/airspy.c b/drivers/media/usb/airspy/airspy.c
index 87c12930416f..92d9d4214c3a 100644
--- a/drivers/media/usb/airspy/airspy.c
+++ b/drivers/media/usb/airspy/airspy.c
@@ -1072,7 +1072,7 @@ static int airspy_probe(struct usb_interface *intf,
 	if (ret) {
 		dev_err(s->dev, "Failed to register as video device (%d)\n",
 				ret);
-		goto err_unregister_v4l2_dev;
+		goto err_free_controls;
 	}
 	dev_info(s->dev, "Registered as %s\n",
 			video_device_node_name(&s->vdev));
@@ -1081,7 +1081,6 @@ static int airspy_probe(struct usb_interface *intf,
 
 err_free_controls:
 	v4l2_ctrl_handler_free(&s->hdl);
-err_unregister_v4l2_dev:
 	v4l2_device_unregister(&s->v4l2_dev);
 err_free_mem:
 	kfree(s);
diff --git a/drivers/media/usb/uvc/uvc_v4l2.c b/drivers/media/usb/uvc/uvc_v4l2.c
index d7723ce772b3..c04bc6afb965 100644
--- a/drivers/media/usb/uvc/uvc_v4l2.c
+++ b/drivers/media/usb/uvc/uvc_v4l2.c
@@ -1274,8 +1274,6 @@ struct uvc_xu_control_mapping32 {
 static int uvc_v4l2_get_xu_mapping(struct uvc_xu_control_mapping *kp,
 			const struct uvc_xu_control_mapping32 __user *up)
 {
-	struct uvc_menu_info __user *umenus;
-	struct uvc_menu_info __user *kmenus;
 	compat_caddr_t p;
 
 	if (!access_ok(VERIFY_READ, up, sizeof(*up)) ||
@@ -1292,17 +1290,7 @@ static int uvc_v4l2_get_xu_mapping(struct uvc_xu_control_mapping *kp,
 
 	if (__get_user(p, &up->menu_info))
 		return -EFAULT;
-	umenus = compat_ptr(p);
-	if (!access_ok(VERIFY_READ, umenus, kp->menu_count * sizeof(*umenus)))
-		return -EFAULT;
-
-	kmenus = compat_alloc_user_space(kp->menu_count * sizeof(*kmenus));
-	if (kmenus == NULL)
-		return -EFAULT;
-	kp->menu_info = kmenus;
-
-	if (copy_in_user(kmenus, umenus, kp->menu_count * sizeof(*umenus)))
-		return -EFAULT;
+	kp->menu_info = compat_ptr(p);
 
 	return 0;
 }
@@ -1310,10 +1298,6 @@ static int uvc_v4l2_get_xu_mapping(struct uvc_xu_control_mapping *kp,
 static int uvc_v4l2_put_xu_mapping(const struct uvc_xu_control_mapping *kp,
 			struct uvc_xu_control_mapping32 __user *up)
 {
-	struct uvc_menu_info __user *umenus;
-	struct uvc_menu_info __user *kmenus = kp->menu_info;
-	compat_caddr_t p;
-
 	if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) ||
 	    __copy_to_user(up, kp, offsetof(typeof(*up), menu_info)) ||
 	    __put_user(kp->menu_count, &up->menu_count))
@@ -1322,16 +1306,6 @@ static int uvc_v4l2_put_xu_mapping(const struct uvc_xu_control_mapping *kp,
 	if (__clear_user(up->reserved, sizeof(up->reserved)))
 		return -EFAULT;
 
-	if (kp->menu_count == 0)
-		return 0;
-
-	if (get_user(p, &up->menu_info))
-		return -EFAULT;
-	umenus = compat_ptr(p);
-
-	if (copy_in_user(umenus, kmenus, kp->menu_count * sizeof(*umenus)))
-		return -EFAULT;
-
 	return 0;
 }
 
@@ -1346,8 +1320,6 @@ struct uvc_xu_control_query32 {
 static int uvc_v4l2_get_xu_query(struct uvc_xu_control_query *kp,
 			const struct uvc_xu_control_query32 __user *up)
 {
-	u8 __user *udata;
-	u8 __user *kdata;
 	compat_caddr_t p;
 
 	if (!access_ok(VERIFY_READ, up, sizeof(*up)) ||
@@ -1361,17 +1333,7 @@ static int uvc_v4l2_get_xu_query(struct uvc_xu_control_query *kp,
 
 	if (__get_user(p, &up->data))
 		return -EFAULT;
-	udata = compat_ptr(p);
-	if (!access_ok(VERIFY_READ, udata, kp->size))
-		return -EFAULT;
-
-	kdata = compat_alloc_user_space(kp->size);
-	if (kdata == NULL)
-		return -EFAULT;
-	kp->data = kdata;
-
-	if (copy_in_user(kdata, udata, kp->size))
-		return -EFAULT;
+	kp->data = compat_ptr(p);
 
 	return 0;
 }
@@ -1379,26 +1341,10 @@ static int uvc_v4l2_get_xu_query(struct uvc_xu_control_query *kp,
 static int uvc_v4l2_put_xu_query(const struct uvc_xu_control_query *kp,
 			struct uvc_xu_control_query32 __user *up)
 {
-	u8 __user *udata;
-	u8 __user *kdata = kp->data;
-	compat_caddr_t p;
-
 	if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) ||
 	    __copy_to_user(up, kp, offsetof(typeof(*up), data)))
 		return -EFAULT;
 
-	if (kp->size == 0)
-		return 0;
-
-	if (get_user(p, &up->data))
-		return -EFAULT;
-	udata = compat_ptr(p);
-	if (!access_ok(VERIFY_READ, udata, kp->size))
-		return -EFAULT;
-
-	if (copy_in_user(udata, kdata, kp->size))
-		return -EFAULT;
-
 	return 0;
 }
 
@@ -1408,47 +1354,44 @@ static int uvc_v4l2_put_xu_query(const struct uvc_xu_control_query *kp,
 static long uvc_v4l2_compat_ioctl32(struct file *file,
 		     unsigned int cmd, unsigned long arg)
 {
+	struct uvc_fh *handle = file->private_data;
 	union {
 		struct uvc_xu_control_mapping xmap;
 		struct uvc_xu_control_query xqry;
 	} karg;
 	void __user *up = compat_ptr(arg);
-	mm_segment_t old_fs;
 	long ret;
 
 	switch (cmd) {
 	case UVCIOC_CTRL_MAP32:
-		cmd = UVCIOC_CTRL_MAP;
 		ret = uvc_v4l2_get_xu_mapping(&karg.xmap, up);
+		if (ret)
+			return ret;
+		ret = uvc_ioctl_ctrl_map(handle->chain, &karg.xmap);
+		if (ret)
+			return ret;
+		ret = uvc_v4l2_put_xu_mapping(&karg.xmap, up);
+		if (ret)
+			return ret;
+
 		break;
 
 	case UVCIOC_CTRL_QUERY32:
-		cmd = UVCIOC_CTRL_QUERY;
 		ret = uvc_v4l2_get_xu_query(&karg.xqry, up);
+		if (ret)
+			return ret;
+		ret = uvc_xu_ctrl_query(handle->chain, &karg.xqry);
+		if (ret)
+			return ret;
+		ret = uvc_v4l2_put_xu_query(&karg.xqry, up);
+		if (ret)
+			return ret;
 		break;
 
 	default:
 		return -ENOIOCTLCMD;
 	}
 
-	old_fs = get_fs();
-	set_fs(KERNEL_DS);
-	ret = video_ioctl2(file, cmd, (unsigned long)&karg);
-	set_fs(old_fs);
-
-	if (ret < 0)
-		return ret;
-
-	switch (cmd) {
-	case UVCIOC_CTRL_MAP:
-		ret = uvc_v4l2_put_xu_mapping(&karg.xmap, up);
-		break;
-
-	case UVCIOC_CTRL_QUERY:
-		ret = uvc_v4l2_put_xu_query(&karg.xqry, up);
-		break;
-	}
-
 	return ret;
 }
 #endif
diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c
index 28e5be2c2eef..528390f33b53 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -2171,7 +2171,7 @@ static int v4l_cropcap(const struct v4l2_ioctl_ops *ops,
 	 * The determine_valid_ioctls() call already should ensure
 	 * that this can never happen, but just in case...
 	 */
-	if (WARN_ON(!ops->vidioc_cropcap && !ops->vidioc_cropcap))
+	if (WARN_ON(!ops->vidioc_cropcap && !ops->vidioc_g_selection))
 		return -ENOTTY;
 
 	if (ops->vidioc_cropcap)
diff --git a/drivers/media/v4l2-core/v4l2-mc.c b/drivers/media/v4l2-core/v4l2-mc.c
index ca94bded3386..8bef4331bd51 100644
--- a/drivers/media/v4l2-core/v4l2-mc.c
+++ b/drivers/media/v4l2-core/v4l2-mc.c
@@ -1,7 +1,7 @@
 /*
  * Media Controller ancillary functions
  *
- * Copyright (c) 2016 Mauro Carvalho Chehab <mchehab@osg.samsung.com>
+ * Copyright (c) 2016 Mauro Carvalho Chehab <mchehab@kernel.org>
  * Copyright (C) 2016 Shuah Khan <shuahkh@osg.samsung.com>
  * Copyright (C) 2006-2010 Nokia Corporation
  * Copyright (c) 2016 Intel Corporation.
diff --git a/drivers/memory/omap-gpmc.c b/drivers/memory/omap-gpmc.c
index af4884ba6b7c..15508df24e5d 100644
--- a/drivers/memory/omap-gpmc.c
+++ b/drivers/memory/omap-gpmc.c
@@ -398,7 +398,7 @@ static void gpmc_cs_bool_timings(int cs, const struct gpmc_bool_timings *p)
 	gpmc_cs_modify_reg(cs, GPMC_CS_CONFIG4,
 			   GPMC_CONFIG4_OEEXTRADELAY, p->oe_extra_delay);
 	gpmc_cs_modify_reg(cs, GPMC_CS_CONFIG4,
-			   GPMC_CONFIG4_OEEXTRADELAY, p->we_extra_delay);
+			   GPMC_CONFIG4_WEEXTRADELAY, p->we_extra_delay);
 	gpmc_cs_modify_reg(cs, GPMC_CS_CONFIG6,
 			   GPMC_CONFIG6_CYCLE2CYCLESAMECSEN,
 			   p->cycle2cyclesamecsen);
diff --git a/drivers/mfd/max77620.c b/drivers/mfd/max77620.c
index 199d261990be..f32fbb8e8129 100644
--- a/drivers/mfd/max77620.c
+++ b/drivers/mfd/max77620.c
@@ -203,6 +203,7 @@ static int max77620_get_fps_period_reg_value(struct max77620_chip *chip,
 		break;
 	case MAX77620:
 		fps_min_period = MAX77620_FPS_PERIOD_MIN_US;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -236,6 +237,7 @@ static int max77620_config_fps(struct max77620_chip *chip,
 		break;
 	case MAX77620:
 		fps_max_period = MAX77620_FPS_PERIOD_MAX_US;
+		break;
 	default:
 		return -EINVAL;
 	}
diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c
index eed254da63a8..641c1a566687 100644
--- a/drivers/misc/mei/client.c
+++ b/drivers/misc/mei/client.c
@@ -730,7 +730,7 @@ static void mei_cl_wake_all(struct mei_cl *cl)
 	/* synchronized under device mutex */
 	if (waitqueue_active(&cl->wait)) {
 		cl_dbg(dev, cl, "Waking up ctrl write clients!\n");
-		wake_up_interruptible(&cl->wait);
+		wake_up(&cl->wait);
 	}
 }
 
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index e62fde3ac431..c5472e3c9231 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -355,8 +355,10 @@ static struct mmc_blk_ioc_data *mmc_blk_ioctl_copy_from_user(
 		goto idata_err;
 	}
 
-	if (!idata->buf_bytes)
+	if (!idata->buf_bytes) {
+		idata->buf = NULL;
 		return idata;
+	}
 
 	idata->buf = kmalloc(idata->buf_bytes, GFP_KERNEL);
 	if (!idata->buf) {
@@ -1786,8 +1788,8 @@ static void mmc_blk_packed_hdr_wrq_prep(struct mmc_queue_req *mqrq,
 
 	packed_cmd_hdr = packed->cmd_hdr;
 	memset(packed_cmd_hdr, 0, sizeof(packed->cmd_hdr));
-	packed_cmd_hdr[0] = (packed->nr_entries << 16) |
-		(PACKED_CMD_WR << 8) | PACKED_CMD_VER;
+	packed_cmd_hdr[0] = cpu_to_le32((packed->nr_entries << 16) |
+		(PACKED_CMD_WR << 8) | PACKED_CMD_VER);
 	hdr_blocks = mmc_large_sector(card) ? 8 : 1;
 
 	/*
@@ -1801,14 +1803,14 @@ static void mmc_blk_packed_hdr_wrq_prep(struct mmc_queue_req *mqrq,
 			((brq->data.blocks * brq->data.blksz) >=
 			 card->ext_csd.data_tag_unit_size);
 		/* Argument of CMD23 */
-		packed_cmd_hdr[(i * 2)] =
+		packed_cmd_hdr[(i * 2)] = cpu_to_le32(
 			(do_rel_wr ? MMC_CMD23_ARG_REL_WR : 0) |
 			(do_data_tag ? MMC_CMD23_ARG_TAG_REQ : 0) |
-			blk_rq_sectors(prq);
+			blk_rq_sectors(prq));
 		/* Argument of CMD18 or CMD25 */
-		packed_cmd_hdr[((i * 2)) + 1] =
+		packed_cmd_hdr[((i * 2)) + 1] = cpu_to_le32(
 			mmc_card_blockaddr(card) ?
-			blk_rq_pos(prq) : blk_rq_pos(prq) << 9;
+			blk_rq_pos(prq) : blk_rq_pos(prq) << 9);
 		packed->blocks += blk_rq_sectors(prq);
 		i++;
 	}
diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c
index 86fac3e86833..c763b404510f 100644
--- a/drivers/mmc/host/pxamci.c
+++ b/drivers/mmc/host/pxamci.c
@@ -789,14 +789,16 @@ static int pxamci_probe(struct platform_device *pdev)
 		gpio_direction_output(gpio_power,
 				      host->pdata->gpio_power_invert);
 	}
-	if (gpio_is_valid(gpio_ro))
+	if (gpio_is_valid(gpio_ro)) {
 		ret = mmc_gpio_request_ro(mmc, gpio_ro);
-	if (ret) {
-		dev_err(&pdev->dev, "Failed requesting gpio_ro %d\n", gpio_ro);
-		goto out;
-	} else {
-		mmc->caps2 |= host->pdata->gpio_card_ro_invert ?
-			0 : MMC_CAP2_RO_ACTIVE_HIGH;
+		if (ret) {
+			dev_err(&pdev->dev, "Failed requesting gpio_ro %d\n",
+				gpio_ro);
+			goto out;
+		} else {
+			mmc->caps2 |= host->pdata->gpio_card_ro_invert ?
+				0 : MMC_CAP2_RO_ACTIVE_HIGH;
+		}
 	}
 
 	if (gpio_is_valid(gpio_cd))
diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
index 08e158895635..a136da8df6fe 100644
--- a/drivers/mtd/nand/omap2.c
+++ b/drivers/mtd/nand/omap2.c
@@ -1657,8 +1657,11 @@ static int omap_get_dt_info(struct device *dev, struct omap_nand_info *info)
 
 	/* detect availability of ELM module. Won't be present pre-OMAP4 */
 	info->elm_of_node = of_parse_phandle(child, "ti,elm-id", 0);
-	if (!info->elm_of_node)
-		dev_dbg(dev, "ti,elm-id not in DT\n");
+	if (!info->elm_of_node) {
+		info->elm_of_node = of_parse_phandle(child, "elm_id", 0);
+		if (!info->elm_of_node)
+			dev_dbg(dev, "ti,elm-id not in DT\n");
+	}
 
 	/* select ecc-scheme for NAND */
 	if (of_property_read_string(child, "ti,nand-ecc-opt", &s)) {
diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index 16baeb51b2bd..ef3618299494 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -1147,11 +1147,17 @@ int ubi_detach_mtd_dev(int ubi_num, int anyway)
  */
 static struct mtd_info * __init open_mtd_by_chdev(const char *mtd_dev)
 {
-	struct kstat stat;
 	int err, minor;
+	struct path path;
+	struct kstat stat;
 
 	/* Probably this is an MTD character device node path */
-	err = vfs_stat(mtd_dev, &stat);
+	err = kern_path(mtd_dev, LOOKUP_FOLLOW, &path);
+	if (err)
+		return ERR_PTR(err);
+
+	err = vfs_getattr(&path, &stat);
+	path_put(&path);
 	if (err)
 		return ERR_PTR(err);
 
@@ -1160,6 +1166,7 @@ static struct mtd_info * __init open_mtd_by_chdev(const char *mtd_dev)
 		return ERR_PTR(-EINVAL);
 
 	minor = MINOR(stat.rdev);
+
 	if (minor & 1)
 		/*
 		 * Just do not think the "/dev/mtdrX" devices support is need,
diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
index 5780dd1ba79d..ebf517271d29 100644
--- a/drivers/mtd/ubi/eba.c
+++ b/drivers/mtd/ubi/eba.c
@@ -575,6 +575,7 @@ static int recover_peb(struct ubi_device *ubi, int pnum, int vol_id, int lnum,
 	int err, idx = vol_id2idx(ubi, vol_id), new_pnum, data_size, tries = 0;
 	struct ubi_volume *vol = ubi->volumes[idx];
 	struct ubi_vid_hdr *vid_hdr;
+	uint32_t crc;
 
 	vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS);
 	if (!vid_hdr)
@@ -599,14 +600,8 @@ retry:
 		goto out_put;
 	}
 
-	vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi));
-	err = ubi_io_write_vid_hdr(ubi, new_pnum, vid_hdr);
-	if (err) {
-		up_read(&ubi->fm_eba_sem);
-		goto write_error;
-	}
+	ubi_assert(vid_hdr->vol_type == UBI_VID_DYNAMIC);
 
-	data_size = offset + len;
 	mutex_lock(&ubi->buf_mutex);
 	memset(ubi->peb_buf + offset, 0xFF, len);
 
@@ -621,6 +616,19 @@ retry:
 
 	memcpy(ubi->peb_buf + offset, buf, len);
 
+	data_size = offset + len;
+	crc = crc32(UBI_CRC32_INIT, ubi->peb_buf, data_size);
+	vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi));
+	vid_hdr->copy_flag = 1;
+	vid_hdr->data_size = cpu_to_be32(data_size);
+	vid_hdr->data_crc = cpu_to_be32(crc);
+	err = ubi_io_write_vid_hdr(ubi, new_pnum, vid_hdr);
+	if (err) {
+		mutex_unlock(&ubi->buf_mutex);
+		up_read(&ubi->fm_eba_sem);
+		goto write_error;
+	}
+
 	err = ubi_io_write_data(ubi, ubi->peb_buf, new_pnum, 0, data_size);
 	if (err) {
 		mutex_unlock(&ubi->buf_mutex);
diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c
index 348dbbcbedc8..a9e2cef7c95c 100644
--- a/drivers/mtd/ubi/kapi.c
+++ b/drivers/mtd/ubi/kapi.c
@@ -302,6 +302,7 @@ EXPORT_SYMBOL_GPL(ubi_open_volume_nm);
 struct ubi_volume_desc *ubi_open_volume_path(const char *pathname, int mode)
 {
 	int error, ubi_num, vol_id;
+	struct path path;
 	struct kstat stat;
 
 	dbg_gen("open volume %s, mode %d", pathname, mode);
@@ -309,7 +310,12 @@ struct ubi_volume_desc *ubi_open_volume_path(const char *pathname, int mode)
 	if (!pathname || !*pathname)
 		return ERR_PTR(-EINVAL);
 
-	error = vfs_stat(pathname, &stat);
+	error = kern_path(pathname, LOOKUP_FOLLOW, &path);
+	if (error)
+		return ERR_PTR(error);
+
+	error = vfs_getattr(&path, &stat);
+	path_put(&path);
 	if (error)
 		return ERR_PTR(error);
 
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index b9304a295f86..edc70ffad660 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -101,11 +101,14 @@ enum ad_link_speed_type {
 #define MAC_ADDRESS_EQUAL(A, B)	\
 	ether_addr_equal_64bits((const u8 *)A, (const u8 *)B)
 
-static struct mac_addr null_mac_addr = { { 0, 0, 0, 0, 0, 0 } };
+static const u8 null_mac_addr[ETH_ALEN + 2] __long_aligned = {
+	0, 0, 0, 0, 0, 0
+};
 static u16 ad_ticks_per_sec;
 static const int ad_delta_in_ticks = (AD_TIMER_INTERVAL * HZ) / 1000;
 
-static const u8 lacpdu_mcast_addr[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
+static const u8 lacpdu_mcast_addr[ETH_ALEN + 2] __long_aligned =
+	MULTICAST_LACPDU_ADDR;
 
 /* ================= main 802.3ad protocol functions ================== */
 static int ad_lacpdu_send(struct port *port);
@@ -657,6 +660,20 @@ static void __set_agg_ports_ready(struct aggregator *aggregator, int val)
 	}
 }
 
+static int __agg_active_ports(struct aggregator *agg)
+{
+	struct port *port;
+	int active = 0;
+
+	for (port = agg->lag_ports; port;
+	     port = port->next_port_in_aggregator) {
+		if (port->is_enabled)
+			active++;
+	}
+
+	return active;
+}
+
 /**
  * __get_agg_bandwidth - get the total bandwidth of an aggregator
  * @aggregator: the aggregator we're looking at
@@ -664,39 +681,40 @@ static void __set_agg_ports_ready(struct aggregator *aggregator, int val)
  */
 static u32 __get_agg_bandwidth(struct aggregator *aggregator)
 {
+	int nports = __agg_active_ports(aggregator);
 	u32 bandwidth = 0;
 
-	if (aggregator->num_of_ports) {
+	if (nports) {
 		switch (__get_link_speed(aggregator->lag_ports)) {
 		case AD_LINK_SPEED_1MBPS:
-			bandwidth = aggregator->num_of_ports;
+			bandwidth = nports;
 			break;
 		case AD_LINK_SPEED_10MBPS:
-			bandwidth = aggregator->num_of_ports * 10;
+			bandwidth = nports * 10;
 			break;
 		case AD_LINK_SPEED_100MBPS:
-			bandwidth = aggregator->num_of_ports * 100;
+			bandwidth = nports * 100;
 			break;
 		case AD_LINK_SPEED_1000MBPS:
-			bandwidth = aggregator->num_of_ports * 1000;
+			bandwidth = nports * 1000;
 			break;
 		case AD_LINK_SPEED_2500MBPS:
-			bandwidth = aggregator->num_of_ports * 2500;
+			bandwidth = nports * 2500;
 			break;
 		case AD_LINK_SPEED_10000MBPS:
-			bandwidth = aggregator->num_of_ports * 10000;
+			bandwidth = nports * 10000;
 			break;
 		case AD_LINK_SPEED_20000MBPS:
-			bandwidth = aggregator->num_of_ports * 20000;
+			bandwidth = nports * 20000;
 			break;
 		case AD_LINK_SPEED_40000MBPS:
-			bandwidth = aggregator->num_of_ports * 40000;
+			bandwidth = nports * 40000;
 			break;
 		case AD_LINK_SPEED_56000MBPS:
-			bandwidth = aggregator->num_of_ports * 56000;
+			bandwidth = nports * 56000;
 			break;
 		case AD_LINK_SPEED_100000MBPS:
-			bandwidth = aggregator->num_of_ports * 100000;
+			bandwidth = nports * 100000;
 			break;
 		default:
 			bandwidth = 0; /* to silence the compiler */
@@ -1530,10 +1548,10 @@ static struct aggregator *ad_agg_selection_test(struct aggregator *best,
 
 	switch (__get_agg_selection_mode(curr->lag_ports)) {
 	case BOND_AD_COUNT:
-		if (curr->num_of_ports > best->num_of_ports)
+		if (__agg_active_ports(curr) > __agg_active_ports(best))
 			return curr;
 
-		if (curr->num_of_ports < best->num_of_ports)
+		if (__agg_active_ports(curr) < __agg_active_ports(best))
 			return best;
 
 		/*FALLTHROUGH*/
@@ -1561,8 +1579,14 @@ static int agg_device_up(const struct aggregator *agg)
 	if (!port)
 		return 0;
 
-	return netif_running(port->slave->dev) &&
-	       netif_carrier_ok(port->slave->dev);
+	for (port = agg->lag_ports; port;
+	     port = port->next_port_in_aggregator) {
+		if (netif_running(port->slave->dev) &&
+		    netif_carrier_ok(port->slave->dev))
+			return 1;
+	}
+
+	return 0;
 }
 
 /**
@@ -1610,7 +1634,7 @@ static void ad_agg_selection_logic(struct aggregator *agg,
 
 		agg->is_active = 0;
 
-		if (agg->num_of_ports && agg_device_up(agg))
+		if (__agg_active_ports(agg) && agg_device_up(agg))
 			best = ad_agg_selection_test(best, agg);
 	}
 
@@ -1622,7 +1646,7 @@ static void ad_agg_selection_logic(struct aggregator *agg,
 		 * answering partner.
 		 */
 		if (active && active->lag_ports &&
-		    active->lag_ports->is_enabled &&
+		    __agg_active_ports(active) &&
 		    (__agg_has_partner(active) ||
 		     (!__agg_has_partner(active) &&
 		     !__agg_has_partner(best)))) {
@@ -1718,7 +1742,7 @@ static void ad_clear_agg(struct aggregator *aggregator)
 		aggregator->is_individual = false;
 		aggregator->actor_admin_aggregator_key = 0;
 		aggregator->actor_oper_aggregator_key = 0;
-		aggregator->partner_system = null_mac_addr;
+		eth_zero_addr(aggregator->partner_system.mac_addr_value);
 		aggregator->partner_system_priority = 0;
 		aggregator->partner_oper_aggregator_key = 0;
 		aggregator->receive_state = 0;
@@ -1740,7 +1764,7 @@ static void ad_initialize_agg(struct aggregator *aggregator)
 	if (aggregator) {
 		ad_clear_agg(aggregator);
 
-		aggregator->aggregator_mac_address = null_mac_addr;
+		eth_zero_addr(aggregator->aggregator_mac_address.mac_addr_value);
 		aggregator->aggregator_identifier = 0;
 		aggregator->slave = NULL;
 	}
@@ -2133,7 +2157,7 @@ void bond_3ad_unbind_slave(struct slave *slave)
 				else
 					temp_aggregator->lag_ports = temp_port->next_port_in_aggregator;
 				temp_aggregator->num_of_ports--;
-				if (temp_aggregator->num_of_ports == 0) {
+				if (__agg_active_ports(temp_aggregator) == 0) {
 					select_new_active_agg = temp_aggregator->is_active;
 					ad_clear_agg(temp_aggregator);
 					if (select_new_active_agg) {
@@ -2432,7 +2456,9 @@ void bond_3ad_adapter_speed_duplex_changed(struct slave *slave)
  */
 void bond_3ad_handle_link_change(struct slave *slave, char link)
 {
+	struct aggregator *agg;
 	struct port *port;
+	bool dummy;
 
 	port = &(SLAVE_AD_INFO(slave)->port);
 
@@ -2459,6 +2485,9 @@ void bond_3ad_handle_link_change(struct slave *slave, char link)
 		port->is_enabled = false;
 		ad_update_actor_keys(port, true);
 	}
+	agg = __get_first_agg(port);
+	ad_agg_selection_logic(agg, &dummy);
+
 	netdev_dbg(slave->bond->dev, "Port %d changed link status to %s\n",
 		   port->actor_port_number,
 		   link == BOND_LINK_UP ? "UP" : "DOWN");
@@ -2499,7 +2528,7 @@ int bond_3ad_set_carrier(struct bonding *bond)
 	active = __get_active_agg(&(SLAVE_AD_INFO(first_slave)->aggregator));
 	if (active) {
 		/* are enough slaves available to consider link up? */
-		if (active->num_of_ports < bond->params.min_links) {
+		if (__agg_active_ports(active) < bond->params.min_links) {
 			if (netif_carrier_ok(bond->dev)) {
 				netif_carrier_off(bond->dev);
 				goto out;
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index c5ac160a8ae9..551f0f8dead3 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -42,13 +42,10 @@
 
 
 
-#ifndef __long_aligned
-#define __long_aligned __attribute__((aligned((sizeof(long)))))
-#endif
-static const u8 mac_bcast[ETH_ALEN] __long_aligned = {
+static const u8 mac_bcast[ETH_ALEN + 2] __long_aligned = {
 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 };
-static const u8 mac_v6_allmcast[ETH_ALEN] __long_aligned = {
+static const u8 mac_v6_allmcast[ETH_ALEN + 2] __long_aligned = {
 	0x33, 0x33, 0x00, 0x00, 0x00, 0x01
 };
 static const int alb_delta_in_ticks = HZ / ALB_TIMER_TICKS_PER_SEC;
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 941ec99cd3b6..a2afa3be17a4 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1584,6 +1584,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 	}
 
 	/* check for initial state */
+	new_slave->link = BOND_LINK_NOCHANGE;
 	if (bond->params.miimon) {
 		if (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS) {
 			if (bond->params.updelay) {
diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c
index db760e84119f..b8df0f5e8c25 100644
--- a/drivers/net/bonding/bond_netlink.c
+++ b/drivers/net/bonding/bond_netlink.c
@@ -446,7 +446,11 @@ static int bond_newlink(struct net *src_net, struct net_device *bond_dev,
 	if (err < 0)
 		return err;
 
-	return register_netdevice(bond_dev);
+	err = register_netdevice(bond_dev);
+
+	netif_carrier_off(bond_dev);
+
+	return err;
 }
 
 static size_t bond_get_size(const struct net_device *bond_dev)
diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c
index 8b3275d7792a..8f5e93cb7975 100644
--- a/drivers/net/can/at91_can.c
+++ b/drivers/net/can/at91_can.c
@@ -712,9 +712,10 @@ static int at91_poll_rx(struct net_device *dev, int quota)
 
 	/* upper group completed, look again in lower */
 	if (priv->rx_next > get_mb_rx_low_last(priv) &&
-	    quota > 0 && mb > get_mb_rx_last(priv)) {
+	    mb > get_mb_rx_last(priv)) {
 		priv->rx_next = get_mb_rx_first(priv);
-		goto again;
+		if (quota > 0)
+			goto again;
 	}
 
 	return received;
diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c
index f91b094288da..e3dccd3200d5 100644
--- a/drivers/net/can/c_can/c_can.c
+++ b/drivers/net/can/c_can/c_can.c
@@ -332,9 +332,23 @@ static void c_can_setup_tx_object(struct net_device *dev, int iface,
 
 	priv->write_reg(priv, C_CAN_IFACE(MSGCTRL_REG, iface), ctrl);
 
-	for (i = 0; i < frame->can_dlc; i += 2) {
-		priv->write_reg(priv, C_CAN_IFACE(DATA1_REG, iface) + i / 2,
-				frame->data[i] | (frame->data[i + 1] << 8));
+	if (priv->type == BOSCH_D_CAN) {
+		u32 data = 0, dreg = C_CAN_IFACE(DATA1_REG, iface);
+
+		for (i = 0; i < frame->can_dlc; i += 4, dreg += 2) {
+			data = (u32)frame->data[i];
+			data |= (u32)frame->data[i + 1] << 8;
+			data |= (u32)frame->data[i + 2] << 16;
+			data |= (u32)frame->data[i + 3] << 24;
+			priv->write_reg32(priv, dreg, data);
+		}
+	} else {
+		for (i = 0; i < frame->can_dlc; i += 2) {
+			priv->write_reg(priv,
+					C_CAN_IFACE(DATA1_REG, iface) + i / 2,
+					frame->data[i] |
+					(frame->data[i + 1] << 8));
+		}
 	}
 }
 
@@ -402,10 +416,20 @@ static int c_can_read_msg_object(struct net_device *dev, int iface, u32 ctrl)
 	} else {
 		int i, dreg = C_CAN_IFACE(DATA1_REG, iface);
 
-		for (i = 0; i < frame->can_dlc; i += 2, dreg ++) {
-			data = priv->read_reg(priv, dreg);
-			frame->data[i] = data;
-			frame->data[i + 1] = data >> 8;
+		if (priv->type == BOSCH_D_CAN) {
+			for (i = 0; i < frame->can_dlc; i += 4, dreg += 2) {
+				data = priv->read_reg32(priv, dreg);
+				frame->data[i] = data;
+				frame->data[i + 1] = data >> 8;
+				frame->data[i + 2] = data >> 16;
+				frame->data[i + 3] = data >> 24;
+			}
+		} else {
+			for (i = 0; i < frame->can_dlc; i += 2, dreg++) {
+				data = priv->read_reg(priv, dreg);
+				frame->data[i] = data;
+				frame->data[i + 1] = data >> 8;
+			}
 		}
 	}
 
diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 910c12e2638e..ad535a854e5c 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -798,6 +798,9 @@ static int can_validate(struct nlattr *tb[], struct nlattr *data[])
 	 * - control mode with CAN_CTRLMODE_FD set
 	 */
 
+	if (!data)
+		return 0;
+
 	if (data[IFLA_CAN_CTRLMODE]) {
 		struct can_ctrlmode *cm = nla_data(data[IFLA_CAN_CTRLMODE]);
 
@@ -1008,6 +1011,11 @@ static int can_newlink(struct net *src_net, struct net_device *dev,
 	return -EOPNOTSUPP;
 }
 
+static void can_dellink(struct net_device *dev, struct list_head *head)
+{
+	return;
+}
+
 static struct rtnl_link_ops can_link_ops __read_mostly = {
 	.kind		= "can",
 	.maxtype	= IFLA_CAN_MAX,
@@ -1016,6 +1024,7 @@ static struct rtnl_link_ops can_link_ops __read_mostly = {
 	.validate	= can_validate,
 	.newlink	= can_newlink,
 	.changelink	= can_changelink,
+	.dellink	= can_dellink,
 	.get_size	= can_get_size,
 	.fill_info	= can_fill_info,
 	.get_xstats_size = can_get_xstats_size,
diff --git a/drivers/net/can/usb/Kconfig b/drivers/net/can/usb/Kconfig
index bcb272f6c68a..8483a40e7e9e 100644
--- a/drivers/net/can/usb/Kconfig
+++ b/drivers/net/can/usb/Kconfig
@@ -16,7 +16,8 @@ config CAN_ESD_USB2
 config CAN_GS_USB
 	tristate "Geschwister Schneider UG interfaces"
 	---help---
-	  This driver supports the Geschwister Schneider USB/CAN devices.
+	  This driver supports the Geschwister Schneider and bytewerk.org
+	  candleLight USB CAN interfaces USB/CAN devices
 	  If unsure choose N,
 	  choose Y for built in support,
 	  M to compile as module (module will be named: gs_usb).
@@ -46,6 +47,8 @@ config CAN_KVASER_USB
 	    - Kvaser USBcan R
 	    - Kvaser Leaf Light v2
 	    - Kvaser Mini PCI Express HS
+	    - Kvaser Mini PCI Express 2xHS
+	    - Kvaser USBcan Light 2xHS
 	    - Kvaser USBcan II HS/HS
 	    - Kvaser USBcan II HS/LS
 	    - Kvaser USBcan Rugged ("USBcan Rev B")
diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
index 1556d4286235..acb0c8490673 100644
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c
@@ -1,7 +1,9 @@
-/* CAN driver for Geschwister Schneider USB/CAN devices.
+/* CAN driver for Geschwister Schneider USB/CAN devices
+ * and bytewerk.org candleLight USB CAN interfaces.
  *
- * Copyright (C) 2013 Geschwister Schneider Technologie-,
+ * Copyright (C) 2013-2016 Geschwister Schneider Technologie-,
  * Entwicklungs- und Vertriebs UG (Haftungsbeschränkt).
+ * Copyright (C) 2016 Hubert Denkmair
  *
  * Many thanks to all socketcan devs!
  *
@@ -29,6 +31,9 @@
 #define USB_GSUSB_1_VENDOR_ID      0x1d50
 #define USB_GSUSB_1_PRODUCT_ID     0x606f
 
+#define USB_CANDLELIGHT_VENDOR_ID  0x1209
+#define USB_CANDLELIGHT_PRODUCT_ID 0x2323
+
 #define GSUSB_ENDPOINT_IN          1
 #define GSUSB_ENDPOINT_OUT         2
 
@@ -952,6 +957,8 @@ static void gs_usb_disconnect(struct usb_interface *intf)
 static const struct usb_device_id gs_usb_table[] = {
 	{ USB_DEVICE_INTERFACE_NUMBER(USB_GSUSB_1_VENDOR_ID,
 				      USB_GSUSB_1_PRODUCT_ID, 0) },
+	{ USB_DEVICE_INTERFACE_NUMBER(USB_CANDLELIGHT_VENDOR_ID,
+				      USB_CANDLELIGHT_PRODUCT_ID, 0) },
 	{} /* Terminating entry */
 };
 
@@ -969,5 +976,6 @@ module_usb_driver(gs_usb_driver);
 MODULE_AUTHOR("Maximilian Schneider <mws@schneidersoft.net>");
 MODULE_DESCRIPTION(
 "Socket CAN device driver for Geschwister Schneider Technologie-, "
-"Entwicklungs- und Vertriebs UG. USB2.0 to CAN interfaces.");
+"Entwicklungs- und Vertriebs UG. USB2.0 to CAN interfaces\n"
+"and bytewerk.org candleLight USB CAN interfaces.");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c
index 022bfa13ebfa..6f1f3b675ff5 100644
--- a/drivers/net/can/usb/kvaser_usb.c
+++ b/drivers/net/can/usb/kvaser_usb.c
@@ -59,11 +59,14 @@
 #define USB_CAN_R_PRODUCT_ID		39
 #define USB_LEAF_LITE_V2_PRODUCT_ID	288
 #define USB_MINI_PCIE_HS_PRODUCT_ID	289
+#define USB_LEAF_LIGHT_HS_V2_OEM_PRODUCT_ID 290
+#define USB_USBCAN_LIGHT_2HS_PRODUCT_ID	291
+#define USB_MINI_PCIE_2HS_PRODUCT_ID	292
 
 static inline bool kvaser_is_leaf(const struct usb_device_id *id)
 {
 	return id->idProduct >= USB_LEAF_DEVEL_PRODUCT_ID &&
-	       id->idProduct <= USB_MINI_PCIE_HS_PRODUCT_ID;
+	       id->idProduct <= USB_MINI_PCIE_2HS_PRODUCT_ID;
 }
 
 /* Kvaser USBCan-II devices */
@@ -537,6 +540,9 @@ static const struct usb_device_id kvaser_usb_table[] = {
 		.driver_info = KVASER_HAS_TXRX_ERRORS },
 	{ USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_V2_PRODUCT_ID) },
 	{ USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_HS_PRODUCT_ID) },
+	{ USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LIGHT_HS_V2_OEM_PRODUCT_ID) },
+	{ USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_LIGHT_2HS_PRODUCT_ID) },
+	{ USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_2HS_PRODUCT_ID) },
 
 	/* USBCANII family IDs */
 	{ USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN2_PRODUCT_ID),
diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c
index 30defe6c81f2..821d86c38ab2 100644
--- a/drivers/net/ethernet/agere/et131x.c
+++ b/drivers/net/ethernet/agere/et131x.c
@@ -3851,7 +3851,7 @@ static void et131x_tx_timeout(struct net_device *netdev)
 	unsigned long flags;
 
 	/* If the device is closed, ignore the timeout */
-	if (~(adapter->flags & FMP_ADAPTER_INTERRUPT_IN_USE))
+	if (!(adapter->flags & FMP_ADAPTER_INTERRUPT_IN_USE))
 		return;
 
 	/* Any nonrecoverable hardware error?
diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c
index e0fb0f1122db..20760e10211a 100644
--- a/drivers/net/ethernet/amd/au1000_eth.c
+++ b/drivers/net/ethernet/amd/au1000_eth.c
@@ -509,8 +509,8 @@ static int au1000_mii_probe(struct net_device *dev)
 	 * on the current MAC's MII bus
 	 */
 	for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++)
-		if (mdiobus_get_phy(aup->mii_bus, aup->phy_addr)) {
-			phydev = mdiobus_get_phy(aup->mii_bus, aup->phy_addr);
+		if (mdiobus_get_phy(aup->mii_bus, phy_addr)) {
+			phydev = mdiobus_get_phy(aup->mii_bus, phy_addr);
 			if (!aup->phy_search_highest_addr)
 				/* break out with first one found */
 				break;
diff --git a/drivers/net/ethernet/atheros/alx/alx.h b/drivers/net/ethernet/atheros/alx/alx.h
index d02c4240b7df..8fc93c5f6abc 100644
--- a/drivers/net/ethernet/atheros/alx/alx.h
+++ b/drivers/net/ethernet/atheros/alx/alx.h
@@ -96,10 +96,6 @@ struct alx_priv {
 	unsigned int rx_ringsz;
 	unsigned int rxbuf_size;
 
-	struct page  *rx_page;
-	unsigned int rx_page_offset;
-	unsigned int rx_frag_size;
-
 	struct napi_struct napi;
 	struct alx_tx_queue txq;
 	struct alx_rx_queue rxq;
diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index c98acdc0d14f..e708e360a9e3 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -70,35 +70,6 @@ static void alx_free_txbuf(struct alx_priv *alx, int entry)
 	}
 }
 
-static struct sk_buff *alx_alloc_skb(struct alx_priv *alx, gfp_t gfp)
-{
-	struct sk_buff *skb;
-	struct page *page;
-
-	if (alx->rx_frag_size > PAGE_SIZE)
-		return __netdev_alloc_skb(alx->dev, alx->rxbuf_size, gfp);
-
-	page = alx->rx_page;
-	if (!page) {
-		alx->rx_page = page = alloc_page(gfp);
-		if (unlikely(!page))
-			return NULL;
-		alx->rx_page_offset = 0;
-	}
-
-	skb = build_skb(page_address(page) + alx->rx_page_offset,
-			alx->rx_frag_size);
-	if (likely(skb)) {
-		alx->rx_page_offset += alx->rx_frag_size;
-		if (alx->rx_page_offset >= PAGE_SIZE)
-			alx->rx_page = NULL;
-		else
-			get_page(page);
-	}
-	return skb;
-}
-
-
 static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp)
 {
 	struct alx_rx_queue *rxq = &alx->rxq;
@@ -115,9 +86,22 @@ static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp)
 	while (!cur_buf->skb && next != rxq->read_idx) {
 		struct alx_rfd *rfd = &rxq->rfd[cur];
 
-		skb = alx_alloc_skb(alx, gfp);
+		/*
+		 * When DMA RX address is set to something like
+		 * 0x....fc0, it will be very likely to cause DMA
+		 * RFD overflow issue.
+		 *
+		 * To work around it, we apply rx skb with 64 bytes
+		 * longer space, and offset the address whenever
+		 * 0x....fc0 is detected.
+		 */
+		skb = __netdev_alloc_skb(alx->dev, alx->rxbuf_size + 64, gfp);
 		if (!skb)
 			break;
+
+		if (((unsigned long)skb->data & 0xfff) == 0xfc0)
+			skb_reserve(skb, 64);
+
 		dma = dma_map_single(&alx->hw.pdev->dev,
 				     skb->data, alx->rxbuf_size,
 				     DMA_FROM_DEVICE);
@@ -153,7 +137,6 @@ static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp)
 		alx_write_mem16(&alx->hw, ALX_RFD_PIDX, cur);
 	}
 
-
 	return count;
 }
 
@@ -622,11 +605,6 @@ static void alx_free_rings(struct alx_priv *alx)
 	kfree(alx->txq.bufs);
 	kfree(alx->rxq.bufs);
 
-	if (alx->rx_page) {
-		put_page(alx->rx_page);
-		alx->rx_page = NULL;
-	}
-
 	dma_free_coherent(&alx->hw.pdev->dev,
 			  alx->descmem.size,
 			  alx->descmem.virt,
@@ -681,7 +659,6 @@ static int alx_request_irq(struct alx_priv *alx)
 				  alx->dev->name, alx);
 		if (!err)
 			goto out;
-
 		/* fall back to legacy interrupt */
 		pci_disable_msi(alx->hw.pdev);
 	}
@@ -725,7 +702,6 @@ static int alx_init_sw(struct alx_priv *alx)
 	struct pci_dev *pdev = alx->hw.pdev;
 	struct alx_hw *hw = &alx->hw;
 	int err;
-	unsigned int head_size;
 
 	err = alx_identify_hw(alx);
 	if (err) {
@@ -741,12 +717,7 @@ static int alx_init_sw(struct alx_priv *alx)
 
 	hw->smb_timer = 400;
 	hw->mtu = alx->dev->mtu;
-
 	alx->rxbuf_size = ALX_MAX_FRAME_LEN(hw->mtu);
-	head_size = SKB_DATA_ALIGN(alx->rxbuf_size + NET_SKB_PAD) +
-		    SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
-	alx->rx_frag_size = roundup_pow_of_two(head_size);
-
 	alx->tx_ringsz = 256;
 	alx->rx_ringsz = 512;
 	hw->imt = 200;
@@ -848,7 +819,6 @@ static int alx_change_mtu(struct net_device *netdev, int mtu)
 {
 	struct alx_priv *alx = netdev_priv(netdev);
 	int max_frame = ALX_MAX_FRAME_LEN(mtu);
-	unsigned int head_size;
 
 	if ((max_frame < ALX_MIN_FRAME_SIZE) ||
 	    (max_frame > ALX_MAX_FRAME_SIZE))
@@ -860,9 +830,6 @@ static int alx_change_mtu(struct net_device *netdev, int mtu)
 	netdev->mtu = mtu;
 	alx->hw.mtu = mtu;
 	alx->rxbuf_size = max(max_frame, ALX_DEF_RXBUF_SIZE);
-	head_size = SKB_DATA_ALIGN(alx->rxbuf_size + NET_SKB_PAD) +
-		    SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
-	alx->rx_frag_size = roundup_pow_of_two(head_size);
 	netdev_update_features(netdev);
 	if (netif_running(netdev))
 		alx_reinit(alx);
diff --git a/drivers/net/ethernet/aurora/nb8800.c b/drivers/net/ethernet/aurora/nb8800.c
index 08a23e6b60e9..1a3555d03a96 100644
--- a/drivers/net/ethernet/aurora/nb8800.c
+++ b/drivers/net/ethernet/aurora/nb8800.c
@@ -259,6 +259,7 @@ static void nb8800_receive(struct net_device *dev, unsigned int i,
 		if (err) {
 			netdev_err(dev, "rx buffer allocation failed\n");
 			dev->stats.rx_dropped++;
+			dev_kfree_skb(skb);
 			return;
 		}
 
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index 543bf38105c9..bfa26a2590c9 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -392,7 +392,7 @@ static void bcm_sysport_get_stats(struct net_device *dev,
 		else
 			p = (char *)priv;
 		p += s->stat_offset;
-		data[i] = *(u32 *)p;
+		data[i] = *(unsigned long *)p;
 	}
 }
 
diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index ee5f431ab32a..25bbae5928d4 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -231,7 +231,7 @@ err_dma:
 	dma_unmap_single(dma_dev, slot->dma_addr, skb_headlen(skb),
 			 DMA_TO_DEVICE);
 
-	while (i > 0) {
+	while (i-- > 0) {
 		int index = (ring->end + i) % BGMAC_TX_RING_SLOTS;
 		struct bgmac_slot_info *slot = &ring->slots[index];
 		u32 ctl1 = le32_to_cpu(ring->cpu_base[index].ctl1);
@@ -267,15 +267,16 @@ static void bgmac_dma_tx_free(struct bgmac *bgmac, struct bgmac_dma_ring *ring)
 	while (ring->start != ring->end) {
 		int slot_idx = ring->start % BGMAC_TX_RING_SLOTS;
 		struct bgmac_slot_info *slot = &ring->slots[slot_idx];
-		u32 ctl1;
+		u32 ctl0, ctl1;
 		int len;
 
 		if (slot_idx == empty_slot)
 			break;
 
+		ctl0 = le32_to_cpu(ring->cpu_base[slot_idx].ctl0);
 		ctl1 = le32_to_cpu(ring->cpu_base[slot_idx].ctl1);
 		len = ctl1 & BGMAC_DESC_CTL1_LEN;
-		if (ctl1 & BGMAC_DESC_CTL0_SOF)
+		if (ctl0 & BGMAC_DESC_CTL0_SOF)
 			/* Unmap no longer used buffer */
 			dma_unmap_single(dma_dev, slot->dma_addr, len,
 					 DMA_TO_DEVICE);
@@ -1312,7 +1313,8 @@ static int bgmac_open(struct net_device *net_dev)
 
 	phy_start(bgmac->phy_dev);
 
-	netif_carrier_on(net_dev);
+	netif_start_queue(net_dev);
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index c5fe915870ad..a59d55e25d5f 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -12895,52 +12895,71 @@ static int __bnx2x_vlan_configure_vid(struct bnx2x *bp, u16 vid, bool add)
 	return rc;
 }
 
-int bnx2x_vlan_reconfigure_vid(struct bnx2x *bp)
+static int bnx2x_vlan_configure_vid_list(struct bnx2x *bp)
 {
 	struct bnx2x_vlan_entry *vlan;
 	int rc = 0;
 
-	if (!bp->vlan_cnt) {
-		DP(NETIF_MSG_IFUP, "No need to re-configure vlan filters\n");
-		return 0;
-	}
-
+	/* Configure all non-configured entries */
 	list_for_each_entry(vlan, &bp->vlan_reg, link) {
-		/* Prepare for cleanup in case of errors */
-		if (rc) {
-			vlan->hw = false;
-			continue;
-		}
-
-		if (!vlan->hw)
+		if (vlan->hw)
 			continue;
 
-		DP(NETIF_MSG_IFUP, "Re-configuring vlan 0x%04x\n", vlan->vid);
+		if (bp->vlan_cnt >= bp->vlan_credit)
+			return -ENOBUFS;
 
 		rc = __bnx2x_vlan_configure_vid(bp, vlan->vid, true);
 		if (rc) {
-			BNX2X_ERR("Unable to configure VLAN %d\n", vlan->vid);
-			vlan->hw = false;
-			rc = -EINVAL;
-			continue;
+			BNX2X_ERR("Unable to config VLAN %d\n", vlan->vid);
+			return rc;
 		}
+
+		DP(NETIF_MSG_IFUP, "HW configured for VLAN %d\n", vlan->vid);
+		vlan->hw = true;
+		bp->vlan_cnt++;
 	}
 
-	return rc;
+	return 0;
+}
+
+static void bnx2x_vlan_configure(struct bnx2x *bp, bool set_rx_mode)
+{
+	bool need_accept_any_vlan;
+
+	need_accept_any_vlan = !!bnx2x_vlan_configure_vid_list(bp);
+
+	if (bp->accept_any_vlan != need_accept_any_vlan) {
+		bp->accept_any_vlan = need_accept_any_vlan;
+		DP(NETIF_MSG_IFUP, "Accept all VLAN %s\n",
+		   bp->accept_any_vlan ? "raised" : "cleared");
+		if (set_rx_mode) {
+			if (IS_PF(bp))
+				bnx2x_set_rx_mode_inner(bp);
+			else
+				bnx2x_vfpf_storm_rx_mode(bp);
+		}
+	}
+}
+
+int bnx2x_vlan_reconfigure_vid(struct bnx2x *bp)
+{
+	struct bnx2x_vlan_entry *vlan;
+
+	/* The hw forgot all entries after reload */
+	list_for_each_entry(vlan, &bp->vlan_reg, link)
+		vlan->hw = false;
+	bp->vlan_cnt = 0;
+
+	/* Don't set rx mode here. Our caller will do it. */
+	bnx2x_vlan_configure(bp, false);
+
+	return 0;
 }
 
 static int bnx2x_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
 {
 	struct bnx2x *bp = netdev_priv(dev);
 	struct bnx2x_vlan_entry *vlan;
-	bool hw = false;
-	int rc = 0;
-
-	if (!netif_running(bp->dev)) {
-		DP(NETIF_MSG_IFUP,
-		   "Ignoring VLAN configuration the interface is down\n");
-		return -EFAULT;
-	}
 
 	DP(NETIF_MSG_IFUP, "Adding VLAN %d\n", vid);
 
@@ -12948,93 +12967,47 @@ static int bnx2x_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
 	if (!vlan)
 		return -ENOMEM;
 
-	bp->vlan_cnt++;
-	if (bp->vlan_cnt > bp->vlan_credit && !bp->accept_any_vlan) {
-		DP(NETIF_MSG_IFUP, "Accept all VLAN raised\n");
-		bp->accept_any_vlan = true;
-		if (IS_PF(bp))
-			bnx2x_set_rx_mode_inner(bp);
-		else
-			bnx2x_vfpf_storm_rx_mode(bp);
-	} else if (bp->vlan_cnt <= bp->vlan_credit) {
-		rc = __bnx2x_vlan_configure_vid(bp, vid, true);
-		hw = true;
-	}
-
 	vlan->vid = vid;
-	vlan->hw = hw;
+	vlan->hw = false;
+	list_add_tail(&vlan->link, &bp->vlan_reg);
 
-	if (!rc) {
-		list_add(&vlan->link, &bp->vlan_reg);
-	} else {
-		bp->vlan_cnt--;
-		kfree(vlan);
-	}
-
-	DP(NETIF_MSG_IFUP, "Adding VLAN result %d\n", rc);
+	if (netif_running(dev))
+		bnx2x_vlan_configure(bp, true);
 
-	return rc;
+	return 0;
 }
 
 static int bnx2x_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
 {
 	struct bnx2x *bp = netdev_priv(dev);
 	struct bnx2x_vlan_entry *vlan;
+	bool found = false;
 	int rc = 0;
 
-	if (!netif_running(bp->dev)) {
-		DP(NETIF_MSG_IFUP,
-		   "Ignoring VLAN configuration the interface is down\n");
-		return -EFAULT;
-	}
-
 	DP(NETIF_MSG_IFUP, "Removing VLAN %d\n", vid);
 
-	if (!bp->vlan_cnt) {
-		BNX2X_ERR("Unable to kill VLAN %d\n", vid);
-		return -EINVAL;
-	}
-
 	list_for_each_entry(vlan, &bp->vlan_reg, link)
-		if (vlan->vid == vid)
+		if (vlan->vid == vid) {
+			found = true;
 			break;
+		}
 
-	if (vlan->vid != vid) {
+	if (!found) {
 		BNX2X_ERR("Unable to kill VLAN %d - not found\n", vid);
 		return -EINVAL;
 	}
 
-	if (vlan->hw)
+	if (netif_running(dev) && vlan->hw) {
 		rc = __bnx2x_vlan_configure_vid(bp, vid, false);
+		DP(NETIF_MSG_IFUP, "HW deconfigured for VLAN %d\n", vid);
+		bp->vlan_cnt--;
+	}
 
 	list_del(&vlan->link);
 	kfree(vlan);
 
-	bp->vlan_cnt--;
-
-	if (bp->vlan_cnt <= bp->vlan_credit && bp->accept_any_vlan) {
-		/* Configure all non-configured entries */
-		list_for_each_entry(vlan, &bp->vlan_reg, link) {
-			if (vlan->hw)
-				continue;
-
-			rc = __bnx2x_vlan_configure_vid(bp, vlan->vid, true);
-			if (rc) {
-				BNX2X_ERR("Unable to config VLAN %d\n",
-					  vlan->vid);
-				continue;
-			}
-			DP(NETIF_MSG_IFUP, "HW configured for VLAN %d\n",
-			   vlan->vid);
-			vlan->hw = true;
-		}
-		DP(NETIF_MSG_IFUP, "Accept all VLAN Removed\n");
-		bp->accept_any_vlan = false;
-		if (IS_PF(bp))
-			bnx2x_set_rx_mode_inner(bp);
-		else
-			bnx2x_vfpf_storm_rx_mode(bp);
-	}
+	if (netif_running(dev))
+		bnx2x_vlan_configure(bp, true);
 
 	DP(NETIF_MSG_IFUP, "Removing VLAN result %d\n", rc);
 
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 72a2efff8e49..c777cde85ce4 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -286,7 +286,9 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			cpu_to_le32(DB_KEY_TX_PUSH | DB_LONG_TX_PUSH | prod);
 		txr->tx_prod = prod;
 
+		tx_buf->is_push = 1;
 		netdev_tx_sent_queue(txq, skb->len);
+		wmb();	/* Sync is_push and byte queue before pushing data */
 
 		push_len = (length + sizeof(*tx_push) + 7) / 8;
 		if (push_len > 16) {
@@ -298,7 +300,6 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
 					 push_len);
 		}
 
-		tx_buf->is_push = 1;
 		goto tx_done;
 	}
 
@@ -1112,19 +1113,13 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
 	if (tpa_info->hash_type != PKT_HASH_TYPE_NONE)
 		skb_set_hash(skb, tpa_info->rss_hash, tpa_info->hash_type);
 
-	if (tpa_info->flags2 & RX_CMP_FLAGS2_META_FORMAT_VLAN) {
-		netdev_features_t features = skb->dev->features;
+	if ((tpa_info->flags2 & RX_CMP_FLAGS2_META_FORMAT_VLAN) &&
+	    (skb->dev->features & NETIF_F_HW_VLAN_CTAG_RX)) {
 		u16 vlan_proto = tpa_info->metadata >>
 			RX_CMP_FLAGS2_METADATA_TPID_SFT;
+		u16 vtag = tpa_info->metadata & RX_CMP_FLAGS2_METADATA_VID_MASK;
 
-		if (((features & NETIF_F_HW_VLAN_CTAG_RX) &&
-		     vlan_proto == ETH_P_8021Q) ||
-		    ((features & NETIF_F_HW_VLAN_STAG_RX) &&
-		     vlan_proto == ETH_P_8021AD)) {
-			__vlan_hwaccel_put_tag(skb, htons(vlan_proto),
-					       tpa_info->metadata &
-					       RX_CMP_FLAGS2_METADATA_VID_MASK);
-		}
+		__vlan_hwaccel_put_tag(skb, htons(vlan_proto), vtag);
 	}
 
 	skb_checksum_none_assert(skb);
@@ -1277,19 +1272,14 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons,
 
 	skb->protocol = eth_type_trans(skb, dev);
 
-	if (rxcmp1->rx_cmp_flags2 &
-	    cpu_to_le32(RX_CMP_FLAGS2_META_FORMAT_VLAN)) {
-		netdev_features_t features = skb->dev->features;
+	if ((rxcmp1->rx_cmp_flags2 &
+	     cpu_to_le32(RX_CMP_FLAGS2_META_FORMAT_VLAN)) &&
+	    (skb->dev->features & NETIF_F_HW_VLAN_CTAG_RX)) {
 		u32 meta_data = le32_to_cpu(rxcmp1->rx_cmp_meta_data);
+		u16 vtag = meta_data & RX_CMP_FLAGS2_METADATA_VID_MASK;
 		u16 vlan_proto = meta_data >> RX_CMP_FLAGS2_METADATA_TPID_SFT;
 
-		if (((features & NETIF_F_HW_VLAN_CTAG_RX) &&
-		     vlan_proto == ETH_P_8021Q) ||
-		    ((features & NETIF_F_HW_VLAN_STAG_RX) &&
-		     vlan_proto == ETH_P_8021AD))
-			__vlan_hwaccel_put_tag(skb, htons(vlan_proto),
-					       meta_data &
-					       RX_CMP_FLAGS2_METADATA_VID_MASK);
+		__vlan_hwaccel_put_tag(skb, htons(vlan_proto), vtag);
 	}
 
 	skb_checksum_none_assert(skb);
@@ -5466,6 +5456,20 @@ static netdev_features_t bnxt_fix_features(struct net_device *dev,
 
 	if (!bnxt_rfs_capable(bp))
 		features &= ~NETIF_F_NTUPLE;
+
+	/* Both CTAG and STAG VLAN accelaration on the RX side have to be
+	 * turned on or off together.
+	 */
+	if ((features & (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_STAG_RX)) !=
+	    (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_STAG_RX)) {
+		if (dev->features & NETIF_F_HW_VLAN_CTAG_RX)
+			features &= ~(NETIF_F_HW_VLAN_CTAG_RX |
+				      NETIF_F_HW_VLAN_STAG_RX);
+		else
+			features |= NETIF_F_HW_VLAN_CTAG_RX |
+				    NETIF_F_HW_VLAN_STAG_RX;
+	}
+
 	return features;
 }
 
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index a38cb047b540..1b0ae4a72e9e 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -1591,7 +1591,7 @@ static int bnxt_get_module_eeprom(struct net_device *dev,
 {
 	struct bnxt *bp = netdev_priv(dev);
 	u16  start = eeprom->offset, length = eeprom->len;
-	int rc;
+	int rc = 0;
 
 	memset(data, 0, eeprom->len);
 
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 8de79ae63231..0e7e7da8d201 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -2821,7 +2821,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 		if (!g) {
 			netif_info(lio, tx_err, lio->netdev,
 				   "Transmit scatter gather: glist null!\n");
-			goto lio_xmit_failed;
+			goto lio_xmit_dma_failed;
 		}
 
 		cmdsetup.s.gather = 1;
@@ -2892,7 +2892,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 	else
 		status = octnet_send_nic_data_pkt(oct, &ndata, xmit_more);
 	if (status == IQ_SEND_FAILED)
-		goto lio_xmit_failed;
+		goto lio_xmit_dma_failed;
 
 	netif_info(lio, tx_queued, lio->netdev, "Transmit queued successfully\n");
 
@@ -2906,12 +2906,13 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 	return NETDEV_TX_OK;
 
+lio_xmit_dma_failed:
+	dma_unmap_single(&oct->pci_dev->dev, ndata.cmd.dptr,
+			 ndata.datasize, DMA_TO_DEVICE);
 lio_xmit_failed:
 	stats->tx_dropped++;
 	netif_info(lio, tx_err, lio->netdev, "IQ%d Transmit dropped:%llu\n",
 		   iq_no, stats->tx_dropped);
-	dma_unmap_single(&oct->pci_dev->dev, ndata.cmd.dptr,
-			 ndata.datasize, DMA_TO_DEVICE);
 	recv_buffer_free(skb);
 	return NETDEV_TX_OK;
 }
diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
index 95f17f8cadac..16ed20357c5c 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c
@@ -499,6 +499,7 @@ static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic,
 	u32 rr_quantum;
 	u8 sq_idx = sq->sq_num;
 	u8 pqs_vnic;
+	int svf;
 
 	if (sq->sqs_mode)
 		pqs_vnic = nic->pqs_vf[vnic];
@@ -511,10 +512,19 @@ static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic,
 	/* 24 bytes for FCS, IPG and preamble */
 	rr_quantum = ((NIC_HW_MAX_FRS + 24) / 4);
 
-	tl4 = (lmac * NIC_TL4_PER_LMAC) + (bgx * NIC_TL4_PER_BGX);
+	if (!sq->sqs_mode) {
+		tl4 = (lmac * NIC_TL4_PER_LMAC) + (bgx * NIC_TL4_PER_BGX);
+	} else {
+		for (svf = 0; svf < MAX_SQS_PER_VF; svf++) {
+			if (nic->vf_sqs[pqs_vnic][svf] == vnic)
+				break;
+		}
+		tl4 = (MAX_LMAC_PER_BGX * NIC_TL4_PER_LMAC);
+		tl4 += (lmac * NIC_TL4_PER_LMAC * MAX_SQS_PER_VF);
+		tl4 += (svf * NIC_TL4_PER_LMAC);
+		tl4 += (bgx * NIC_TL4_PER_BGX);
+	}
 	tl4 += sq_idx;
-	if (sq->sqs_mode)
-		tl4 += vnic * 8;
 
 	tl3 = tl4 / (NIC_MAX_TL4 / NIC_MAX_TL3);
 	nic_reg_write(nic, NIC_PF_QSET_0_127_SQ_0_7_CFG2 |
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 3ed21988626b..63a39ac97d53 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -551,7 +551,9 @@ static int bgx_xaui_check_link(struct lmac *lmac)
 	}
 
 	/* Clear rcvflt bit (latching high) and read it back */
-	bgx_reg_modify(bgx, lmacid, BGX_SPUX_STATUS2, SPU_STATUS2_RCVFLT);
+	if (bgx_reg_read(bgx, lmacid, BGX_SPUX_STATUS2) & SPU_STATUS2_RCVFLT)
+		bgx_reg_modify(bgx, lmacid,
+			       BGX_SPUX_STATUS2, SPU_STATUS2_RCVFLT);
 	if (bgx_reg_read(bgx, lmacid, BGX_SPUX_STATUS2) & SPU_STATUS2_RCVFLT) {
 		dev_err(&bgx->pdev->dev, "Receive fault, retry training\n");
 		if (bgx->use_training) {
@@ -570,13 +572,6 @@ static int bgx_xaui_check_link(struct lmac *lmac)
 		return -1;
 	}
 
-	/* Wait for MAC RX to be ready */
-	if (bgx_poll_reg(bgx, lmacid, BGX_SMUX_RX_CTL,
-			 SMU_RX_CTL_STATUS, true)) {
-		dev_err(&bgx->pdev->dev, "SMU RX link not okay\n");
-		return -1;
-	}
-
 	/* Wait for BGX RX to be idle */
 	if (bgx_poll_reg(bgx, lmacid, BGX_SMUX_CTL, SMU_CTL_RX_IDLE, false)) {
 		dev_err(&bgx->pdev->dev, "SMU RX not idle\n");
@@ -589,29 +584,30 @@ static int bgx_xaui_check_link(struct lmac *lmac)
 		return -1;
 	}
 
-	if (bgx_reg_read(bgx, lmacid, BGX_SPUX_STATUS2) & SPU_STATUS2_RCVFLT) {
-		dev_err(&bgx->pdev->dev, "Receive fault\n");
-		return -1;
-	}
-
-	/* Receive link is latching low. Force it high and verify it */
-	bgx_reg_modify(bgx, lmacid, BGX_SPUX_STATUS1, SPU_STATUS1_RCV_LNK);
-	if (bgx_poll_reg(bgx, lmacid, BGX_SPUX_STATUS1,
-			 SPU_STATUS1_RCV_LNK, false)) {
-		dev_err(&bgx->pdev->dev, "SPU receive link down\n");
-		return -1;
-	}
-
+	/* Clear receive packet disable */
 	cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_MISC_CONTROL);
 	cfg &= ~SPU_MISC_CTL_RX_DIS;
 	bgx_reg_write(bgx, lmacid, BGX_SPUX_MISC_CONTROL, cfg);
-	return 0;
+
+	/* Check for MAC RX faults */
+	cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_RX_CTL);
+	/* 0 - Link is okay, 1 - Local fault, 2 - Remote fault */
+	cfg &= SMU_RX_CTL_STATUS;
+	if (!cfg)
+		return 0;
+
+	/* Rx local/remote fault seen.
+	 * Do lmac reinit to see if condition recovers
+	 */
+	bgx_lmac_xaui_init(bgx, lmacid, bgx->lmac_type);
+
+	return -1;
 }
 
 static void bgx_poll_for_link(struct work_struct *work)
 {
 	struct lmac *lmac;
-	u64 link;
+	u64 spu_link, smu_link;
 
 	lmac = container_of(work, struct lmac, dwork.work);
 
@@ -621,8 +617,11 @@ static void bgx_poll_for_link(struct work_struct *work)
 	bgx_poll_reg(lmac->bgx, lmac->lmacid, BGX_SPUX_STATUS1,
 		     SPU_STATUS1_RCV_LNK, false);
 
-	link = bgx_reg_read(lmac->bgx, lmac->lmacid, BGX_SPUX_STATUS1);
-	if (link & SPU_STATUS1_RCV_LNK) {
+	spu_link = bgx_reg_read(lmac->bgx, lmac->lmacid, BGX_SPUX_STATUS1);
+	smu_link = bgx_reg_read(lmac->bgx, lmac->lmacid, BGX_SMUX_RX_CTL);
+
+	if ((spu_link & SPU_STATUS1_RCV_LNK) &&
+	    !(smu_link & SMU_RX_CTL_STATUS)) {
 		lmac->link_up = 1;
 		if (lmac->bgx->lmac_type == BGX_MODE_XLAUI)
 			lmac->last_speed = 40000;
@@ -636,9 +635,15 @@ static void bgx_poll_for_link(struct work_struct *work)
 	}
 
 	if (lmac->last_link != lmac->link_up) {
+		if (lmac->link_up) {
+			if (bgx_xaui_check_link(lmac)) {
+				/* Errors, clear link_up state */
+				lmac->link_up = 0;
+				lmac->last_speed = SPEED_UNKNOWN;
+				lmac->last_duplex = DUPLEX_UNKNOWN;
+			}
+		}
 		lmac->last_link = lmac->link_up;
-		if (lmac->link_up)
-			bgx_xaui_check_link(lmac);
 	}
 
 	queue_delayed_work(lmac->check_link, &lmac->dwork, HZ * 2);
@@ -710,7 +715,7 @@ static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid)
 static void bgx_lmac_disable(struct bgx *bgx, u8 lmacid)
 {
 	struct lmac *lmac;
-	u64 cmrx_cfg;
+	u64 cfg;
 
 	lmac = &bgx->lmac[lmacid];
 	if (lmac->check_link) {
@@ -719,9 +724,33 @@ static void bgx_lmac_disable(struct bgx *bgx, u8 lmacid)
 		destroy_workqueue(lmac->check_link);
 	}
 
-	cmrx_cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG);
-	cmrx_cfg &= ~(1 << 15);
-	bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cmrx_cfg);
+	/* Disable packet reception */
+	cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG);
+	cfg &= ~CMR_PKT_RX_EN;
+	bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg);
+
+	/* Give chance for Rx/Tx FIFO to get drained */
+	bgx_poll_reg(bgx, lmacid, BGX_CMRX_RX_FIFO_LEN, (u64)0x1FFF, true);
+	bgx_poll_reg(bgx, lmacid, BGX_CMRX_TX_FIFO_LEN, (u64)0x3FFF, true);
+
+	/* Disable packet transmission */
+	cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG);
+	cfg &= ~CMR_PKT_TX_EN;
+	bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg);
+
+	/* Disable serdes lanes */
+        if (!lmac->is_sgmii)
+                bgx_reg_modify(bgx, lmacid,
+                               BGX_SPUX_CONTROL1, SPU_CTL_LOW_POWER);
+        else
+                bgx_reg_modify(bgx, lmacid,
+                               BGX_GMP_PCS_MRX_CTL, PCS_MRX_CTL_PWR_DN);
+
+	/* Disable LMAC */
+	cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG);
+	cfg &= ~CMR_EN;
+	bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg);
+
 	bgx_flush_dmac_addrs(bgx, lmacid);
 
 	if ((bgx->lmac_type != BGX_MODE_XFI) &&
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
index 149e179363a1..42010d2e5ddf 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
@@ -41,6 +41,7 @@
 #define BGX_CMRX_RX_STAT10		0xC0
 #define BGX_CMRX_RX_BP_DROP		0xC8
 #define BGX_CMRX_RX_DMAC_CTL		0x0E8
+#define BGX_CMRX_RX_FIFO_LEN		0x108
 #define BGX_CMR_RX_DMACX_CAM		0x200
 #define  RX_DMACX_CAM_EN			BIT_ULL(48)
 #define  RX_DMACX_CAM_LMACID(x)			(x << 49)
@@ -50,6 +51,7 @@
 #define BGX_CMR_CHAN_MSK_AND		0x450
 #define BGX_CMR_BIST_STATUS		0x460
 #define BGX_CMR_RX_LMACS		0x468
+#define BGX_CMRX_TX_FIFO_LEN		0x518
 #define BGX_CMRX_TX_STAT0		0x600
 #define BGX_CMRX_TX_STAT1		0x608
 #define BGX_CMRX_TX_STAT2		0x610
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
index a2cdfc1261dc..50812a1d67bd 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
@@ -144,6 +144,7 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN
 	CH_PCI_ID_TABLE_FENTRY(0x5015),	/* T502-bt */
 	CH_PCI_ID_TABLE_FENTRY(0x5016),	/* T580-OCP-SO */
 	CH_PCI_ID_TABLE_FENTRY(0x5017),	/* T520-OCP-SO */
+	CH_PCI_ID_TABLE_FENTRY(0x5018),	/* T540-BT */
 	CH_PCI_ID_TABLE_FENTRY(0x5080),	/* Custom T540-cr */
 	CH_PCI_ID_TABLE_FENTRY(0x5081),	/* Custom T540-LL-cr */
 	CH_PCI_ID_TABLE_FENTRY(0x5082),	/* Custom T504-cr */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
index c4b262ca7d43..2accab386323 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
@@ -36,8 +36,8 @@
 #define __T4FW_VERSION_H__
 
 #define T4FW_VERSION_MAJOR 0x01
-#define T4FW_VERSION_MINOR 0x0E
-#define T4FW_VERSION_MICRO 0x04
+#define T4FW_VERSION_MINOR 0x0F
+#define T4FW_VERSION_MICRO 0x25
 #define T4FW_VERSION_BUILD 0x00
 
 #define T4FW_MIN_VERSION_MAJOR 0x01
@@ -45,8 +45,8 @@
 #define T4FW_MIN_VERSION_MICRO 0x00
 
 #define T5FW_VERSION_MAJOR 0x01
-#define T5FW_VERSION_MINOR 0x0E
-#define T5FW_VERSION_MICRO 0x04
+#define T5FW_VERSION_MINOR 0x0F
+#define T5FW_VERSION_MICRO 0x25
 #define T5FW_VERSION_BUILD 0x00
 
 #define T5FW_MIN_VERSION_MAJOR 0x00
@@ -54,8 +54,8 @@
 #define T5FW_MIN_VERSION_MICRO 0x00
 
 #define T6FW_VERSION_MAJOR 0x01
-#define T6FW_VERSION_MINOR 0x0E
-#define T6FW_VERSION_MICRO 0x04
+#define T6FW_VERSION_MINOR 0x0F
+#define T6FW_VERSION_MICRO 0x25
 #define T6FW_VERSION_BUILD 0x00
 
 #define T6FW_MIN_VERSION_MAJOR 0x00
diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c
index 41b010645100..4466a1187110 100644
--- a/drivers/net/ethernet/ethoc.c
+++ b/drivers/net/ethernet/ethoc.c
@@ -860,6 +860,11 @@ static netdev_tx_t ethoc_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	unsigned int entry;
 	void *dest;
 
+	if (skb_put_padto(skb, ETHOC_ZLEN)) {
+		dev->stats.tx_errors++;
+		goto out_no_free;
+	}
+
 	if (unlikely(skb->len > ETHOC_BUFSIZ)) {
 		dev->stats.tx_errors++;
 		goto out;
@@ -894,6 +899,7 @@ static netdev_tx_t ethoc_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	skb_tx_timestamp(skb);
 out:
 	dev_kfree_skb(skb);
+out_no_free:
 	return NETDEV_TX_OK;
 }
 
@@ -1086,7 +1092,7 @@ static int ethoc_probe(struct platform_device *pdev)
 	if (!priv->iobase) {
 		dev_err(&pdev->dev, "cannot remap I/O memory space\n");
 		ret = -ENXIO;
-		goto error;
+		goto free;
 	}
 
 	if (netdev->mem_end) {
@@ -1095,7 +1101,7 @@ static int ethoc_probe(struct platform_device *pdev)
 		if (!priv->membase) {
 			dev_err(&pdev->dev, "cannot remap memory space\n");
 			ret = -ENXIO;
-			goto error;
+			goto free;
 		}
 	} else {
 		/* Allocate buffer memory */
@@ -1106,7 +1112,7 @@ static int ethoc_probe(struct platform_device *pdev)
 			dev_err(&pdev->dev, "cannot allocate %dB buffer\n",
 				buffer_size);
 			ret = -ENOMEM;
-			goto error;
+			goto free;
 		}
 		netdev->mem_end = netdev->mem_start + buffer_size;
 		priv->dma_alloc = buffer_size;
@@ -1120,7 +1126,7 @@ static int ethoc_probe(struct platform_device *pdev)
 		128, (netdev->mem_end - netdev->mem_start + 1) / ETHOC_BUFSIZ);
 	if (num_bd < 4) {
 		ret = -ENODEV;
-		goto error;
+		goto free;
 	}
 	priv->num_bd = num_bd;
 	/* num_tx must be a power of two */
@@ -1133,7 +1139,7 @@ static int ethoc_probe(struct platform_device *pdev)
 	priv->vma = devm_kzalloc(&pdev->dev, num_bd*sizeof(void *), GFP_KERNEL);
 	if (!priv->vma) {
 		ret = -ENOMEM;
-		goto error;
+		goto free;
 	}
 
 	/* Allow the platform setup code to pass in a MAC address. */
@@ -1195,7 +1201,7 @@ static int ethoc_probe(struct platform_device *pdev)
 	priv->mdio = mdiobus_alloc();
 	if (!priv->mdio) {
 		ret = -ENOMEM;
-		goto free;
+		goto free2;
 	}
 
 	priv->mdio->name = "ethoc-mdio";
@@ -1208,7 +1214,7 @@ static int ethoc_probe(struct platform_device *pdev)
 	ret = mdiobus_register(priv->mdio);
 	if (ret) {
 		dev_err(&netdev->dev, "failed to register MDIO bus\n");
-		goto free;
+		goto free2;
 	}
 
 	ret = ethoc_mdio_probe(netdev);
@@ -1241,9 +1247,10 @@ error2:
 error:
 	mdiobus_unregister(priv->mdio);
 	mdiobus_free(priv->mdio);
-free:
+free2:
 	if (priv->clk)
 		clk_disable_unprepare(priv->clk);
+free:
 	free_netdev(netdev);
 out:
 	return ret;
diff --git a/drivers/net/ethernet/ezchip/nps_enet.c b/drivers/net/ethernet/ezchip/nps_enet.c
index 06f031715b57..9b7a3f5a2818 100644
--- a/drivers/net/ethernet/ezchip/nps_enet.c
+++ b/drivers/net/ethernet/ezchip/nps_enet.c
@@ -285,6 +285,7 @@ static void nps_enet_hw_reset(struct net_device *ndev)
 	ge_rst_value |= NPS_ENET_ENABLE << RST_GMAC_0_SHIFT;
 	nps_enet_reg_set(priv, NPS_ENET_REG_GE_RST, ge_rst_value);
 	usleep_range(10, 20);
+	ge_rst_value = 0;
 	nps_enet_reg_set(priv, NPS_ENET_REG_GE_RST, ge_rst_value);
 
 	/* Tx fifo reset sequence */
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 3c0255e98535..fea0f330ddbd 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -2416,24 +2416,24 @@ fec_enet_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *ec)
 		return -EOPNOTSUPP;
 
 	if (ec->rx_max_coalesced_frames > 255) {
-		pr_err("Rx coalesced frames exceed hardware limiation");
+		pr_err("Rx coalesced frames exceed hardware limitation\n");
 		return -EINVAL;
 	}
 
 	if (ec->tx_max_coalesced_frames > 255) {
-		pr_err("Tx coalesced frame exceed hardware limiation");
+		pr_err("Tx coalesced frame exceed hardware limitation\n");
 		return -EINVAL;
 	}
 
 	cycle = fec_enet_us_to_itr_clock(ndev, fep->rx_time_itr);
 	if (cycle > 0xFFFF) {
-		pr_err("Rx coalesed usec exceeed hardware limiation");
+		pr_err("Rx coalesced usec exceed hardware limitation\n");
 		return -EINVAL;
 	}
 
 	cycle = fec_enet_us_to_itr_clock(ndev, fep->tx_time_itr);
 	if (cycle > 0xFFFF) {
-		pr_err("Rx coalesed usec exceeed hardware limiation");
+		pr_err("Rx coalesced usec exceed hardware limitation\n");
 		return -EINVAL;
 	}
 
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 7615e0668acb..2e6785b6e8be 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -2440,7 +2440,8 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev)
 						 tx_queue->tx_ring_size);
 
 	if (likely(!nr_frags)) {
-		lstatus |= BD_LFLAG(TXBD_LAST | TXBD_INTERRUPT);
+		if (likely(!do_tstamp))
+			lstatus |= BD_LFLAG(TXBD_LAST | TXBD_INTERRUPT);
 	} else {
 		u32 lstatus_start = lstatus;
 
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 864cb21351a4..88f3c85fb04a 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -75,6 +75,7 @@
 #include <linux/uaccess.h>
 #include <asm/firmware.h>
 #include <linux/seq_file.h>
+#include <linux/workqueue.h>
 
 #include "ibmvnic.h"
 
@@ -89,6 +90,7 @@ MODULE_VERSION(IBMVNIC_DRIVER_VERSION);
 static int ibmvnic_version = IBMVNIC_INITIAL_VERSION;
 static int ibmvnic_remove(struct vio_dev *);
 static void release_sub_crqs(struct ibmvnic_adapter *);
+static void release_sub_crqs_no_irqs(struct ibmvnic_adapter *);
 static int ibmvnic_reset_crq(struct ibmvnic_adapter *);
 static int ibmvnic_send_crq_init(struct ibmvnic_adapter *);
 static int ibmvnic_reenable_crq_queue(struct ibmvnic_adapter *);
@@ -469,7 +471,8 @@ static int ibmvnic_open(struct net_device *netdev)
 	crq.logical_link_state.link_state = IBMVNIC_LOGICAL_LNK_UP;
 	ibmvnic_send_crq(adapter, &crq);
 
-	netif_start_queue(netdev);
+	netif_tx_start_all_queues(netdev);
+
 	return 0;
 
 bounce_map_failed:
@@ -519,7 +522,7 @@ static int ibmvnic_close(struct net_device *netdev)
 	for (i = 0; i < adapter->req_rx_queues; i++)
 		napi_disable(&adapter->napi[i]);
 
-	netif_stop_queue(netdev);
+	netif_tx_stop_all_queues(netdev);
 
 	if (adapter->bounce_buffer) {
 		if (!dma_mapping_error(dev, adapter->bounce_buffer_dma)) {
@@ -1212,12 +1215,6 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter
 		goto reg_failed;
 	}
 
-	scrq->irq = irq_create_mapping(NULL, scrq->hw_irq);
-	if (scrq->irq == NO_IRQ) {
-		dev_err(dev, "Error mapping irq\n");
-		goto map_irq_failed;
-	}
-
 	scrq->adapter = adapter;
 	scrq->size = 4 * PAGE_SIZE / sizeof(*scrq->msgs);
 	scrq->cur = 0;
@@ -1230,12 +1227,6 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter
 
 	return scrq;
 
-map_irq_failed:
-	do {
-		rc = plpar_hcall_norets(H_FREE_SUB_CRQ,
-					adapter->vdev->unit_address,
-					scrq->crq_num);
-	} while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
 reg_failed:
 	dma_unmap_single(dev, scrq->msg_token, 4 * PAGE_SIZE,
 			 DMA_BIDIRECTIONAL);
@@ -1256,6 +1247,7 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter)
 			if (adapter->tx_scrq[i]) {
 				free_irq(adapter->tx_scrq[i]->irq,
 					 adapter->tx_scrq[i]);
+				irq_dispose_mapping(adapter->tx_scrq[i]->irq);
 				release_sub_crq_queue(adapter,
 						      adapter->tx_scrq[i]);
 			}
@@ -1267,6 +1259,7 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter)
 			if (adapter->rx_scrq[i]) {
 				free_irq(adapter->rx_scrq[i]->irq,
 					 adapter->rx_scrq[i]);
+				irq_dispose_mapping(adapter->rx_scrq[i]->irq);
 				release_sub_crq_queue(adapter,
 						      adapter->rx_scrq[i]);
 			}
@@ -1276,6 +1269,29 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter)
 	adapter->requested_caps = 0;
 }
 
+static void release_sub_crqs_no_irqs(struct ibmvnic_adapter *adapter)
+{
+	int i;
+
+	if (adapter->tx_scrq) {
+		for (i = 0; i < adapter->req_tx_queues; i++)
+			if (adapter->tx_scrq[i])
+				release_sub_crq_queue(adapter,
+						      adapter->tx_scrq[i]);
+		adapter->tx_scrq = NULL;
+	}
+
+	if (adapter->rx_scrq) {
+		for (i = 0; i < adapter->req_rx_queues; i++)
+			if (adapter->rx_scrq[i])
+				release_sub_crq_queue(adapter,
+						      adapter->rx_scrq[i]);
+		adapter->rx_scrq = NULL;
+	}
+
+	adapter->requested_caps = 0;
+}
+
 static int disable_scrq_irq(struct ibmvnic_adapter *adapter,
 			    struct ibmvnic_sub_crq_queue *scrq)
 {
@@ -1395,6 +1411,66 @@ static irqreturn_t ibmvnic_interrupt_rx(int irq, void *instance)
 	return IRQ_HANDLED;
 }
 
+static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter)
+{
+	struct device *dev = &adapter->vdev->dev;
+	struct ibmvnic_sub_crq_queue *scrq;
+	int i = 0, j = 0;
+	int rc = 0;
+
+	for (i = 0; i < adapter->req_tx_queues; i++) {
+		scrq = adapter->tx_scrq[i];
+		scrq->irq = irq_create_mapping(NULL, scrq->hw_irq);
+
+		if (scrq->irq == NO_IRQ) {
+			rc = -EINVAL;
+			dev_err(dev, "Error mapping irq\n");
+			goto req_tx_irq_failed;
+		}
+
+		rc = request_irq(scrq->irq, ibmvnic_interrupt_tx,
+				 0, "ibmvnic_tx", scrq);
+
+		if (rc) {
+			dev_err(dev, "Couldn't register tx irq 0x%x. rc=%d\n",
+				scrq->irq, rc);
+			irq_dispose_mapping(scrq->irq);
+			goto req_rx_irq_failed;
+		}
+	}
+
+	for (i = 0; i < adapter->req_rx_queues; i++) {
+		scrq = adapter->rx_scrq[i];
+		scrq->irq = irq_create_mapping(NULL, scrq->hw_irq);
+		if (scrq->irq == NO_IRQ) {
+			rc = -EINVAL;
+			dev_err(dev, "Error mapping irq\n");
+			goto req_rx_irq_failed;
+		}
+		rc = request_irq(scrq->irq, ibmvnic_interrupt_rx,
+				 0, "ibmvnic_rx", scrq);
+		if (rc) {
+			dev_err(dev, "Couldn't register rx irq 0x%x. rc=%d\n",
+				scrq->irq, rc);
+			irq_dispose_mapping(scrq->irq);
+			goto req_rx_irq_failed;
+		}
+	}
+	return rc;
+
+req_rx_irq_failed:
+	for (j = 0; j < i; j++)
+		free_irq(adapter->rx_scrq[j]->irq, adapter->rx_scrq[j]);
+		irq_dispose_mapping(adapter->rx_scrq[j]->irq);
+	i = adapter->req_tx_queues;
+req_tx_irq_failed:
+	for (j = 0; j < i; j++)
+		free_irq(adapter->tx_scrq[j]->irq, adapter->tx_scrq[j]);
+		irq_dispose_mapping(adapter->rx_scrq[j]->irq);
+	release_sub_crqs_no_irqs(adapter);
+	return rc;
+}
+
 static void init_sub_crqs(struct ibmvnic_adapter *adapter, int retry)
 {
 	struct device *dev = &adapter->vdev->dev;
@@ -1403,8 +1479,7 @@ static void init_sub_crqs(struct ibmvnic_adapter *adapter, int retry)
 	union ibmvnic_crq crq;
 	int total_queues;
 	int more = 0;
-	int i, j;
-	int rc;
+	int i;
 
 	if (!retry) {
 		/* Sub-CRQ entries are 32 byte long */
@@ -1483,13 +1558,6 @@ static void init_sub_crqs(struct ibmvnic_adapter *adapter, int retry)
 	for (i = 0; i < adapter->req_tx_queues; i++) {
 		adapter->tx_scrq[i] = allqueues[i];
 		adapter->tx_scrq[i]->pool_index = i;
-		rc = request_irq(adapter->tx_scrq[i]->irq, ibmvnic_interrupt_tx,
-				 0, "ibmvnic_tx", adapter->tx_scrq[i]);
-		if (rc) {
-			dev_err(dev, "Couldn't register tx irq 0x%x. rc=%d\n",
-				adapter->tx_scrq[i]->irq, rc);
-			goto req_tx_irq_failed;
-		}
 	}
 
 	adapter->rx_scrq = kcalloc(adapter->req_rx_queues,
@@ -1500,13 +1568,6 @@ static void init_sub_crqs(struct ibmvnic_adapter *adapter, int retry)
 	for (i = 0; i < adapter->req_rx_queues; i++) {
 		adapter->rx_scrq[i] = allqueues[i + adapter->req_tx_queues];
 		adapter->rx_scrq[i]->scrq_num = i;
-		rc = request_irq(adapter->rx_scrq[i]->irq, ibmvnic_interrupt_rx,
-				 0, "ibmvnic_rx", adapter->rx_scrq[i]);
-		if (rc) {
-			dev_err(dev, "Couldn't register rx irq 0x%x. rc=%d\n",
-				adapter->rx_scrq[i]->irq, rc);
-			goto req_rx_irq_failed;
-		}
 	}
 
 	memset(&crq, 0, sizeof(crq));
@@ -1559,15 +1620,6 @@ static void init_sub_crqs(struct ibmvnic_adapter *adapter, int retry)
 
 	return;
 
-req_rx_irq_failed:
-	for (j = 0; j < i; j++)
-		free_irq(adapter->rx_scrq[j]->irq, adapter->rx_scrq[j]);
-	i = adapter->req_tx_queues;
-req_tx_irq_failed:
-	for (j = 0; j < i; j++)
-		free_irq(adapter->tx_scrq[j]->irq, adapter->tx_scrq[j]);
-	kfree(adapter->rx_scrq);
-	adapter->rx_scrq = NULL;
 rx_failed:
 	kfree(adapter->tx_scrq);
 	adapter->tx_scrq = NULL;
@@ -2121,7 +2173,7 @@ static void handle_error_info_rsp(union ibmvnic_crq *crq,
 				  struct ibmvnic_adapter *adapter)
 {
 	struct device *dev = &adapter->vdev->dev;
-	struct ibmvnic_error_buff *error_buff;
+	struct ibmvnic_error_buff *error_buff, *tmp;
 	unsigned long flags;
 	bool found = false;
 	int i;
@@ -2133,7 +2185,7 @@ static void handle_error_info_rsp(union ibmvnic_crq *crq,
 	}
 
 	spin_lock_irqsave(&adapter->error_list_lock, flags);
-	list_for_each_entry(error_buff, &adapter->errors, list)
+	list_for_each_entry_safe(error_buff, tmp, &adapter->errors, list)
 		if (error_buff->error_id == crq->request_error_rsp.error_id) {
 			found = true;
 			list_del(&error_buff->list);
@@ -2348,9 +2400,9 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq,
 			 *req_value,
 			 (long int)be32_to_cpu(crq->request_capability_rsp.
 					       number), name);
-		release_sub_crqs(adapter);
+		release_sub_crqs_no_irqs(adapter);
 		*req_value = be32_to_cpu(crq->request_capability_rsp.number);
-		complete(&adapter->init_done);
+		init_sub_crqs(adapter, 1);
 		return;
 	default:
 		dev_err(dev, "Error %d in request cap rsp\n",
@@ -2659,7 +2711,7 @@ static void handle_query_cap_rsp(union ibmvnic_crq *crq,
 
 out:
 	if (atomic_read(&adapter->running_cap_queries) == 0)
-		complete(&adapter->init_done);
+		init_sub_crqs(adapter, 0);
 		/* We're done querying the capabilities, initialize sub-crqs */
 }
 
@@ -3141,14 +3193,14 @@ static void handle_request_ras_comp_num_rsp(union ibmvnic_crq *crq,
 
 static void ibmvnic_free_inflight(struct ibmvnic_adapter *adapter)
 {
-	struct ibmvnic_inflight_cmd *inflight_cmd;
+	struct ibmvnic_inflight_cmd *inflight_cmd, *tmp1;
 	struct device *dev = &adapter->vdev->dev;
-	struct ibmvnic_error_buff *error_buff;
+	struct ibmvnic_error_buff *error_buff, *tmp2;
 	unsigned long flags;
 	unsigned long flags2;
 
 	spin_lock_irqsave(&adapter->inflight_lock, flags);
-	list_for_each_entry(inflight_cmd, &adapter->inflight, list) {
+	list_for_each_entry_safe(inflight_cmd, tmp1, &adapter->inflight, list) {
 		switch (inflight_cmd->crq.generic.cmd) {
 		case LOGIN:
 			dma_unmap_single(dev, adapter->login_buf_token,
@@ -3165,8 +3217,8 @@ static void ibmvnic_free_inflight(struct ibmvnic_adapter *adapter)
 			break;
 		case REQUEST_ERROR_INFO:
 			spin_lock_irqsave(&adapter->error_list_lock, flags2);
-			list_for_each_entry(error_buff, &adapter->errors,
-					    list) {
+			list_for_each_entry_safe(error_buff, tmp2,
+						 &adapter->errors, list) {
 				dma_unmap_single(dev, error_buff->dma,
 						 error_buff->len,
 						 DMA_FROM_DEVICE);
@@ -3202,8 +3254,8 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
 			dev_info(dev, "Partner initialized\n");
 			/* Send back a response */
 			rc = ibmvnic_send_crq_init_complete(adapter);
-			if (rc == 0)
-				send_version_xchg(adapter);
+			if (!rc)
+				schedule_work(&adapter->vnic_crq_init);
 			else
 				dev_err(dev, "Can't send initrsp rc=%ld\n", rc);
 			break;
@@ -3555,8 +3607,63 @@ static const struct file_operations ibmvnic_dump_ops = {
 	.release        = single_release,
 };
 
+static void handle_crq_init_rsp(struct work_struct *work)
+{
+	struct ibmvnic_adapter *adapter = container_of(work,
+						       struct ibmvnic_adapter,
+						       vnic_crq_init);
+	struct device *dev = &adapter->vdev->dev;
+	struct net_device *netdev = adapter->netdev;
+	unsigned long timeout = msecs_to_jiffies(30000);
+	int rc;
+
+	send_version_xchg(adapter);
+	reinit_completion(&adapter->init_done);
+	if (!wait_for_completion_timeout(&adapter->init_done, timeout)) {
+		dev_err(dev, "Passive init timeout\n");
+		goto task_failed;
+	}
+
+	do {
+		if (adapter->renegotiate) {
+			adapter->renegotiate = false;
+			release_sub_crqs_no_irqs(adapter);
+			send_cap_queries(adapter);
+
+			reinit_completion(&adapter->init_done);
+			if (!wait_for_completion_timeout(&adapter->init_done,
+							 timeout)) {
+				dev_err(dev, "Passive init timeout\n");
+				goto task_failed;
+			}
+		}
+	} while (adapter->renegotiate);
+	rc = init_sub_crq_irqs(adapter);
+
+	if (rc)
+		goto task_failed;
+
+	netdev->real_num_tx_queues = adapter->req_tx_queues;
+
+	rc = register_netdev(netdev);
+	if (rc) {
+		dev_err(dev,
+			"failed to register netdev rc=%d\n", rc);
+		goto register_failed;
+	}
+	dev_info(dev, "ibmvnic registered\n");
+
+	return;
+
+register_failed:
+	release_sub_crqs(adapter);
+task_failed:
+	dev_err(dev, "Passive initialization was not successful\n");
+}
+
 static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
 {
+	unsigned long timeout = msecs_to_jiffies(30000);
 	struct ibmvnic_adapter *adapter;
 	struct net_device *netdev;
 	unsigned char *mac_addr_p;
@@ -3593,6 +3700,8 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
 	netdev->ethtool_ops = &ibmvnic_ethtool_ops;
 	SET_NETDEV_DEV(netdev, &dev->dev);
 
+	INIT_WORK(&adapter->vnic_crq_init, handle_crq_init_rsp);
+
 	spin_lock_init(&adapter->stats_lock);
 
 	rc = ibmvnic_init_crq_queue(adapter);
@@ -3635,30 +3744,26 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
 	ibmvnic_send_crq_init(adapter);
 
 	init_completion(&adapter->init_done);
-	wait_for_completion(&adapter->init_done);
+	if (!wait_for_completion_timeout(&adapter->init_done, timeout))
+		return 0;
 
 	do {
-		adapter->renegotiate = false;
-
-		init_sub_crqs(adapter, 0);
-		reinit_completion(&adapter->init_done);
-		wait_for_completion(&adapter->init_done);
-
 		if (adapter->renegotiate) {
-			release_sub_crqs(adapter);
+			adapter->renegotiate = false;
+			release_sub_crqs_no_irqs(adapter);
 			send_cap_queries(adapter);
 
 			reinit_completion(&adapter->init_done);
-			wait_for_completion(&adapter->init_done);
+			if (!wait_for_completion_timeout(&adapter->init_done,
+							 timeout))
+				return 0;
 		}
 	} while (adapter->renegotiate);
 
-	/* if init_sub_crqs is partially successful, retry */
-	while (!adapter->tx_scrq || !adapter->rx_scrq) {
-		init_sub_crqs(adapter, 1);
-
-		reinit_completion(&adapter->init_done);
-		wait_for_completion(&adapter->init_done);
+	rc = init_sub_crq_irqs(adapter);
+	if (rc) {
+		dev_err(&dev->dev, "failed to initialize sub crq irqs\n");
+		goto free_debugfs;
 	}
 
 	netdev->real_num_tx_queues = adapter->req_tx_queues;
@@ -3666,12 +3771,14 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
 	rc = register_netdev(netdev);
 	if (rc) {
 		dev_err(&dev->dev, "failed to register netdev rc=%d\n", rc);
-		goto free_debugfs;
+		goto free_sub_crqs;
 	}
 	dev_info(&dev->dev, "ibmvnic registered\n");
 
 	return 0;
 
+free_sub_crqs:
+	release_sub_crqs(adapter);
 free_debugfs:
 	if (adapter->debugfs_dir && !IS_ERR(adapter->debugfs_dir))
 		debugfs_remove_recursive(adapter->debugfs_dir);
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 0b66a506a4e4..e82898fd518e 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -1045,4 +1045,6 @@ struct ibmvnic_adapter {
 	u64 opt_rxba_entries_per_subcrq;
 	__be64 tx_rx_desc_req;
 	u8 map_id;
+
+	struct work_struct vnic_crq_init;
 };
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 75e60897b7e7..2b2e2f8c6369 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -2789,7 +2789,7 @@ static void e1000e_vlan_filter_enable(struct e1000_adapter *adapter)
 }
 
 /**
- * e1000e_vlan_strip_enable - helper to disable HW VLAN stripping
+ * e1000e_vlan_strip_disable - helper to disable HW VLAN stripping
  * @adapter: board private structure to initialize
  **/
 static void e1000e_vlan_strip_disable(struct e1000_adapter *adapter)
@@ -6915,6 +6915,14 @@ static netdev_features_t e1000_fix_features(struct net_device *netdev,
 	if ((hw->mac.type >= e1000_pch2lan) && (netdev->mtu > ETH_DATA_LEN))
 		features &= ~NETIF_F_RXFCS;
 
+	/* Since there is no support for separate Rx/Tx vlan accel
+	 * enable/disable make sure Tx flag is always in same state as Rx.
+	 */
+	if (features & NETIF_F_HW_VLAN_CTAG_RX)
+		features |= NETIF_F_HW_VLAN_CTAG_TX;
+	else
+		features &= ~NETIF_F_HW_VLAN_CTAG_TX;
+
 	return features;
 }
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 5ea22008d721..501f15d9f4d6 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -1344,6 +1344,13 @@ struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi,
 	if (!vsi || !macaddr)
 		return NULL;
 
+	/* Do not allow broadcast filter to be added since broadcast filter
+	 * is added as part of add VSI for any newly created VSI except
+	 * FDIR VSI
+	 */
+	if (is_broadcast_ether_addr(macaddr))
+		return NULL;
+
 	f = i40e_find_filter(vsi, macaddr, vlan, is_vf, is_netdev);
 	if (!f) {
 		f = kzalloc(sizeof(*f), GFP_ATOMIC);
@@ -2151,18 +2158,6 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 					 aq_ret, pf->hw.aq.asq_last_status);
 			}
 		}
-		aq_ret = i40e_aq_set_vsi_broadcast(&vsi->back->hw,
-						   vsi->seid,
-						   cur_promisc, NULL);
-		if (aq_ret) {
-			retval = i40e_aq_rc_to_posix(aq_ret,
-						     pf->hw.aq.asq_last_status);
-			dev_info(&pf->pdev->dev,
-				 "set brdcast promisc failed, err %s, aq_err %s\n",
-				 i40e_stat_str(&pf->hw, aq_ret),
-				 i40e_aq_str(&pf->hw,
-					     pf->hw.aq.asq_last_status));
-		}
 	}
 out:
 	/* if something went wrong then set the changed flag so we try again */
@@ -7726,10 +7721,11 @@ static int i40e_init_msix(struct i40e_pf *pf)
  * i40e_vsi_alloc_q_vector - Allocate memory for a single interrupt vector
  * @vsi: the VSI being configured
  * @v_idx: index of the vector in the vsi struct
+ * @cpu: cpu to be used on affinity_mask
  *
  * We allocate one q_vector.  If allocation fails we return -ENOMEM.
  **/
-static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx)
+static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx, int cpu)
 {
 	struct i40e_q_vector *q_vector;
 
@@ -7740,7 +7736,8 @@ static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx)
 
 	q_vector->vsi = vsi;
 	q_vector->v_idx = v_idx;
-	cpumask_set_cpu(v_idx, &q_vector->affinity_mask);
+	cpumask_set_cpu(cpu, &q_vector->affinity_mask);
+
 	if (vsi->netdev)
 		netif_napi_add(vsi->netdev, &q_vector->napi,
 			       i40e_napi_poll, NAPI_POLL_WEIGHT);
@@ -7764,8 +7761,7 @@ static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx)
 static int i40e_vsi_alloc_q_vectors(struct i40e_vsi *vsi)
 {
 	struct i40e_pf *pf = vsi->back;
-	int v_idx, num_q_vectors;
-	int err;
+	int err, v_idx, num_q_vectors, current_cpu;
 
 	/* if not MSIX, give the one vector only to the LAN VSI */
 	if (pf->flags & I40E_FLAG_MSIX_ENABLED)
@@ -7775,10 +7771,15 @@ static int i40e_vsi_alloc_q_vectors(struct i40e_vsi *vsi)
 	else
 		return -EINVAL;
 
+	current_cpu = cpumask_first(cpu_online_mask);
+
 	for (v_idx = 0; v_idx < num_q_vectors; v_idx++) {
-		err = i40e_vsi_alloc_q_vector(vsi, v_idx);
+		err = i40e_vsi_alloc_q_vector(vsi, v_idx, current_cpu);
 		if (err)
 			goto err_out;
+		current_cpu = cpumask_next(current_cpu, cpu_online_mask);
+		if (unlikely(current_cpu >= nr_cpu_ids))
+			current_cpu = cpumask_first(cpu_online_mask);
 	}
 
 	return 0;
@@ -9224,6 +9225,7 @@ int i40e_is_vsi_uplink_mode_veb(struct i40e_vsi *vsi)
 static int i40e_add_vsi(struct i40e_vsi *vsi)
 {
 	int ret = -ENODEV;
+	i40e_status aq_ret = 0;
 	u8 laa_macaddr[ETH_ALEN];
 	bool found_laa_mac_filter = false;
 	struct i40e_pf *pf = vsi->back;
@@ -9413,6 +9415,18 @@ static int i40e_add_vsi(struct i40e_vsi *vsi)
 		vsi->seid = ctxt.seid;
 		vsi->id = ctxt.vsi_number;
 	}
+	/* Except FDIR VSI, for all othet VSI set the broadcast filter */
+	if (vsi->type != I40E_VSI_FDIR) {
+		aq_ret = i40e_aq_set_vsi_broadcast(hw, vsi->seid, true, NULL);
+		if (aq_ret) {
+			ret = i40e_aq_rc_to_posix(aq_ret,
+						  hw->aq.asq_last_status);
+			dev_info(&pf->pdev->dev,
+				 "set brdcast promisc failed, err %s, aq_err %s\n",
+				 i40e_stat_str(hw, aq_ret),
+				 i40e_aq_str(hw, hw->aq.asq_last_status));
+		}
+	}
 
 	spin_lock_bh(&vsi->mac_filter_list_lock);
 	/* If macvlan filters already exist, force them to get loaded */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 55f151fca1dc..a8868e1bf832 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1280,8 +1280,8 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
 				    union i40e_rx_desc *rx_desc)
 {
 	struct i40e_rx_ptype_decoded decoded;
-	bool ipv4, ipv6, tunnel = false;
 	u32 rx_error, rx_status;
+	bool ipv4, ipv6;
 	u8 ptype;
 	u64 qword;
 
@@ -1336,19 +1336,23 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
 	if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT))
 		return;
 
-	/* The hardware supported by this driver does not validate outer
-	 * checksums for tunneled VXLAN or GENEVE frames.  I don't agree
-	 * with it but the specification states that you "MAY validate", it
-	 * doesn't make it a hard requirement so if we have validated the
-	 * inner checksum report CHECKSUM_UNNECESSARY.
+	/* If there is an outer header present that might contain a checksum
+	 * we need to bump the checksum level by 1 to reflect the fact that
+	 * we are indicating we validated the inner checksum.
 	 */
-	if (decoded.inner_prot & (I40E_RX_PTYPE_INNER_PROT_TCP |
-				  I40E_RX_PTYPE_INNER_PROT_UDP |
-				  I40E_RX_PTYPE_INNER_PROT_SCTP))
-		tunnel = true;
-
-	skb->ip_summed = CHECKSUM_UNNECESSARY;
-	skb->csum_level = tunnel ? 1 : 0;
+	if (decoded.tunnel_type >= I40E_RX_PTYPE_TUNNEL_IP_GRENAT)
+		skb->csum_level = 1;
+
+	/* Only report checksum unnecessary for TCP, UDP, or SCTP */
+	switch (decoded.inner_prot) {
+	case I40E_RX_PTYPE_INNER_PROT_TCP:
+	case I40E_RX_PTYPE_INNER_PROT_UDP:
+	case I40E_RX_PTYPE_INNER_PROT_SCTP:
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		/* fall though */
+	default:
+		break;
+	}
 
 	return;
 
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index be99189da925..79d99cd91b24 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -752,8 +752,8 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
 				    union i40e_rx_desc *rx_desc)
 {
 	struct i40e_rx_ptype_decoded decoded;
-	bool ipv4, ipv6, tunnel = false;
 	u32 rx_error, rx_status;
+	bool ipv4, ipv6;
 	u8 ptype;
 	u64 qword;
 
@@ -808,19 +808,23 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
 	if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT))
 		return;
 
-	/* The hardware supported by this driver does not validate outer
-	 * checksums for tunneled VXLAN or GENEVE frames.  I don't agree
-	 * with it but the specification states that you "MAY validate", it
-	 * doesn't make it a hard requirement so if we have validated the
-	 * inner checksum report CHECKSUM_UNNECESSARY.
+	/* If there is an outer header present that might contain a checksum
+	 * we need to bump the checksum level by 1 to reflect the fact that
+	 * we are indicating we validated the inner checksum.
 	 */
-	if (decoded.inner_prot & (I40E_RX_PTYPE_INNER_PROT_TCP |
-				  I40E_RX_PTYPE_INNER_PROT_UDP |
-				  I40E_RX_PTYPE_INNER_PROT_SCTP))
-		tunnel = true;
-
-	skb->ip_summed = CHECKSUM_UNNECESSARY;
-	skb->csum_level = tunnel ? 1 : 0;
+	if (decoded.tunnel_type >= I40E_RX_PTYPE_TUNNEL_IP_GRENAT)
+		skb->csum_level = 1;
+
+	/* Only report checksum unnecessary for TCP, UDP, or SCTP */
+	switch (decoded.inner_prot) {
+	case I40E_RX_PTYPE_INNER_PROT_TCP:
+	case I40E_RX_PTYPE_INNER_PROT_UDP:
+	case I40E_RX_PTYPE_INNER_PROT_SCTP:
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		/* fall though */
+	default:
+		break;
+	}
 
 	return;
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 088c47cf27d9..8bebd862a54c 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -2887,7 +2887,7 @@ int ixgbe_poll(struct napi_struct *napi, int budget)
 	if (!test_bit(__IXGBE_DOWN, &adapter->state))
 		ixgbe_irq_enable_queues(adapter, BIT_ULL(q_vector->v_idx));
 
-	return 0;
+	return min(work_done, budget - 1);
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/ixgbevf/mbx.c b/drivers/net/ethernet/intel/ixgbevf/mbx.c
index 61a80da8b6f0..2819abc454c7 100644
--- a/drivers/net/ethernet/intel/ixgbevf/mbx.c
+++ b/drivers/net/ethernet/intel/ixgbevf/mbx.c
@@ -85,7 +85,7 @@ static s32 ixgbevf_poll_for_ack(struct ixgbe_hw *hw)
 static s32 ixgbevf_read_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size)
 {
 	struct ixgbe_mbx_info *mbx = &hw->mbx;
-	s32 ret_val = -IXGBE_ERR_MBX;
+	s32 ret_val = IXGBE_ERR_MBX;
 
 	if (!mbx->ops.read)
 		goto out;
@@ -111,7 +111,7 @@ out:
 static s32 ixgbevf_write_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size)
 {
 	struct ixgbe_mbx_info *mbx = &hw->mbx;
-	s32 ret_val = -IXGBE_ERR_MBX;
+	s32 ret_val = IXGBE_ERR_MBX;
 
 	/* exit if either we can't write or there isn't a defined timeout */
 	if (!mbx->ops.write || !mbx->timeout)
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index a6d26d351dfc..f92018b13d28 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -244,7 +244,7 @@
 /* Various constants */
 
 /* Coalescing */
-#define MVNETA_TXDONE_COAL_PKTS		1
+#define MVNETA_TXDONE_COAL_PKTS		0	/* interrupt per packet */
 #define MVNETA_RX_COAL_PKTS		32
 #define MVNETA_RX_COAL_USEC		100
 
@@ -3458,6 +3458,8 @@ static int mvneta_open(struct net_device *dev)
 	return 0;
 
 err_free_irq:
+	unregister_cpu_notifier(&pp->cpu_notifier);
+	on_each_cpu(mvneta_percpu_disable, pp, true);
 	free_percpu_irq(pp->dev->irq, pp->ports);
 err_cleanup_txqs:
 	mvneta_cleanup_txqs(pp);
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index c984462fad2a..d1cdc2d76151 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -133,6 +133,8 @@ static int mtk_mdio_read(struct mii_bus *bus, int phy_addr, int phy_reg)
 static void mtk_phy_link_adjust(struct net_device *dev)
 {
 	struct mtk_mac *mac = netdev_priv(dev);
+	u16 lcl_adv = 0, rmt_adv = 0;
+	u8 flowctrl;
 	u32 mcr = MAC_MCR_MAX_RX_1536 | MAC_MCR_IPG_CFG |
 		  MAC_MCR_FORCE_MODE | MAC_MCR_TX_EN |
 		  MAC_MCR_RX_EN | MAC_MCR_BACKOFF_EN |
@@ -150,11 +152,30 @@ static void mtk_phy_link_adjust(struct net_device *dev)
 	if (mac->phy_dev->link)
 		mcr |= MAC_MCR_FORCE_LINK;
 
-	if (mac->phy_dev->duplex)
+	if (mac->phy_dev->duplex) {
 		mcr |= MAC_MCR_FORCE_DPX;
 
-	if (mac->phy_dev->pause)
-		mcr |= MAC_MCR_FORCE_RX_FC | MAC_MCR_FORCE_TX_FC;
+		if (mac->phy_dev->pause)
+			rmt_adv = LPA_PAUSE_CAP;
+		if (mac->phy_dev->asym_pause)
+			rmt_adv |= LPA_PAUSE_ASYM;
+
+		if (mac->phy_dev->advertising & ADVERTISED_Pause)
+			lcl_adv |= ADVERTISE_PAUSE_CAP;
+		if (mac->phy_dev->advertising & ADVERTISED_Asym_Pause)
+			lcl_adv |= ADVERTISE_PAUSE_ASYM;
+
+		flowctrl = mii_resolve_flowctrl_fdx(lcl_adv, rmt_adv);
+
+		if (flowctrl & FLOW_CTRL_TX)
+			mcr |= MAC_MCR_FORCE_TX_FC;
+		if (flowctrl & FLOW_CTRL_RX)
+			mcr |= MAC_MCR_FORCE_RX_FC;
+
+		netif_dbg(mac->hw, link, dev, "rx pause %s, tx pause %s\n",
+			  flowctrl & FLOW_CTRL_RX ? "enabled" : "disabled",
+			  flowctrl & FLOW_CTRL_TX ? "enabled" : "disabled");
+	}
 
 	mtk_w32(mac->hw, mcr, MTK_MAC_MCR(mac->id));
 
@@ -208,10 +229,16 @@ static int mtk_phy_connect(struct mtk_mac *mac)
 	u32 val, ge_mode;
 
 	np = of_parse_phandle(mac->of_node, "phy-handle", 0);
+	if (!np && of_phy_is_fixed_link(mac->of_node))
+		if (!of_phy_register_fixed_link(mac->of_node))
+			np = of_node_get(mac->of_node);
 	if (!np)
 		return -ENODEV;
 
 	switch (of_get_phy_mode(np)) {
+	case PHY_INTERFACE_MODE_RGMII_TXID:
+	case PHY_INTERFACE_MODE_RGMII_RXID:
+	case PHY_INTERFACE_MODE_RGMII_ID:
 	case PHY_INTERFACE_MODE_RGMII:
 		ge_mode = 0;
 		break;
@@ -236,7 +263,8 @@ static int mtk_phy_connect(struct mtk_mac *mac)
 	mac->phy_dev->autoneg = AUTONEG_ENABLE;
 	mac->phy_dev->speed = 0;
 	mac->phy_dev->duplex = 0;
-	mac->phy_dev->supported &= PHY_BASIC_FEATURES;
+	mac->phy_dev->supported &= PHY_GBIT_FEATURES | SUPPORTED_Pause |
+				   SUPPORTED_Asym_Pause;
 	mac->phy_dev->advertising = mac->phy_dev->supported |
 				    ADVERTISED_Autoneg;
 	phy_start_aneg(mac->phy_dev);
@@ -280,7 +308,7 @@ static int mtk_mdio_init(struct mtk_eth *eth)
 	return 0;
 
 err_free_bus:
-	kfree(eth->mii_bus);
+	mdiobus_free(eth->mii_bus);
 
 err_put_node:
 	of_node_put(mii_np);
@@ -295,7 +323,7 @@ static void mtk_mdio_cleanup(struct mtk_eth *eth)
 
 	mdiobus_unregister(eth->mii_bus);
 	of_node_put(eth->mii_bus->dev.of_node);
-	kfree(eth->mii_bus);
+	mdiobus_free(eth->mii_bus);
 }
 
 static inline void mtk_irq_disable(struct mtk_eth *eth, u32 mask)
@@ -453,20 +481,23 @@ static inline void mtk_rx_get_desc(struct mtk_rx_dma *rxd,
 /* the qdma core needs scratch memory to be setup */
 static int mtk_init_fq_dma(struct mtk_eth *eth)
 {
-	dma_addr_t phy_ring_head, phy_ring_tail;
+	dma_addr_t phy_ring_tail;
 	int cnt = MTK_DMA_SIZE;
 	dma_addr_t dma_addr;
 	int i;
 
 	eth->scratch_ring = dma_alloc_coherent(eth->dev,
 					       cnt * sizeof(struct mtk_tx_dma),
-					       &phy_ring_head,
+					       &eth->phy_scratch_ring,
 					       GFP_ATOMIC | __GFP_ZERO);
 	if (unlikely(!eth->scratch_ring))
 		return -ENOMEM;
 
 	eth->scratch_head = kcalloc(cnt, MTK_QDMA_PAGE_SIZE,
 				    GFP_KERNEL);
+	if (unlikely(!eth->scratch_head))
+		return -ENOMEM;
+
 	dma_addr = dma_map_single(eth->dev,
 				  eth->scratch_head, cnt * MTK_QDMA_PAGE_SIZE,
 				  DMA_FROM_DEVICE);
@@ -474,19 +505,19 @@ static int mtk_init_fq_dma(struct mtk_eth *eth)
 		return -ENOMEM;
 
 	memset(eth->scratch_ring, 0x0, sizeof(struct mtk_tx_dma) * cnt);
-	phy_ring_tail = phy_ring_head +
+	phy_ring_tail = eth->phy_scratch_ring +
 			(sizeof(struct mtk_tx_dma) * (cnt - 1));
 
 	for (i = 0; i < cnt; i++) {
 		eth->scratch_ring[i].txd1 =
 					(dma_addr + (i * MTK_QDMA_PAGE_SIZE));
 		if (i < cnt - 1)
-			eth->scratch_ring[i].txd2 = (phy_ring_head +
+			eth->scratch_ring[i].txd2 = (eth->phy_scratch_ring +
 				((i + 1) * sizeof(struct mtk_tx_dma)));
 		eth->scratch_ring[i].txd3 = TX_DMA_SDL(MTK_QDMA_PAGE_SIZE);
 	}
 
-	mtk_w32(eth, phy_ring_head, MTK_QDMA_FQ_HEAD);
+	mtk_w32(eth, eth->phy_scratch_ring, MTK_QDMA_FQ_HEAD);
 	mtk_w32(eth, phy_ring_tail, MTK_QDMA_FQ_TAIL);
 	mtk_w32(eth, (cnt << 16) | cnt, MTK_QDMA_FQ_CNT);
 	mtk_w32(eth, MTK_QDMA_PAGE_SIZE << 16, MTK_QDMA_FQ_BLEN);
@@ -643,7 +674,7 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
 
 err_dma:
 	do {
-		tx_buf = mtk_desc_to_tx_buf(ring, txd);
+		tx_buf = mtk_desc_to_tx_buf(ring, itxd);
 
 		/* unmap dma */
 		mtk_tx_unmap(&dev->dev, tx_buf);
@@ -673,6 +704,20 @@ static inline int mtk_cal_txd_req(struct sk_buff *skb)
 	return nfrags;
 }
 
+static int mtk_queue_stopped(struct mtk_eth *eth)
+{
+	int i;
+
+	for (i = 0; i < MTK_MAC_COUNT; i++) {
+		if (!eth->netdev[i])
+			continue;
+		if (netif_queue_stopped(eth->netdev[i]))
+			return 1;
+	}
+
+	return 0;
+}
+
 static void mtk_wake_queue(struct mtk_eth *eth)
 {
 	int i;
@@ -738,12 +783,9 @@ static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (mtk_tx_map(skb, dev, tx_num, ring, gso) < 0)
 		goto drop;
 
-	if (unlikely(atomic_read(&ring->free_count) <= ring->thresh)) {
+	if (unlikely(atomic_read(&ring->free_count) <= ring->thresh))
 		mtk_stop_queue(eth);
-		if (unlikely(atomic_read(&ring->free_count) >
-			     ring->thresh))
-			mtk_wake_queue(eth);
-	}
+
 	spin_unlock_irqrestore(&eth->page_lock, flags);
 
 	return NETDEV_TX_OK;
@@ -798,6 +840,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
 					  DMA_FROM_DEVICE);
 		if (unlikely(dma_mapping_error(&netdev->dev, dma_addr))) {
 			skb_free_frag(new_data);
+			netdev->stats.rx_dropped++;
 			goto release_desc;
 		}
 
@@ -805,6 +848,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
 		skb = build_skb(data, ring->frag_size);
 		if (unlikely(!skb)) {
 			put_page(virt_to_head_page(new_data));
+			netdev->stats.rx_dropped++;
 			goto release_desc;
 		}
 		skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
@@ -893,7 +937,6 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget, bool *tx_again)
 		}
 		mtk_tx_unmap(eth->dev, tx_buf);
 
-		ring->last_free->txd2 = next_cpu;
 		ring->last_free = desc;
 		atomic_inc(&ring->free_count);
 
@@ -918,7 +961,8 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget, bool *tx_again)
 	if (!total)
 		return 0;
 
-	if (atomic_read(&ring->free_count) > ring->thresh)
+	if (mtk_queue_stopped(eth) &&
+	    (atomic_read(&ring->free_count) > ring->thresh))
 		mtk_wake_queue(eth);
 
 	return total;
@@ -999,9 +1043,8 @@ static int mtk_tx_alloc(struct mtk_eth *eth)
 
 	atomic_set(&ring->free_count, MTK_DMA_SIZE - 2);
 	ring->next_free = &ring->dma[0];
-	ring->last_free = &ring->dma[MTK_DMA_SIZE - 2];
-	ring->thresh = max((unsigned long)MTK_DMA_SIZE >> 2,
-			      MAX_SKB_FRAGS);
+	ring->last_free = &ring->dma[MTK_DMA_SIZE - 1];
+	ring->thresh = MAX_SKB_FRAGS;
 
 	/* make sure that all changes to the dma ring are flushed before we
 	 * continue
@@ -1179,6 +1222,14 @@ static void mtk_dma_free(struct mtk_eth *eth)
 	for (i = 0; i < MTK_MAC_COUNT; i++)
 		if (eth->netdev[i])
 			netdev_reset_queue(eth->netdev[i]);
+	if (eth->scratch_ring) {
+		dma_free_coherent(eth->dev,
+				  MTK_DMA_SIZE * sizeof(struct mtk_tx_dma),
+				  eth->scratch_ring,
+				  eth->phy_scratch_ring);
+		eth->scratch_ring = NULL;
+		eth->phy_scratch_ring = 0;
+	}
 	mtk_tx_clean(eth);
 	mtk_rx_clean(eth);
 	kfree(eth->scratch_head);
@@ -1241,7 +1292,7 @@ static int mtk_start_dma(struct mtk_eth *eth)
 	mtk_w32(eth,
 		MTK_TX_WB_DDONE | MTK_RX_DMA_EN | MTK_TX_DMA_EN |
 		MTK_RX_2B_OFFSET | MTK_DMA_SIZE_16DWORDS |
-		MTK_RX_BT_32DWORDS,
+		MTK_RX_BT_32DWORDS | MTK_NDP_CO_PRO,
 		MTK_QDMA_GLO_CFG);
 
 	return 0;
@@ -1355,7 +1406,7 @@ static int __init mtk_hw_init(struct mtk_eth *eth)
 
 	/* disable delay and normal interrupt */
 	mtk_w32(eth, 0, MTK_QDMA_DELAY_INT);
-	mtk_irq_disable(eth, MTK_TX_DONE_INT | MTK_RX_DONE_INT);
+	mtk_irq_disable(eth, ~0);
 	mtk_w32(eth, RST_GL_PSE, MTK_RST_GL);
 	mtk_w32(eth, 0, MTK_RST_GL);
 
@@ -1669,7 +1720,7 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
 	mac->hw_stats->reg_offset = id * MTK_STAT_OFFSET;
 
 	SET_NETDEV_DEV(eth->netdev[id], eth->dev);
-	eth->netdev[id]->watchdog_timeo = HZ;
+	eth->netdev[id]->watchdog_timeo = 5 * HZ;
 	eth->netdev[id]->netdev_ops = &mtk_netdev_ops;
 	eth->netdev[id]->base_addr = (unsigned long)eth->base;
 	eth->netdev[id]->vlan_features = MTK_HW_FEATURES &
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index eed626d56ea4..a5eb7c62306b 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -91,6 +91,7 @@
 #define MTK_QDMA_GLO_CFG	0x1A04
 #define MTK_RX_2B_OFFSET	BIT(31)
 #define MTK_RX_BT_32DWORDS	(3 << 11)
+#define MTK_NDP_CO_PRO		BIT(10)
 #define MTK_TX_WB_DDONE		BIT(6)
 #define MTK_DMA_SIZE_16DWORDS	(2 << 4)
 #define MTK_RX_DMA_BUSY		BIT(3)
@@ -357,6 +358,7 @@ struct mtk_rx_ring {
  * @rx_ring:		Pointer to the memore holding info about the RX ring
  * @rx_napi:		The NAPI struct
  * @scratch_ring:	Newer SoCs need memory for a second HW managed TX ring
+ * @phy_scratch_ring:	physical address of scratch_ring
  * @scratch_head:	The scratch memory that scratch_ring points to.
  * @clk_ethif:		The ethif clock
  * @clk_esw:		The switch clock
@@ -384,6 +386,7 @@ struct mtk_eth {
 	struct mtk_rx_ring		rx_ring;
 	struct napi_struct		rx_napi;
 	struct mtk_tx_dma		*scratch_ring;
+	dma_addr_t			phy_scratch_ring;
 	void				*scratch_head;
 	struct clk			*clk_ethif;
 	struct clk			*clk_esw;
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index e94ca1c3fc7c..f04a423ff79d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -2597,7 +2597,6 @@ int mlx4_cmd_use_events(struct mlx4_dev *dev)
 	priv->cmd.free_head = 0;
 
 	sema_init(&priv->cmd.event_sem, priv->cmd.max_cmds);
-	spin_lock_init(&priv->cmd.context_lock);
 
 	for (priv->cmd.token_mask = 1;
 	     priv->cmd.token_mask < priv->cmd.max_cmds;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index fc95affaf76b..44cf16d01f42 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -1042,6 +1042,8 @@ static int mlx4_en_set_ringparam(struct net_device *dev,
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
+	struct mlx4_en_port_profile new_prof;
+	struct mlx4_en_priv *tmp;
 	u32 rx_size, tx_size;
 	int port_up = 0;
 	int err = 0;
@@ -1061,22 +1063,25 @@ static int mlx4_en_set_ringparam(struct net_device *dev,
 	    tx_size == priv->tx_ring[0]->size)
 		return 0;
 
+	tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
 	mutex_lock(&mdev->state_lock);
+	memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile));
+	new_prof.tx_ring_size = tx_size;
+	new_prof.rx_ring_size = rx_size;
+	err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof);
+	if (err)
+		goto out;
+
 	if (priv->port_up) {
 		port_up = 1;
 		mlx4_en_stop_port(dev, 1);
 	}
 
-	mlx4_en_free_resources(priv);
-
-	priv->prof->tx_ring_size = tx_size;
-	priv->prof->rx_ring_size = rx_size;
+	mlx4_en_safe_replace_resources(priv, tmp);
 
-	err = mlx4_en_alloc_resources(priv);
-	if (err) {
-		en_err(priv, "Failed reallocating port resources\n");
-		goto out;
-	}
 	if (port_up) {
 		err = mlx4_en_start_port(dev);
 		if (err)
@@ -1084,8 +1089,8 @@ static int mlx4_en_set_ringparam(struct net_device *dev,
 	}
 
 	err = mlx4_en_moderation_update(priv);
-
 out:
+	kfree(tmp);
 	mutex_unlock(&mdev->state_lock);
 	return err;
 }
@@ -1714,6 +1719,8 @@ static int mlx4_en_set_channels(struct net_device *dev,
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
+	struct mlx4_en_port_profile new_prof;
+	struct mlx4_en_priv *tmp;
 	int port_up = 0;
 	int err = 0;
 
@@ -1723,23 +1730,26 @@ static int mlx4_en_set_channels(struct net_device *dev,
 	    !channel->tx_count || !channel->rx_count)
 		return -EINVAL;
 
+	tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
 	mutex_lock(&mdev->state_lock);
+	memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile));
+	new_prof.num_tx_rings_p_up = channel->tx_count;
+	new_prof.tx_ring_num = channel->tx_count * MLX4_EN_NUM_UP;
+	new_prof.rx_ring_num = channel->rx_count;
+
+	err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof);
+	if (err)
+		goto out;
+
 	if (priv->port_up) {
 		port_up = 1;
 		mlx4_en_stop_port(dev, 1);
 	}
 
-	mlx4_en_free_resources(priv);
-
-	priv->num_tx_rings_p_up = channel->tx_count;
-	priv->tx_ring_num = channel->tx_count * MLX4_EN_NUM_UP;
-	priv->rx_ring_num = channel->rx_count;
-
-	err = mlx4_en_alloc_resources(priv);
-	if (err) {
-		en_err(priv, "Failed reallocating port resources\n");
-		goto out;
-	}
+	mlx4_en_safe_replace_resources(priv, tmp);
 
 	netif_set_real_num_tx_queues(dev, priv->tx_ring_num);
 	netif_set_real_num_rx_queues(dev, priv->rx_ring_num);
@@ -1757,8 +1767,8 @@ static int mlx4_en_set_channels(struct net_device *dev,
 	}
 
 	err = mlx4_en_moderation_update(priv);
-
 out:
+	kfree(tmp);
 	mutex_unlock(&mdev->state_lock);
 	return err;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 19ceced6736c..8359e9e51b3b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -406,14 +406,18 @@ static int mlx4_en_vlan_rx_add_vid(struct net_device *dev,
 	mutex_lock(&mdev->state_lock);
 	if (mdev->device_up && priv->port_up) {
 		err = mlx4_SET_VLAN_FLTR(mdev->dev, priv);
-		if (err)
+		if (err) {
 			en_err(priv, "Failed configuring VLAN filter\n");
+			goto out;
+		}
 	}
-	if (mlx4_register_vlan(mdev->dev, priv->port, vid, &idx))
-		en_dbg(HW, priv, "failed adding vlan %d\n", vid);
-	mutex_unlock(&mdev->state_lock);
+	err = mlx4_register_vlan(mdev->dev, priv->port, vid, &idx);
+	if (err)
+		en_dbg(HW, priv, "Failed adding vlan %d\n", vid);
 
-	return 0;
+out:
+	mutex_unlock(&mdev->state_lock);
+	return err;
 }
 
 static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev,
@@ -421,7 +425,7 @@ static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev,
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
-	int err;
+	int err = 0;
 
 	en_dbg(HW, priv, "Killing VID:%d\n", vid);
 
@@ -438,7 +442,7 @@ static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev,
 	}
 	mutex_unlock(&mdev->state_lock);
 
-	return 0;
+	return err;
 }
 
 static void mlx4_en_u64_to_mac(unsigned char dst_mac[ETH_ALEN + 2], u64 src_mac)
@@ -1950,7 +1954,7 @@ static int mlx4_en_close(struct net_device *dev)
 	return 0;
 }
 
-void mlx4_en_free_resources(struct mlx4_en_priv *priv)
+static void mlx4_en_free_resources(struct mlx4_en_priv *priv)
 {
 	int i;
 
@@ -1975,7 +1979,7 @@ void mlx4_en_free_resources(struct mlx4_en_priv *priv)
 
 }
 
-int mlx4_en_alloc_resources(struct mlx4_en_priv *priv)
+static int mlx4_en_alloc_resources(struct mlx4_en_priv *priv)
 {
 	struct mlx4_en_port_profile *prof = priv->prof;
 	int i;
@@ -2032,11 +2036,91 @@ err:
 	return -ENOMEM;
 }
 
+static void mlx4_en_shutdown(struct net_device *dev)
+{
+	rtnl_lock();
+	netif_device_detach(dev);
+	mlx4_en_close(dev);
+	rtnl_unlock();
+}
+
+static int mlx4_en_copy_priv(struct mlx4_en_priv *dst,
+			     struct mlx4_en_priv *src,
+			     struct mlx4_en_port_profile *prof)
+{
+	memcpy(&dst->hwtstamp_config, &prof->hwtstamp_config,
+	       sizeof(dst->hwtstamp_config));
+	dst->num_tx_rings_p_up = src->mdev->profile.num_tx_rings_p_up;
+	dst->tx_ring_num = prof->tx_ring_num;
+	dst->rx_ring_num = prof->rx_ring_num;
+	dst->flags = prof->flags;
+	dst->mdev = src->mdev;
+	dst->port = src->port;
+	dst->dev = src->dev;
+	dst->prof = prof;
+	dst->stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) +
+					 DS_SIZE * MLX4_EN_MAX_RX_FRAGS);
+
+	dst->tx_ring = kzalloc(sizeof(struct mlx4_en_tx_ring *) * MAX_TX_RINGS,
+				GFP_KERNEL);
+	if (!dst->tx_ring)
+		return -ENOMEM;
+
+	dst->tx_cq = kzalloc(sizeof(struct mlx4_en_cq *) * MAX_TX_RINGS,
+			      GFP_KERNEL);
+	if (!dst->tx_cq) {
+		kfree(dst->tx_ring);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static void mlx4_en_update_priv(struct mlx4_en_priv *dst,
+				struct mlx4_en_priv *src)
+{
+	memcpy(dst->rx_ring, src->rx_ring,
+	       sizeof(struct mlx4_en_rx_ring *) * src->rx_ring_num);
+	memcpy(dst->rx_cq, src->rx_cq,
+	       sizeof(struct mlx4_en_cq *) * src->rx_ring_num);
+	memcpy(&dst->hwtstamp_config, &src->hwtstamp_config,
+	       sizeof(dst->hwtstamp_config));
+	dst->tx_ring_num = src->tx_ring_num;
+	dst->rx_ring_num = src->rx_ring_num;
+	dst->tx_ring = src->tx_ring;
+	dst->tx_cq = src->tx_cq;
+	memcpy(dst->prof, src->prof, sizeof(struct mlx4_en_port_profile));
+}
+
+int mlx4_en_try_alloc_resources(struct mlx4_en_priv *priv,
+				struct mlx4_en_priv *tmp,
+				struct mlx4_en_port_profile *prof)
+{
+	mlx4_en_copy_priv(tmp, priv, prof);
+
+	if (mlx4_en_alloc_resources(tmp)) {
+		en_warn(priv,
+			"%s: Resource allocation failed, using previous configuration\n",
+			__func__);
+		kfree(tmp->tx_ring);
+		kfree(tmp->tx_cq);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+void mlx4_en_safe_replace_resources(struct mlx4_en_priv *priv,
+				    struct mlx4_en_priv *tmp)
+{
+	mlx4_en_free_resources(priv);
+	mlx4_en_update_priv(priv, tmp);
+}
 
 void mlx4_en_destroy_netdev(struct net_device *dev)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
+	bool shutdown = mdev->dev->persist->interface_state &
+					    MLX4_INTERFACE_STATE_SHUTDOWN;
 
 	en_dbg(DRV, priv, "Destroying netdev on port:%d\n", priv->port);
 
@@ -2044,7 +2128,10 @@ void mlx4_en_destroy_netdev(struct net_device *dev)
 	if (priv->registered) {
 		devlink_port_type_clear(mlx4_get_devlink_port(mdev->dev,
 							      priv->port));
-		unregister_netdev(dev);
+		if (shutdown)
+			mlx4_en_shutdown(dev);
+		else
+			unregister_netdev(dev);
 	}
 
 	if (priv->allocated)
@@ -2064,12 +2151,17 @@ void mlx4_en_destroy_netdev(struct net_device *dev)
 	mdev->upper[priv->port] = NULL;
 	mutex_unlock(&mdev->state_lock);
 
+#ifdef CONFIG_RFS_ACCEL
+	mlx4_en_cleanup_filters(priv);
+#endif
+
 	mlx4_en_free_resources(priv);
 
 	kfree(priv->tx_ring);
 	kfree(priv->tx_cq);
 
-	free_netdev(dev);
+	if (!shutdown)
+		free_netdev(dev);
 }
 
 static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu)
@@ -2447,9 +2539,14 @@ static netdev_features_t mlx4_en_features_check(struct sk_buff *skb,
 	 * strip that feature if this is an IPv6 encapsulated frame.
 	 */
 	if (skb->encapsulation &&
-	    (skb->ip_summed == CHECKSUM_PARTIAL) &&
-	    (ip_hdr(skb)->version != 4))
-		features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
+	    (skb->ip_summed == CHECKSUM_PARTIAL)) {
+		struct mlx4_en_priv *priv = netdev_priv(dev);
+
+		if (!priv->vxlan_port ||
+		    (ip_hdr(skb)->version != 4) ||
+		    (udp_hdr(skb)->dest != priv->vxlan_port))
+			features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
+	}
 
 	return features;
 }
@@ -3102,6 +3199,8 @@ int mlx4_en_reset_config(struct net_device *dev,
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
+	struct mlx4_en_port_profile new_prof;
+	struct mlx4_en_priv *tmp;
 	int port_up = 0;
 	int err = 0;
 
@@ -3118,19 +3217,29 @@ int mlx4_en_reset_config(struct net_device *dev,
 		return -EINVAL;
 	}
 
+	tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
 	mutex_lock(&mdev->state_lock);
+
+	memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile));
+	memcpy(&new_prof.hwtstamp_config, &ts_config, sizeof(ts_config));
+
+	err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof);
+	if (err)
+		goto out;
+
 	if (priv->port_up) {
 		port_up = 1;
 		mlx4_en_stop_port(dev, 1);
 	}
 
-	mlx4_en_free_resources(priv);
-
 	en_warn(priv, "Changing device configuration rx filter(%x) rx vlan(%x)\n",
-		ts_config.rx_filter, !!(features & NETIF_F_HW_VLAN_CTAG_RX));
+		ts_config.rx_filter,
+		!!(features & NETIF_F_HW_VLAN_CTAG_RX));
 
-	priv->hwtstamp_config.tx_type = ts_config.tx_type;
-	priv->hwtstamp_config.rx_filter = ts_config.rx_filter;
+	mlx4_en_safe_replace_resources(priv, tmp);
 
 	if (DEV_FEATURE_CHANGED(dev, features, NETIF_F_HW_VLAN_CTAG_RX)) {
 		if (features & NETIF_F_HW_VLAN_CTAG_RX)
@@ -3164,11 +3273,6 @@ int mlx4_en_reset_config(struct net_device *dev,
 		dev->features &= ~NETIF_F_HW_VLAN_CTAG_RX;
 	}
 
-	err = mlx4_en_alloc_resources(priv);
-	if (err) {
-		en_err(priv, "Failed reallocating port resources\n");
-		goto out;
-	}
 	if (port_up) {
 		err = mlx4_en_start_port(dev);
 		if (err)
@@ -3177,6 +3281,8 @@ int mlx4_en_reset_config(struct net_device *dev,
 
 out:
 	mutex_unlock(&mdev->state_lock);
-	netdev_features_change(dev);
+	kfree(tmp);
+	if (!err)
+		netdev_features_change(dev);
 	return err;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index c1b3a9c8cf3b..99b5407f2278 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -514,9 +514,6 @@ void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
 	ring->rx_info = NULL;
 	kfree(ring);
 	*pring = NULL;
-#ifdef CONFIG_RFS_ACCEL
-	mlx4_en_cleanup_filters(priv);
-#endif
 }
 
 void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv,
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 12c77a70abdb..546fab0ecc3b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -3222,6 +3222,7 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
 
 	INIT_LIST_HEAD(&priv->pgdir_list);
 	mutex_init(&priv->pgdir_mutex);
+	spin_lock_init(&priv->cmd.context_lock);
 
 	INIT_LIST_HEAD(&priv->bf_list);
 	mutex_init(&priv->bf_mutex);
@@ -4134,8 +4135,11 @@ static void mlx4_shutdown(struct pci_dev *pdev)
 
 	mlx4_info(persist->dev, "mlx4_shutdown was called\n");
 	mutex_lock(&persist->interface_state_mutex);
-	if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
+	if (persist->interface_state & MLX4_INTERFACE_STATE_UP) {
+		/* Notify mlx4 clients that the kernel is being shut down */
+		persist->interface_state |= MLX4_INTERFACE_STATE_SHUTDOWN;
 		mlx4_unload_one(pdev);
+	}
 	mutex_unlock(&persist->interface_state_mutex);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 467d47ed2c39..13d297ee34bb 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -353,12 +353,14 @@ struct mlx4_en_port_profile {
 	u32 rx_ring_num;
 	u32 tx_ring_size;
 	u32 rx_ring_size;
+	u8 num_tx_rings_p_up;
 	u8 rx_pause;
 	u8 rx_ppp;
 	u8 tx_pause;
 	u8 tx_ppp;
 	int rss_rings;
 	int inline_thold;
+	struct hwtstamp_config hwtstamp_config;
 };
 
 struct mlx4_en_profile {
@@ -623,8 +625,11 @@ void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev,
 			      u8 rx_ppp, u8 rx_pause,
 			      u8 tx_ppp, u8 tx_pause);
 
-void mlx4_en_free_resources(struct mlx4_en_priv *priv);
-int mlx4_en_alloc_resources(struct mlx4_en_priv *priv);
+int mlx4_en_try_alloc_resources(struct mlx4_en_priv *priv,
+				struct mlx4_en_priv *tmp,
+				struct mlx4_en_port_profile *prof);
+void mlx4_en_safe_replace_resources(struct mlx4_en_priv *priv,
+				    struct mlx4_en_priv *tmp);
 
 int mlx4_en_create_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq,
 		      int entries, int ring, enum cq_type mode, int node);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index dcd2df6518de..d6e2a1cae19a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -295,6 +295,12 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_DESTROY_FLOW_GROUP:
 	case MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY:
 	case MLX5_CMD_OP_DEALLOC_FLOW_COUNTER:
+	case MLX5_CMD_OP_2ERR_QP:
+	case MLX5_CMD_OP_2RST_QP:
+	case MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT:
+	case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
+	case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+	case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT:
 		return MLX5_CMD_STAT_OK;
 
 	case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -321,8 +327,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_RTR2RTS_QP:
 	case MLX5_CMD_OP_RTS2RTS_QP:
 	case MLX5_CMD_OP_SQERR2RTS_QP:
-	case MLX5_CMD_OP_2ERR_QP:
-	case MLX5_CMD_OP_2RST_QP:
 	case MLX5_CMD_OP_QUERY_QP:
 	case MLX5_CMD_OP_SQD_RTS_QP:
 	case MLX5_CMD_OP_INIT2INIT_QP:
@@ -342,7 +346,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT:
 	case MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT:
 	case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT:
-	case MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT:
 	case MLX5_CMD_OP_QUERY_ROCE_ADDRESS:
 	case MLX5_CMD_OP_SET_ROCE_ADDRESS:
 	case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
@@ -390,11 +393,12 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_CREATE_RQT:
 	case MLX5_CMD_OP_MODIFY_RQT:
 	case MLX5_CMD_OP_QUERY_RQT:
+
 	case MLX5_CMD_OP_CREATE_FLOW_TABLE:
 	case MLX5_CMD_OP_QUERY_FLOW_TABLE:
 	case MLX5_CMD_OP_CREATE_FLOW_GROUP:
 	case MLX5_CMD_OP_QUERY_FLOW_GROUP:
-	case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+
 	case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
 	case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
 	case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
@@ -545,6 +549,7 @@ const char *mlx5_command_str(int command)
 	MLX5_COMMAND_STR_CASE(ALLOC_FLOW_COUNTER);
 	MLX5_COMMAND_STR_CASE(DEALLOC_FLOW_COUNTER);
 	MLX5_COMMAND_STR_CASE(QUERY_FLOW_COUNTER);
+	MLX5_COMMAND_STR_CASE(MODIFY_FLOW_TABLE);
 	default: return "unknown command opcode";
 	}
 }
@@ -601,11 +606,36 @@ static void dump_command(struct mlx5_core_dev *dev,
 		pr_debug("\n");
 }
 
+static u16 msg_to_opcode(struct mlx5_cmd_msg *in)
+{
+	struct mlx5_inbox_hdr *hdr = (struct mlx5_inbox_hdr *)(in->first.data);
+
+	return be16_to_cpu(hdr->opcode);
+}
+
+static void cb_timeout_handler(struct work_struct *work)
+{
+	struct delayed_work *dwork = container_of(work, struct delayed_work,
+						  work);
+	struct mlx5_cmd_work_ent *ent = container_of(dwork,
+						     struct mlx5_cmd_work_ent,
+						     cb_timeout_work);
+	struct mlx5_core_dev *dev = container_of(ent->cmd, struct mlx5_core_dev,
+						 cmd);
+
+	ent->ret = -ETIMEDOUT;
+	mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n",
+		       mlx5_command_str(msg_to_opcode(ent->in)),
+		       msg_to_opcode(ent->in));
+	mlx5_cmd_comp_handler(dev, 1UL << ent->idx);
+}
+
 static void cmd_work_handler(struct work_struct *work)
 {
 	struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work);
 	struct mlx5_cmd *cmd = ent->cmd;
 	struct mlx5_core_dev *dev = container_of(cmd, struct mlx5_core_dev, cmd);
+	unsigned long cb_timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC);
 	struct mlx5_cmd_layout *lay;
 	struct semaphore *sem;
 	unsigned long flags;
@@ -646,6 +676,9 @@ static void cmd_work_handler(struct work_struct *work)
 	dump_command(dev, ent, 1);
 	ent->ts1 = ktime_get_ns();
 
+	if (ent->callback)
+		schedule_delayed_work(&ent->cb_timeout_work, cb_timeout);
+
 	/* ring doorbell after the descriptor is valid */
 	mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx);
 	wmb();
@@ -690,13 +723,6 @@ static const char *deliv_status_to_str(u8 status)
 	}
 }
 
-static u16 msg_to_opcode(struct mlx5_cmd_msg *in)
-{
-	struct mlx5_inbox_hdr *hdr = (struct mlx5_inbox_hdr *)(in->first.data);
-
-	return be16_to_cpu(hdr->opcode);
-}
-
 static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
 {
 	unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC);
@@ -705,13 +731,13 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
 
 	if (cmd->mode == CMD_MODE_POLLING) {
 		wait_for_completion(&ent->done);
-		err = ent->ret;
-	} else {
-		if (!wait_for_completion_timeout(&ent->done, timeout))
-			err = -ETIMEDOUT;
-		else
-			err = 0;
+	} else if (!wait_for_completion_timeout(&ent->done, timeout)) {
+		ent->ret = -ETIMEDOUT;
+		mlx5_cmd_comp_handler(dev, 1UL << ent->idx);
 	}
+
+	err = ent->ret;
+
 	if (err == -ETIMEDOUT) {
 		mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n",
 			       mlx5_command_str(msg_to_opcode(ent->in)),
@@ -760,6 +786,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
 	if (!callback)
 		init_completion(&ent->done);
 
+	INIT_DELAYED_WORK(&ent->cb_timeout_work, cb_timeout_handler);
 	INIT_WORK(&ent->work, cmd_work_handler);
 	if (page_queue) {
 		cmd_work_handler(&ent->work);
@@ -769,28 +796,26 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
 		goto out_free;
 	}
 
-	if (!callback) {
-		err = wait_func(dev, ent);
-		if (err == -ETIMEDOUT)
-			goto out;
-
-		ds = ent->ts2 - ent->ts1;
-		op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode);
-		if (op < ARRAY_SIZE(cmd->stats)) {
-			stats = &cmd->stats[op];
-			spin_lock_irq(&stats->lock);
-			stats->sum += ds;
-			++stats->n;
-			spin_unlock_irq(&stats->lock);
-		}
-		mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME,
-				   "fw exec time for %s is %lld nsec\n",
-				   mlx5_command_str(op), ds);
-		*status = ent->status;
-		free_cmd(ent);
-	}
+	if (callback)
+		goto out;
 
-	return err;
+	err = wait_func(dev, ent);
+	if (err == -ETIMEDOUT)
+		goto out_free;
+
+	ds = ent->ts2 - ent->ts1;
+	op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode);
+	if (op < ARRAY_SIZE(cmd->stats)) {
+		stats = &cmd->stats[op];
+		spin_lock_irq(&stats->lock);
+		stats->sum += ds;
+		++stats->n;
+		spin_unlock_irq(&stats->lock);
+	}
+	mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME,
+			   "fw exec time for %s is %lld nsec\n",
+			   mlx5_command_str(op), ds);
+	*status = ent->status;
 
 out_free:
 	free_cmd(ent);
@@ -1180,41 +1205,30 @@ err_dbg:
 	return err;
 }
 
-void mlx5_cmd_use_events(struct mlx5_core_dev *dev)
+static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode)
 {
 	struct mlx5_cmd *cmd = &dev->cmd;
 	int i;
 
 	for (i = 0; i < cmd->max_reg_cmds; i++)
 		down(&cmd->sem);
-
 	down(&cmd->pages_sem);
 
-	flush_workqueue(cmd->wq);
-
-	cmd->mode = CMD_MODE_EVENTS;
+	cmd->mode = mode;
 
 	up(&cmd->pages_sem);
 	for (i = 0; i < cmd->max_reg_cmds; i++)
 		up(&cmd->sem);
 }
 
-void mlx5_cmd_use_polling(struct mlx5_core_dev *dev)
+void mlx5_cmd_use_events(struct mlx5_core_dev *dev)
 {
-	struct mlx5_cmd *cmd = &dev->cmd;
-	int i;
-
-	for (i = 0; i < cmd->max_reg_cmds; i++)
-		down(&cmd->sem);
-
-	down(&cmd->pages_sem);
-
-	flush_workqueue(cmd->wq);
-	cmd->mode = CMD_MODE_POLLING;
+	mlx5_cmd_change_mod(dev, CMD_MODE_EVENTS);
+}
 
-	up(&cmd->pages_sem);
-	for (i = 0; i < cmd->max_reg_cmds; i++)
-		up(&cmd->sem);
+void mlx5_cmd_use_polling(struct mlx5_core_dev *dev)
+{
+	mlx5_cmd_change_mod(dev, CMD_MODE_POLLING);
 }
 
 static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
@@ -1250,6 +1264,8 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec)
 			struct semaphore *sem;
 
 			ent = cmd->ent_arr[i];
+			if (ent->callback)
+				cancel_delayed_work(&ent->cb_timeout_work);
 			if (ent->page_queue)
 				sem = &cmd->pages_sem;
 			else
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index e8a6c3325b39..943b1bd434bf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -145,7 +145,6 @@ struct mlx5e_umr_wqe {
 
 #ifdef CONFIG_MLX5_CORE_EN_DCB
 #define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */
-#define MLX5E_MIN_BW_ALLOC 1   /* Min percentage of BW allocation */
 #endif
 
 struct mlx5e_params {
@@ -191,6 +190,7 @@ struct mlx5e_tstamp {
 enum {
 	MLX5E_RQ_STATE_POST_WQES_ENABLE,
 	MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS,
+	MLX5E_RQ_STATE_FLUSH_TIMEOUT,
 };
 
 struct mlx5e_cq {
@@ -220,6 +220,8 @@ typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq *rq,
 typedef int (*mlx5e_fp_alloc_wqe)(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe,
 				  u16 ix);
 
+typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq *rq, u16 ix);
+
 struct mlx5e_dma_info {
 	struct page	*page;
 	dma_addr_t	addr;
@@ -241,6 +243,7 @@ struct mlx5e_rq {
 	struct mlx5e_cq        cq;
 	mlx5e_fp_handle_rx_cqe handle_rx_cqe;
 	mlx5e_fp_alloc_wqe     alloc_wqe;
+	mlx5e_fp_dealloc_wqe   dealloc_wqe;
 
 	unsigned long          state;
 	int                    ix;
@@ -305,6 +308,7 @@ struct mlx5e_sq_dma {
 enum {
 	MLX5E_SQ_STATE_WAKE_TXQ_ENABLE,
 	MLX5E_SQ_STATE_BF_ENABLE,
+	MLX5E_SQ_STATE_TX_TIMEOUT,
 };
 
 struct mlx5e_ico_wqe_info {
@@ -401,7 +405,7 @@ enum mlx5e_traffic_types {
 };
 
 enum {
-	MLX5E_STATE_ASYNC_EVENTS_ENABLE,
+	MLX5E_STATE_ASYNC_EVENTS_ENABLED,
 	MLX5E_STATE_OPENED,
 	MLX5E_STATE_DESTROYING,
 };
@@ -538,6 +542,7 @@ struct mlx5e_priv {
 	struct workqueue_struct    *wq;
 	struct work_struct         update_carrier_work;
 	struct work_struct         set_rx_mode_work;
+	struct work_struct         tx_timeout_work;
 	struct delayed_work        update_stats_work;
 
 	struct mlx5_core_dev      *mdev;
@@ -589,12 +594,16 @@ void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event);
 int mlx5e_napi_poll(struct napi_struct *napi, int budget);
 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
 int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget);
+void mlx5e_free_tx_descs(struct mlx5e_sq *sq);
+void mlx5e_free_rx_descs(struct mlx5e_rq *rq);
 
 void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq);
 int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix);
 int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix);
+void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix);
+void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix);
 void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq);
 void mlx5e_complete_rx_linear_mpwqe(struct mlx5e_rq *rq,
 				    struct mlx5_cqe64 *cqe,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index b2db180ae2a5..c585349e05c3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -96,7 +96,7 @@ static void mlx5e_build_tc_tx_bw(struct ieee_ets *ets, u8 *tc_tx_bw,
 			tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC;
 			break;
 		case IEEE_8021QAZ_TSA_ETS:
-			tc_tx_bw[i] = ets->tc_tx_bw[i] ?: MLX5E_MIN_BW_ALLOC;
+			tc_tx_bw[i] = ets->tc_tx_bw[i];
 			break;
 		}
 	}
@@ -140,8 +140,12 @@ static int mlx5e_dbcnl_validate_ets(struct ieee_ets *ets)
 
 	/* Validate Bandwidth Sum */
 	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
-		if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS)
+		if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) {
+			if (!ets->tc_tx_bw[i])
+				return -EINVAL;
+
 			bw_sum += ets->tc_tx_bw[i];
+		}
 	}
 
 	if (bw_sum != 0 && bw_sum != 100)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index fc7dcc03b1de..e667a870e0c2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -184,7 +184,9 @@ static unsigned long mlx5e_query_pfc_combined(struct mlx5e_priv *priv)
 #define MLX5E_NUM_SQ_STATS(priv) \
 	(NUM_SQ_STATS * priv->params.num_channels * priv->params.num_tc * \
 	 test_bit(MLX5E_STATE_OPENED, &priv->state))
-#define MLX5E_NUM_PFC_COUNTERS(priv) hweight8(mlx5e_query_pfc_combined(priv))
+#define MLX5E_NUM_PFC_COUNTERS(priv) \
+	(hweight8(mlx5e_query_pfc_combined(priv)) * \
+	 NUM_PPORT_PER_PRIO_PFC_COUNTERS)
 
 static int mlx5e_get_sset_count(struct net_device *dev, int sset)
 {
@@ -211,42 +213,41 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data)
 
 	/* SW counters */
 	for (i = 0; i < NUM_SW_COUNTERS; i++)
-		strcpy(data + (idx++) * ETH_GSTRING_LEN, sw_stats_desc[i].name);
+		strcpy(data + (idx++) * ETH_GSTRING_LEN, sw_stats_desc[i].format);
 
 	/* Q counters */
 	for (i = 0; i < MLX5E_NUM_Q_CNTRS(priv); i++)
-		strcpy(data + (idx++) * ETH_GSTRING_LEN, q_stats_desc[i].name);
+		strcpy(data + (idx++) * ETH_GSTRING_LEN, q_stats_desc[i].format);
 
 	/* VPORT counters */
 	for (i = 0; i < NUM_VPORT_COUNTERS; i++)
 		strcpy(data + (idx++) * ETH_GSTRING_LEN,
-		       vport_stats_desc[i].name);
+		       vport_stats_desc[i].format);
 
 	/* PPORT counters */
 	for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++)
 		strcpy(data + (idx++) * ETH_GSTRING_LEN,
-		       pport_802_3_stats_desc[i].name);
+		       pport_802_3_stats_desc[i].format);
 
 	for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++)
 		strcpy(data + (idx++) * ETH_GSTRING_LEN,
-		       pport_2863_stats_desc[i].name);
+		       pport_2863_stats_desc[i].format);
 
 	for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++)
 		strcpy(data + (idx++) * ETH_GSTRING_LEN,
-		       pport_2819_stats_desc[i].name);
+		       pport_2819_stats_desc[i].format);
 
 	for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
 		for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++)
-			sprintf(data + (idx++) * ETH_GSTRING_LEN, "prio%d_%s",
-				prio,
-				pport_per_prio_traffic_stats_desc[i].name);
+			sprintf(data + (idx++) * ETH_GSTRING_LEN,
+				pport_per_prio_traffic_stats_desc[i].format, prio);
 	}
 
 	pfc_combined = mlx5e_query_pfc_combined(priv);
 	for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) {
 		for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) {
-			sprintf(data + (idx++) * ETH_GSTRING_LEN, "prio%d_%s",
-				prio, pport_per_prio_pfc_stats_desc[i].name);
+			sprintf(data + (idx++) * ETH_GSTRING_LEN,
+				pport_per_prio_pfc_stats_desc[i].format, prio);
 		}
 	}
 
@@ -256,16 +257,15 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data)
 	/* per channel counters */
 	for (i = 0; i < priv->params.num_channels; i++)
 		for (j = 0; j < NUM_RQ_STATS; j++)
-			sprintf(data + (idx++) * ETH_GSTRING_LEN, "rx%d_%s", i,
-				rq_stats_desc[j].name);
+			sprintf(data + (idx++) * ETH_GSTRING_LEN,
+				rq_stats_desc[j].format, i);
 
 	for (tc = 0; tc < priv->params.num_tc; tc++)
 		for (i = 0; i < priv->params.num_channels; i++)
 			for (j = 0; j < NUM_SQ_STATS; j++)
 				sprintf(data + (idx++) * ETH_GSTRING_LEN,
-					"tx%d_%s",
-					priv->channeltc_to_txq_map[i][tc],
-					sq_stats_desc[j].name);
+					sq_stats_desc[j].format,
+					priv->channeltc_to_txq_map[i][tc]);
 }
 
 static void mlx5e_get_strings(struct net_device *dev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index fd4392999eee..5a4d88c2cdb2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -39,6 +39,13 @@
 #include "eswitch.h"
 #include "vxlan.h"
 
+enum {
+	MLX5_EN_QP_FLUSH_TIMEOUT_MS	= 5000,
+	MLX5_EN_QP_FLUSH_MSLEEP_QUANT	= 20,
+	MLX5_EN_QP_FLUSH_MAX_ITER	= MLX5_EN_QP_FLUSH_TIMEOUT_MS /
+					  MLX5_EN_QP_FLUSH_MSLEEP_QUANT,
+};
+
 struct mlx5e_rq_param {
 	u32                        rqc[MLX5_ST_SZ_DW(rqc)];
 	struct mlx5_wq_param       wq;
@@ -74,10 +81,13 @@ static void mlx5e_update_carrier(struct mlx5e_priv *priv)
 	port_state = mlx5_query_vport_state(mdev,
 		MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0);
 
-	if (port_state == VPORT_STATE_UP)
+	if (port_state == VPORT_STATE_UP) {
+		netdev_info(priv->netdev, "Link up\n");
 		netif_carrier_on(priv->netdev);
-	else
+	} else {
+		netdev_info(priv->netdev, "Link down\n");
 		netif_carrier_off(priv->netdev);
+	}
 }
 
 static void mlx5e_update_carrier_work(struct work_struct *work)
@@ -91,6 +101,26 @@ static void mlx5e_update_carrier_work(struct work_struct *work)
 	mutex_unlock(&priv->state_lock);
 }
 
+static void mlx5e_tx_timeout_work(struct work_struct *work)
+{
+	struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
+					       tx_timeout_work);
+	int err;
+
+	rtnl_lock();
+	mutex_lock(&priv->state_lock);
+	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+		goto unlock;
+	mlx5e_close_locked(priv->netdev);
+	err = mlx5e_open_locked(priv->netdev);
+	if (err)
+		netdev_err(priv->netdev, "mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n",
+			   err);
+unlock:
+	mutex_unlock(&priv->state_lock);
+	rtnl_unlock();
+}
+
 static void mlx5e_update_sw_counters(struct mlx5e_priv *priv)
 {
 	struct mlx5e_sw_stats *s = &priv->stats.sw;
@@ -105,11 +135,11 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv)
 
 		s->rx_packets	+= rq_stats->packets;
 		s->rx_bytes	+= rq_stats->bytes;
-		s->lro_packets	+= rq_stats->lro_packets;
-		s->lro_bytes	+= rq_stats->lro_bytes;
+		s->rx_lro_packets += rq_stats->lro_packets;
+		s->rx_lro_bytes	+= rq_stats->lro_bytes;
 		s->rx_csum_none	+= rq_stats->csum_none;
-		s->rx_csum_sw	+= rq_stats->csum_sw;
-		s->rx_csum_inner += rq_stats->csum_inner;
+		s->rx_csum_complete += rq_stats->csum_complete;
+		s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner;
 		s->rx_wqe_err   += rq_stats->wqe_err;
 		s->rx_mpwqe_filler += rq_stats->mpwqe_filler;
 		s->rx_mpwqe_frag   += rq_stats->mpwqe_frag;
@@ -122,24 +152,23 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv)
 
 			s->tx_packets		+= sq_stats->packets;
 			s->tx_bytes		+= sq_stats->bytes;
-			s->tso_packets		+= sq_stats->tso_packets;
-			s->tso_bytes		+= sq_stats->tso_bytes;
-			s->tso_inner_packets	+= sq_stats->tso_inner_packets;
-			s->tso_inner_bytes	+= sq_stats->tso_inner_bytes;
+			s->tx_tso_packets	+= sq_stats->tso_packets;
+			s->tx_tso_bytes		+= sq_stats->tso_bytes;
+			s->tx_tso_inner_packets	+= sq_stats->tso_inner_packets;
+			s->tx_tso_inner_bytes	+= sq_stats->tso_inner_bytes;
 			s->tx_queue_stopped	+= sq_stats->stopped;
 			s->tx_queue_wake	+= sq_stats->wake;
 			s->tx_queue_dropped	+= sq_stats->dropped;
-			s->tx_csum_inner	+= sq_stats->csum_offload_inner;
-			tx_offload_none		+= sq_stats->csum_offload_none;
+			s->tx_csum_partial_inner += sq_stats->csum_partial_inner;
+			tx_offload_none		+= sq_stats->csum_none;
 		}
 	}
 
 	/* Update calculated offload counters */
-	s->tx_csum_offload = s->tx_packets - tx_offload_none - s->tx_csum_inner;
-	s->rx_csum_good    = s->rx_packets - s->rx_csum_none -
-			     s->rx_csum_sw;
+	s->tx_csum_partial = s->tx_packets - tx_offload_none - s->tx_csum_partial_inner;
+	s->rx_csum_unnecessary = s->rx_packets - s->rx_csum_none - s->rx_csum_complete;
 
-	s->link_down_events = MLX5_GET(ppcnt_reg,
+	s->link_down_events_phy = MLX5_GET(ppcnt_reg,
 				priv->stats.pport.phy_counters,
 				counter_set.phys_layer_cntrs.link_down_events);
 }
@@ -244,7 +273,7 @@ static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
 {
 	struct mlx5e_priv *priv = vpriv;
 
-	if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state))
+	if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state))
 		return;
 
 	switch (event) {
@@ -260,12 +289,12 @@ static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
 
 static void mlx5e_enable_async_events(struct mlx5e_priv *priv)
 {
-	set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
+	set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state);
 }
 
 static void mlx5e_disable_async_events(struct mlx5e_priv *priv)
 {
-	clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
+	clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state);
 	synchronize_irq(mlx5_get_msix_vec(priv->mdev, MLX5_EQ_VEC_ASYNC));
 }
 
@@ -306,6 +335,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
 		}
 		rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq;
 		rq->alloc_wqe = mlx5e_alloc_rx_mpwqe;
+		rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
 
 		rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz);
 		rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides);
@@ -321,6 +351,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
 		}
 		rq->handle_rx_cqe = mlx5e_handle_rx_cqe;
 		rq->alloc_wqe = mlx5e_alloc_rx_wqe;
+		rq->dealloc_wqe = mlx5e_dealloc_rx_wqe;
 
 		rq->wqe_sz = (priv->params.lro_en) ?
 				priv->params.lro_wqe_sz :
@@ -526,17 +557,25 @@ err_destroy_rq:
 
 static void mlx5e_close_rq(struct mlx5e_rq *rq)
 {
+	int tout = 0;
+	int err;
+
 	clear_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state);
 	napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */
 
-	mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
-	while (!mlx5_wq_ll_is_empty(&rq->wq))
-		msleep(20);
+	err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
+	while (!mlx5_wq_ll_is_empty(&rq->wq) && !err &&
+	       tout++ < MLX5_EN_QP_FLUSH_MAX_ITER)
+		msleep(MLX5_EN_QP_FLUSH_MSLEEP_QUANT);
+
+	if (err || tout == MLX5_EN_QP_FLUSH_MAX_ITER)
+		set_bit(MLX5E_RQ_STATE_FLUSH_TIMEOUT, &rq->state);
 
 	/* avoid destroying rq before mlx5e_poll_rx_cq() is done with it */
 	napi_synchronize(&rq->channel->napi);
 
 	mlx5e_disable_rq(rq);
+	mlx5e_free_rx_descs(rq);
 	mlx5e_destroy_rq(rq);
 }
 
@@ -580,7 +619,7 @@ static int mlx5e_create_sq(struct mlx5e_channel *c,
 	void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
 	int err;
 
-	err = mlx5_alloc_map_uar(mdev, &sq->uar, true);
+	err = mlx5_alloc_map_uar(mdev, &sq->uar, !!MLX5_CAP_GEN(mdev, bf));
 	if (err)
 		return err;
 
@@ -783,6 +822,9 @@ static inline void netif_tx_disable_queue(struct netdev_queue *txq)
 
 static void mlx5e_close_sq(struct mlx5e_sq *sq)
 {
+	int tout = 0;
+	int err;
+
 	if (sq->txq) {
 		clear_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state);
 		/* prevent netif_tx_wake_queue */
@@ -793,15 +835,24 @@ static void mlx5e_close_sq(struct mlx5e_sq *sq)
 		if (mlx5e_sq_has_room_for(sq, 1))
 			mlx5e_send_nop(sq, true);
 
-		mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
+		err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY,
+				      MLX5_SQC_STATE_ERR);
+		if (err)
+			set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state);
 	}
 
-	while (sq->cc != sq->pc) /* wait till sq is empty */
-		msleep(20);
+	/* wait till sq is empty, unless a TX timeout occurred on this SQ */
+	while (sq->cc != sq->pc &&
+	       !test_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state)) {
+		msleep(MLX5_EN_QP_FLUSH_MSLEEP_QUANT);
+		if (tout++ > MLX5_EN_QP_FLUSH_MAX_ITER)
+			set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state);
+	}
 
 	/* avoid destroying sq before mlx5e_poll_tx_cq() is done with it */
 	napi_synchronize(&sq->channel->napi);
 
+	mlx5e_free_tx_descs(sq);
 	mlx5e_disable_sq(sq);
 	mlx5e_destroy_sq(sq);
 }
@@ -1297,6 +1348,11 @@ static int mlx5e_open_channels(struct mlx5e_priv *priv)
 			goto err_close_channels;
 	}
 
+	/* FIXME: This is a W/A for tx timeout watch dog false alarm when
+	 * polling for inactive tx queues.
+	 */
+	netif_tx_start_all_queues(priv->netdev);
+
 	kfree(cparam);
 	return 0;
 
@@ -1316,6 +1372,12 @@ static void mlx5e_close_channels(struct mlx5e_priv *priv)
 {
 	int i;
 
+	/* FIXME: This is a W/A only for tx timeout watch dog false alarm when
+	 * polling for inactive tx queues.
+	 */
+	netif_tx_stop_all_queues(priv->netdev);
+	netif_tx_disable(priv->netdev);
+
 	for (i = 0; i < priv->params.num_channels; i++)
 		mlx5e_close_channel(priv->channel[i]);
 
@@ -1659,8 +1721,11 @@ static void mlx5e_netdev_set_tcs(struct net_device *netdev)
 
 	netdev_set_num_tc(netdev, ntc);
 
+	/* Map netdev TCs to offset 0
+	 * We have our own UP to TXQ mapping for QoS
+	 */
 	for (tc = 0; tc < ntc; tc++)
-		netdev_set_tc_queue(netdev, tc, nch, tc * nch);
+		netdev_set_tc_queue(netdev, tc, nch, 0);
 }
 
 int mlx5e_open_locked(struct net_device *netdev)
@@ -2591,6 +2656,29 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb,
 	return features;
 }
 
+static void mlx5e_tx_timeout(struct net_device *dev)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	bool sched_work = false;
+	int i;
+
+	netdev_err(dev, "TX timeout detected\n");
+
+	for (i = 0; i < priv->params.num_channels * priv->params.num_tc; i++) {
+		struct mlx5e_sq *sq = priv->txq_to_sq_map[i];
+
+		if (!netif_xmit_stopped(netdev_get_tx_queue(dev, i)))
+			continue;
+		sched_work = true;
+		set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state);
+		netdev_err(dev, "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x\n",
+			   i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc);
+	}
+
+	if (sched_work && test_bit(MLX5E_STATE_OPENED, &priv->state))
+		schedule_work(&priv->tx_timeout_work);
+}
+
 static const struct net_device_ops mlx5e_netdev_ops_basic = {
 	.ndo_open                = mlx5e_open,
 	.ndo_stop                = mlx5e_close,
@@ -2608,6 +2696,7 @@ static const struct net_device_ops mlx5e_netdev_ops_basic = {
 #ifdef CONFIG_RFS_ACCEL
 	.ndo_rx_flow_steer	 = mlx5e_rx_flow_steer,
 #endif
+	.ndo_tx_timeout          = mlx5e_tx_timeout,
 };
 
 static const struct net_device_ops mlx5e_netdev_ops_sriov = {
@@ -2637,6 +2726,7 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = {
 	.ndo_get_vf_config       = mlx5e_get_vf_config,
 	.ndo_set_vf_link_state   = mlx5e_set_vf_link_state,
 	.ndo_get_vf_stats        = mlx5e_get_vf_stats,
+	.ndo_tx_timeout          = mlx5e_tx_timeout,
 };
 
 static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
@@ -2839,6 +2929,7 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev,
 
 	INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
 	INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
+	INIT_WORK(&priv->tx_timeout_work, mlx5e_tx_timeout_work);
 	INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
 }
 
@@ -3192,10 +3283,7 @@ static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv)
 	flush_workqueue(priv->wq);
 	if (test_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &mdev->intf_state)) {
 		netif_device_detach(netdev);
-		mutex_lock(&priv->state_lock);
-		if (test_bit(MLX5E_STATE_OPENED, &priv->state))
-			mlx5e_close_locked(netdev);
-		mutex_unlock(&priv->state_lock);
+		mlx5e_close(netdev);
 	} else {
 		unregister_netdev(netdev);
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index bd947704b59c..9f2a16a507e0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -212,6 +212,20 @@ err_free_skb:
 	return -ENOMEM;
 }
 
+void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix)
+{
+	struct sk_buff *skb = rq->skb[ix];
+
+	if (skb) {
+		rq->skb[ix] = NULL;
+		dma_unmap_single(rq->pdev,
+				 *((dma_addr_t *)skb->cb),
+				 rq->wqe_sz,
+				 DMA_FROM_DEVICE);
+		dev_kfree_skb(skb);
+	}
+}
+
 static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq *rq)
 {
 	return rq->mpwqe_num_strides >> MLX5_MPWRQ_WQE_PAGE_ORDER;
@@ -574,6 +588,30 @@ int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix)
 	return 0;
 }
 
+void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
+{
+	struct mlx5e_mpw_info *wi = &rq->wqe_info[ix];
+
+	wi->free_wqe(rq, wi);
+}
+
+void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
+{
+	struct mlx5_wq_ll *wq = &rq->wq;
+	struct mlx5e_rx_wqe *wqe;
+	__be16 wqe_ix_be;
+	u16 wqe_ix;
+
+	while (!mlx5_wq_ll_is_empty(wq)) {
+		wqe_ix_be = *wq->tail_next;
+		wqe_ix    = be16_to_cpu(wqe_ix_be);
+		wqe       = mlx5_wq_ll_get_wqe(&rq->wq, wqe_ix);
+		rq->dealloc_wqe(rq, wqe_ix);
+		mlx5_wq_ll_pop(&rq->wq, wqe_ix_be,
+			       &wqe->next.next_wqe_index);
+	}
+}
+
 #define RQ_CANNOT_POST(rq) \
 		(!test_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state) || \
 		 test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state))
@@ -689,7 +727,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
 	if (is_first_ethertype_ip(skb)) {
 		skb->ip_summed = CHECKSUM_COMPLETE;
 		skb->csum = csum_unfold((__force __sum16)cqe->check_sum);
-		rq->stats.csum_sw++;
+		rq->stats.csum_complete++;
 		return;
 	}
 
@@ -699,7 +737,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
 		if (cqe_is_tunneled(cqe)) {
 			skb->csum_level = 1;
 			skb->encapsulation = 1;
-			rq->stats.csum_inner++;
+			rq->stats.csum_unnecessary_inner++;
 		}
 		return;
 	}
@@ -878,6 +916,9 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
 	struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq);
 	int work_done = 0;
 
+	if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH_TIMEOUT, &rq->state)))
+		return 0;
+
 	if (cq->decmprs_left)
 		work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 83bc32b25849..fcd490cc5610 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -42,9 +42,11 @@
 	be64_to_cpu(*(__be32 *)((char *)ptr + dsc[i].offset))
 
 #define MLX5E_DECLARE_STAT(type, fld) #fld, offsetof(type, fld)
+#define MLX5E_DECLARE_RX_STAT(type, fld) "rx%d_"#fld, offsetof(type, fld)
+#define MLX5E_DECLARE_TX_STAT(type, fld) "tx%d_"#fld, offsetof(type, fld)
 
 struct counter_desc {
-	char		name[ETH_GSTRING_LEN];
+	char		format[ETH_GSTRING_LEN];
 	int		offset; /* Byte offset */
 };
 
@@ -53,18 +55,18 @@ struct mlx5e_sw_stats {
 	u64 rx_bytes;
 	u64 tx_packets;
 	u64 tx_bytes;
-	u64 tso_packets;
-	u64 tso_bytes;
-	u64 tso_inner_packets;
-	u64 tso_inner_bytes;
-	u64 lro_packets;
-	u64 lro_bytes;
-	u64 rx_csum_good;
+	u64 tx_tso_packets;
+	u64 tx_tso_bytes;
+	u64 tx_tso_inner_packets;
+	u64 tx_tso_inner_bytes;
+	u64 rx_lro_packets;
+	u64 rx_lro_bytes;
+	u64 rx_csum_unnecessary;
 	u64 rx_csum_none;
-	u64 rx_csum_sw;
-	u64 rx_csum_inner;
-	u64 tx_csum_offload;
-	u64 tx_csum_inner;
+	u64 rx_csum_complete;
+	u64 rx_csum_unnecessary_inner;
+	u64 tx_csum_partial;
+	u64 tx_csum_partial_inner;
 	u64 tx_queue_stopped;
 	u64 tx_queue_wake;
 	u64 tx_queue_dropped;
@@ -76,7 +78,7 @@ struct mlx5e_sw_stats {
 	u64 rx_cqe_compress_pkts;
 
 	/* Special handling counters */
-	u64 link_down_events;
+	u64 link_down_events_phy;
 };
 
 static const struct counter_desc sw_stats_desc[] = {
@@ -84,18 +86,18 @@ static const struct counter_desc sw_stats_desc[] = {
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_packets) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_packets) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_inner_packets) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_inner_bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, lro_packets) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, lro_bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_good) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_bytes) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_bytes) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_bytes) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_sw) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_inner) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_offload) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_inner) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial_inner) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_dropped) },
@@ -105,7 +107,7 @@ static const struct counter_desc sw_stats_desc[] = {
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events_phy) },
 };
 
 struct mlx5e_qcounter_stats {
@@ -125,12 +127,6 @@ struct mlx5e_vport_stats {
 };
 
 static const struct counter_desc vport_stats_desc[] = {
-	{ "rx_vport_error_packets",
-		VPORT_COUNTER_OFF(received_errors.packets) },
-	{ "rx_vport_error_bytes", VPORT_COUNTER_OFF(received_errors.octets) },
-	{ "tx_vport_error_packets",
-		VPORT_COUNTER_OFF(transmit_errors.packets) },
-	{ "tx_vport_error_bytes", VPORT_COUNTER_OFF(transmit_errors.octets) },
 	{ "rx_vport_unicast_packets",
 		VPORT_COUNTER_OFF(received_eth_unicast.packets) },
 	{ "rx_vport_unicast_bytes",
@@ -192,94 +188,68 @@ struct mlx5e_pport_stats {
 };
 
 static const struct counter_desc pport_802_3_stats_desc[] = {
-	{ "frames_tx", PPORT_802_3_OFF(a_frames_transmitted_ok) },
-	{ "frames_rx", PPORT_802_3_OFF(a_frames_received_ok) },
-	{ "check_seq_err", PPORT_802_3_OFF(a_frame_check_sequence_errors) },
-	{ "alignment_err", PPORT_802_3_OFF(a_alignment_errors) },
-	{ "octets_tx", PPORT_802_3_OFF(a_octets_transmitted_ok) },
-	{ "octets_received", PPORT_802_3_OFF(a_octets_received_ok) },
-	{ "multicast_xmitted", PPORT_802_3_OFF(a_multicast_frames_xmitted_ok) },
-	{ "broadcast_xmitted", PPORT_802_3_OFF(a_broadcast_frames_xmitted_ok) },
-	{ "multicast_rx", PPORT_802_3_OFF(a_multicast_frames_received_ok) },
-	{ "broadcast_rx", PPORT_802_3_OFF(a_broadcast_frames_received_ok) },
-	{ "in_range_len_errors", PPORT_802_3_OFF(a_in_range_length_errors) },
-	{ "out_of_range_len", PPORT_802_3_OFF(a_out_of_range_length_field) },
-	{ "too_long_errors", PPORT_802_3_OFF(a_frame_too_long_errors) },
-	{ "symbol_err", PPORT_802_3_OFF(a_symbol_error_during_carrier) },
-	{ "mac_control_tx", PPORT_802_3_OFF(a_mac_control_frames_transmitted) },
-	{ "mac_control_rx", PPORT_802_3_OFF(a_mac_control_frames_received) },
-	{ "unsupported_op_rx",
-		PPORT_802_3_OFF(a_unsupported_opcodes_received) },
-	{ "pause_ctrl_rx", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_received) },
-	{ "pause_ctrl_tx",
-		PPORT_802_3_OFF(a_pause_mac_ctrl_frames_transmitted) },
+	{ "tx_packets_phy", PPORT_802_3_OFF(a_frames_transmitted_ok) },
+	{ "rx_packets_phy", PPORT_802_3_OFF(a_frames_received_ok) },
+	{ "rx_crc_errors_phy", PPORT_802_3_OFF(a_frame_check_sequence_errors) },
+	{ "tx_bytes_phy", PPORT_802_3_OFF(a_octets_transmitted_ok) },
+	{ "rx_bytes_phy", PPORT_802_3_OFF(a_octets_received_ok) },
+	{ "tx_multicast_phy", PPORT_802_3_OFF(a_multicast_frames_xmitted_ok) },
+	{ "tx_broadcast_phy", PPORT_802_3_OFF(a_broadcast_frames_xmitted_ok) },
+	{ "rx_multicast_phy", PPORT_802_3_OFF(a_multicast_frames_received_ok) },
+	{ "rx_broadcast_phy", PPORT_802_3_OFF(a_broadcast_frames_received_ok) },
+	{ "rx_in_range_len_errors_phy", PPORT_802_3_OFF(a_in_range_length_errors) },
+	{ "rx_out_of_range_len_phy", PPORT_802_3_OFF(a_out_of_range_length_field) },
+	{ "rx_oversize_pkts_phy", PPORT_802_3_OFF(a_frame_too_long_errors) },
+	{ "rx_symbol_err_phy", PPORT_802_3_OFF(a_symbol_error_during_carrier) },
+	{ "tx_mac_control_phy", PPORT_802_3_OFF(a_mac_control_frames_transmitted) },
+	{ "rx_mac_control_phy", PPORT_802_3_OFF(a_mac_control_frames_received) },
+	{ "rx_unsupported_op_phy", PPORT_802_3_OFF(a_unsupported_opcodes_received) },
+	{ "rx_pause_ctrl_phy", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_received) },
+	{ "tx_pause_ctrl_phy", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_transmitted) },
 };
 
 static const struct counter_desc pport_2863_stats_desc[] = {
-	{ "in_octets", PPORT_2863_OFF(if_in_octets) },
-	{ "in_ucast_pkts", PPORT_2863_OFF(if_in_ucast_pkts) },
-	{ "in_discards", PPORT_2863_OFF(if_in_discards) },
-	{ "in_errors", PPORT_2863_OFF(if_in_errors) },
-	{ "in_unknown_protos", PPORT_2863_OFF(if_in_unknown_protos) },
-	{ "out_octets", PPORT_2863_OFF(if_out_octets) },
-	{ "out_ucast_pkts", PPORT_2863_OFF(if_out_ucast_pkts) },
-	{ "out_discards", PPORT_2863_OFF(if_out_discards) },
-	{ "out_errors", PPORT_2863_OFF(if_out_errors) },
-	{ "in_multicast_pkts", PPORT_2863_OFF(if_in_multicast_pkts) },
-	{ "in_broadcast_pkts", PPORT_2863_OFF(if_in_broadcast_pkts) },
-	{ "out_multicast_pkts", PPORT_2863_OFF(if_out_multicast_pkts) },
-	{ "out_broadcast_pkts", PPORT_2863_OFF(if_out_broadcast_pkts) },
+	{ "rx_discards_phy", PPORT_2863_OFF(if_in_discards) },
+	{ "tx_discards_phy", PPORT_2863_OFF(if_out_discards) },
+	{ "tx_errors_phy", PPORT_2863_OFF(if_out_errors) },
 };
 
 static const struct counter_desc pport_2819_stats_desc[] = {
-	{ "drop_events", PPORT_2819_OFF(ether_stats_drop_events) },
-	{ "octets", PPORT_2819_OFF(ether_stats_octets) },
-	{ "pkts", PPORT_2819_OFF(ether_stats_pkts) },
-	{ "broadcast_pkts", PPORT_2819_OFF(ether_stats_broadcast_pkts) },
-	{ "multicast_pkts", PPORT_2819_OFF(ether_stats_multicast_pkts) },
-	{ "crc_align_errors", PPORT_2819_OFF(ether_stats_crc_align_errors) },
-	{ "undersize_pkts", PPORT_2819_OFF(ether_stats_undersize_pkts) },
-	{ "oversize_pkts", PPORT_2819_OFF(ether_stats_oversize_pkts) },
-	{ "fragments", PPORT_2819_OFF(ether_stats_fragments) },
-	{ "jabbers", PPORT_2819_OFF(ether_stats_jabbers) },
-	{ "collisions", PPORT_2819_OFF(ether_stats_collisions) },
-	{ "p64octets", PPORT_2819_OFF(ether_stats_pkts64octets) },
-	{ "p65to127octets", PPORT_2819_OFF(ether_stats_pkts65to127octets) },
-	{ "p128to255octets", PPORT_2819_OFF(ether_stats_pkts128to255octets) },
-	{ "p256to511octets", PPORT_2819_OFF(ether_stats_pkts256to511octets) },
-	{ "p512to1023octets", PPORT_2819_OFF(ether_stats_pkts512to1023octets) },
-	{ "p1024to1518octets",
-		PPORT_2819_OFF(ether_stats_pkts1024to1518octets) },
-	{ "p1519to2047octets",
-		PPORT_2819_OFF(ether_stats_pkts1519to2047octets) },
-	{ "p2048to4095octets",
-		PPORT_2819_OFF(ether_stats_pkts2048to4095octets) },
-	{ "p4096to8191octets",
-		PPORT_2819_OFF(ether_stats_pkts4096to8191octets) },
-	{ "p8192to10239octets",
-		PPORT_2819_OFF(ether_stats_pkts8192to10239octets) },
+	{ "rx_undersize_pkts_phy", PPORT_2819_OFF(ether_stats_undersize_pkts) },
+	{ "rx_fragments_phy", PPORT_2819_OFF(ether_stats_fragments) },
+	{ "rx_jabbers_phy", PPORT_2819_OFF(ether_stats_jabbers) },
+	{ "rx_64_bytes_phy", PPORT_2819_OFF(ether_stats_pkts64octets) },
+	{ "rx_65_to_127_bytes_phy", PPORT_2819_OFF(ether_stats_pkts65to127octets) },
+	{ "rx_128_to_255_bytes_phy", PPORT_2819_OFF(ether_stats_pkts128to255octets) },
+	{ "rx_256_to_511_bytes_phy", PPORT_2819_OFF(ether_stats_pkts256to511octets) },
+	{ "rx_512_to_1023_bytes_phy", PPORT_2819_OFF(ether_stats_pkts512to1023octets) },
+	{ "rx_1024_to_1518_bytes_phy", PPORT_2819_OFF(ether_stats_pkts1024to1518octets) },
+	{ "rx_1519_to_2047_bytes_phy", PPORT_2819_OFF(ether_stats_pkts1519to2047octets) },
+	{ "rx_2048_to_4095_bytes_phy", PPORT_2819_OFF(ether_stats_pkts2048to4095octets) },
+	{ "rx_4096_to_8191_bytes_phy", PPORT_2819_OFF(ether_stats_pkts4096to8191octets) },
+	{ "rx_8192_to_10239_bytes_phy", PPORT_2819_OFF(ether_stats_pkts8192to10239octets) },
 };
 
 static const struct counter_desc pport_per_prio_traffic_stats_desc[] = {
-	{ "rx_octets", PPORT_PER_PRIO_OFF(rx_octets) },
-	{ "rx_frames", PPORT_PER_PRIO_OFF(rx_frames) },
-	{ "tx_octets", PPORT_PER_PRIO_OFF(tx_octets) },
-	{ "tx_frames", PPORT_PER_PRIO_OFF(tx_frames) },
+	{ "rx_prio%d_bytes", PPORT_PER_PRIO_OFF(rx_octets) },
+	{ "rx_prio%d_packets", PPORT_PER_PRIO_OFF(rx_frames) },
+	{ "tx_prio%d_bytes", PPORT_PER_PRIO_OFF(tx_octets) },
+	{ "tx_prio%d_packets", PPORT_PER_PRIO_OFF(tx_frames) },
 };
 
 static const struct counter_desc pport_per_prio_pfc_stats_desc[] = {
-	{ "rx_pause", PPORT_PER_PRIO_OFF(rx_pause) },
-	{ "rx_pause_duration", PPORT_PER_PRIO_OFF(rx_pause_duration) },
-	{ "tx_pause", PPORT_PER_PRIO_OFF(tx_pause) },
-	{ "tx_pause_duration", PPORT_PER_PRIO_OFF(tx_pause_duration) },
-	{ "rx_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) },
+	{ "rx_prio%d_pause", PPORT_PER_PRIO_OFF(rx_pause) },
+	{ "rx_prio%d_pause_duration", PPORT_PER_PRIO_OFF(rx_pause_duration) },
+	{ "tx_prio%d_pause", PPORT_PER_PRIO_OFF(tx_pause) },
+	{ "tx_prio%d_pause_duration", PPORT_PER_PRIO_OFF(tx_pause_duration) },
+	{ "rx_prio%d_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) },
 };
 
 struct mlx5e_rq_stats {
 	u64 packets;
 	u64 bytes;
-	u64 csum_sw;
-	u64 csum_inner;
+	u64 csum_complete;
+	u64 csum_unnecessary_inner;
 	u64 csum_none;
 	u64 lro_packets;
 	u64 lro_bytes;
@@ -292,19 +262,19 @@ struct mlx5e_rq_stats {
 };
 
 static const struct counter_desc rq_stats_desc[] = {
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, packets) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, csum_sw) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, csum_inner) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, csum_none) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, lro_packets) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, lro_bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, wqe_err) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, mpwqe_filler) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, mpwqe_frag) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, cqe_compress_blks) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_frag) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) },
 };
 
 struct mlx5e_sq_stats {
@@ -315,28 +285,28 @@ struct mlx5e_sq_stats {
 	u64 tso_bytes;
 	u64 tso_inner_packets;
 	u64 tso_inner_bytes;
-	u64 csum_offload_inner;
+	u64 csum_partial_inner;
 	u64 nop;
 	/* less likely accessed in data path */
-	u64 csum_offload_none;
+	u64 csum_none;
 	u64 stopped;
 	u64 wake;
 	u64 dropped;
 };
 
 static const struct counter_desc sq_stats_desc[] = {
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, packets) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_packets) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_inner_packets) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_inner_bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, csum_offload_inner) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, nop) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, csum_offload_none) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, stopped) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, wake) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, dropped) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, packets) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, bytes) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_packets) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_bytes) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_packets) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_bytes) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_none) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, stopped) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, wake) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) },
 };
 
 #define NUM_SW_COUNTERS			ARRAY_SIZE(sw_stats_desc)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 229ab16fb8d3..5740b465ef84 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -110,8 +110,20 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 	int channel_ix = fallback(dev, skb);
-	int up = (netdev_get_num_tc(dev) && skb_vlan_tag_present(skb)) ?
-		 skb->vlan_tci >> VLAN_PRIO_SHIFT : 0;
+	int up = 0;
+
+	if (!netdev_get_num_tc(dev))
+		return channel_ix;
+
+	if (skb_vlan_tag_present(skb))
+		up = skb->vlan_tci >> VLAN_PRIO_SHIFT;
+
+	/* channel_ix can be larger than num_channels since
+	 * dev->num_real_tx_queues = num_channels * num_tc
+	 */
+	if (channel_ix >= priv->params.num_channels)
+		channel_ix = reciprocal_scale(channel_ix,
+					      priv->params.num_channels);
 
 	return priv->channeltc_to_txq_map[channel_ix][up];
 }
@@ -123,7 +135,7 @@ static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq,
 	 * headers and occur before the data gather.
 	 * Therefore these headers must be copied into the WQE
 	 */
-#define MLX5E_MIN_INLINE ETH_HLEN
+#define MLX5E_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
 
 	if (bf) {
 		u16 ihs = skb_headlen(skb);
@@ -135,7 +147,7 @@ static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq,
 			return skb_headlen(skb);
 	}
 
-	return MLX5E_MIN_INLINE;
+	return max(skb_network_offset(skb), MLX5E_MIN_INLINE);
 }
 
 static inline void mlx5e_tx_skb_pull_inline(unsigned char **skb_data,
@@ -192,12 +204,12 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
 		if (skb->encapsulation) {
 			eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM |
 					  MLX5_ETH_WQE_L4_INNER_CSUM;
-			sq->stats.csum_offload_inner++;
+			sq->stats.csum_partial_inner++;
 		} else {
 			eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM;
 		}
 	} else
-		sq->stats.csum_offload_none++;
+		sq->stats.csum_none++;
 
 	if (sq->cc != sq->prev_cc) {
 		sq->prev_cc = sq->cc;
@@ -317,7 +329,8 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
 	while ((sq->pc & wq->sz_m1) > sq->edge)
 		mlx5e_send_nop(sq, false);
 
-	sq->bf_budget = bf ? sq->bf_budget - 1 : 0;
+	if (bf)
+		sq->bf_budget--;
 
 	sq->stats.packets++;
 	sq->stats.bytes += num_bytes;
@@ -340,6 +353,35 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
 	return mlx5e_sq_xmit(sq, skb);
 }
 
+void mlx5e_free_tx_descs(struct mlx5e_sq *sq)
+{
+	struct mlx5e_tx_wqe_info *wi;
+	struct sk_buff *skb;
+	u16 ci;
+	int i;
+
+	while (sq->cc != sq->pc) {
+		ci = sq->cc & sq->wq.sz_m1;
+		skb = sq->skb[ci];
+		wi = &sq->wqe_info[ci];
+
+		if (!skb) { /* nop */
+			sq->cc++;
+			continue;
+		}
+
+		for (i = 0; i < wi->num_dma; i++) {
+			struct mlx5e_sq_dma *dma =
+				mlx5e_dma_get(sq, sq->dma_fifo_cc++);
+
+			mlx5e_tx_dma_unmap(sq->pdev, dma);
+		}
+
+		dev_kfree_skb_any(skb);
+		sq->cc += wi->num_wqebbs;
+	}
+}
+
 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 {
 	struct mlx5e_sq *sq;
@@ -351,6 +393,9 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 
 	sq = container_of(cq, struct mlx5e_sq, cq);
 
+	if (unlikely(test_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state)))
+		return false;
+
 	npkts = 0;
 	nbytes = 0;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index b84a6918a700..aebbd6ccb9fe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -383,7 +383,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
 				   match_v,
 				   MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
 				   0, &dest);
-	if (IS_ERR_OR_NULL(flow_rule)) {
+	if (IS_ERR(flow_rule)) {
 		pr_warn(
 			"FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n",
 			 dmac_v, dmac_c, vport, PTR_ERR(flow_rule));
@@ -457,7 +457,7 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports)
 
 	table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
 	fdb = mlx5_create_flow_table(root_ns, 0, table_size, 0);
-	if (IS_ERR_OR_NULL(fdb)) {
+	if (IS_ERR(fdb)) {
 		err = PTR_ERR(fdb);
 		esw_warn(dev, "Failed to create FDB Table err %d\n", err);
 		goto out;
@@ -474,7 +474,7 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports)
 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 3);
 	eth_broadcast_addr(dmac);
 	g = mlx5_create_flow_group(fdb, flow_group_in);
-	if (IS_ERR_OR_NULL(g)) {
+	if (IS_ERR(g)) {
 		err = PTR_ERR(g);
 		esw_warn(dev, "Failed to create flow group err(%d)\n", err);
 		goto out;
@@ -489,7 +489,7 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports)
 	eth_zero_addr(dmac);
 	dmac[0] = 0x01;
 	g = mlx5_create_flow_group(fdb, flow_group_in);
-	if (IS_ERR_OR_NULL(g)) {
+	if (IS_ERR(g)) {
 		err = PTR_ERR(g);
 		esw_warn(dev, "Failed to create allmulti flow group err(%d)\n", err);
 		goto out;
@@ -506,7 +506,7 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports)
 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, table_size - 1);
 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 1);
 	g = mlx5_create_flow_group(fdb, flow_group_in);
-	if (IS_ERR_OR_NULL(g)) {
+	if (IS_ERR(g)) {
 		err = PTR_ERR(g);
 		esw_warn(dev, "Failed to create promisc flow group err(%d)\n", err);
 		goto out;
@@ -529,7 +529,7 @@ out:
 		}
 	}
 
-	kfree(flow_group_in);
+	kvfree(flow_group_in);
 	return err;
 }
 
@@ -651,6 +651,7 @@ static void update_allmulti_vports(struct mlx5_eswitch *esw,
 					esw_fdb_set_vport_rule(esw,
 							       mac,
 							       vport_idx);
+			iter_vaddr->mc_promisc = true;
 			break;
 		case MLX5_ACTION_DEL:
 			if (!iter_vaddr)
@@ -1060,7 +1061,7 @@ static void esw_vport_enable_egress_acl(struct mlx5_eswitch *esw,
 		return;
 
 	acl = mlx5_create_vport_flow_table(root_ns, 0, table_size, 0, vport->vport);
-	if (IS_ERR_OR_NULL(acl)) {
+	if (IS_ERR(acl)) {
 		err = PTR_ERR(acl);
 		esw_warn(dev, "Failed to create E-Switch vport[%d] egress flow Table, err(%d)\n",
 			 vport->vport, err);
@@ -1075,7 +1076,7 @@ static void esw_vport_enable_egress_acl(struct mlx5_eswitch *esw,
 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
 
 	vlan_grp = mlx5_create_flow_group(acl, flow_group_in);
-	if (IS_ERR_OR_NULL(vlan_grp)) {
+	if (IS_ERR(vlan_grp)) {
 		err = PTR_ERR(vlan_grp);
 		esw_warn(dev, "Failed to create E-Switch vport[%d] egress allowed vlans flow group, err(%d)\n",
 			 vport->vport, err);
@@ -1086,7 +1087,7 @@ static void esw_vport_enable_egress_acl(struct mlx5_eswitch *esw,
 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
 	drop_grp = mlx5_create_flow_group(acl, flow_group_in);
-	if (IS_ERR_OR_NULL(drop_grp)) {
+	if (IS_ERR(drop_grp)) {
 		err = PTR_ERR(drop_grp);
 		esw_warn(dev, "Failed to create E-Switch vport[%d] egress drop flow group, err(%d)\n",
 			 vport->vport, err);
@@ -1097,7 +1098,7 @@ static void esw_vport_enable_egress_acl(struct mlx5_eswitch *esw,
 	vport->egress.drop_grp = drop_grp;
 	vport->egress.allowed_vlans_grp = vlan_grp;
 out:
-	kfree(flow_group_in);
+	kvfree(flow_group_in);
 	if (err && !IS_ERR_OR_NULL(vlan_grp))
 		mlx5_destroy_flow_group(vlan_grp);
 	if (err && !IS_ERR_OR_NULL(acl))
@@ -1174,7 +1175,7 @@ static void esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw,
 		return;
 
 	acl = mlx5_create_vport_flow_table(root_ns, 0, table_size, 0, vport->vport);
-	if (IS_ERR_OR_NULL(acl)) {
+	if (IS_ERR(acl)) {
 		err = PTR_ERR(acl);
 		esw_warn(dev, "Failed to create E-Switch vport[%d] ingress flow Table, err(%d)\n",
 			 vport->vport, err);
@@ -1192,7 +1193,7 @@ static void esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw,
 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
 
 	g = mlx5_create_flow_group(acl, flow_group_in);
-	if (IS_ERR_OR_NULL(g)) {
+	if (IS_ERR(g)) {
 		err = PTR_ERR(g);
 		esw_warn(dev, "Failed to create E-Switch vport[%d] ingress untagged spoofchk flow group, err(%d)\n",
 			 vport->vport, err);
@@ -1207,7 +1208,7 @@ static void esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw,
 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
 
 	g = mlx5_create_flow_group(acl, flow_group_in);
-	if (IS_ERR_OR_NULL(g)) {
+	if (IS_ERR(g)) {
 		err = PTR_ERR(g);
 		esw_warn(dev, "Failed to create E-Switch vport[%d] ingress untagged flow group, err(%d)\n",
 			 vport->vport, err);
@@ -1223,7 +1224,7 @@ static void esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw,
 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 2);
 
 	g = mlx5_create_flow_group(acl, flow_group_in);
-	if (IS_ERR_OR_NULL(g)) {
+	if (IS_ERR(g)) {
 		err = PTR_ERR(g);
 		esw_warn(dev, "Failed to create E-Switch vport[%d] ingress spoofchk flow group, err(%d)\n",
 			 vport->vport, err);
@@ -1236,7 +1237,7 @@ static void esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw,
 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 3);
 
 	g = mlx5_create_flow_group(acl, flow_group_in);
-	if (IS_ERR_OR_NULL(g)) {
+	if (IS_ERR(g)) {
 		err = PTR_ERR(g);
 		esw_warn(dev, "Failed to create E-Switch vport[%d] ingress drop flow group, err(%d)\n",
 			 vport->vport, err);
@@ -1259,7 +1260,7 @@ out:
 			mlx5_destroy_flow_table(vport->ingress.acl);
 	}
 
-	kfree(flow_group_in);
+	kvfree(flow_group_in);
 }
 
 static void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw,
@@ -1363,7 +1364,7 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
 				   match_v,
 				   MLX5_FLOW_CONTEXT_ACTION_ALLOW,
 				   0, NULL);
-	if (IS_ERR_OR_NULL(vport->ingress.allow_rule)) {
+	if (IS_ERR(vport->ingress.allow_rule)) {
 		err = PTR_ERR(vport->ingress.allow_rule);
 		pr_warn("vport[%d] configure ingress allow rule, err(%d)\n",
 			vport->vport, err);
@@ -1380,7 +1381,7 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
 				   match_v,
 				   MLX5_FLOW_CONTEXT_ACTION_DROP,
 				   0, NULL);
-	if (IS_ERR_OR_NULL(vport->ingress.drop_rule)) {
+	if (IS_ERR(vport->ingress.drop_rule)) {
 		err = PTR_ERR(vport->ingress.drop_rule);
 		pr_warn("vport[%d] configure ingress drop rule, err(%d)\n",
 			vport->vport, err);
@@ -1439,7 +1440,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw,
 				   match_v,
 				   MLX5_FLOW_CONTEXT_ACTION_ALLOW,
 				   0, NULL);
-	if (IS_ERR_OR_NULL(vport->egress.allowed_vlan)) {
+	if (IS_ERR(vport->egress.allowed_vlan)) {
 		err = PTR_ERR(vport->egress.allowed_vlan);
 		pr_warn("vport[%d] configure egress allowed vlan rule failed, err(%d)\n",
 			vport->vport, err);
@@ -1457,7 +1458,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw,
 				   match_v,
 				   MLX5_FLOW_CONTEXT_ACTION_DROP,
 				   0, NULL);
-	if (IS_ERR_OR_NULL(vport->egress.drop_rule)) {
+	if (IS_ERR(vport->egress.drop_rule)) {
 		err = PTR_ERR(vport->egress.drop_rule);
 		pr_warn("vport[%d] configure egress drop rule failed, err(%d)\n",
 			vport->vport, err);
@@ -1491,14 +1492,11 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num,
 
 	/* Sync with current vport context */
 	vport->enabled_events = enable_events;
-	esw_vport_change_handle_locked(vport);
-
 	vport->enabled = true;
 
 	/* only PF is trusted by default */
 	vport->trusted = (vport_num) ? false : true;
-
-	arm_vport_context_events_cmd(esw->dev, vport_num, enable_events);
+	esw_vport_change_handle_locked(vport);
 
 	esw->enabled_vports++;
 	esw_debug(esw->dev, "Enabled VPORT(%d)\n", vport_num);
@@ -1728,11 +1726,24 @@ void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe)
 	(esw && MLX5_CAP_GEN(esw->dev, vport_group_manager) && mlx5_core_is_pf(esw->dev))
 #define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports)
 
+static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN])
+{
+	((u8 *)node_guid)[7] = mac[0];
+	((u8 *)node_guid)[6] = mac[1];
+	((u8 *)node_guid)[5] = mac[2];
+	((u8 *)node_guid)[4] = 0xff;
+	((u8 *)node_guid)[3] = 0xfe;
+	((u8 *)node_guid)[2] = mac[3];
+	((u8 *)node_guid)[1] = mac[4];
+	((u8 *)node_guid)[0] = mac[5];
+}
+
 int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
 			       int vport, u8 mac[ETH_ALEN])
 {
-	int err = 0;
 	struct mlx5_vport *evport;
+	u64 node_guid;
+	int err = 0;
 
 	if (!ESW_ALLOWED(esw))
 		return -EPERM;
@@ -1756,11 +1767,17 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
 		return err;
 	}
 
+	node_guid_gen_from_mac(&node_guid, mac);
+	err = mlx5_modify_nic_vport_node_guid(esw->dev, vport, node_guid);
+	if (err)
+		mlx5_core_warn(esw->dev,
+			       "Failed to set vport %d node guid, err = %d. RDMA_CM will not function properly for this VF.\n",
+			       vport, err);
+
 	mutex_lock(&esw->state_lock);
 	if (evport->enabled)
 		err = esw_vport_ingress_config(esw, evport);
 	mutex_unlock(&esw->state_lock);
-
 	return err;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 8b5f0b2c0d5c..e912a3d2505e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1292,8 +1292,8 @@ static int update_root_ft_destroy(struct mlx5_flow_table *ft)
 				       ft->id);
 			return err;
 		}
-		root->root_ft = new_root_ft;
 	}
+	root->root_ft = new_root_ft;
 	return 0;
 }
 
@@ -1767,6 +1767,9 @@ static void cleanup_root_ns(struct mlx5_core_dev *dev)
 
 void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
 {
+	if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+		return;
+
 	cleanup_root_ns(dev);
 	cleanup_single_prio_root_ns(dev, dev->priv.fdb_root_ns);
 	cleanup_single_prio_root_ns(dev, dev->priv.esw_egress_root_ns);
@@ -1828,29 +1831,36 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
 {
 	int err = 0;
 
+	if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+		return 0;
+
 	err = mlx5_init_fc_stats(dev);
 	if (err)
 		return err;
 
-	if (MLX5_CAP_GEN(dev, nic_flow_table)) {
+	if (MLX5_CAP_GEN(dev, nic_flow_table) &&
+	    MLX5_CAP_FLOWTABLE_NIC_RX(dev, ft_support)) {
 		err = init_root_ns(dev);
 		if (err)
 			goto err;
 	}
+
 	if (MLX5_CAP_GEN(dev, eswitch_flow_table)) {
-		err = init_fdb_root_ns(dev);
-		if (err)
-			goto err;
-	}
-	if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) {
-		err = init_egress_acl_root_ns(dev);
-		if (err)
-			goto err;
-	}
-	if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) {
-		err = init_ingress_acl_root_ns(dev);
-		if (err)
-			goto err;
+		if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ft_support)) {
+			err = init_fdb_root_ns(dev);
+			if (err)
+				goto err;
+		}
+		if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) {
+			err = init_egress_acl_root_ns(dev);
+			if (err)
+				goto err;
+		}
+		if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) {
+			err = init_ingress_acl_root_ns(dev);
+			if (err)
+				goto err;
+		}
 	}
 
 	return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 42d16b9458e4..96a59463ae65 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -108,15 +108,21 @@ static int in_fatal(struct mlx5_core_dev *dev)
 
 void mlx5_enter_error_state(struct mlx5_core_dev *dev)
 {
+	mutex_lock(&dev->intf_state_mutex);
 	if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
-		return;
+		goto unlock;
 
 	mlx5_core_err(dev, "start\n");
-	if (pci_channel_offline(dev->pdev) || in_fatal(dev))
+	if (pci_channel_offline(dev->pdev) || in_fatal(dev)) {
 		dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
+		trigger_cmd_completions(dev);
+	}
 
 	mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 0);
 	mlx5_core_err(dev, "end\n");
+
+unlock:
+	mutex_unlock(&dev->intf_state_mutex);
 }
 
 static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
@@ -245,7 +251,6 @@ static void poll_health(unsigned long data)
 	u32 count;
 
 	if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
-		trigger_cmd_completions(dev);
 		mod_timer(&health->timer, get_next_poll_jiffies());
 		return;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index a19b59348dd6..6695893ddd2d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1422,46 +1422,31 @@ void mlx5_disable_device(struct mlx5_core_dev *dev)
 	mlx5_pci_err_detected(dev->pdev, 0);
 }
 
-/* wait for the device to show vital signs. For now we check
- * that we can read the device ID and that the health buffer
- * shows a non zero value which is different than 0xffffffff
+/* wait for the device to show vital signs by waiting
+ * for the health counter to start counting.
  */
-static void wait_vital(struct pci_dev *pdev)
+static int wait_vital(struct pci_dev *pdev)
 {
 	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
 	struct mlx5_core_health *health = &dev->priv.health;
 	const int niter = 100;
+	u32 last_count = 0;
 	u32 count;
-	u16 did;
 	int i;
 
-	/* Wait for firmware to be ready after reset */
-	msleep(1000);
-	for (i = 0; i < niter; i++) {
-		if (pci_read_config_word(pdev, 2, &did)) {
-			dev_warn(&pdev->dev, "failed reading config word\n");
-			break;
-		}
-		if (did == pdev->device) {
-			dev_info(&pdev->dev, "device ID correctly read after %d iterations\n", i);
-			break;
-		}
-		msleep(50);
-	}
-	if (i == niter)
-		dev_warn(&pdev->dev, "%s-%d: could not read device ID\n", __func__, __LINE__);
-
 	for (i = 0; i < niter; i++) {
 		count = ioread32be(health->health_counter);
 		if (count && count != 0xffffffff) {
-			dev_info(&pdev->dev, "Counter value 0x%x after %d iterations\n", count, i);
-			break;
+			if (last_count && last_count != count) {
+				dev_info(&pdev->dev, "Counter value 0x%x after %d iterations\n", count, i);
+				return 0;
+			}
+			last_count = count;
 		}
 		msleep(50);
 	}
 
-	if (i == niter)
-		dev_warn(&pdev->dev, "%s-%d: could not read device ID\n", __func__, __LINE__);
+	return -ETIMEDOUT;
 }
 
 static void mlx5_pci_resume(struct pci_dev *pdev)
@@ -1473,7 +1458,11 @@ static void mlx5_pci_resume(struct pci_dev *pdev)
 	dev_info(&pdev->dev, "%s was called\n", __func__);
 
 	pci_save_state(pdev);
-	wait_vital(pdev);
+	err = wait_vital(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__);
+		return;
+	}
 
 	err = mlx5_load_one(dev, priv);
 	if (err)
@@ -1508,8 +1497,9 @@ static const struct pci_device_id mlx5_core_pci_table[] = {
 	{ PCI_VDEVICE(MELLANOX, 0x1014), MLX5_PCI_DEV_IS_VF},	/* ConnectX-4 VF */
 	{ PCI_VDEVICE(MELLANOX, 0x1015) },			/* ConnectX-4LX */
 	{ PCI_VDEVICE(MELLANOX, 0x1016), MLX5_PCI_DEV_IS_VF},	/* ConnectX-4LX VF */
-	{ PCI_VDEVICE(MELLANOX, 0x1017) },			/* ConnectX-5 */
+	{ PCI_VDEVICE(MELLANOX, 0x1017) },			/* ConnectX-5, PCIe 3.0 */
 	{ PCI_VDEVICE(MELLANOX, 0x1018), MLX5_PCI_DEV_IS_VF},	/* ConnectX-5 VF */
+	{ PCI_VDEVICE(MELLANOX, 0x1019) },			/* ConnectX-5, PCIe 4.0 */
 	{ 0, }
 };
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index 9eeee0545f1c..32dea3524cee 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -345,7 +345,6 @@ retry:
 			       func_id, npages, err);
 		goto out_4k;
 	}
-	dev->priv.fw_pages += npages;
 
 	err = mlx5_cmd_status_to_err(&out.hdr);
 	if (err) {
@@ -373,6 +372,33 @@ out_free:
 	return err;
 }
 
+static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
+			     struct mlx5_manage_pages_inbox *in, int in_size,
+			     struct mlx5_manage_pages_outbox *out, int out_size)
+{
+	struct fw_page *fwp;
+	struct rb_node *p;
+	u32 npages;
+	u32 i = 0;
+
+	if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)
+		return mlx5_cmd_exec_check_status(dev, (u32 *)in, in_size,
+						  (u32 *)out, out_size);
+
+	npages = be32_to_cpu(in->num_entries);
+
+	p = rb_first(&dev->priv.page_root);
+	while (p && i < npages) {
+		fwp = rb_entry(p, struct fw_page, rb_node);
+		out->pas[i] = cpu_to_be64(fwp->addr);
+		p = rb_next(p);
+		i++;
+	}
+
+	out->num_entries = cpu_to_be32(i);
+	return 0;
+}
+
 static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
 			 int *nclaimed)
 {
@@ -398,15 +424,9 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
 	in.func_id = cpu_to_be16(func_id);
 	in.num_entries = cpu_to_be32(npages);
 	mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen);
-	err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen);
+	err = reclaim_pages_cmd(dev, &in, sizeof(in), out, outlen);
 	if (err) {
-		mlx5_core_err(dev, "failed reclaiming pages\n");
-		goto out_free;
-	}
-	dev->priv.fw_pages -= npages;
-
-	if (out->hdr.status) {
-		err = mlx5_cmd_status_to_err(&out->hdr);
+		mlx5_core_err(dev, "failed reclaiming pages: err %d\n", err);
 		goto out_free;
 	}
 
@@ -417,13 +437,15 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
 		err = -EINVAL;
 		goto out_free;
 	}
-	if (nclaimed)
-		*nclaimed = num_claimed;
 
 	for (i = 0; i < num_claimed; i++) {
 		addr = be64_to_cpu(out->pas[i]);
 		free_4k(dev, addr);
 	}
+
+	if (nclaimed)
+		*nclaimed = num_claimed;
+
 	dev->priv.fw_pages -= num_claimed;
 	if (func_id)
 		dev->priv.vfs_pages -= num_claimed;
@@ -514,14 +536,10 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
 		p = rb_first(&dev->priv.page_root);
 		if (p) {
 			fwp = rb_entry(p, struct fw_page, rb_node);
-			if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
-				free_4k(dev, fwp->addr);
-				nclaimed = 1;
-			} else {
-				err = reclaim_pages(dev, fwp->func_id,
-						    optimal_reclaimed_pages(),
-						    &nclaimed);
-			}
+			err = reclaim_pages(dev, fwp->func_id,
+					    optimal_reclaimed_pages(),
+					    &nclaimed);
+
 			if (err) {
 				mlx5_core_warn(dev, "failed reclaiming pages (%d)\n",
 					       err);
@@ -536,6 +554,13 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
 		}
 	} while (p);
 
+	WARN(dev->priv.fw_pages,
+	     "FW pages counter is %d after reclaiming all pages\n",
+	     dev->priv.fw_pages);
+	WARN(dev->priv.vfs_pages,
+	     "VFs FW pages counter is %d after reclaiming all pages\n",
+	     dev->priv.vfs_pages);
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index b720a274220d..b82d65802d96 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -418,7 +418,7 @@ int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn)
 	if (out.hdr.status)
 		err = mlx5_cmd_status_to_err(&out.hdr);
 	else
-		*xrcdn = be32_to_cpu(out.xrcdn);
+		*xrcdn = be32_to_cpu(out.xrcdn) & 0xffffff;
 
 	return err;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index b69dadcfb897..91846dfcbe9c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -508,6 +508,41 @@ int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid)
 }
 EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_node_guid);
 
+int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev,
+				    u32 vport, u64 node_guid)
+{
+	int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+	void *nic_vport_context;
+	void *in;
+	int err;
+
+	if (!vport)
+		return -EINVAL;
+	if (!MLX5_CAP_GEN(mdev, vport_group_manager))
+		return -EACCES;
+	if (!MLX5_CAP_ESW(mdev, nic_vport_node_guid_modify))
+		return -ENOTSUPP;
+
+	in = mlx5_vzalloc(inlen);
+	if (!in)
+		return -ENOMEM;
+
+	MLX5_SET(modify_nic_vport_context_in, in,
+		 field_select.node_guid, 1);
+	MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport);
+	MLX5_SET(modify_nic_vport_context_in, in, other_vport, !!vport);
+
+	nic_vport_context = MLX5_ADDR_OF(modify_nic_vport_context_in,
+					 in, nic_vport_context);
+	MLX5_SET64(nic_vport_context, nic_vport_context, node_guid, node_guid);
+
+	err = mlx5_modify_nic_vport_context(mdev, in, inlen);
+
+	kvfree(in);
+
+	return err;
+}
+
 int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev,
 					u16 *qkey_viol_cntr)
 {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
index f2fd1ef16da7..e25a73ed2981 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
@@ -72,8 +72,8 @@ static int mlx5e_vxlan_core_del_port_cmd(struct mlx5_core_dev *mdev, u16 port)
 	u32 in[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_in)];
 	u32 out[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_out)];
 
-	memset(&in, 0, sizeof(in));
-	memset(&out, 0, sizeof(out));
+	memset(in, 0, sizeof(in));
+	memset(out, 0, sizeof(out));
 
 	MLX5_SET(delete_vxlan_udp_dport_in, in, opcode,
 		 MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT);
@@ -105,6 +105,9 @@ static void mlx5e_vxlan_add_port(struct work_struct *work)
 	struct mlx5e_vxlan *vxlan;
 	int err;
 
+	if (mlx5e_vxlan_lookup_port(priv, port))
+		goto free_work;
+
 	if (mlx5e_vxlan_core_add_port_cmd(priv->mdev, port))
 		goto free_work;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
index ce21ee5b2357..821a087c7ae2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -75,14 +75,14 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 
 	err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
 	if (err) {
-		mlx5_core_warn(mdev, "mlx5_db_alloc() failed, %d\n", err);
+		mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
 		return err;
 	}
 
 	err = mlx5_buf_alloc_node(mdev, mlx5_wq_cyc_get_byte_size(wq),
 				  &wq_ctrl->buf, param->buf_numa_node);
 	if (err) {
-		mlx5_core_warn(mdev, "mlx5_buf_alloc() failed, %d\n", err);
+		mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err);
 		goto err_db_free;
 	}
 
@@ -111,14 +111,14 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 
 	err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
 	if (err) {
-		mlx5_core_warn(mdev, "mlx5_db_alloc() failed, %d\n", err);
+		mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
 		return err;
 	}
 
 	err = mlx5_buf_alloc_node(mdev, mlx5_cqwq_get_byte_size(wq),
 				  &wq_ctrl->buf, param->buf_numa_node);
 	if (err) {
-		mlx5_core_warn(mdev, "mlx5_buf_alloc() failed, %d\n", err);
+		mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err);
 		goto err_db_free;
 	}
 
@@ -148,13 +148,14 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 
 	err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
 	if (err) {
-		mlx5_core_warn(mdev, "mlx5_db_alloc() failed, %d\n", err);
+		mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
 		return err;
 	}
 
-	err = mlx5_buf_alloc(mdev, mlx5_wq_ll_get_byte_size(wq), &wq_ctrl->buf);
+	err = mlx5_buf_alloc_node(mdev, mlx5_wq_ll_get_byte_size(wq),
+				  &wq_ctrl->buf, param->buf_numa_node);
 	if (err) {
-		mlx5_core_warn(mdev, "mlx5_buf_alloc() failed, %d\n", err);
+		mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err);
 		goto err_db_free;
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 1977e7a5c530..57d48da709fb 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -2718,7 +2718,7 @@ static inline void mlxsw_reg_ppcnt_pack(char *payload, u8 local_port,
  * Configures the switch priority to buffer table.
  */
 #define MLXSW_REG_PPTB_ID 0x500B
-#define MLXSW_REG_PPTB_LEN 0x0C
+#define MLXSW_REG_PPTB_LEN 0x10
 
 static const struct mlxsw_reg_info mlxsw_reg_pptb = {
 	.id = MLXSW_REG_PPTB_ID,
@@ -2784,6 +2784,13 @@ MLXSW_ITEM32(reg, pptb, pm_msb, 0x08, 24, 8);
  */
 MLXSW_ITEM32(reg, pptb, untagged_buff, 0x08, 0, 4);
 
+/* reg_pptb_prio_to_buff_msb
+ * Mapping of switch priority <i+8> to one of the allocated receive port
+ * buffers.
+ * Access: RW
+ */
+MLXSW_ITEM_BIT_ARRAY(reg, pptb, prio_to_buff_msb, 0x0C, 0x04, 4);
+
 #define MLXSW_REG_PPTB_ALL_PRIO 0xFF
 
 static inline void mlxsw_reg_pptb_pack(char *payload, u8 local_port)
@@ -2792,6 +2799,14 @@ static inline void mlxsw_reg_pptb_pack(char *payload, u8 local_port)
 	mlxsw_reg_pptb_mm_set(payload, MLXSW_REG_PPTB_MM_UM);
 	mlxsw_reg_pptb_local_port_set(payload, local_port);
 	mlxsw_reg_pptb_pm_set(payload, MLXSW_REG_PPTB_ALL_PRIO);
+	mlxsw_reg_pptb_pm_msb_set(payload, MLXSW_REG_PPTB_ALL_PRIO);
+}
+
+static inline void mlxsw_reg_pptb_prio_to_buff_pack(char *payload, u8 prio,
+						    u8 buff)
+{
+	mlxsw_reg_pptb_prio_to_buff_set(payload, prio, buff);
+	mlxsw_reg_pptb_prio_to_buff_msb_set(payload, prio, buff);
 }
 
 /* PBMC - Port Buffer Management Control Register
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 4a7273771028..374080027b2f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -171,23 +171,6 @@ static int mlxsw_sp_port_admin_status_set(struct mlxsw_sp_port *mlxsw_sp_port,
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(paos), paos_pl);
 }
 
-static int mlxsw_sp_port_oper_status_get(struct mlxsw_sp_port *mlxsw_sp_port,
-					 bool *p_is_up)
-{
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-	char paos_pl[MLXSW_REG_PAOS_LEN];
-	u8 oper_status;
-	int err;
-
-	mlxsw_reg_paos_pack(paos_pl, mlxsw_sp_port->local_port, 0);
-	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(paos), paos_pl);
-	if (err)
-		return err;
-	oper_status = mlxsw_reg_paos_oper_status_get(paos_pl);
-	*p_is_up = oper_status == MLXSW_PORT_ADMIN_STATUS_UP ? true : false;
-	return 0;
-}
-
 static int mlxsw_sp_port_dev_addr_set(struct mlxsw_sp_port *mlxsw_sp_port,
 				      unsigned char *addr)
 {
@@ -247,15 +230,23 @@ static int mlxsw_sp_port_mtu_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mtu)
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pmtu), pmtu_pl);
 }
 
-static int mlxsw_sp_port_swid_set(struct mlxsw_sp_port *mlxsw_sp_port, u8 swid)
+static int __mlxsw_sp_port_swid_set(struct mlxsw_sp *mlxsw_sp, u8 local_port,
+				    u8 swid)
 {
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	char pspa_pl[MLXSW_REG_PSPA_LEN];
 
-	mlxsw_reg_pspa_pack(pspa_pl, swid, mlxsw_sp_port->local_port);
+	mlxsw_reg_pspa_pack(pspa_pl, swid, local_port);
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pspa), pspa_pl);
 }
 
+static int mlxsw_sp_port_swid_set(struct mlxsw_sp_port *mlxsw_sp_port, u8 swid)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+
+	return __mlxsw_sp_port_swid_set(mlxsw_sp, mlxsw_sp_port->local_port,
+					swid);
+}
+
 static int mlxsw_sp_port_vp_mode_set(struct mlxsw_sp_port *mlxsw_sp_port,
 				     bool enable)
 {
@@ -305,9 +296,9 @@ mlxsw_sp_port_system_port_mapping_set(struct mlxsw_sp_port *mlxsw_sp_port)
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sspr), sspr_pl);
 }
 
-static int __mlxsw_sp_port_module_info_get(struct mlxsw_sp *mlxsw_sp,
-					   u8 local_port, u8 *p_module,
-					   u8 *p_width, u8 *p_lane)
+static int mlxsw_sp_port_module_info_get(struct mlxsw_sp *mlxsw_sp,
+					 u8 local_port, u8 *p_module,
+					 u8 *p_width, u8 *p_lane)
 {
 	char pmlp_pl[MLXSW_REG_PMLP_LEN];
 	int err;
@@ -322,16 +313,6 @@ static int __mlxsw_sp_port_module_info_get(struct mlxsw_sp *mlxsw_sp,
 	return 0;
 }
 
-static int mlxsw_sp_port_module_info_get(struct mlxsw_sp *mlxsw_sp,
-					 u8 local_port, u8 *p_module,
-					 u8 *p_width)
-{
-	u8 lane;
-
-	return __mlxsw_sp_port_module_info_get(mlxsw_sp, local_port, p_module,
-					       p_width, &lane);
-}
-
 static int mlxsw_sp_port_module_map(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 				    u8 module, u8 width, u8 lane)
 {
@@ -410,7 +391,11 @@ static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb,
 	}
 
 	mlxsw_sp_txhdr_construct(skb, &tx_info);
-	len = skb->len;
+	/* TX header is consumed by HW on the way so we shouldn't count its
+	 * bytes as being sent.
+	 */
+	len = skb->len - MLXSW_TXHDR_LEN;
+
 	/* Due to a race we might fail here because of a full queue. In that
 	 * unlikely case we simply drop the packet.
 	 */
@@ -949,17 +934,11 @@ static int mlxsw_sp_port_get_phys_port_name(struct net_device *dev, char *name,
 					    size_t len)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
-	u8 module, width, lane;
+	u8 module = mlxsw_sp_port->mapping.module;
+	u8 width = mlxsw_sp_port->mapping.width;
+	u8 lane = mlxsw_sp_port->mapping.lane;
 	int err;
 
-	err = __mlxsw_sp_port_module_info_get(mlxsw_sp_port->mlxsw_sp,
-					      mlxsw_sp_port->local_port,
-					      &module, &width, &lane);
-	if (err) {
-		netdev_err(dev, "Failed to retrieve module information\n");
-		return err;
-	}
-
 	if (!mlxsw_sp_port->split)
 		err = snprintf(name, len, "p%d", module + 1);
 	else
@@ -1438,7 +1417,8 @@ static int mlxsw_sp_port_get_settings(struct net_device *dev,
 
 	cmd->supported = mlxsw_sp_from_ptys_supported_port(eth_proto_cap) |
 			 mlxsw_sp_from_ptys_supported_link(eth_proto_cap) |
-			 SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+			 SUPPORTED_Pause | SUPPORTED_Asym_Pause |
+			 SUPPORTED_Autoneg;
 	cmd->advertising = mlxsw_sp_from_ptys_advert_link(eth_proto_admin);
 	mlxsw_sp_from_ptys_speed_duplex(netif_carrier_ok(dev),
 					eth_proto_oper, cmd);
@@ -1497,7 +1477,6 @@ static int mlxsw_sp_port_set_settings(struct net_device *dev,
 	u32 eth_proto_new;
 	u32 eth_proto_cap;
 	u32 eth_proto_admin;
-	bool is_up;
 	int err;
 
 	speed = ethtool_cmd_speed(cmd);
@@ -1529,12 +1508,7 @@ static int mlxsw_sp_port_set_settings(struct net_device *dev,
 		return err;
 	}
 
-	err = mlxsw_sp_port_oper_status_get(mlxsw_sp_port, &is_up);
-	if (err) {
-		netdev_err(dev, "Failed to get oper status");
-		return err;
-	}
-	if (!is_up)
+	if (!netif_running(dev))
 		return 0;
 
 	err = mlxsw_sp_port_admin_status_set(mlxsw_sp_port, false);
@@ -1681,8 +1655,8 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port)
 	return 0;
 }
 
-static int __mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
-				  bool split, u8 module, u8 width)
+static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
+				bool split, u8 module, u8 width, u8 lane)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port;
 	struct net_device *dev;
@@ -1697,6 +1671,9 @@ static int __mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 	mlxsw_sp_port->mlxsw_sp = mlxsw_sp;
 	mlxsw_sp_port->local_port = local_port;
 	mlxsw_sp_port->split = split;
+	mlxsw_sp_port->mapping.module = module;
+	mlxsw_sp_port->mapping.width = width;
+	mlxsw_sp_port->mapping.lane = lane;
 	bytes = DIV_ROUND_UP(VLAN_N_VID, BITS_PER_BYTE);
 	mlxsw_sp_port->active_vlans = kzalloc(bytes, GFP_KERNEL);
 	if (!mlxsw_sp_port->active_vlans) {
@@ -1839,28 +1816,6 @@ err_port_active_vlans_alloc:
 	return err;
 }
 
-static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
-				bool split, u8 module, u8 width, u8 lane)
-{
-	int err;
-
-	err = mlxsw_sp_port_module_map(mlxsw_sp, local_port, module, width,
-				       lane);
-	if (err)
-		return err;
-
-	err = __mlxsw_sp_port_create(mlxsw_sp, local_port, split, module,
-				     width);
-	if (err)
-		goto err_port_create;
-
-	return 0;
-
-err_port_create:
-	mlxsw_sp_port_module_unmap(mlxsw_sp, local_port);
-	return err;
-}
-
 static void mlxsw_sp_port_vports_fini(struct mlxsw_sp_port *mlxsw_sp_port)
 {
 	struct net_device *dev = mlxsw_sp_port->dev;
@@ -1909,8 +1864,8 @@ static void mlxsw_sp_ports_remove(struct mlxsw_sp *mlxsw_sp)
 
 static int mlxsw_sp_ports_create(struct mlxsw_sp *mlxsw_sp)
 {
+	u8 module, width, lane;
 	size_t alloc_size;
-	u8 module, width;
 	int i;
 	int err;
 
@@ -1921,13 +1876,14 @@ static int mlxsw_sp_ports_create(struct mlxsw_sp *mlxsw_sp)
 
 	for (i = 1; i < MLXSW_PORT_MAX_PORTS; i++) {
 		err = mlxsw_sp_port_module_info_get(mlxsw_sp, i, &module,
-						    &width);
+						    &width, &lane);
 		if (err)
 			goto err_port_module_info_get;
 		if (!width)
 			continue;
 		mlxsw_sp->port_to_module[i] = module;
-		err = __mlxsw_sp_port_create(mlxsw_sp, i, false, module, width);
+		err = mlxsw_sp_port_create(mlxsw_sp, i, false, module, width,
+					   lane);
 		if (err)
 			goto err_port_create;
 	}
@@ -1948,12 +1904,85 @@ static u8 mlxsw_sp_cluster_base_port_get(u8 local_port)
 	return local_port - offset;
 }
 
+static int mlxsw_sp_port_split_create(struct mlxsw_sp *mlxsw_sp, u8 base_port,
+				      u8 module, unsigned int count)
+{
+	u8 width = MLXSW_PORT_MODULE_MAX_WIDTH / count;
+	int err, i;
+
+	for (i = 0; i < count; i++) {
+		err = mlxsw_sp_port_module_map(mlxsw_sp, base_port + i, module,
+					       width, i * width);
+		if (err)
+			goto err_port_module_map;
+	}
+
+	for (i = 0; i < count; i++) {
+		err = __mlxsw_sp_port_swid_set(mlxsw_sp, base_port + i, 0);
+		if (err)
+			goto err_port_swid_set;
+	}
+
+	for (i = 0; i < count; i++) {
+		err = mlxsw_sp_port_create(mlxsw_sp, base_port + i, true,
+					   module, width, i * width);
+		if (err)
+			goto err_port_create;
+	}
+
+	return 0;
+
+err_port_create:
+	for (i--; i >= 0; i--)
+		mlxsw_sp_port_remove(mlxsw_sp, base_port + i);
+	i = count;
+err_port_swid_set:
+	for (i--; i >= 0; i--)
+		__mlxsw_sp_port_swid_set(mlxsw_sp, base_port + i,
+					 MLXSW_PORT_SWID_DISABLED_PORT);
+	i = count;
+err_port_module_map:
+	for (i--; i >= 0; i--)
+		mlxsw_sp_port_module_unmap(mlxsw_sp, base_port + i);
+	return err;
+}
+
+static void mlxsw_sp_port_unsplit_create(struct mlxsw_sp *mlxsw_sp,
+					 u8 base_port, unsigned int count)
+{
+	u8 local_port, module, width = MLXSW_PORT_MODULE_MAX_WIDTH;
+	int i;
+
+	/* Split by four means we need to re-create two ports, otherwise
+	 * only one.
+	 */
+	count = count / 2;
+
+	for (i = 0; i < count; i++) {
+		local_port = base_port + i * 2;
+		module = mlxsw_sp->port_to_module[local_port];
+
+		mlxsw_sp_port_module_map(mlxsw_sp, local_port, module, width,
+					 0);
+	}
+
+	for (i = 0; i < count; i++)
+		__mlxsw_sp_port_swid_set(mlxsw_sp, base_port + i * 2, 0);
+
+	for (i = 0; i < count; i++) {
+		local_port = base_port + i * 2;
+		module = mlxsw_sp->port_to_module[local_port];
+
+		mlxsw_sp_port_create(mlxsw_sp, local_port, false, module,
+				     width, 0);
+	}
+}
+
 static int mlxsw_sp_port_split(struct mlxsw_core *mlxsw_core, u8 local_port,
 			       unsigned int count)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
 	struct mlxsw_sp_port *mlxsw_sp_port;
-	u8 width = MLXSW_PORT_MODULE_MAX_WIDTH / count;
 	u8 module, cur_width, base_port;
 	int i;
 	int err;
@@ -1965,18 +1994,14 @@ static int mlxsw_sp_port_split(struct mlxsw_core *mlxsw_core, u8 local_port,
 		return -EINVAL;
 	}
 
+	module = mlxsw_sp_port->mapping.module;
+	cur_width = mlxsw_sp_port->mapping.width;
+
 	if (count != 2 && count != 4) {
 		netdev_err(mlxsw_sp_port->dev, "Port can only be split into 2 or 4 ports\n");
 		return -EINVAL;
 	}
 
-	err = mlxsw_sp_port_module_info_get(mlxsw_sp, local_port, &module,
-					    &cur_width);
-	if (err) {
-		netdev_err(mlxsw_sp_port->dev, "Failed to get port's width\n");
-		return err;
-	}
-
 	if (cur_width != MLXSW_PORT_MODULE_MAX_WIDTH) {
 		netdev_err(mlxsw_sp_port->dev, "Port cannot be split further\n");
 		return -EINVAL;
@@ -2001,25 +2026,16 @@ static int mlxsw_sp_port_split(struct mlxsw_core *mlxsw_core, u8 local_port,
 	for (i = 0; i < count; i++)
 		mlxsw_sp_port_remove(mlxsw_sp, base_port + i);
 
-	for (i = 0; i < count; i++) {
-		err = mlxsw_sp_port_create(mlxsw_sp, base_port + i, true,
-					   module, width, i * width);
-		if (err) {
-			dev_err(mlxsw_sp->bus_info->dev, "Failed to create split port\n");
-			goto err_port_create;
-		}
+	err = mlxsw_sp_port_split_create(mlxsw_sp, base_port, module, count);
+	if (err) {
+		dev_err(mlxsw_sp->bus_info->dev, "Failed to create split ports\n");
+		goto err_port_split_create;
 	}
 
 	return 0;
 
-err_port_create:
-	for (i--; i >= 0; i--)
-		mlxsw_sp_port_remove(mlxsw_sp, base_port + i);
-	for (i = 0; i < count / 2; i++) {
-		module = mlxsw_sp->port_to_module[base_port + i * 2];
-		mlxsw_sp_port_create(mlxsw_sp, base_port + i * 2, false,
-				     module, MLXSW_PORT_MODULE_MAX_WIDTH, 0);
-	}
+err_port_split_create:
+	mlxsw_sp_port_unsplit_create(mlxsw_sp, base_port, count);
 	return err;
 }
 
@@ -2027,10 +2043,9 @@ static int mlxsw_sp_port_unsplit(struct mlxsw_core *mlxsw_core, u8 local_port)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
 	struct mlxsw_sp_port *mlxsw_sp_port;
-	u8 module, cur_width, base_port;
+	u8 cur_width, base_port;
 	unsigned int count;
 	int i;
-	int err;
 
 	mlxsw_sp_port = mlxsw_sp->ports[local_port];
 	if (!mlxsw_sp_port) {
@@ -2044,12 +2059,7 @@ static int mlxsw_sp_port_unsplit(struct mlxsw_core *mlxsw_core, u8 local_port)
 		return -EINVAL;
 	}
 
-	err = mlxsw_sp_port_module_info_get(mlxsw_sp, local_port, &module,
-					    &cur_width);
-	if (err) {
-		netdev_err(mlxsw_sp_port->dev, "Failed to get port's width\n");
-		return err;
-	}
+	cur_width = mlxsw_sp_port->mapping.width;
 	count = cur_width == 1 ? 4 : 2;
 
 	base_port = mlxsw_sp_cluster_base_port_get(local_port);
@@ -2061,14 +2071,7 @@ static int mlxsw_sp_port_unsplit(struct mlxsw_core *mlxsw_core, u8 local_port)
 	for (i = 0; i < count; i++)
 		mlxsw_sp_port_remove(mlxsw_sp, base_port + i);
 
-	for (i = 0; i < count / 2; i++) {
-		module = mlxsw_sp->port_to_module[base_port + i * 2];
-		err = mlxsw_sp_port_create(mlxsw_sp, base_port + i * 2, false,
-					   module, MLXSW_PORT_MODULE_MAX_WIDTH,
-					   0);
-		if (err)
-			dev_err(mlxsw_sp->bus_info->dev, "Failed to reinstantiate port\n");
-	}
+	mlxsw_sp_port_unsplit_create(mlxsw_sp, base_port, count);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index e2c022d3e2f3..13b30eaa13d4 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -229,6 +229,11 @@ struct mlxsw_sp_port {
 		struct ieee_maxrate *maxrate;
 		struct ieee_pfc *pfc;
 	} dcb;
+	struct {
+		u8 module;
+		u8 width;
+		u8 lane;
+	} mapping;
 	/* 802.1Q bridge VLANs */
 	unsigned long *active_vlans;
 	unsigned long *untagged_vlans;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
index a3720a0fad7d..074cdda7b6f3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
@@ -194,7 +194,7 @@ static int mlxsw_sp_port_pb_prio_init(struct mlxsw_sp_port *mlxsw_sp_port)
 
 	mlxsw_reg_pptb_pack(pptb_pl, mlxsw_sp_port->local_port);
 	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
-		mlxsw_reg_pptb_prio_to_buff_set(pptb_pl, i, 0);
+		mlxsw_reg_pptb_prio_to_buff_pack(pptb_pl, i, 0);
 	return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pptb),
 			       pptb_pl);
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
index 0b323661c0b6..01cfb7512827 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
@@ -103,7 +103,8 @@ static int mlxsw_sp_port_pg_prio_map(struct mlxsw_sp_port *mlxsw_sp_port,
 
 	mlxsw_reg_pptb_pack(pptb_pl, mlxsw_sp_port->local_port);
 	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
-		mlxsw_reg_pptb_prio_to_buff_set(pptb_pl, i, prio_tc[i]);
+		mlxsw_reg_pptb_prio_to_buff_pack(pptb_pl, i, prio_tc[i]);
+
 	return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pptb),
 			       pptb_pl);
 }
@@ -249,6 +250,7 @@ static int mlxsw_sp_dcbnl_ieee_setets(struct net_device *dev,
 		return err;
 
 	memcpy(mlxsw_sp_port->dcb.ets, ets, sizeof(*ets));
+	mlxsw_sp_port->dcb.ets->ets_cap = IEEE_8021QAZ_MAX_TCS;
 
 	return 0;
 }
@@ -351,7 +353,8 @@ static int mlxsw_sp_dcbnl_ieee_setpfc(struct net_device *dev,
 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
 	int err;
 
-	if (mlxsw_sp_port->link.tx_pause || mlxsw_sp_port->link.rx_pause) {
+	if ((mlxsw_sp_port->link.tx_pause || mlxsw_sp_port->link.rx_pause) &&
+	    pfc->pfc_en) {
 		netdev_err(dev, "PAUSE frames already enabled on port\n");
 		return -EINVAL;
 	}
@@ -371,6 +374,7 @@ static int mlxsw_sp_dcbnl_ieee_setpfc(struct net_device *dev,
 	}
 
 	memcpy(mlxsw_sp_port->dcb.pfc, pfc, sizeof(*pfc));
+	mlxsw_sp_port->dcb.pfc->pfc_cap = IEEE_8021QAZ_MAX_TCS;
 
 	return 0;
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
index 3842eab9449a..25f658b3849a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
@@ -316,7 +316,10 @@ static netdev_tx_t mlxsw_sx_port_xmit(struct sk_buff *skb,
 		}
 	}
 	mlxsw_sx_txhdr_construct(skb, &tx_info);
-	len = skb->len;
+	/* TX header is consumed by HW on the way so we shouldn't count its
+	 * bytes as being sent.
+	 */
+	len = skb->len - MLXSW_TXHDR_LEN;
 	/* Due to a race we might fail here because of a full queue. In that
 	 * unlikely case we simply drop the packet.
 	 */
diff --git a/drivers/net/ethernet/microchip/enc28j60.c b/drivers/net/ethernet/microchip/enc28j60.c
index 7066954c39d6..0a26b11ca8f6 100644
--- a/drivers/net/ethernet/microchip/enc28j60.c
+++ b/drivers/net/ethernet/microchip/enc28j60.c
@@ -1151,7 +1151,8 @@ static void enc28j60_irq_work_handler(struct work_struct *work)
 			enc28j60_phy_read(priv, PHIR);
 		}
 		/* TX complete handler */
-		if ((intflags & EIR_TXIF) != 0) {
+		if (((intflags & EIR_TXIF) != 0) &&
+		    ((intflags & EIR_TXERIF) == 0)) {
 			bool err = false;
 			loop++;
 			if (netif_msg_intr(priv))
@@ -1203,7 +1204,7 @@ static void enc28j60_irq_work_handler(struct work_struct *work)
 					enc28j60_tx_clear(ndev, true);
 			} else
 				enc28j60_tx_clear(ndev, true);
-			locked_reg_bfclr(priv, EIR, EIR_TXERIF);
+			locked_reg_bfclr(priv, EIR, EIR_TXERIF | EIR_TXIF);
 		}
 		/* RX Error handler */
 		if ((intflags & EIR_RXERIF) != 0) {
@@ -1238,6 +1239,8 @@ static void enc28j60_irq_work_handler(struct work_struct *work)
  */
 static void enc28j60_hw_tx(struct enc28j60_net *priv)
 {
+	BUG_ON(!priv->tx_skb);
+
 	if (netif_msg_tx_queued(priv))
 		printk(KERN_DEBUG DRV_NAME
 			": Tx Packet Len:%d\n", priv->tx_skb->len);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index fa47c14c743a..ba26bb356b8d 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -2015,7 +2015,7 @@ static void nfp_net_open_stack(struct nfp_net *nn)
 
 	netif_tx_wake_all_queues(nn->netdev);
 
-	enable_irq(nn->irq_entries[NFP_NET_CFG_LSC].vector);
+	enable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector);
 	nfp_net_read_link_status(nn);
 }
 
@@ -2044,7 +2044,7 @@ static int nfp_net_netdev_open(struct net_device *netdev)
 				      NFP_NET_IRQ_LSC_IDX, nn->lsc_handler);
 	if (err)
 		goto err_free_exn;
-	disable_irq(nn->irq_entries[NFP_NET_CFG_LSC].vector);
+	disable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector);
 
 	nn->rx_rings = kcalloc(nn->num_rx_rings, sizeof(*nn->rx_rings),
 			       GFP_KERNEL);
@@ -2133,7 +2133,7 @@ static void nfp_net_close_stack(struct nfp_net *nn)
 {
 	unsigned int r;
 
-	disable_irq(nn->irq_entries[NFP_NET_CFG_LSC].vector);
+	disable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector);
 	netif_carrier_off(nn->netdev);
 	nn->link_up = false;
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 9afc15fdbb02..e29ed5a69566 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -3700,6 +3700,7 @@ struct public_port {
 #define MEDIA_DA_TWINAX         0x3
 #define MEDIA_BASE_T            0x4
 #define MEDIA_SFP_1G_FIBER      0x5
+#define MEDIA_MODULE_FIBER      0x6
 #define MEDIA_KR                0xf0
 #define MEDIA_NOT_PRESENT       0xff
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index 8fba87dd48af..aada4c7e095f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -72,6 +72,7 @@ int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn,
 	p_ramrod->mtu			= cpu_to_le16(p_params->mtu);
 	p_ramrod->inner_vlan_removal_en	= p_params->remove_inner_vlan;
 	p_ramrod->drop_ttl0_en		= p_params->drop_ttl0;
+	p_ramrod->untagged		= p_params->only_untagged;
 
 	SET_FIELD(rx_mode, ETH_VPORT_RX_MODE_UCAST_DROP_ALL, 1);
 	SET_FIELD(rx_mode, ETH_VPORT_RX_MODE_MCAST_DROP_ALL, 1);
@@ -247,10 +248,6 @@ qed_sp_update_accept_mode(struct qed_hwfn *p_hwfn,
 		SET_FIELD(state, ETH_VPORT_TX_MODE_UCAST_DROP_ALL,
 			  !!(accept_filter & QED_ACCEPT_NONE));
 
-		SET_FIELD(state, ETH_VPORT_TX_MODE_UCAST_ACCEPT_ALL,
-			  (!!(accept_filter & QED_ACCEPT_UCAST_MATCHED) &&
-			   !!(accept_filter & QED_ACCEPT_UCAST_UNMATCHED)));
-
 		SET_FIELD(state, ETH_VPORT_TX_MODE_MCAST_DROP_ALL,
 			  !!(accept_filter & QED_ACCEPT_NONE));
 
@@ -1748,7 +1745,8 @@ static int qed_start_vport(struct qed_dev *cdev,
 			   start.vport_id, start.mtu);
 	}
 
-	qed_reset_vport_stats(cdev);
+	if (params->clear_stats)
+		qed_reset_vport_stats(cdev);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 753064679bde..c7e01b303540 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -1085,6 +1085,7 @@ static int qed_get_port_type(u32 media_type)
 	case MEDIA_SFPP_10G_FIBER:
 	case MEDIA_SFP_1G_FIBER:
 	case MEDIA_XFP_FIBER:
+	case MEDIA_MODULE_FIBER:
 	case MEDIA_KR:
 		port_type = PORT_FIBRE;
 		break;
@@ -1105,6 +1106,39 @@ static int qed_get_port_type(u32 media_type)
 	return port_type;
 }
 
+static int qed_get_link_data(struct qed_hwfn *hwfn,
+			     struct qed_mcp_link_params *params,
+			     struct qed_mcp_link_state *link,
+			     struct qed_mcp_link_capabilities *link_caps)
+{
+	void *p;
+
+	if (!IS_PF(hwfn->cdev)) {
+		qed_vf_get_link_params(hwfn, params);
+		qed_vf_get_link_state(hwfn, link);
+		qed_vf_get_link_caps(hwfn, link_caps);
+
+		return 0;
+	}
+
+	p = qed_mcp_get_link_params(hwfn);
+	if (!p)
+		return -ENXIO;
+	memcpy(params, p, sizeof(*params));
+
+	p = qed_mcp_get_link_state(hwfn);
+	if (!p)
+		return -ENXIO;
+	memcpy(link, p, sizeof(*link));
+
+	p = qed_mcp_get_link_capabilities(hwfn);
+	if (!p)
+		return -ENXIO;
+	memcpy(link_caps, p, sizeof(*link_caps));
+
+	return 0;
+}
+
 static void qed_fill_link(struct qed_hwfn *hwfn,
 			  struct qed_link_output *if_link)
 {
@@ -1116,15 +1150,9 @@ static void qed_fill_link(struct qed_hwfn *hwfn,
 	memset(if_link, 0, sizeof(*if_link));
 
 	/* Prepare source inputs */
-	if (IS_PF(hwfn->cdev)) {
-		memcpy(&params, qed_mcp_get_link_params(hwfn), sizeof(params));
-		memcpy(&link, qed_mcp_get_link_state(hwfn), sizeof(link));
-		memcpy(&link_caps, qed_mcp_get_link_capabilities(hwfn),
-		       sizeof(link_caps));
-	} else {
-		qed_vf_get_link_params(hwfn, &params);
-		qed_vf_get_link_state(hwfn, &link);
-		qed_vf_get_link_caps(hwfn, &link_caps);
+	if (qed_get_link_data(hwfn, &params, &link, &link_caps)) {
+		dev_warn(&hwfn->cdev->pdev->dev, "no link data available\n");
+		return;
 	}
 
 	/* Set the link parameters to pass to protocol driver */
diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c
index acac6626a1b2..b122f6013b6c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_spq.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c
@@ -213,19 +213,15 @@ static int qed_spq_hw_post(struct qed_hwfn *p_hwfn,
 	SET_FIELD(db.params, CORE_DB_DATA_AGG_VAL_SEL,
 		  DQ_XCM_CORE_SPQ_PROD_CMD);
 	db.agg_flags = DQ_XCM_CORE_DQ_CF_CMD;
-
-	/* validate producer is up to-date */
-	rmb();
-
 	db.spq_prod = cpu_to_le16(qed_chain_get_prod_idx(p_chain));
 
-	/* do not reorder */
-	barrier();
+	/* make sure the SPQE is updated before the doorbell */
+	wmb();
 
 	DOORBELL(p_hwfn, qed_db_addr(p_spq->cid, DQ_DEMS_LEGACY), *(u32 *)&db);
 
 	/* make sure doorbell is rang */
-	mmiowb();
+	wmb();
 
 	DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
 		   "Doorbelled [0x%08x, CID 0x%08x] with Flags: %02x agg_params: %02x, prod: %04x\n",
@@ -614,7 +610,9 @@ qed_spq_add_entry(struct qed_hwfn *p_hwfn,
 
 			*p_en2 = *p_ent;
 
-			kfree(p_ent);
+			/* EBLOCK responsible to free the allocated p_ent */
+			if (p_ent->comp_mode != QED_SPQ_MODE_EBLOCK)
+				kfree(p_ent);
 
 			p_ent = p_en2;
 		}
@@ -749,6 +747,15 @@ int qed_spq_post(struct qed_hwfn *p_hwfn,
 		 * Thus, after gaining the answer perform the cleanup here.
 		 */
 		rc = qed_spq_block(p_hwfn, p_ent, fw_return_code);
+
+		if (p_ent->queue == &p_spq->unlimited_pending) {
+			/* This is an allocated p_ent which does not need to
+			 * return to pool.
+			 */
+			kfree(p_ent);
+			return rc;
+		}
+
 		if (rc)
 			goto spq_post_fail2;
 
@@ -844,8 +851,12 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn,
 		found->comp_cb.function(p_hwfn, found->comp_cb.cookie, p_data,
 					fw_return_code);
 
-	if (found->comp_mode != QED_SPQ_MODE_EBLOCK)
-		/* EBLOCK is responsible for freeing its own entry */
+	if ((found->comp_mode != QED_SPQ_MODE_EBLOCK) ||
+	    (found->queue == &p_spq->unlimited_pending))
+		/* EBLOCK  is responsible for returning its own entry into the
+		 * free list, unless it originally added the entry into the
+		 * unlimited pending list.
+		 */
 		qed_spq_return_entry(p_hwfn, found);
 
 	/* Attempt to post pending requests */
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.h b/drivers/net/ethernet/qlogic/qed/qed_sriov.h
index c8667c65e685..c90b2b6ad969 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.h
@@ -12,11 +12,13 @@
 #include "qed_vf.h"
 #define QED_VF_ARRAY_LENGTH (3)
 
+#ifdef CONFIG_QED_SRIOV
 #define IS_VF(cdev)             ((cdev)->b_is_vf)
 #define IS_PF(cdev)             (!((cdev)->b_is_vf))
-#ifdef CONFIG_QED_SRIOV
 #define IS_PF_SRIOV(p_hwfn)     (!!((p_hwfn)->cdev->p_iov_info))
 #else
+#define IS_VF(cdev)             (0)
+#define IS_PF(cdev)             (1)
 #define IS_PF_SRIOV(p_hwfn)     (0)
 #endif
 #define IS_PF_SRIOV_ALLOC(p_hwfn)       (!!((p_hwfn)->pf_iov_info))
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 5d00d1404bfc..f8e11f953acb 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -87,7 +87,9 @@ static const struct pci_device_id qede_pci_tbl[] = {
 	{PCI_VDEVICE(QLOGIC, PCI_DEVICE_ID_57980S_100), QEDE_PRIVATE_PF},
 	{PCI_VDEVICE(QLOGIC, PCI_DEVICE_ID_57980S_50), QEDE_PRIVATE_PF},
 	{PCI_VDEVICE(QLOGIC, PCI_DEVICE_ID_57980S_25), QEDE_PRIVATE_PF},
+#ifdef CONFIG_QED_SRIOV
 	{PCI_VDEVICE(QLOGIC, PCI_DEVICE_ID_57980S_IOV), QEDE_PRIVATE_VF},
+#endif
 	{ 0 }
 };
 
@@ -3229,7 +3231,7 @@ static int qede_stop_queues(struct qede_dev *edev)
 	return rc;
 }
 
-static int qede_start_queues(struct qede_dev *edev)
+static int qede_start_queues(struct qede_dev *edev, bool clear_stats)
 {
 	int rc, tc, i;
 	int vlan_removal_en = 1;
@@ -3460,6 +3462,7 @@ out:
 
 enum qede_load_mode {
 	QEDE_LOAD_NORMAL,
+	QEDE_LOAD_RELOAD,
 };
 
 static int qede_load(struct qede_dev *edev, enum qede_load_mode mode)
@@ -3498,7 +3501,7 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode)
 		goto err3;
 	DP_INFO(edev, "Setup IRQs succeeded\n");
 
-	rc = qede_start_queues(edev);
+	rc = qede_start_queues(edev, mode != QEDE_LOAD_RELOAD);
 	if (rc)
 		goto err4;
 	DP_INFO(edev, "Start VPORT, RXQ and TXQ succeeded\n");
@@ -3553,7 +3556,7 @@ void qede_reload(struct qede_dev *edev,
 	if (func)
 		func(edev, args);
 
-	qede_load(edev, QEDE_LOAD_NORMAL);
+	qede_load(edev, QEDE_LOAD_RELOAD);
 
 	mutex_lock(&edev->qede_lock);
 	qede_config_rx_mode(edev->ndev);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
index 7bd6f25b4625..87c642d3b075 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
@@ -772,6 +772,8 @@ netdev_tx_t qlcnic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 	tx_ring->tx_stats.tx_bytes += skb->len;
 	tx_ring->tx_stats.xmit_called++;
 
+	/* Ensure writes are complete before HW fetches Tx descriptors */
+	wmb();
 	qlcnic_update_cmd_producer(tx_ring);
 
 	return NETDEV_TX_OK;
@@ -2220,7 +2222,7 @@ void qlcnic_83xx_process_rcv_ring_diag(struct qlcnic_host_sds_ring *sds_ring)
 	if (!opcode)
 		return;
 
-	ring = QLCNIC_FETCH_RING_ID(qlcnic_83xx_hndl(sts_data[0]));
+	ring = QLCNIC_FETCH_RING_ID(sts_data[0]);
 	qlcnic_83xx_process_rcv_diag(adapter, ring, sts_data);
 	desc = &sds_ring->desc_head[consumer];
 	desc->status_desc_data[0] = cpu_to_le64(STATUS_OWNER_PHANTOM);
diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c
index 133e9e35be9e..4c83739d158f 100644
--- a/drivers/net/ethernet/sfc/farch.c
+++ b/drivers/net/ethernet/sfc/farch.c
@@ -104,7 +104,8 @@ int efx_farch_test_registers(struct efx_nic *efx,
 			     const struct efx_farch_register_test *regs,
 			     size_t n_regs)
 {
-	unsigned address = 0, i, j;
+	unsigned address = 0;
+	int i, j;
 	efx_oword_t mask, imask, original, reg, buf;
 
 	for (i = 0; i < n_regs; ++i) {
diff --git a/drivers/net/ethernet/sfc/mcdi_port.c b/drivers/net/ethernet/sfc/mcdi_port.c
index 7f295c4d7b80..2a9228a6e4a0 100644
--- a/drivers/net/ethernet/sfc/mcdi_port.c
+++ b/drivers/net/ethernet/sfc/mcdi_port.c
@@ -189,11 +189,12 @@ static u32 mcdi_to_ethtool_cap(u32 media, u32 cap)
 
 	case MC_CMD_MEDIA_XFP:
 	case MC_CMD_MEDIA_SFP_PLUS:
-		result |= SUPPORTED_FIBRE;
-		break;
-
 	case MC_CMD_MEDIA_QSFP_PLUS:
 		result |= SUPPORTED_FIBRE;
+		if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN))
+			result |= SUPPORTED_1000baseT_Full;
+		if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN))
+			result |= SUPPORTED_10000baseT_Full;
 		if (cap & (1 << MC_CMD_PHY_CAP_40000FDX_LBN))
 			result |= SUPPORTED_40000baseCR4_Full;
 		break;
diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c
index 8af25563f627..b5ab5e120bca 100644
--- a/drivers/net/ethernet/smsc/smsc911x.c
+++ b/drivers/net/ethernet/smsc/smsc911x.c
@@ -116,7 +116,6 @@ struct smsc911x_data {
 
 	struct phy_device *phy_dev;
 	struct mii_bus *mii_bus;
-	int phy_irq[PHY_MAX_ADDR];
 	unsigned int using_extphy;
 	int last_duplex;
 	int last_carrier;
@@ -1073,7 +1072,6 @@ static int smsc911x_mii_init(struct platform_device *pdev,
 	pdata->mii_bus->priv = pdata;
 	pdata->mii_bus->read = smsc911x_mii_read;
 	pdata->mii_bus->write = smsc911x_mii_write;
-	memcpy(pdata->mii_bus->irq, pdata->phy_irq, sizeof(pdata->mii_bus));
 
 	pdata->mii_bus->parent = &pdev->dev;
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index 4f7283d05588..44da877d2483 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -156,7 +156,7 @@ static void dwmac4_set_filter(struct mac_device_info *hw,
 		struct netdev_hw_addr *ha;
 
 		netdev_for_each_uc_addr(ha, dev) {
-			dwmac4_set_umac_addr(ioaddr, ha->addr, reg);
+			dwmac4_set_umac_addr(hw, ha->addr, reg);
 			reg++;
 		}
 	}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index eac45d0c75e2..e4071265be76 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2804,7 +2804,7 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
 				priv->tx_path_in_lpi_mode = true;
 			if (status & CORE_IRQ_TX_PATH_EXIT_LPI_MODE)
 				priv->tx_path_in_lpi_mode = false;
-			if (status & CORE_IRQ_MTL_RX_OVERFLOW)
+			if (status & CORE_IRQ_MTL_RX_OVERFLOW && priv->hw->dma->set_rx_tail_ptr)
 				priv->hw->dma->set_rx_tail_ptr(priv->ioaddr,
 							priv->rx_tail_addr,
 							STMMAC_CHAN0);
@@ -3450,8 +3450,6 @@ int stmmac_resume(struct device *dev)
 	if (!netif_running(ndev))
 		return 0;
 
-	spin_lock_irqsave(&priv->lock, flags);
-
 	/* Power Down bit, into the PM register, is cleared
 	 * automatically as soon as a magic packet or a Wake-up frame
 	 * is received. Anyway, it's better to manually clear
@@ -3459,7 +3457,9 @@ int stmmac_resume(struct device *dev)
 	 * from another devices (e.g. serial console).
 	 */
 	if (device_may_wakeup(priv->device)) {
+		spin_lock_irqsave(&priv->lock, flags);
 		priv->hw->mac->pmt(priv->hw, 0);
+		spin_unlock_irqrestore(&priv->lock, flags);
 		priv->irq_wake = 0;
 	} else {
 		pinctrl_pm_select_default_state(priv->device);
@@ -3473,6 +3473,8 @@ int stmmac_resume(struct device *dev)
 
 	netif_device_attach(ndev);
 
+	spin_lock_irqsave(&priv->lock, flags);
+
 	priv->cur_rx = 0;
 	priv->dirty_rx = 0;
 	priv->dirty_tx = 0;
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 4b08a2f52b3e..53190894f17a 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1339,7 +1339,7 @@ static int cpsw_ndo_open(struct net_device *ndev)
 	if (priv->coal_intvl != 0) {
 		struct ethtool_coalesce coal;
 
-		coal.rx_coalesce_usecs = (priv->coal_intvl << 4);
+		coal.rx_coalesce_usecs = priv->coal_intvl;
 		cpsw_set_coalesce(ndev, &coal);
 	}
 
@@ -2505,8 +2505,6 @@ static int cpsw_probe(struct platform_device *pdev)
 clean_ale_ret:
 	cpsw_ale_destroy(priv->ale);
 clean_dma_ret:
-	cpdma_chan_destroy(priv->txch);
-	cpdma_chan_destroy(priv->rxch);
 	cpdma_ctlr_destroy(priv->dma);
 clean_runtime_disable_ret:
 	pm_runtime_disable(&pdev->dev);
@@ -2534,8 +2532,6 @@ static int cpsw_remove(struct platform_device *pdev)
 	unregister_netdev(ndev);
 
 	cpsw_ale_destroy(priv->ale);
-	cpdma_chan_destroy(priv->txch);
-	cpdma_chan_destroy(priv->rxch);
 	cpdma_ctlr_destroy(priv->dma);
 	pm_runtime_disable(&pdev->dev);
 	device_for_each_child(&pdev->dev, NULL, cpsw_remove_child_device);
diff --git a/drivers/net/ethernet/tile/tilegx.c b/drivers/net/ethernet/tile/tilegx.c
index 0a15acc075b3..11213a38c795 100644
--- a/drivers/net/ethernet/tile/tilegx.c
+++ b/drivers/net/ethernet/tile/tilegx.c
@@ -462,7 +462,7 @@ static void tile_tx_timestamp(struct sk_buff *skb, int instance)
 	if (unlikely((shtx->tx_flags & SKBTX_HW_TSTAMP) != 0)) {
 		struct mpipe_data *md = &mpipe_data[instance];
 		struct skb_shared_hwtstamps shhwtstamps;
-		struct timespec ts;
+		struct timespec64 ts;
 
 		shtx->tx_flags |= SKBTX_IN_PROGRESS;
 		gxio_mpipe_get_timestamp(&md->context, &ts);
@@ -886,9 +886,9 @@ static struct ptp_clock_info ptp_mpipe_caps = {
 /* Sync mPIPE's timestamp up with Linux system time and register PTP clock. */
 static void register_ptp_clock(struct net_device *dev, struct mpipe_data *md)
 {
-	struct timespec ts;
+	struct timespec64 ts;
 
-	getnstimeofday(&ts);
+	ktime_get_ts64(&ts);
 	gxio_mpipe_set_timestamp(&md->context, &ts);
 
 	mutex_init(&md->ptp_lock);
diff --git a/drivers/net/fddi/skfp/Makefile b/drivers/net/fddi/skfp/Makefile
index b0be0234abf6..a957a1c7e5ba 100644
--- a/drivers/net/fddi/skfp/Makefile
+++ b/drivers/net/fddi/skfp/Makefile
@@ -17,4 +17,4 @@ skfp-objs :=  skfddi.o    hwmtm.o    fplustm.o  smt.o      cfm.o     \
 #   projects. To keep the source common for all those drivers (and
 #   thus simplify fixes to it), please do not clean it up!
 
-ccflags-y := -Idrivers/net/skfp -DPCI -DMEM_MAPPED_IO -Wno-strict-prototypes
+ccflags-y := -DPCI -DMEM_MAPPED_IO -Wno-strict-prototypes
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index cadefe4fdaa2..9b3dc3c61e00 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -958,8 +958,8 @@ tx_error:
 		dev->stats.collisions++;
 	else if (err == -ENETUNREACH)
 		dev->stats.tx_carrier_errors++;
-	else
-		dev->stats.tx_errors++;
+
+	dev->stats.tx_errors++;
 	return NETDEV_TX_OK;
 }
 
@@ -1048,8 +1048,8 @@ tx_error:
 		dev->stats.collisions++;
 	else if (err == -ENETUNREACH)
 		dev->stats.tx_carrier_errors++;
-	else
-		dev->stats.tx_errors++;
+
+	dev->stats.tx_errors++;
 	return NETDEV_TX_OK;
 }
 #endif
@@ -1072,12 +1072,17 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
 
 static int __geneve_change_mtu(struct net_device *dev, int new_mtu, bool strict)
 {
+	struct geneve_dev *geneve = netdev_priv(dev);
 	/* The max_mtu calculation does not take account of GENEVE
 	 * options, to avoid excluding potentially valid
 	 * configurations.
 	 */
-	int max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - sizeof(struct iphdr)
-		- dev->hard_header_len;
+	int max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - dev->hard_header_len;
+
+	if (geneve->remote.sa.sa_family == AF_INET6)
+		max_mtu -= sizeof(struct ipv6hdr);
+	else
+		max_mtu -= sizeof(struct iphdr);
 
 	if (new_mtu < 68)
 		return -EINVAL;
@@ -1508,6 +1513,7 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
 {
 	struct nlattr *tb[IFLA_MAX + 1];
 	struct net_device *dev;
+	LIST_HEAD(list_kill);
 	int err;
 
 	memset(tb, 0, sizeof(tb));
@@ -1519,8 +1525,10 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
 	err = geneve_configure(net, dev, &geneve_remote_unspec,
 			       0, 0, 0, 0, htons(dst_port), true,
 			       GENEVE_F_UDP_ZERO_CSUM6_RX);
-	if (err)
-		goto err;
+	if (err) {
+		free_netdev(dev);
+		return ERR_PTR(err);
+	}
 
 	/* openvswitch users expect packet sizes to be unrestricted,
 	 * so set the largest MTU we can.
@@ -1529,10 +1537,15 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
 	if (err)
 		goto err;
 
+	err = rtnl_configure_link(dev, NULL);
+	if (err < 0)
+		goto err;
+
 	return dev;
 
  err:
-	free_netdev(dev);
+	geneve_dellink(dev, &list_kill);
+	unregister_netdevice_many(&list_kill);
 	return ERR_PTR(err);
 }
 EXPORT_SYMBOL_GPL(geneve_dev_create_fb);
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 47ee2c840b55..8bcd78f94966 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -605,12 +605,41 @@ static void macsec_encrypt_done(struct crypto_async_request *base, int err)
 	dev_put(dev);
 }
 
+static struct aead_request *macsec_alloc_req(struct crypto_aead *tfm,
+					     unsigned char **iv,
+					     struct scatterlist **sg)
+{
+	size_t size, iv_offset, sg_offset;
+	struct aead_request *req;
+	void *tmp;
+
+	size = sizeof(struct aead_request) + crypto_aead_reqsize(tfm);
+	iv_offset = size;
+	size += GCM_AES_IV_LEN;
+
+	size = ALIGN(size, __alignof__(struct scatterlist));
+	sg_offset = size;
+	size += sizeof(struct scatterlist) * (MAX_SKB_FRAGS + 1);
+
+	tmp = kmalloc(size, GFP_ATOMIC);
+	if (!tmp)
+		return NULL;
+
+	*iv = (unsigned char *)(tmp + iv_offset);
+	*sg = (struct scatterlist *)(tmp + sg_offset);
+	req = tmp;
+
+	aead_request_set_tfm(req, tfm);
+
+	return req;
+}
+
 static struct sk_buff *macsec_encrypt(struct sk_buff *skb,
 				      struct net_device *dev)
 {
 	int ret;
-	struct scatterlist sg[MAX_SKB_FRAGS + 1];
-	unsigned char iv[GCM_AES_IV_LEN];
+	struct scatterlist *sg;
+	unsigned char *iv;
 	struct ethhdr *eth;
 	struct macsec_eth_header *hh;
 	size_t unprotected_len;
@@ -668,8 +697,6 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb,
 	macsec_fill_sectag(hh, secy, pn);
 	macsec_set_shortlen(hh, unprotected_len - 2 * ETH_ALEN);
 
-	macsec_fill_iv(iv, secy->sci, pn);
-
 	skb_put(skb, secy->icv_len);
 
 	if (skb->len - ETH_HLEN > macsec_priv(dev)->real_dev->mtu) {
@@ -684,13 +711,15 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb,
 		return ERR_PTR(-EINVAL);
 	}
 
-	req = aead_request_alloc(tx_sa->key.tfm, GFP_ATOMIC);
+	req = macsec_alloc_req(tx_sa->key.tfm, &iv, &sg);
 	if (!req) {
 		macsec_txsa_put(tx_sa);
 		kfree_skb(skb);
 		return ERR_PTR(-ENOMEM);
 	}
 
+	macsec_fill_iv(iv, secy->sci, pn);
+
 	sg_init_table(sg, MAX_SKB_FRAGS + 1);
 	skb_to_sgvec(skb, sg, 0, skb->len);
 
@@ -861,7 +890,6 @@ static void macsec_decrypt_done(struct crypto_async_request *base, int err)
 out:
 	macsec_rxsa_put(rx_sa);
 	dev_put(dev);
-	return;
 }
 
 static struct sk_buff *macsec_decrypt(struct sk_buff *skb,
@@ -871,8 +899,8 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb,
 				      struct macsec_secy *secy)
 {
 	int ret;
-	struct scatterlist sg[MAX_SKB_FRAGS + 1];
-	unsigned char iv[GCM_AES_IV_LEN];
+	struct scatterlist *sg;
+	unsigned char *iv;
 	struct aead_request *req;
 	struct macsec_eth_header *hdr;
 	u16 icv_len = secy->icv_len;
@@ -882,7 +910,7 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb,
 	if (!skb)
 		return ERR_PTR(-ENOMEM);
 
-	req = aead_request_alloc(rx_sa->key.tfm, GFP_ATOMIC);
+	req = macsec_alloc_req(rx_sa->key.tfm, &iv, &sg);
 	if (!req) {
 		kfree_skb(skb);
 		return ERR_PTR(-ENOMEM);
@@ -1234,7 +1262,7 @@ static struct crypto_aead *macsec_alloc_tfm(char *key, int key_len, int icv_len)
 	struct crypto_aead *tfm;
 	int ret;
 
-	tfm = crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC);
+	tfm = crypto_alloc_aead("gcm(aes)", 0, 0);
 	if (!tfm || IS_ERR(tfm))
 		return NULL;
 
@@ -2612,6 +2640,7 @@ static netdev_tx_t macsec_start_xmit(struct sk_buff *skb,
 		u64_stats_update_begin(&secy_stats->syncp);
 		secy_stats->stats.OutPktsUntagged++;
 		u64_stats_update_end(&secy_stats->syncp);
+		skb->dev = macsec->real_dev;
 		len = skb->len;
 		ret = dev_queue_xmit(skb);
 		count_tx(dev, ret, len);
@@ -3361,6 +3390,7 @@ static void __exit macsec_exit(void)
 	genl_unregister_family(&macsec_fam);
 	rtnl_link_unregister(&macsec_link_ops);
 	unregister_netdevice_notifier(&macsec_notifier);
+	rcu_barrier();
 }
 
 module_init(macsec_init);
diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c
index 2afa61b51d41..91177a4a32ad 100644
--- a/drivers/net/phy/dp83867.c
+++ b/drivers/net/phy/dp83867.c
@@ -57,6 +57,7 @@
 
 /* PHY CTRL bits */
 #define DP83867_PHYCR_FIFO_DEPTH_SHIFT		14
+#define DP83867_PHYCR_FIFO_DEPTH_MASK		(3 << 14)
 
 /* RGMIIDCTL bits */
 #define DP83867_RGMII_TX_CLK_DELAY_SHIFT	4
@@ -133,8 +134,8 @@ static int dp83867_of_init(struct phy_device *phydev)
 static int dp83867_config_init(struct phy_device *phydev)
 {
 	struct dp83867_private *dp83867;
-	int ret;
-	u16 val, delay;
+	int ret, val;
+	u16 delay;
 
 	if (!phydev->priv) {
 		dp83867 = devm_kzalloc(&phydev->mdio.dev, sizeof(*dp83867),
@@ -151,8 +152,12 @@ static int dp83867_config_init(struct phy_device *phydev)
 	}
 
 	if (phy_interface_is_rgmii(phydev)) {
-		ret = phy_write(phydev, MII_DP83867_PHYCTRL,
-			(dp83867->fifo_depth << DP83867_PHYCR_FIFO_DEPTH_SHIFT));
+		val = phy_read(phydev, MII_DP83867_PHYCTRL);
+		if (val < 0)
+			return val;
+		val &= ~DP83867_PHYCR_FIFO_DEPTH_MASK;
+		val |= (dp83867->fifo_depth << DP83867_PHYCR_FIFO_DEPTH_SHIFT);
+		ret = phy_write(phydev, MII_DP83867_PHYCTRL, val);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c
index 2d2e4339f0df..9ec7f7353434 100644
--- a/drivers/net/phy/fixed_phy.c
+++ b/drivers/net/phy/fixed_phy.c
@@ -23,6 +23,7 @@
 #include <linux/slab.h>
 #include <linux/of.h>
 #include <linux/gpio.h>
+#include <linux/idr.h>
 
 #define MII_REGS_NUM 29
 
@@ -286,6 +287,8 @@ err_regs:
 }
 EXPORT_SYMBOL_GPL(fixed_phy_add);
 
+static DEFINE_IDA(phy_fixed_ida);
+
 static void fixed_phy_del(int phy_addr)
 {
 	struct fixed_mdio_bus *fmb = &platform_fmb;
@@ -297,14 +300,12 @@ static void fixed_phy_del(int phy_addr)
 			if (gpio_is_valid(fp->link_gpio))
 				gpio_free(fp->link_gpio);
 			kfree(fp);
+			ida_simple_remove(&phy_fixed_ida, phy_addr);
 			return;
 		}
 	}
 }
 
-static int phy_fixed_addr;
-static DEFINE_SPINLOCK(phy_fixed_addr_lock);
-
 struct phy_device *fixed_phy_register(unsigned int irq,
 				      struct fixed_phy_status *status,
 				      int link_gpio,
@@ -319,17 +320,15 @@ struct phy_device *fixed_phy_register(unsigned int irq,
 		return ERR_PTR(-EPROBE_DEFER);
 
 	/* Get the next available PHY address, up to PHY_MAX_ADDR */
-	spin_lock(&phy_fixed_addr_lock);
-	if (phy_fixed_addr == PHY_MAX_ADDR) {
-		spin_unlock(&phy_fixed_addr_lock);
-		return ERR_PTR(-ENOSPC);
-	}
-	phy_addr = phy_fixed_addr++;
-	spin_unlock(&phy_fixed_addr_lock);
+	phy_addr = ida_simple_get(&phy_fixed_ida, 0, PHY_MAX_ADDR, GFP_KERNEL);
+	if (phy_addr < 0)
+		return ERR_PTR(phy_addr);
 
 	ret = fixed_phy_add(irq, phy_addr, status, link_gpio);
-	if (ret < 0)
+	if (ret < 0) {
+		ida_simple_remove(&phy_fixed_ida, phy_addr);
 		return ERR_PTR(ret);
+	}
 
 	phy = get_phy_device(fmb->mii_bus, phy_addr, false);
 	if (IS_ERR(phy)) {
@@ -434,6 +433,7 @@ static void __exit fixed_mdio_bus_exit(void)
 		list_del(&fp->node);
 		kfree(fp);
 	}
+	ida_destroy(&phy_fixed_ida);
 }
 module_exit(fixed_mdio_bus_exit);
 
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 280e8795b463..ec2c1eee6405 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -285,6 +285,48 @@ static int marvell_config_aneg(struct phy_device *phydev)
 	return 0;
 }
 
+static int m88e1111_config_aneg(struct phy_device *phydev)
+{
+	int err;
+
+	/* The Marvell PHY has an errata which requires
+	 * that certain registers get written in order
+	 * to restart autonegotiation
+	 */
+	err = phy_write(phydev, MII_BMCR, BMCR_RESET);
+
+	err = marvell_set_polarity(phydev, phydev->mdix);
+	if (err < 0)
+		return err;
+
+	err = phy_write(phydev, MII_M1111_PHY_LED_CONTROL,
+			MII_M1111_PHY_LED_DIRECT);
+	if (err < 0)
+		return err;
+
+	err = genphy_config_aneg(phydev);
+	if (err < 0)
+		return err;
+
+	if (phydev->autoneg != AUTONEG_ENABLE) {
+		int bmcr;
+
+		/* A write to speed/duplex bits (that is performed by
+		 * genphy_config_aneg() call above) must be followed by
+		 * a software reset. Otherwise, the write has no effect.
+		 */
+		bmcr = phy_read(phydev, MII_BMCR);
+		if (bmcr < 0)
+			return bmcr;
+
+		err = phy_write(phydev, MII_BMCR, bmcr | BMCR_RESET);
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+
 #ifdef CONFIG_OF_MDIO
 /*
  * Set and/or override some configuration registers based on the
@@ -407,15 +449,7 @@ static int m88e1121_config_aneg(struct phy_device *phydev)
 	if (err < 0)
 		return err;
 
-	oldpage = phy_read(phydev, MII_MARVELL_PHY_PAGE);
-
-	phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_88E1121_PHY_LED_PAGE);
-	phy_write(phydev, MII_88E1121_PHY_LED_CTRL, MII_88E1121_PHY_LED_DEF);
-	phy_write(phydev, MII_MARVELL_PHY_PAGE, oldpage);
-
-	err = genphy_config_aneg(phydev);
-
-	return err;
+	return genphy_config_aneg(phydev);
 }
 
 static int m88e1318_config_aneg(struct phy_device *phydev)
@@ -636,6 +670,28 @@ static int m88e1111_config_init(struct phy_device *phydev)
 	return phy_write(phydev, MII_BMCR, BMCR_RESET);
 }
 
+static int m88e1121_config_init(struct phy_device *phydev)
+{
+	int err, oldpage;
+
+	oldpage = phy_read(phydev, MII_MARVELL_PHY_PAGE);
+
+	err = phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_88E1121_PHY_LED_PAGE);
+	if (err < 0)
+		return err;
+
+	/* Default PHY LED config: LED[0] .. Link, LED[1] .. Activity */
+	err = phy_write(phydev, MII_88E1121_PHY_LED_CTRL,
+			MII_88E1121_PHY_LED_DEF);
+	if (err < 0)
+		return err;
+
+	phy_write(phydev, MII_MARVELL_PHY_PAGE, oldpage);
+
+	/* Set marvell,reg-init configuration from device tree */
+	return marvell_config_init(phydev);
+}
+
 static int m88e1510_config_init(struct phy_device *phydev)
 {
 	int err;
@@ -668,7 +724,7 @@ static int m88e1510_config_init(struct phy_device *phydev)
 			return err;
 	}
 
-	return marvell_config_init(phydev);
+	return m88e1121_config_init(phydev);
 }
 
 static int m88e1118_config_aneg(struct phy_device *phydev)
@@ -1161,7 +1217,7 @@ static struct phy_driver marvell_drivers[] = {
 		.flags = PHY_HAS_INTERRUPT,
 		.probe = marvell_probe,
 		.config_init = &m88e1111_config_init,
-		.config_aneg = &marvell_config_aneg,
+		.config_aneg = &m88e1111_config_aneg,
 		.read_status = &marvell_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,
 		.config_intr = &marvell_config_intr,
@@ -1196,7 +1252,7 @@ static struct phy_driver marvell_drivers[] = {
 		.features = PHY_GBIT_FEATURES,
 		.flags = PHY_HAS_INTERRUPT,
 		.probe = marvell_probe,
-		.config_init = &marvell_config_init,
+		.config_init = &m88e1121_config_init,
 		.config_aneg = &m88e1121_config_aneg,
 		.read_status = &marvell_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,
@@ -1215,7 +1271,7 @@ static struct phy_driver marvell_drivers[] = {
 		.features = PHY_GBIT_FEATURES,
 		.flags = PHY_HAS_INTERRUPT,
 		.probe = marvell_probe,
-		.config_init = &marvell_config_init,
+		.config_init = &m88e1121_config_init,
 		.config_aneg = &m88e1318_config_aneg,
 		.read_status = &marvell_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,
diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c
index 2e21e9366f76..b62c4aaee40b 100644
--- a/drivers/net/phy/smsc.c
+++ b/drivers/net/phy/smsc.c
@@ -75,22 +75,13 @@ static int smsc_phy_reset(struct phy_device *phydev)
 	 * in all capable mode before using it.
 	 */
 	if ((rc & MII_LAN83C185_MODE_MASK) == MII_LAN83C185_MODE_POWERDOWN) {
-		int timeout = 50000;
-
-		/* set "all capable" mode and reset the phy */
+		/* set "all capable" mode */
 		rc |= MII_LAN83C185_MODE_ALL;
 		phy_write(phydev, MII_LAN83C185_SPECIAL_MODES, rc);
-		phy_write(phydev, MII_BMCR, BMCR_RESET);
-
-		/* wait end of reset (max 500 ms) */
-		do {
-			udelay(10);
-			if (timeout-- == 0)
-				return -1;
-			rc = phy_read(phydev, MII_BMCR);
-		} while (rc & BMCR_RESET);
 	}
-	return 0;
+
+	/* reset the phy */
+	return genphy_soft_reset(phydev);
 }
 
 static int lan911x_config_init(struct phy_device *phydev)
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 8dedafa1a95d..a30ee427efab 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -2601,8 +2601,6 @@ ppp_unregister_channel(struct ppp_channel *chan)
 	spin_lock_bh(&pn->all_channels_lock);
 	list_del(&pch->list);
 	spin_unlock_bh(&pn->all_channels_lock);
-	put_net(pch->chan_net);
-	pch->chan_net = NULL;
 
 	pch->file.dead = 1;
 	wake_up_interruptible(&pch->file.rwait);
@@ -3136,6 +3134,9 @@ ppp_disconnect_channel(struct channel *pch)
  */
 static void ppp_destroy_channel(struct channel *pch)
 {
+	put_net(pch->chan_net);
+	pch->chan_net = NULL;
+
 	atomic_dec(&channel_count);
 
 	if (!pch->file.dead) {
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 2ace126533cd..fdee77207323 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1203,8 +1203,10 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
 		goto err_dev_open;
 	}
 
+	netif_addr_lock_bh(dev);
 	dev_uc_sync_multiple(port_dev, dev);
 	dev_mc_sync_multiple(port_dev, dev);
+	netif_addr_unlock_bh(dev);
 
 	err = vlan_vids_add_by_dev(port_dev, dev);
 	if (err) {
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 53759c315b97..877c9516e781 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -854,6 +854,13 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_
 	if (cdc_ncm_init(dev))
 		goto error2;
 
+	/* Some firmwares need a pause here or they will silently fail
+	 * to set up the interface properly.  This value was decided
+	 * empirically on a Sierra Wireless MC7455 running 02.08.02.00
+	 * firmware.
+	 */
+	usleep_range(10000, 20000);
+
 	/* configure data interface */
 	temp = usb_set_interface(dev->udev, iface_no, data_altsetting);
 	if (temp) {
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 3f9f6ed3eec4..e9654a685381 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -26,12 +26,13 @@
 #include <linux/mdio.h>
 #include <linux/usb/cdc.h>
 #include <linux/suspend.h>
+#include <linux/acpi.h>
 
 /* Information for net-next */
 #define NETNEXT_VERSION		"08"
 
 /* Information for net */
-#define NET_VERSION		"3"
+#define NET_VERSION		"5"
 
 #define DRIVER_VERSION		"v1." NETNEXT_VERSION "." NET_VERSION
 #define DRIVER_AUTHOR "Realtek linux nic maintainers <nic_swsd@realtek.com>"
@@ -116,6 +117,7 @@
 #define USB_TX_DMA		0xd434
 #define USB_TOLERANCE		0xd490
 #define USB_LPM_CTRL		0xd41a
+#define USB_BMU_RESET		0xd4b0
 #define USB_UPS_CTRL		0xd800
 #define USB_MISC_0		0xd81a
 #define USB_POWER_CUT		0xd80a
@@ -338,6 +340,10 @@
 #define TEST_MODE_DISABLE	0x00000001
 #define TX_SIZE_ADJUST1		0x00000100
 
+/* USB_BMU_RESET */
+#define BMU_RESET_EP_IN		0x01
+#define BMU_RESET_EP_OUT	0x02
+
 /* USB_UPS_CTRL */
 #define POWER_CUT		0x0100
 
@@ -455,6 +461,11 @@
 /* SRAM_IMPEDANCE */
 #define RX_DRIVING_MASK		0x6000
 
+/* MAC PASSTHRU */
+#define AD_MASK			0xfee0
+#define EFUSE			0xcfdb
+#define PASS_THRU_MASK		0x1
+
 enum rtl_register_content {
 	_1000bps	= 0x10,
 	_100bps		= 0x08,
@@ -619,6 +630,7 @@ struct r8152 {
 		int (*eee_get)(struct r8152 *, struct ethtool_eee *);
 		int (*eee_set)(struct r8152 *, struct ethtool_eee *);
 		bool (*in_nway)(struct r8152 *);
+		void (*autosuspend_en)(struct r8152 *tp, bool enable);
 	} rtl_ops;
 
 	int intr_interval;
@@ -1030,6 +1042,65 @@ out1:
 	return ret;
 }
 
+/* Devices containing RTL8153-AD can support a persistent
+ * host system provided MAC address.
+ * Examples of this are Dell TB15 and Dell WD15 docks
+ */
+static int vendor_mac_passthru_addr_read(struct r8152 *tp, struct sockaddr *sa)
+{
+	acpi_status status;
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	union acpi_object *obj;
+	int ret = -EINVAL;
+	u32 ocp_data;
+	unsigned char buf[6];
+
+	/* test for -AD variant of RTL8153 */
+	ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_MISC_0);
+	if ((ocp_data & AD_MASK) != 0x1000)
+		return -ENODEV;
+
+	/* test for MAC address pass-through bit */
+	ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, EFUSE);
+	if ((ocp_data & PASS_THRU_MASK) != 1)
+		return -ENODEV;
+
+	/* returns _AUXMAC_#AABBCCDDEEFF# */
+	status = acpi_evaluate_object(NULL, "\\_SB.AMAC", NULL, &buffer);
+	obj = (union acpi_object *)buffer.pointer;
+	if (!ACPI_SUCCESS(status))
+		return -ENODEV;
+	if (obj->type != ACPI_TYPE_BUFFER || obj->string.length != 0x17) {
+		netif_warn(tp, probe, tp->netdev,
+			   "Invalid buffer when reading pass-thru MAC addr: "
+			   "(%d, %d)\n",
+			   obj->type, obj->string.length);
+		goto amacout;
+	}
+	if (strncmp(obj->string.pointer, "_AUXMAC_#", 9) != 0 ||
+	    strncmp(obj->string.pointer + 0x15, "#", 1) != 0) {
+		netif_warn(tp, probe, tp->netdev,
+			   "Invalid header when reading pass-thru MAC addr\n");
+		goto amacout;
+	}
+	ret = hex2bin(buf, obj->string.pointer + 9, 6);
+	if (!(ret == 0 && is_valid_ether_addr(buf))) {
+		netif_warn(tp, probe, tp->netdev,
+			   "Invalid MAC when reading pass-thru MAC addr: "
+			   "%d, %pM\n", ret, buf);
+		ret = -EINVAL;
+		goto amacout;
+	}
+	memcpy(sa->sa_data, buf, 6);
+	ether_addr_copy(tp->netdev->dev_addr, sa->sa_data);
+	netif_info(tp, probe, tp->netdev,
+		   "Using pass-thru MAC addr %pM\n", sa->sa_data);
+
+amacout:
+	kfree(obj);
+	return ret;
+}
+
 static int set_ethernet_addr(struct r8152 *tp)
 {
 	struct net_device *dev = tp->netdev;
@@ -1038,8 +1109,15 @@ static int set_ethernet_addr(struct r8152 *tp)
 
 	if (tp->version == RTL_VER_01)
 		ret = pla_ocp_read(tp, PLA_IDR, 8, sa.sa_data);
-	else
-		ret = pla_ocp_read(tp, PLA_BACKUP, 8, sa.sa_data);
+	else {
+		/* if this is not an RTL8153-AD, no eFuse mac pass thru set,
+		 * or system doesn't provide valid _SB.AMAC this will be
+		 * be expected to non-zero
+		 */
+		ret = vendor_mac_passthru_addr_read(tp, &sa);
+		if (ret < 0)
+			ret = pla_ocp_read(tp, PLA_BACKUP, 8, sa.sa_data);
+	}
 
 	if (ret < 0) {
 		netif_err(tp, probe, dev, "Get ether addr fail\n");
@@ -2169,7 +2247,7 @@ static void r8153_set_rx_early_timeout(struct r8152 *tp)
 static void r8153_set_rx_early_size(struct r8152 *tp)
 {
 	u32 mtu = tp->netdev->mtu;
-	u32 ocp_data = (agg_buf_sz - mtu - VLAN_ETH_HLEN - VLAN_HLEN) / 4;
+	u32 ocp_data = (agg_buf_sz - mtu - VLAN_ETH_HLEN - VLAN_HLEN) / 8;
 
 	ocp_write_word(tp, MCU_TYPE_USB, USB_RX_EARLY_SIZE, ocp_data);
 }
@@ -2290,10 +2368,6 @@ static u32 __rtl_get_wol(struct r8152 *tp)
 	u32 ocp_data;
 	u32 wolopts = 0;
 
-	ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_CONFIG5);
-	if (!(ocp_data & LAN_WAKE_EN))
-		return 0;
-
 	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_CONFIG34);
 	if (ocp_data & LINK_ON_WAKE_EN)
 		wolopts |= WAKE_PHY;
@@ -2326,15 +2400,13 @@ static void __rtl_set_wol(struct r8152 *tp, u32 wolopts)
 	ocp_write_word(tp, MCU_TYPE_PLA, PLA_CONFIG34, ocp_data);
 
 	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_CONFIG5);
-	ocp_data &= ~(UWF_EN | BWF_EN | MWF_EN | LAN_WAKE_EN);
+	ocp_data &= ~(UWF_EN | BWF_EN | MWF_EN);
 	if (wolopts & WAKE_UCAST)
 		ocp_data |= UWF_EN;
 	if (wolopts & WAKE_BCAST)
 		ocp_data |= BWF_EN;
 	if (wolopts & WAKE_MCAST)
 		ocp_data |= MWF_EN;
-	if (wolopts & WAKE_ANY)
-		ocp_data |= LAN_WAKE_EN;
 	ocp_write_word(tp, MCU_TYPE_PLA, PLA_CONFIG5, ocp_data);
 
 	ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_NORAML);
@@ -2403,9 +2475,6 @@ static void rtl_runtime_suspend_enable(struct r8152 *tp, bool enable)
 	if (enable) {
 		u32 ocp_data;
 
-		r8153_u1u2en(tp, false);
-		r8153_u2p3en(tp, false);
-
 		__rtl_set_wol(tp, WAKE_ANY);
 
 		ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_CONFIG);
@@ -2416,7 +2485,28 @@ static void rtl_runtime_suspend_enable(struct r8152 *tp, bool enable)
 
 		ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_NORAML);
 	} else {
+		u32 ocp_data;
+
 		__rtl_set_wol(tp, tp->saved_wolopts);
+
+		ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_CONFIG);
+
+		ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_CONFIG34);
+		ocp_data &= ~LINK_OFF_WAKE_EN;
+		ocp_write_word(tp, MCU_TYPE_PLA, PLA_CONFIG34, ocp_data);
+
+		ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_NORAML);
+	}
+}
+
+static void rtl8153_runtime_enable(struct r8152 *tp, bool enable)
+{
+	rtl_runtime_suspend_enable(tp, enable);
+
+	if (enable) {
+		r8153_u1u2en(tp, false);
+		r8153_u2p3en(tp, false);
+	} else {
 		r8153_u2p3en(tp, true);
 		r8153_u1u2en(tp, true);
 	}
@@ -2456,6 +2546,17 @@ static void r8153_teredo_off(struct r8152 *tp)
 	ocp_write_dword(tp, MCU_TYPE_PLA, PLA_TEREDO_TIMER, 0);
 }
 
+static void rtl_reset_bmu(struct r8152 *tp)
+{
+	u32 ocp_data;
+
+	ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_BMU_RESET);
+	ocp_data &= ~(BMU_RESET_EP_IN | BMU_RESET_EP_OUT);
+	ocp_write_byte(tp, MCU_TYPE_USB, USB_BMU_RESET, ocp_data);
+	ocp_data |= BMU_RESET_EP_IN | BMU_RESET_EP_OUT;
+	ocp_write_byte(tp, MCU_TYPE_USB, USB_BMU_RESET, ocp_data);
+}
+
 static void r8152_aldps_en(struct r8152 *tp, bool enable)
 {
 	if (enable) {
@@ -2681,6 +2782,7 @@ static void r8153_first_init(struct r8152 *tp)
 	r8153_hw_phy_cfg(tp);
 
 	rtl8152_nic_reset(tp);
+	rtl_reset_bmu(tp);
 
 	ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
 	ocp_data &= ~NOW_IS_OOB;
@@ -2742,6 +2844,7 @@ static void r8153_enter_oob(struct r8152 *tp)
 	ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data);
 
 	rtl_disable(tp);
+	rtl_reset_bmu(tp);
 
 	for (i = 0; i < 1000; i++) {
 		ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
@@ -2803,6 +2906,7 @@ static void rtl8153_disable(struct r8152 *tp)
 {
 	r8153_aldps_en(tp, false);
 	rtl_disable(tp);
+	rtl_reset_bmu(tp);
 	r8153_aldps_en(tp, true);
 	usb_enable_lpm(tp->udev);
 }
@@ -3382,15 +3486,11 @@ static void r8153_init(struct r8152 *tp)
 	r8153_power_cut_en(tp, false);
 	r8153_u1u2en(tp, true);
 
-	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL, ALDPS_SPDWN_RATIO);
-	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2, EEE_SPDWN_RATIO);
-	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3,
-		       PKT_AVAIL_SPDWN_EN | SUSPEND_SPDWN_EN |
-		       U1U2_SPDWN_EN | L1_SPDWN_EN);
-	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4,
-		       PWRSAVE_SPDWN_EN | RXDV_SPDWN_EN | TX10MIDLE_EN |
-		       TP100_SPDWN_EN | TP500_SPDWN_EN | TP1000_SPDWN_EN |
-		       EEE_SPDWN_EN);
+	/* MAC clock speed down */
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL, 0);
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2, 0);
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, 0);
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, 0);
 
 	r8153_enable_eee(tp);
 	r8153_aldps_en(tp, true);
@@ -3497,7 +3597,7 @@ static int rtl8152_suspend(struct usb_interface *intf, pm_message_t message)
 		napi_disable(&tp->napi);
 		if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) {
 			rtl_stop_rx(tp);
-			rtl_runtime_suspend_enable(tp, true);
+			tp->rtl_ops.autosuspend_en(tp, true);
 		} else {
 			cancel_delayed_work_sync(&tp->schedule);
 			tp->rtl_ops.down(tp);
@@ -3523,7 +3623,7 @@ static int rtl8152_resume(struct usb_interface *intf)
 
 	if (netif_running(tp->netdev) && tp->netdev->flags & IFF_UP) {
 		if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) {
-			rtl_runtime_suspend_enable(tp, false);
+			tp->rtl_ops.autosuspend_en(tp, false);
 			clear_bit(SELECTIVE_SUSPEND, &tp->flags);
 			napi_disable(&tp->napi);
 			set_bit(WORK_ENABLE, &tp->flags);
@@ -3542,7 +3642,7 @@ static int rtl8152_resume(struct usb_interface *intf)
 		usb_submit_urb(tp->intr_urb, GFP_KERNEL);
 	} else if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) {
 		if (tp->netdev->flags & IFF_UP)
-			rtl_runtime_suspend_enable(tp, false);
+			tp->rtl_ops.autosuspend_en(tp, false);
 		clear_bit(SELECTIVE_SUSPEND, &tp->flags);
 	}
 
@@ -4122,6 +4222,7 @@ static int rtl_ops_init(struct r8152 *tp)
 		ops->eee_get		= r8152_get_eee;
 		ops->eee_set		= r8152_set_eee;
 		ops->in_nway		= rtl8152_in_nway;
+		ops->autosuspend_en	= rtl_runtime_suspend_enable;
 		break;
 
 	case RTL_VER_03:
@@ -4137,6 +4238,7 @@ static int rtl_ops_init(struct r8152 *tp)
 		ops->eee_get		= r8153_get_eee;
 		ops->eee_set		= r8153_set_eee;
 		ops->in_nway		= rtl8153_in_nway;
+		ops->autosuspend_en	= rtl8153_runtime_enable;
 		break;
 
 	default:
@@ -4323,3 +4425,4 @@ module_usb_driver(rtl8152_driver);
 MODULE_AUTHOR(DRIVER_AUTHOR);
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL");
+MODULE_VERSION(DRIVER_VERSION);
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 61ba46404937..6086a0163249 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -395,8 +395,11 @@ int usbnet_change_mtu (struct net_device *net, int new_mtu)
 	dev->hard_mtu = net->mtu + net->hard_header_len;
 	if (dev->rx_urb_size == old_hard_mtu) {
 		dev->rx_urb_size = dev->hard_mtu;
-		if (dev->rx_urb_size > old_rx_urb_size)
+		if (dev->rx_urb_size > old_rx_urb_size) {
+			usbnet_pause_rx(dev);
 			usbnet_unlink_rx_urbs(dev);
+			usbnet_resume_rx(dev);
+		}
 	}
 
 	/* max qlen depend on hard_mtu and rx_urb_size */
@@ -1508,8 +1511,9 @@ static void usbnet_bh (unsigned long param)
 	} else if (netif_running (dev->net) &&
 		   netif_device_present (dev->net) &&
 		   netif_carrier_ok(dev->net) &&
-		   !timer_pending (&dev->delay) &&
-		   !test_bit (EVENT_RX_HALT, &dev->flags)) {
+		   !timer_pending(&dev->delay) &&
+		   !test_bit(EVENT_RX_PAUSED, &dev->flags) &&
+		   !test_bit(EVENT_RX_HALT, &dev->flags)) {
 		int	temp = dev->rxq.qlen;
 
 		if (temp < RX_QLEN(dev)) {
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index db8022ae415b..08885bc8d6db 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -1369,7 +1369,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
 				rcdlro = (struct Vmxnet3_RxCompDescExt *)rcd;
 
 				segCnt = rcdlro->segCnt;
-				BUG_ON(segCnt <= 1);
+				WARN_ON_ONCE(segCnt == 0);
 				mss = rcdlro->mss;
 				if (unlikely(segCnt <= 1))
 					segCnt = 0;
diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h
index c4825392d64b..3d2b64e63408 100644
--- a/drivers/net/vmxnet3/vmxnet3_int.h
+++ b/drivers/net/vmxnet3/vmxnet3_int.h
@@ -69,10 +69,10 @@
 /*
  * Version numbers
  */
-#define VMXNET3_DRIVER_VERSION_STRING   "1.4.7.0-k"
+#define VMXNET3_DRIVER_VERSION_STRING   "1.4.8.0-k"
 
 /* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */
-#define VMXNET3_DRIVER_VERSION_NUM      0x01040700
+#define VMXNET3_DRIVER_VERSION_NUM      0x01040800
 
 #if defined(CONFIG_PCI_MSI)
 	/* RSS only makes sense if MSI-X is supported. */
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index dff08842f26d..8bd8c7e1ee87 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -304,7 +304,7 @@ static int vrf_rt6_create(struct net_device *dev)
 	dst_hold(&rt6->dst);
 
 	rt6->rt6i_table = rt6i_table;
-	rt6->dst.output	= vrf_output6;
+	rt6->dst.output = vrf_output6;
 	rcu_assign_pointer(vrf->rt6, rt6);
 
 	rc = 0;
@@ -403,7 +403,7 @@ static int vrf_rtable_create(struct net_device *dev)
 	if (!rth)
 		return -ENOMEM;
 
-	rth->dst.output	= vrf_output;
+	rth->dst.output = vrf_output;
 	rth->rt_table_id = vrf->tb_id;
 
 	rcu_assign_pointer(vrf->rth, rth);
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index f999db2f97b4..b3b9db68f758 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2952,30 +2952,6 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
 	return 0;
 }
 
-struct net_device *vxlan_dev_create(struct net *net, const char *name,
-				    u8 name_assign_type, struct vxlan_config *conf)
-{
-	struct nlattr *tb[IFLA_MAX+1];
-	struct net_device *dev;
-	int err;
-
-	memset(&tb, 0, sizeof(tb));
-
-	dev = rtnl_create_link(net, name, name_assign_type,
-			       &vxlan_link_ops, tb);
-	if (IS_ERR(dev))
-		return dev;
-
-	err = vxlan_dev_configure(net, dev, conf);
-	if (err < 0) {
-		free_netdev(dev);
-		return ERR_PTR(err);
-	}
-
-	return dev;
-}
-EXPORT_SYMBOL_GPL(vxlan_dev_create);
-
 static int vxlan_newlink(struct net *src_net, struct net_device *dev,
 			 struct nlattr *tb[], struct nlattr *data[])
 {
@@ -3268,6 +3244,40 @@ static struct rtnl_link_ops vxlan_link_ops __read_mostly = {
 	.get_link_net	= vxlan_get_link_net,
 };
 
+struct net_device *vxlan_dev_create(struct net *net, const char *name,
+				    u8 name_assign_type,
+				    struct vxlan_config *conf)
+{
+	struct nlattr *tb[IFLA_MAX + 1];
+	struct net_device *dev;
+	int err;
+
+	memset(&tb, 0, sizeof(tb));
+
+	dev = rtnl_create_link(net, name, name_assign_type,
+			       &vxlan_link_ops, tb);
+	if (IS_ERR(dev))
+		return dev;
+
+	err = vxlan_dev_configure(net, dev, conf);
+	if (err < 0) {
+		free_netdev(dev);
+		return ERR_PTR(err);
+	}
+
+	err = rtnl_configure_link(dev, NULL);
+	if (err < 0) {
+		LIST_HEAD(list_kill);
+
+		vxlan_dellink(dev, &list_kill);
+		unregister_netdevice_many(&list_kill);
+		return ERR_PTR(err);
+	}
+
+	return dev;
+}
+EXPORT_SYMBOL_GPL(vxlan_dev_create);
+
 static void vxlan_handle_lowerdev_unregister(struct vxlan_net *vn,
 					     struct net_device *dev)
 {
diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c
index 49af62428c88..a92a0ba829f5 100644
--- a/drivers/net/wireless/ath/ath10k/core.c
+++ b/drivers/net/wireless/ath/ath10k/core.c
@@ -1083,7 +1083,7 @@ int ath10k_core_fetch_firmware_api_n(struct ath10k *ar, const char *name,
 			}
 
 			ath10k_dbg_dump(ar, ATH10K_DBG_BOOT, "features", "",
-					ar->running_fw->fw_file.fw_features,
+					fw_file->fw_features,
 					sizeof(fw_file->fw_features));
 			break;
 		case ATH10K_FW_IE_FW_IMAGE:
diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c
index cc979a4faeb0..813cdd2621a1 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -1904,7 +1904,6 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb)
 			return;
 		}
 	}
-	ath10k_htt_rx_msdu_buff_replenish(htt);
 }
 
 static void ath10k_htt_rx_tx_fetch_resp_id_confirm(struct ath10k *ar,
diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 6dd1d26b357f..4040f9413e86 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -679,10 +679,10 @@ static int ath10k_peer_create(struct ath10k *ar,
 
 	peer = ath10k_peer_find(ar, vdev_id, addr);
 	if (!peer) {
+		spin_unlock_bh(&ar->data_lock);
 		ath10k_warn(ar, "failed to find peer %pM on vdev %i after creation\n",
 			    addr, vdev_id);
 		ath10k_wmi_peer_delete(ar, vdev_id, addr);
-		spin_unlock_bh(&ar->data_lock);
 		return -ENOENT;
 	}
 
diff --git a/drivers/net/wireless/ath/ath9k/reg.h b/drivers/net/wireless/ath/ath9k/reg.h
index 9272ca90632b..80ff69f99229 100644
--- a/drivers/net/wireless/ath/ath9k/reg.h
+++ b/drivers/net/wireless/ath/ath9k/reg.h
@@ -1122,12 +1122,12 @@ enum {
 #define AR9300_NUM_GPIO                          16
 #define AR9330_NUM_GPIO				 16
 #define AR9340_NUM_GPIO				 23
-#define AR9462_NUM_GPIO				 10
+#define AR9462_NUM_GPIO				 14
 #define AR9485_NUM_GPIO				 12
 #define AR9531_NUM_GPIO				 18
 #define AR9550_NUM_GPIO				 24
 #define AR9561_NUM_GPIO				 23
-#define AR9565_NUM_GPIO				 12
+#define AR9565_NUM_GPIO				 14
 #define AR9580_NUM_GPIO				 16
 #define AR7010_NUM_GPIO                          16
 
@@ -1139,12 +1139,12 @@ enum {
 #define AR9300_GPIO_MASK			 0x0000F4FF
 #define AR9330_GPIO_MASK			 0x0000F4FF
 #define AR9340_GPIO_MASK			 0x0000000F
-#define AR9462_GPIO_MASK			 0x000003FF
+#define AR9462_GPIO_MASK			 0x00003FFF
 #define AR9485_GPIO_MASK			 0x00000FFF
 #define AR9531_GPIO_MASK			 0x0000000F
 #define AR9550_GPIO_MASK			 0x0000000F
 #define AR9561_GPIO_MASK			 0x0000000F
-#define AR9565_GPIO_MASK			 0x00000FFF
+#define AR9565_GPIO_MASK			 0x00003FFF
 #define AR9580_GPIO_MASK			 0x0000F4FF
 #define AR7010_GPIO_MASK			 0x0000FFFF
 
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index d0631b6cfd53..62f475e31077 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -2540,12 +2540,14 @@ brcmf_cfg80211_get_station(struct wiphy *wiphy, struct net_device *ndev,
 			   const u8 *mac, struct station_info *sinfo)
 {
 	struct brcmf_if *ifp = netdev_priv(ndev);
+	struct brcmf_scb_val_le scb_val;
 	s32 err = 0;
 	struct brcmf_sta_info_le sta_info_le;
 	u32 sta_flags;
 	u32 is_tdls_peer;
 	s32 total_rssi;
 	s32 count_rssi;
+	int rssi;
 	u32 i;
 
 	brcmf_dbg(TRACE, "Enter, MAC %pM\n", mac);
@@ -2629,6 +2631,20 @@ brcmf_cfg80211_get_station(struct wiphy *wiphy, struct net_device *ndev,
 			sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
 			total_rssi /= count_rssi;
 			sinfo->signal = total_rssi;
+		} else if (test_bit(BRCMF_VIF_STATUS_CONNECTED,
+			&ifp->vif->sme_state)) {
+			memset(&scb_val, 0, sizeof(scb_val));
+			err = brcmf_fil_cmd_data_get(ifp, BRCMF_C_GET_RSSI,
+						     &scb_val, sizeof(scb_val));
+			if (err) {
+				brcmf_err("Could not get rssi (%d)\n", err);
+				goto done;
+			} else {
+				rssi = le32_to_cpu(scb_val.val);
+				sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
+				sinfo->signal = rssi;
+				brcmf_dbg(CONN, "RSSI %d dBm\n", rssi);
+			}
 		}
 	}
 done:
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/msgbuf.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/msgbuf.c
index 68f1ce02f4bf..2b9a2bc429d6 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/msgbuf.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/msgbuf.c
@@ -1157,6 +1157,8 @@ brcmf_msgbuf_process_rx_complete(struct brcmf_msgbuf *msgbuf, void *buf)
 		brcmu_pkt_buf_free_skb(skb);
 		return;
 	}
+
+	skb->protocol = eth_type_trans(skb, ifp->ndev);
 	brcmf_netif_rx(ifp, skb);
 }
 
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index e5f267b21316..18a8474b5760 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -3851,8 +3851,8 @@ static int iwl_mvm_mac_get_survey(struct ieee80211_hw *hw, int idx,
 	if (idx != 0)
 		return -ENOENT;
 
-	if (fw_has_capa(&mvm->fw->ucode_capa,
-			IWL_UCODE_TLV_CAPA_RADIO_BEACON_STATS))
+	if (!fw_has_capa(&mvm->fw->ucode_capa,
+			 IWL_UCODE_TLV_CAPA_RADIO_BEACON_STATS))
 		return -ENOENT;
 
 	mutex_lock(&mvm->mutex);
@@ -3898,8 +3898,8 @@ static void iwl_mvm_mac_sta_statistics(struct ieee80211_hw *hw,
 	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
 	struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta);
 
-	if (fw_has_capa(&mvm->fw->ucode_capa,
-			IWL_UCODE_TLV_CAPA_RADIO_BEACON_STATS))
+	if (!fw_has_capa(&mvm->fw->ucode_capa,
+			 IWL_UCODE_TLV_CAPA_RADIO_BEACON_STATS))
 		return;
 
 	/* if beacon filtering isn't on mac80211 does it anyway */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
index ac2c5718e454..2c61516d06ff 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
@@ -581,7 +581,7 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm,
 			    struct iwl_rx_mpdu_desc *desc)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
-	struct iwl_mvm_sta *mvm_sta = iwl_mvm_sta_from_mac80211(sta);
+	struct iwl_mvm_sta *mvm_sta;
 	struct iwl_mvm_baid_data *baid_data;
 	struct iwl_mvm_reorder_buffer *buffer;
 	struct sk_buff *tail;
@@ -604,6 +604,8 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm,
 	if (WARN_ON(IS_ERR_OR_NULL(sta)))
 		return false;
 
+	mvm_sta = iwl_mvm_sta_from_mac80211(sta);
+
 	/* not a data packet */
 	if (!ieee80211_is_data_qos(hdr->frame_control) ||
 	    is_multicast_ether_addr(hdr->addr1))
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
index 6f609dd5c222..e78fc567ff7d 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
@@ -1222,7 +1222,7 @@ static int iwl_mvm_check_running_scans(struct iwl_mvm *mvm, int type)
 	return -EIO;
 }
 
-#define SCAN_TIMEOUT (16 * HZ)
+#define SCAN_TIMEOUT (20 * HZ)
 
 void iwl_mvm_scan_timeout(unsigned long data)
 {
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index fea4d3437e2f..b23ab4a4504f 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -1852,12 +1852,18 @@ static struct iwl_mvm_sta *iwl_mvm_get_key_sta(struct iwl_mvm *mvm,
 	    mvmvif->ap_sta_id != IWL_MVM_STATION_COUNT) {
 		u8 sta_id = mvmvif->ap_sta_id;
 
+		sta = rcu_dereference_check(mvm->fw_id_to_mac_id[sta_id],
+					    lockdep_is_held(&mvm->mutex));
+
 		/*
 		 * It is possible that the 'sta' parameter is NULL,
 		 * for example when a GTK is removed - the sta_id will then
 		 * be the AP ID, and no station was passed by mac80211.
 		 */
-		return iwl_mvm_sta_from_staid_protected(mvm, sta_id);
+		if (IS_ERR_OR_NULL(sta))
+			return NULL;
+
+		return iwl_mvm_sta_from_mac80211(sta);
 	}
 
 	return NULL;
@@ -1955,6 +1961,14 @@ static int iwl_mvm_send_sta_igtk(struct iwl_mvm *mvm,
 		struct ieee80211_key_seq seq;
 		const u8 *pn;
 
+		switch (keyconf->cipher) {
+		case WLAN_CIPHER_SUITE_AES_CMAC:
+			igtk_cmd.ctrl_flags |= cpu_to_le32(STA_KEY_FLG_CCM);
+			break;
+		default:
+			return -EINVAL;
+		}
+
 		memcpy(igtk_cmd.IGTK, keyconf->key, keyconf->keylen);
 		ieee80211_get_key_rx_seq(keyconf, 0, &seq);
 		pn = seq.aes_cmac.pn;
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 9ed0ed1bf514..4dd5adcdd29b 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -2776,6 +2776,7 @@ static int hwsim_tx_info_frame_received_nl(struct sk_buff *skb_2,
 	if (!info->attrs[HWSIM_ATTR_ADDR_TRANSMITTER] ||
 	    !info->attrs[HWSIM_ATTR_FLAGS] ||
 	    !info->attrs[HWSIM_ATTR_COOKIE] ||
+	    !info->attrs[HWSIM_ATTR_SIGNAL] ||
 	    !info->attrs[HWSIM_ATTR_TX_INFO])
 		goto out;
 
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c
index fe19ace0d6a0..b04cf30f3959 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c
@@ -1149,7 +1149,7 @@ static void rtl8192eu_phy_iqcalibrate(struct rtl8xxxu_priv *priv,
 
 		for (i = 0; i < retry; i++) {
 			path_b_ok = rtl8192eu_rx_iqk_path_b(priv);
-			if (path_a_ok == 0x03) {
+			if (path_b_ok == 0x03) {
 				val32 = rtl8xxxu_read32(priv,
 							REG_RX_POWER_BEFORE_IQK_B_2);
 				result[t][6] = (val32 >> 16) & 0x3ff;
diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c
index 0f48048b8654..3a0faa8fe9d4 100644
--- a/drivers/net/wireless/realtek/rtlwifi/core.c
+++ b/drivers/net/wireless/realtek/rtlwifi/core.c
@@ -54,7 +54,7 @@ EXPORT_SYMBOL(channel5g_80m);
 void rtl_addr_delay(u32 addr)
 {
 	if (addr == 0xfe)
-		msleep(50);
+		mdelay(50);
 	else if (addr == 0xfd)
 		msleep(5);
 	else if (addr == 0xfc)
@@ -75,7 +75,7 @@ void rtl_rfreg_delay(struct ieee80211_hw *hw, enum radio_path rfpath, u32 addr,
 		rtl_addr_delay(addr);
 	} else {
 		rtl_set_rfreg(hw, rfpath, addr, mask, data);
-		usleep_range(1, 2);
+		udelay(1);
 	}
 }
 EXPORT_SYMBOL(rtl_rfreg_delay);
@@ -86,7 +86,7 @@ void rtl_bb_delay(struct ieee80211_hw *hw, u32 addr, u32 data)
 		rtl_addr_delay(addr);
 	} else {
 		rtl_set_bbreg(hw, addr, MASKDWORD, data);
-		usleep_range(1, 2);
+		udelay(1);
 	}
 }
 EXPORT_SYMBOL(rtl_bb_delay);
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index f7718ec685fa..cea8350fbc7e 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -344,6 +344,8 @@ struct device *nd_pfn_create(struct nd_region *nd_region)
 int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
 {
 	u64 checksum, offset;
+	unsigned long align;
+	enum nd_pfn_mode mode;
 	struct nd_namespace_io *nsio;
 	struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
 	struct nd_namespace_common *ndns = nd_pfn->ndns;
@@ -386,22 +388,50 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
 		return -ENXIO;
 	}
 
+	align = le32_to_cpu(pfn_sb->align);
+	offset = le64_to_cpu(pfn_sb->dataoff);
+	if (align == 0)
+		align = 1UL << ilog2(offset);
+	mode = le32_to_cpu(pfn_sb->mode);
+
 	if (!nd_pfn->uuid) {
-		/* from probe we allocate */
+		/*
+		 * When probing a namepace via nd_pfn_probe() the uuid
+		 * is NULL (see: nd_pfn_devinit()) we init settings from
+		 * pfn_sb
+		 */
 		nd_pfn->uuid = kmemdup(pfn_sb->uuid, 16, GFP_KERNEL);
 		if (!nd_pfn->uuid)
 			return -ENOMEM;
+		nd_pfn->align = align;
+		nd_pfn->mode = mode;
 	} else {
-		/* from init we validate */
+		/*
+		 * When probing a pfn / dax instance we validate the
+		 * live settings against the pfn_sb
+		 */
 		if (memcmp(nd_pfn->uuid, pfn_sb->uuid, 16) != 0)
 			return -ENODEV;
+
+		/*
+		 * If the uuid validates, but other settings mismatch
+		 * return EINVAL because userspace has managed to change
+		 * the configuration without specifying new
+		 * identification.
+		 */
+		if (nd_pfn->align != align || nd_pfn->mode != mode) {
+			dev_err(&nd_pfn->dev,
+					"init failed, settings mismatch\n");
+			dev_dbg(&nd_pfn->dev, "align: %lx:%lx mode: %d:%d\n",
+					nd_pfn->align, align, nd_pfn->mode,
+					mode);
+			return -EINVAL;
+		}
 	}
 
-	if (nd_pfn->align == 0)
-		nd_pfn->align = le32_to_cpu(pfn_sb->align);
-	if (nd_pfn->align > nvdimm_namespace_capacity(ndns)) {
+	if (align > nvdimm_namespace_capacity(ndns)) {
 		dev_err(&nd_pfn->dev, "alignment: %lx exceeds capacity %llx\n",
-				nd_pfn->align, nvdimm_namespace_capacity(ndns));
+				align, nvdimm_namespace_capacity(ndns));
 		return -EINVAL;
 	}
 
@@ -411,7 +441,6 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
 	 * namespace has changed since the pfn superblock was
 	 * established.
 	 */
-	offset = le64_to_cpu(pfn_sb->dataoff);
 	nsio = to_nd_namespace_io(&ndns->dev);
 	if (offset >= resource_size(&nsio->res)) {
 		dev_err(&nd_pfn->dev, "pfn array size exceeds capacity of %s\n",
@@ -419,10 +448,11 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
 		return -EBUSY;
 	}
 
-	if ((nd_pfn->align && !IS_ALIGNED(offset, nd_pfn->align))
+	if ((align && !IS_ALIGNED(offset, align))
 			|| !IS_ALIGNED(offset, PAGE_SIZE)) {
-		dev_err(&nd_pfn->dev, "bad offset: %#llx dax disabled\n",
-				offset);
+		dev_err(&nd_pfn->dev,
+				"bad offset: %#llx dax disabled align: %#lx\n",
+				offset, align);
 		return -ENXIO;
 	}
 
@@ -502,7 +532,6 @@ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
 	res->start += start_pad;
 	res->end -= end_trunc;
 
-	nd_pfn->mode = le32_to_cpu(nd_pfn->pfn_sb->mode);
 	if (nd_pfn->mode == PFN_MODE_RAM) {
 		if (offset < SZ_8K)
 			return ERR_PTR(-EINVAL);
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 1a51584a382b..d5fb55c0a9d9 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1394,19 +1394,22 @@ static int ns_cmp(void *priv, struct list_head *a, struct list_head *b)
 	return nsa->ns_id - nsb->ns_id;
 }
 
-static struct nvme_ns *nvme_find_ns(struct nvme_ctrl *ctrl, unsigned nsid)
+static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 {
-	struct nvme_ns *ns;
-
-	lockdep_assert_held(&ctrl->namespaces_mutex);
+	struct nvme_ns *ns, *ret = NULL;
 
+	mutex_lock(&ctrl->namespaces_mutex);
 	list_for_each_entry(ns, &ctrl->namespaces, list) {
-		if (ns->ns_id == nsid)
-			return ns;
+		if (ns->ns_id == nsid) {
+			kref_get(&ns->kref);
+			ret = ns;
+			break;
+		}
 		if (ns->ns_id > nsid)
 			break;
 	}
-	return NULL;
+	mutex_unlock(&ctrl->namespaces_mutex);
+	return ret;
 }
 
 static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
@@ -1415,8 +1418,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 	struct gendisk *disk;
 	int node = dev_to_node(ctrl->dev);
 
-	lockdep_assert_held(&ctrl->namespaces_mutex);
-
 	ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
 	if (!ns)
 		return;
@@ -1457,7 +1458,10 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 	if (nvme_revalidate_disk(ns->disk))
 		goto out_free_disk;
 
-	list_add_tail_rcu(&ns->list, &ctrl->namespaces);
+	mutex_lock(&ctrl->namespaces_mutex);
+	list_add_tail(&ns->list, &ctrl->namespaces);
+	mutex_unlock(&ctrl->namespaces_mutex);
+
 	kref_get(&ctrl->kref);
 	if (ns->type == NVME_NS_LIGHTNVM)
 		return;
@@ -1480,8 +1484,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 
 static void nvme_ns_remove(struct nvme_ns *ns)
 {
-	lockdep_assert_held(&ns->ctrl->namespaces_mutex);
-
 	if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags))
 		return;
 
@@ -1494,8 +1496,11 @@ static void nvme_ns_remove(struct nvme_ns *ns)
 		blk_mq_abort_requeue_list(ns->queue);
 		blk_cleanup_queue(ns->queue);
 	}
+
+	mutex_lock(&ns->ctrl->namespaces_mutex);
 	list_del_init(&ns->list);
-	synchronize_rcu();
+	mutex_unlock(&ns->ctrl->namespaces_mutex);
+
 	nvme_put_ns(ns);
 }
 
@@ -1503,10 +1508,11 @@ static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 {
 	struct nvme_ns *ns;
 
-	ns = nvme_find_ns(ctrl, nsid);
+	ns = nvme_find_get_ns(ctrl, nsid);
 	if (ns) {
 		if (revalidate_disk(ns->disk))
 			nvme_ns_remove(ns);
+		nvme_put_ns(ns);
 	} else
 		nvme_alloc_ns(ctrl, nsid);
 }
@@ -1535,9 +1541,11 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn)
 			nvme_validate_ns(ctrl, nsid);
 
 			while (++prev < nsid) {
-				ns = nvme_find_ns(ctrl, prev);
-				if (ns)
+				ns = nvme_find_get_ns(ctrl, prev);
+				if (ns) {
 					nvme_ns_remove(ns);
+					nvme_put_ns(ns);
+				}
 			}
 		}
 		nn -= j;
@@ -1552,8 +1560,6 @@ static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl, unsigned nn)
 	struct nvme_ns *ns, *next;
 	unsigned i;
 
-	lockdep_assert_held(&ctrl->namespaces_mutex);
-
 	for (i = 1; i <= nn; i++)
 		nvme_validate_ns(ctrl, i);
 
@@ -1576,7 +1582,6 @@ static void nvme_scan_work(struct work_struct *work)
 	if (nvme_identify_ctrl(ctrl, &id))
 		return;
 
-	mutex_lock(&ctrl->namespaces_mutex);
 	nn = le32_to_cpu(id->nn);
 	if (ctrl->vs >= NVME_VS(1, 1) &&
 	    !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
@@ -1585,6 +1590,7 @@ static void nvme_scan_work(struct work_struct *work)
 	}
 	nvme_scan_ns_sequential(ctrl, nn);
  done:
+	mutex_lock(&ctrl->namespaces_mutex);
 	list_sort(NULL, &ctrl->namespaces, ns_cmp);
 	mutex_unlock(&ctrl->namespaces_mutex);
 	kfree(id);
@@ -1604,6 +1610,11 @@ void nvme_queue_scan(struct nvme_ctrl *ctrl)
 }
 EXPORT_SYMBOL_GPL(nvme_queue_scan);
 
+/*
+ * This function iterates the namespace list unlocked to allow recovery from
+ * controller failure. It is up to the caller to ensure the namespace list is
+ * not modified by scan work while this function is executing.
+ */
 void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
 {
 	struct nvme_ns *ns, *next;
@@ -1617,10 +1628,8 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
 	if (ctrl->state == NVME_CTRL_DEAD)
 		nvme_kill_queues(ctrl);
 
-	mutex_lock(&ctrl->namespaces_mutex);
 	list_for_each_entry_safe(ns, next, &ctrl->namespaces, list)
 		nvme_ns_remove(ns);
-	mutex_unlock(&ctrl->namespaces_mutex);
 }
 EXPORT_SYMBOL_GPL(nvme_remove_namespaces);
 
@@ -1791,11 +1800,8 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
 {
 	struct nvme_ns *ns;
 
-	rcu_read_lock();
-	list_for_each_entry_rcu(ns, &ctrl->namespaces, list) {
-		if (!kref_get_unless_zero(&ns->kref))
-			continue;
-
+	mutex_lock(&ctrl->namespaces_mutex);
+	list_for_each_entry(ns, &ctrl->namespaces, list) {
 		/*
 		 * Revalidating a dead namespace sets capacity to 0. This will
 		 * end buffered writers dirtying pages that can't be synced.
@@ -1806,10 +1812,8 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
 		blk_set_queue_dying(ns->queue);
 		blk_mq_abort_requeue_list(ns->queue);
 		blk_mq_start_stopped_hw_queues(ns->queue, true);
-
-		nvme_put_ns(ns);
 	}
-	rcu_read_unlock();
+	mutex_unlock(&ctrl->namespaces_mutex);
 }
 EXPORT_SYMBOL_GPL(nvme_kill_queues);
 
@@ -1817,8 +1821,8 @@ void nvme_stop_queues(struct nvme_ctrl *ctrl)
 {
 	struct nvme_ns *ns;
 
-	rcu_read_lock();
-	list_for_each_entry_rcu(ns, &ctrl->namespaces, list) {
+	mutex_lock(&ctrl->namespaces_mutex);
+	list_for_each_entry(ns, &ctrl->namespaces, list) {
 		spin_lock_irq(ns->queue->queue_lock);
 		queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue);
 		spin_unlock_irq(ns->queue->queue_lock);
@@ -1826,7 +1830,7 @@ void nvme_stop_queues(struct nvme_ctrl *ctrl)
 		blk_mq_cancel_requeue_work(ns->queue);
 		blk_mq_stop_hw_queues(ns->queue);
 	}
-	rcu_read_unlock();
+	mutex_unlock(&ctrl->namespaces_mutex);
 }
 EXPORT_SYMBOL_GPL(nvme_stop_queues);
 
@@ -1834,13 +1838,13 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
 {
 	struct nvme_ns *ns;
 
-	rcu_read_lock();
-	list_for_each_entry_rcu(ns, &ctrl->namespaces, list) {
+	mutex_lock(&ctrl->namespaces_mutex);
+	list_for_each_entry(ns, &ctrl->namespaces, list) {
 		queue_flag_clear_unlocked(QUEUE_FLAG_STOPPED, ns->queue);
 		blk_mq_start_stopped_hw_queues(ns->queue, true);
 		blk_mq_kick_requeue_list(ns->queue);
 	}
-	rcu_read_unlock();
+	mutex_unlock(&ctrl->namespaces_mutex);
 }
 EXPORT_SYMBOL_GPL(nvme_start_queues);
 
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 78dca3193ca4..befac5b19490 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1679,9 +1679,14 @@ static int nvme_pci_enable(struct nvme_dev *dev)
 
 static void nvme_dev_unmap(struct nvme_dev *dev)
 {
+	struct pci_dev *pdev = to_pci_dev(dev->dev);
+	int bars;
+
 	if (dev->bar)
 		iounmap(dev->bar);
-	pci_release_regions(to_pci_dev(dev->dev));
+
+	bars = pci_select_bars(pdev, IORESOURCE_MEM);
+	pci_release_selected_regions(pdev, bars);
 }
 
 static void nvme_pci_disable(struct nvme_dev *dev)
@@ -1924,7 +1929,7 @@ static int nvme_dev_map(struct nvme_dev *dev)
 
        return 0;
   release:
-       pci_release_regions(pdev);
+       pci_release_selected_regions(pdev, bars);
        return -ENODEV;
 }
 
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 14f2f8c7c260..33daffc4392c 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -395,7 +395,7 @@ static int unflatten_dt_nodes(const void *blob,
 			      struct device_node **nodepp)
 {
 	struct device_node *root;
-	int offset = 0, depth = 0;
+	int offset = 0, depth = 0, initial_depth = 0;
 #define FDT_MAX_DEPTH	64
 	unsigned int fpsizes[FDT_MAX_DEPTH];
 	struct device_node *nps[FDT_MAX_DEPTH];
@@ -405,11 +405,22 @@ static int unflatten_dt_nodes(const void *blob,
 	if (nodepp)
 		*nodepp = NULL;
 
+	/*
+	 * We're unflattening device sub-tree if @dad is valid. There are
+	 * possibly multiple nodes in the first level of depth. We need
+	 * set @depth to 1 to make fdt_next_node() happy as it bails
+	 * immediately when negative @depth is found. Otherwise, the device
+	 * nodes except the first one won't be unflattened successfully.
+	 */
+	if (dad)
+		depth = initial_depth = 1;
+
 	root = dad;
 	fpsizes[depth] = dad ? strlen(of_node_full_name(dad)) : 0;
 	nps[depth] = dad;
+
 	for (offset = 0;
-	     offset >= 0 && depth >= 0;
+	     offset >= 0 && depth >= initial_depth;
 	     offset = fdt_next_node(blob, offset, &depth)) {
 		if (WARN_ON_ONCE(depth >= FDT_MAX_DEPTH))
 			continue;
diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index e7bfc175b8e1..6ec743faabe8 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -386,13 +386,13 @@ int of_irq_to_resource(struct device_node *dev, int index, struct resource *r)
 EXPORT_SYMBOL_GPL(of_irq_to_resource);
 
 /**
- * of_irq_get - Decode a node's IRQ and return it as a Linux irq number
+ * of_irq_get - Decode a node's IRQ and return it as a Linux IRQ number
  * @dev: pointer to device tree node
- * @index: zero-based index of the irq
- *
- * Returns Linux irq number on success, or -EPROBE_DEFER if the irq domain
- * is not yet created.
+ * @index: zero-based index of the IRQ
  *
+ * Returns Linux IRQ number on success, or 0 on the IRQ mapping failure, or
+ * -EPROBE_DEFER if the IRQ domain is not yet created, or error code in case
+ * of any other failure.
  */
 int of_irq_get(struct device_node *dev, int index)
 {
@@ -413,12 +413,13 @@ int of_irq_get(struct device_node *dev, int index)
 EXPORT_SYMBOL_GPL(of_irq_get);
 
 /**
- * of_irq_get_byname - Decode a node's IRQ and return it as a Linux irq number
+ * of_irq_get_byname - Decode a node's IRQ and return it as a Linux IRQ number
  * @dev: pointer to device tree node
- * @name: irq name
+ * @name: IRQ name
  *
- * Returns Linux irq number on success, or -EPROBE_DEFER if the irq domain
- * is not yet created, or error code in case of any other failure.
+ * Returns Linux IRQ number on success, or 0 on the IRQ mapping failure, or
+ * -EPROBE_DEFER if the IRQ domain is not yet created, or error code in case
+ * of any other failure.
  */
 int of_irq_get_byname(struct device_node *dev, const char *name)
 {
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index ed01c0172e4a..216648233874 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -127,8 +127,15 @@ static int __init __reserved_mem_alloc_size(unsigned long node,
 	}
 
 	/* Need adjust the alignment to satisfy the CMA requirement */
-	if (IS_ENABLED(CONFIG_CMA) && of_flat_dt_is_compatible(node, "shared-dma-pool"))
-		align = max(align, (phys_addr_t)PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order));
+	if (IS_ENABLED(CONFIG_CMA)
+	    && of_flat_dt_is_compatible(node, "shared-dma-pool")
+	    && of_get_flat_dt_prop(node, "reusable", NULL)
+	    && !of_get_flat_dt_prop(node, "no-map", NULL)) {
+		unsigned long order =
+			max_t(unsigned long, MAX_ORDER - 1, pageblock_order);
+
+		align = max(align, (phys_addr_t)PAGE_SIZE << order);
+	}
 
 	prop = of_get_flat_dt_prop(node, "alloc-ranges", &len);
 	if (prop) {
diff --git a/drivers/pci/vc.c b/drivers/pci/vc.c
index dfbab61a1b47..1fa3a3219c45 100644
--- a/drivers/pci/vc.c
+++ b/drivers/pci/vc.c
@@ -221,9 +221,9 @@ static int pci_vc_do_save_buffer(struct pci_dev *dev, int pos,
 		else
 			pci_write_config_word(dev, pos + PCI_VC_PORT_CTRL,
 					      *(u16 *)buf);
-		buf += 2;
+		buf += 4;
 	}
-	len += 2;
+	len += 4;
 
 	/*
 	 * If we have any Low Priority VCs and a VC Arbitration Table Offset
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 1b8304e1efaa..140436a046c0 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -1010,8 +1010,8 @@ int arm_pmu_device_probe(struct platform_device *pdev,
 		if (!ret)
 			ret = init_fn(pmu);
 	} else {
-		ret = probe_current_pmu(pmu, probe_table);
 		cpumask_setall(&pmu->supported_cpus);
+		ret = probe_current_pmu(pmu, probe_table);
 	}
 
 	if (ret) {
diff --git a/drivers/phy/phy-bcm-ns-usb2.c b/drivers/phy/phy-bcm-ns-usb2.c
index 95ab6b2a0de5..58dff80e9386 100644
--- a/drivers/phy/phy-bcm-ns-usb2.c
+++ b/drivers/phy/phy-bcm-ns-usb2.c
@@ -109,8 +109,8 @@ static int bcm_ns_usb2_probe(struct platform_device *pdev)
 	}
 
 	usb2->phy = devm_phy_create(dev, NULL, &ops);
-	if (IS_ERR(dev))
-		return PTR_ERR(dev);
+	if (IS_ERR(usb2->phy))
+		return PTR_ERR(usb2->phy);
 
 	phy_set_drvdata(usb2->phy, usb2);
 	platform_set_drvdata(pdev, usb2);
diff --git a/drivers/phy/phy-exynos-mipi-video.c b/drivers/phy/phy-exynos-mipi-video.c
index cc093ebfda94..8b851f718123 100644
--- a/drivers/phy/phy-exynos-mipi-video.c
+++ b/drivers/phy/phy-exynos-mipi-video.c
@@ -233,8 +233,12 @@ static inline int __is_running(const struct exynos_mipi_phy_desc *data,
 			struct exynos_mipi_video_phy *state)
 {
 	u32 val;
+	int ret;
+
+	ret = regmap_read(state->regmaps[data->resetn_map], data->resetn_reg, &val);
+	if (ret)
+		return 0;
 
-	regmap_read(state->regmaps[data->resetn_map], data->resetn_reg, &val);
 	return val & data->resetn_val;
 }
 
diff --git a/drivers/phy/phy-miphy28lp.c b/drivers/phy/phy-miphy28lp.c
index 3acd2a1808df..213e2e15339c 100644
--- a/drivers/phy/phy-miphy28lp.c
+++ b/drivers/phy/phy-miphy28lp.c
@@ -1143,7 +1143,8 @@ static int miphy28lp_probe_resets(struct device_node *node,
 	struct miphy28lp_dev *miphy_dev = miphy_phy->phydev;
 	int err;
 
-	miphy_phy->miphy_rst = of_reset_control_get(node, "miphy-sw-rst");
+	miphy_phy->miphy_rst =
+		of_reset_control_get_shared(node, "miphy-sw-rst");
 
 	if (IS_ERR(miphy_phy->miphy_rst)) {
 		dev_err(miphy_dev->dev,
diff --git a/drivers/phy/phy-rcar-gen3-usb2.c b/drivers/phy/phy-rcar-gen3-usb2.c
index 76bb88f0700a..4be3f5dbbc9f 100644
--- a/drivers/phy/phy-rcar-gen3-usb2.c
+++ b/drivers/phy/phy-rcar-gen3-usb2.c
@@ -144,12 +144,6 @@ static void rcar_gen3_init_for_peri(struct rcar_gen3_chan *ch)
 	extcon_set_cable_state_(ch->extcon, EXTCON_USB, true);
 }
 
-static bool rcar_gen3_check_vbus(struct rcar_gen3_chan *ch)
-{
-	return !!(readl(ch->base + USB2_ADPCTRL) &
-		  USB2_ADPCTRL_OTGSESSVLD);
-}
-
 static bool rcar_gen3_check_id(struct rcar_gen3_chan *ch)
 {
 	return !!(readl(ch->base + USB2_ADPCTRL) & USB2_ADPCTRL_IDDIG);
@@ -157,13 +151,7 @@ static bool rcar_gen3_check_id(struct rcar_gen3_chan *ch)
 
 static void rcar_gen3_device_recognition(struct rcar_gen3_chan *ch)
 {
-	bool is_host = true;
-
-	/* B-device? */
-	if (rcar_gen3_check_id(ch) && rcar_gen3_check_vbus(ch))
-		is_host = false;
-
-	if (is_host)
+	if (!rcar_gen3_check_id(ch))
 		rcar_gen3_init_for_host(ch);
 	else
 		rcar_gen3_init_for_peri(ch);
diff --git a/drivers/phy/phy-rockchip-dp.c b/drivers/phy/phy-rockchip-dp.c
index 793ecb6d87bc..8b267a746576 100644
--- a/drivers/phy/phy-rockchip-dp.c
+++ b/drivers/phy/phy-rockchip-dp.c
@@ -90,7 +90,7 @@ static int rockchip_dp_phy_probe(struct platform_device *pdev)
 		return -ENODEV;
 
 	dp = devm_kzalloc(dev, sizeof(*dp), GFP_KERNEL);
-	if (IS_ERR(dp))
+	if (!dp)
 		return -ENOMEM;
 
 	dp->dev = dev;
diff --git a/drivers/phy/phy-stih407-usb.c b/drivers/phy/phy-stih407-usb.c
index 1d5ae5f8ef69..b1f44ab669fb 100644
--- a/drivers/phy/phy-stih407-usb.c
+++ b/drivers/phy/phy-stih407-usb.c
@@ -105,13 +105,13 @@ static int stih407_usb2_picophy_probe(struct platform_device *pdev)
 	phy_dev->dev = dev;
 	dev_set_drvdata(dev, phy_dev);
 
-	phy_dev->rstc = devm_reset_control_get(dev, "global");
+	phy_dev->rstc = devm_reset_control_get_shared(dev, "global");
 	if (IS_ERR(phy_dev->rstc)) {
 		dev_err(dev, "failed to ctrl picoPHY reset\n");
 		return PTR_ERR(phy_dev->rstc);
 	}
 
-	phy_dev->rstport = devm_reset_control_get(dev, "port");
+	phy_dev->rstport = devm_reset_control_get_exclusive(dev, "port");
 	if (IS_ERR(phy_dev->rstport)) {
 		dev_err(dev, "failed to ctrl picoPHY reset\n");
 		return PTR_ERR(phy_dev->rstport);
diff --git a/drivers/phy/phy-sun4i-usb.c b/drivers/phy/phy-sun4i-usb.c
index bae54f7a1f48..de3101fbbf40 100644
--- a/drivers/phy/phy-sun4i-usb.c
+++ b/drivers/phy/phy-sun4i-usb.c
@@ -175,7 +175,7 @@ static void sun4i_usb_phy_write(struct sun4i_usb_phy *phy, u32 addr, u32 data,
 {
 	struct sun4i_usb_phy_data *phy_data = to_sun4i_usb_phy_data(phy);
 	u32 temp, usbc_bit = BIT(phy->index * 2);
-	void *phyctl = phy_data->base + phy_data->cfg->phyctl_offset;
+	void __iomem *phyctl = phy_data->base + phy_data->cfg->phyctl_offset;
 	int i;
 
 	mutex_lock(&phy_data->mutex);
@@ -514,9 +514,9 @@ static int sun4i_usb_phy_remove(struct platform_device *pdev)
 
 	if (data->vbus_power_nb_registered)
 		power_supply_unreg_notifier(&data->vbus_power_nb);
-	if (data->id_det_irq >= 0)
+	if (data->id_det_irq > 0)
 		devm_free_irq(dev, data->id_det_irq, data);
-	if (data->vbus_det_irq >= 0)
+	if (data->vbus_det_irq > 0)
 		devm_free_irq(dev, data->vbus_det_irq, data);
 
 	cancel_delayed_work_sync(&data->detect);
@@ -645,11 +645,11 @@ static int sun4i_usb_phy_probe(struct platform_device *pdev)
 
 	data->id_det_irq = gpiod_to_irq(data->id_det_gpio);
 	data->vbus_det_irq = gpiod_to_irq(data->vbus_det_gpio);
-	if ((data->id_det_gpio && data->id_det_irq < 0) ||
-	    (data->vbus_det_gpio && data->vbus_det_irq < 0))
+	if ((data->id_det_gpio && data->id_det_irq <= 0) ||
+	    (data->vbus_det_gpio && data->vbus_det_irq <= 0))
 		data->phy0_poll = true;
 
-	if (data->id_det_irq >= 0) {
+	if (data->id_det_irq > 0) {
 		ret = devm_request_irq(dev, data->id_det_irq,
 				sun4i_usb_phy0_id_vbus_det_irq,
 				IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
@@ -660,7 +660,7 @@ static int sun4i_usb_phy_probe(struct platform_device *pdev)
 		}
 	}
 
-	if (data->vbus_det_irq >= 0) {
+	if (data->vbus_det_irq > 0) {
 		ret = devm_request_irq(dev, data->vbus_det_irq,
 				sun4i_usb_phy0_id_vbus_det_irq,
 				IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
diff --git a/drivers/phy/phy-ti-pipe3.c b/drivers/phy/phy-ti-pipe3.c
index 0a477d24cf76..bf46844dc387 100644
--- a/drivers/phy/phy-ti-pipe3.c
+++ b/drivers/phy/phy-ti-pipe3.c
@@ -293,11 +293,18 @@ static int ti_pipe3_init(struct phy *x)
 		ret = ti_pipe3_dpll_wait_lock(phy);
 	}
 
-	/* Program the DPLL only if not locked */
+	/* SATA has issues if re-programmed when locked */
 	val = ti_pipe3_readl(phy->pll_ctrl_base, PLL_STATUS);
-	if (!(val & PLL_LOCK))
-		if (ti_pipe3_dpll_program(phy))
-			return -EINVAL;
+	if ((val & PLL_LOCK) && of_device_is_compatible(phy->dev->of_node,
+							"ti,phy-pipe3-sata"))
+		return ret;
+
+	/* Program the DPLL */
+	ret = ti_pipe3_dpll_program(phy);
+	if (ret) {
+		ti_pipe3_disable_clocks(phy);
+		return -EINVAL;
+	}
 
 	return ret;
 }
diff --git a/drivers/phy/phy-twl4030-usb.c b/drivers/phy/phy-twl4030-usb.c
index 6b6af6cba454..d9b10a39a2cf 100644
--- a/drivers/phy/phy-twl4030-usb.c
+++ b/drivers/phy/phy-twl4030-usb.c
@@ -463,7 +463,8 @@ static int twl4030_phy_power_on(struct phy *phy)
 	twl4030_usb_set_mode(twl, twl->usb_mode);
 	if (twl->usb_mode == T2_USB_MODE_ULPI)
 		twl4030_i2c_access(twl, 0);
-	schedule_delayed_work(&twl->id_workaround_work, 0);
+	twl->linkstat = MUSB_UNKNOWN;
+	schedule_delayed_work(&twl->id_workaround_work, HZ);
 
 	return 0;
 }
@@ -537,6 +538,7 @@ static irqreturn_t twl4030_usb_irq(int irq, void *_twl)
 	struct twl4030_usb *twl = _twl;
 	enum musb_vbus_id_status status;
 	bool status_changed = false;
+	int err;
 
 	status = twl4030_usb_linkstat(twl);
 
@@ -567,7 +569,9 @@ static irqreturn_t twl4030_usb_irq(int irq, void *_twl)
 			pm_runtime_mark_last_busy(twl->dev);
 			pm_runtime_put_autosuspend(twl->dev);
 		}
-		musb_mailbox(status);
+		err = musb_mailbox(status);
+		if (err)
+			twl->linkstat = MUSB_UNKNOWN;
 	}
 
 	/* don't schedule during sleep - irq works right then */
@@ -595,7 +599,8 @@ static int twl4030_phy_init(struct phy *phy)
 	struct twl4030_usb *twl = phy_get_drvdata(phy);
 
 	pm_runtime_get_sync(twl->dev);
-	schedule_delayed_work(&twl->id_workaround_work, 0);
+	twl->linkstat = MUSB_UNKNOWN;
+	schedule_delayed_work(&twl->id_workaround_work, HZ);
 	pm_runtime_mark_last_busy(twl->dev);
 	pm_runtime_put_autosuspend(twl->dev);
 
@@ -763,7 +768,8 @@ static int twl4030_usb_remove(struct platform_device *pdev)
 	if (cable_present(twl->linkstat))
 		pm_runtime_put_noidle(twl->dev);
 	pm_runtime_mark_last_busy(twl->dev);
-	pm_runtime_put_sync_suspend(twl->dev);
+	pm_runtime_dont_use_autosuspend(&pdev->dev);
+	pm_runtime_put_sync(twl->dev);
 	pm_runtime_disable(twl->dev);
 
 	/* autogate 60MHz ULPI clock,
diff --git a/drivers/pinctrl/Makefile b/drivers/pinctrl/Makefile
index e4bc1151e04f..42a5c1dddfef 100644
--- a/drivers/pinctrl/Makefile
+++ b/drivers/pinctrl/Makefile
@@ -23,7 +23,7 @@ obj-$(CONFIG_PINCTRL_PISTACHIO)	+= pinctrl-pistachio.o
 obj-$(CONFIG_PINCTRL_ROCKCHIP)	+= pinctrl-rockchip.o
 obj-$(CONFIG_PINCTRL_SINGLE)	+= pinctrl-single.o
 obj-$(CONFIG_PINCTRL_SIRF)	+= sirf/
-obj-$(CONFIG_PINCTRL_TEGRA)	+= tegra/
+obj-$(CONFIG_ARCH_TEGRA)	+= tegra/
 obj-$(CONFIG_PINCTRL_TZ1090)	+= pinctrl-tz1090.o
 obj-$(CONFIG_PINCTRL_TZ1090_PDC)	+= pinctrl-tz1090-pdc.o
 obj-$(CONFIG_PINCTRL_U300)	+= pinctrl-u300.o
diff --git a/drivers/pinctrl/freescale/pinctrl-imx.c b/drivers/pinctrl/freescale/pinctrl-imx.c
index 47ccfcc8a647..eccb47480e1d 100644
--- a/drivers/pinctrl/freescale/pinctrl-imx.c
+++ b/drivers/pinctrl/freescale/pinctrl-imx.c
@@ -209,9 +209,9 @@ static int imx_pmx_set(struct pinctrl_dev *pctldev, unsigned selector,
 		pin_reg = &info->pin_regs[pin_id];
 
 		if (pin_reg->mux_reg == -1) {
-			dev_err(ipctl->dev, "Pin(%s) does not support mux function\n",
+			dev_dbg(ipctl->dev, "Pin(%s) does not support mux function\n",
 				info->pins[pin_id].name);
-			return -EINVAL;
+			continue;
 		}
 
 		if (info->flags & SHARE_MUX_CONF_REG) {
diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c
index 677a811b3a6f..7abfd42e8ffd 100644
--- a/drivers/pinctrl/intel/pinctrl-baytrail.c
+++ b/drivers/pinctrl/intel/pinctrl-baytrail.c
@@ -401,9 +401,9 @@ static const struct byt_simple_func_mux byt_score_sata_mux[] = {
 static const unsigned int byt_score_plt_clk0_pins[] = { 96 };
 static const unsigned int byt_score_plt_clk1_pins[] = { 97 };
 static const unsigned int byt_score_plt_clk2_pins[] = { 98 };
-static const unsigned int byt_score_plt_clk4_pins[] = { 99 };
-static const unsigned int byt_score_plt_clk5_pins[] = { 100 };
-static const unsigned int byt_score_plt_clk3_pins[] = { 101 };
+static const unsigned int byt_score_plt_clk3_pins[] = { 99 };
+static const unsigned int byt_score_plt_clk4_pins[] = { 100 };
+static const unsigned int byt_score_plt_clk5_pins[] = { 101 };
 static const struct byt_simple_func_mux byt_score_plt_clk_mux[] = {
 	SIMPLE_FUNC("plt_clk", 1),
 };
diff --git a/drivers/pinctrl/pinconf-generic.c b/drivers/pinctrl/pinconf-generic.c
index d5bf9fae2ddd..a87439ee4cdd 100644
--- a/drivers/pinctrl/pinconf-generic.c
+++ b/drivers/pinctrl/pinconf-generic.c
@@ -391,4 +391,12 @@ exit:
 }
 EXPORT_SYMBOL_GPL(pinconf_generic_dt_node_to_map);
 
+void pinconf_generic_dt_free_map(struct pinctrl_dev *pctldev,
+				 struct pinctrl_map *map,
+				 unsigned num_maps)
+{
+	pinctrl_utils_free_map(pctldev, map, num_maps);
+}
+EXPORT_SYMBOL_GPL(pinconf_generic_dt_free_map);
+
 #endif
diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c
index cf9bafa10acf..bfdf720db270 100644
--- a/drivers/pinctrl/pinctrl-single.c
+++ b/drivers/pinctrl/pinctrl-single.c
@@ -1580,6 +1580,9 @@ static inline void pcs_irq_set(struct pcs_soc_data *pcs_soc,
 		else
 			mask &= ~soc_mask;
 		pcs->write(mask, pcswi->reg);
+
+		/* flush posted write */
+		mask = pcs->read(pcswi->reg);
 		raw_spin_unlock(&pcs->lock);
 	}
 
diff --git a/drivers/pinctrl/tegra/Makefile b/drivers/pinctrl/tegra/Makefile
index a927379b6794..d9ea2be69cc4 100644
--- a/drivers/pinctrl/tegra/Makefile
+++ b/drivers/pinctrl/tegra/Makefile
@@ -1,4 +1,4 @@
-obj-y					+= pinctrl-tegra.o
+obj-$(CONFIG_PINCTRL_TEGRA)		+= pinctrl-tegra.o
 obj-$(CONFIG_PINCTRL_TEGRA20)		+= pinctrl-tegra20.o
 obj-$(CONFIG_PINCTRL_TEGRA30)		+= pinctrl-tegra30.o
 obj-$(CONFIG_PINCTRL_TEGRA114)		+= pinctrl-tegra114.o
diff --git a/drivers/platform/chrome/cros_ec_dev.c b/drivers/platform/chrome/cros_ec_dev.c
index 6d8ee3b15872..8abd80dbcbed 100644
--- a/drivers/platform/chrome/cros_ec_dev.c
+++ b/drivers/platform/chrome/cros_ec_dev.c
@@ -151,13 +151,19 @@ static long ec_device_ioctl_xcmd(struct cros_ec_dev *ec, void __user *arg)
 		goto exit;
 	}
 
+	if (u_cmd.outsize != s_cmd->outsize ||
+	    u_cmd.insize != s_cmd->insize) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
 	s_cmd->command += ec->cmd_offset;
 	ret = cros_ec_cmd_xfer(ec->ec_dev, s_cmd);
 	/* Only copy data to userland if data was received. */
 	if (ret < 0)
 		goto exit;
 
-	if (copy_to_user(arg, s_cmd, sizeof(*s_cmd) + u_cmd.insize))
+	if (copy_to_user(arg, s_cmd, sizeof(*s_cmd) + s_cmd->insize))
 		ret = -EFAULT;
 exit:
 	kfree(s_cmd);
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index c06bb85c2839..3ec0025d19e7 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -103,7 +103,6 @@ config DELL_SMBIOS
 
 config DELL_LAPTOP
 	tristate "Dell Laptop Extras"
-	depends on X86
 	depends on DELL_SMBIOS
 	depends on DMI
 	depends on BACKLIGHT_CLASS_DEVICE
@@ -505,7 +504,7 @@ config THINKPAD_ACPI_HOTKEY_POLL
 
 config SENSORS_HDAPS
 	tristate "Thinkpad Hard Drive Active Protection System (hdaps)"
-	depends on INPUT && X86
+	depends on INPUT
 	select INPUT_POLLDEV
 	default n
 	help
@@ -749,7 +748,7 @@ config TOSHIBA_WMI
 
 config ACPI_CMPC
 	tristate "CMPC Laptop Extras"
-	depends on X86 && ACPI
+	depends on ACPI
 	depends on RFKILL || RFKILL=n
 	select INPUT
 	select BACKLIGHT_CLASS_DEVICE
@@ -848,7 +847,7 @@ config INTEL_IMR
 
 config INTEL_PMC_CORE
 	bool "Intel PMC Core driver"
-	depends on X86 && PCI
+	depends on PCI
 	---help---
 	  The Intel Platform Controller Hub for Intel Core SoCs provides access
 	  to Power Management Controller registers via a PCI interface. This
@@ -860,7 +859,7 @@ config INTEL_PMC_CORE
 
 config IBM_RTL
 	tristate "Device driver to enable PRTL support"
-	depends on X86 && PCI
+	depends on PCI
 	---help---
 	 Enable support for IBM Premium Real Time Mode (PRTM).
 	 This module will allow you the enter and exit PRTM in the BIOS via
@@ -894,7 +893,6 @@ config XO15_EBOOK
 
 config SAMSUNG_LAPTOP
 	tristate "Samsung Laptop driver"
-	depends on X86
 	depends on RFKILL || RFKILL = n
 	depends on ACPI_VIDEO || ACPI_VIDEO = n
 	depends on BACKLIGHT_CLASS_DEVICE
diff --git a/drivers/platform/x86/apple-gmux.c b/drivers/platform/x86/apple-gmux.c
index 4034d2d4c507..a66be137324c 100644
--- a/drivers/platform/x86/apple-gmux.c
+++ b/drivers/platform/x86/apple-gmux.c
@@ -31,19 +31,21 @@
 /**
  * DOC: Overview
  *
- * :1:  http://www.latticesemi.com/en/Products/FPGAandCPLD/LatticeXP2.aspx
- * :2:  http://www.renesas.com/products/mpumcu/h8s/h8s2100/h8s2113/index.jsp
- *
  * gmux is a microcontroller built into the MacBook Pro to support dual GPUs:
- * A {1}[Lattice XP2] on pre-retinas, a {2}[Renesas R4F2113] on retinas.
+ * A `Lattice XP2`_ on pre-retinas, a `Renesas R4F2113`_ on retinas.
  *
  * (The MacPro6,1 2013 also has a gmux, however it is unclear why since it has
  * dual GPUs but no built-in display.)
  *
  * gmux is connected to the LPC bus of the southbridge. Its I/O ports are
  * accessed differently depending on the microcontroller: Driver functions
- * to access a pre-retina gmux are infixed `_pio_`, those for a retina gmux
- * are infixed `_index_`.
+ * to access a pre-retina gmux are infixed ``_pio_``, those for a retina gmux
+ * are infixed ``_index_``.
+ *
+ * .. _Lattice XP2:
+ *     http://www.latticesemi.com/en/Products/FPGAandCPLD/LatticeXP2.aspx
+ * .. _Renesas R4F2113:
+ *     http://www.renesas.com/products/mpumcu/h8s/h8s2100/h8s2113/index.jsp
  */
 
 struct apple_gmux_data {
@@ -272,15 +274,15 @@ static bool gmux_is_indexed(struct apple_gmux_data *gmux_data)
 /**
  * DOC: Backlight control
  *
- * :3:  http://www.ti.com/lit/ds/symlink/lp8543.pdf
- * :4:  http://www.ti.com/lit/ds/symlink/lp8545.pdf
- *
  * On single GPU MacBooks, the PWM signal for the backlight is generated by
  * the GPU. On dual GPU MacBook Pros by contrast, either GPU may be suspended
  * to conserve energy. Hence the PWM signal needs to be generated by a separate
  * backlight driver which is controlled by gmux. The earliest generation
- * MBP5 2008/09 uses a {3}[TI LP8543] backlight driver. All newer models
- * use a {4}[TI LP8545].
+ * MBP5 2008/09 uses a `TI LP8543`_ backlight driver. All newer models
+ * use a `TI LP8545`_.
+ *
+ * .. _TI LP8543: http://www.ti.com/lit/ds/symlink/lp8543.pdf
+ * .. _TI LP8545: http://www.ti.com/lit/ds/symlink/lp8545.pdf
  */
 
 static int gmux_get_brightness(struct backlight_device *bd)
@@ -312,28 +314,20 @@ static const struct backlight_ops gmux_bl_ops = {
 /**
  * DOC: Graphics mux
  *
- * :5:  http://pimg-fpiw.uspto.gov/fdd/07/870/086/0.pdf
- * :6:  http://www.nxp.com/documents/data_sheet/CBTL06141.pdf
- * :7:  http://www.ti.com/lit/ds/symlink/hd3ss212.pdf
- * :8:  https://www.pericom.com/assets/Datasheets/PI3VDP12412.pdf
- * :9:  http://www.ti.com/lit/ds/symlink/sn74lv4066a.pdf
- * :10: http://pdf.datasheetarchive.com/indexerfiles/Datasheets-SW16/DSASW00308511.pdf
- * :11: http://www.ti.com/lit/ds/symlink/ts3ds10224.pdf
- *
  * On pre-retinas, the LVDS outputs of both GPUs feed into gmux which muxes
  * either of them to the panel. One of the tricks gmux has up its sleeve is
  * to lengthen the blanking interval of its output during a switch to
  * synchronize it with the GPU switched to. This allows for a flicker-free
- * switch that is imperceptible by the user ({5}[US 8,687,007 B2]).
+ * switch that is imperceptible by the user (`US 8,687,007 B2`_).
  *
  * On retinas, muxing is no longer done by gmux itself, but by a separate
  * chip which is controlled by gmux. The chip is triple sourced, it is
- * either an {6}[NXP CBTL06142], {7}[TI HD3SS212] or {8}[Pericom PI3VDP12412].
+ * either an `NXP CBTL06142`_, `TI HD3SS212`_ or `Pericom PI3VDP12412`_.
  * The panel is driven with eDP instead of LVDS since the pixel clock
  * required for retina resolution exceeds LVDS' limits.
  *
  * Pre-retinas are able to switch the panel's DDC pins separately.
- * This is handled by a {9}[TI SN74LV4066A] which is controlled by gmux.
+ * This is handled by a `TI SN74LV4066A`_ which is controlled by gmux.
  * The inactive GPU can thus probe the panel's EDID without switching over
  * the entire panel. Retinas lack this functionality as the chips used for
  * eDP muxing are incapable of switching the AUX channel separately (see
@@ -344,15 +338,15 @@ static const struct backlight_ops gmux_bl_ops = {
  *
  * The external DP port is only fully switchable on the first two unibody
  * MacBook Pro generations, MBP5 2008/09 and MBP6 2010. This is done by an
- * {6}[NXP CBTL06141] which is controlled by gmux. It's the predecessor of the
+ * `NXP CBTL06141`_ which is controlled by gmux. It's the predecessor of the
  * eDP mux on retinas, the difference being support for 2.7 versus 5.4 Gbit/s.
  *
  * The following MacBook Pro generations replaced the external DP port with a
  * combined DP/Thunderbolt port and lost the ability to switch it between GPUs,
  * connecting it either to the discrete GPU or the Thunderbolt controller.
  * Oddly enough, while the full port is no longer switchable, AUX and HPD
- * are still switchable by way of an {10}[NXP CBTL03062] (on pre-retinas
- * MBP8 2011 and MBP9 2012) or two {11}[TI TS3DS10224] (on retinas) under the
+ * are still switchable by way of an `NXP CBTL03062`_ (on pre-retinas
+ * MBP8 2011 and MBP9 2012) or two `TI TS3DS10224`_ (on retinas) under the
  * control of gmux. Since the integrated GPU is missing the main link,
  * external displays appear to it as phantoms which fail to link-train.
  *
@@ -365,10 +359,19 @@ static const struct backlight_ops gmux_bl_ops = {
  * of this feature.
  *
  * gmux' initial switch state on bootup is user configurable via the EFI
- * variable `gpu-power-prefs-fa4ce28d-b62f-4c99-9cc3-6815686e30f9` (5th byte,
+ * variable ``gpu-power-prefs-fa4ce28d-b62f-4c99-9cc3-6815686e30f9`` (5th byte,
  * 1 = IGD, 0 = DIS). Based on this setting, the EFI firmware tells gmux to
  * switch the panel and the external DP connector and allocates a framebuffer
  * for the selected GPU.
+ *
+ * .. _US 8,687,007 B2: http://pimg-fpiw.uspto.gov/fdd/07/870/086/0.pdf
+ * .. _NXP CBTL06141:   http://www.nxp.com/documents/data_sheet/CBTL06141.pdf
+ * .. _NXP CBTL06142:   http://www.nxp.com/documents/data_sheet/CBTL06141.pdf
+ * .. _TI HD3SS212:     http://www.ti.com/lit/ds/symlink/hd3ss212.pdf
+ * .. _Pericom PI3VDP12412: https://www.pericom.com/assets/Datasheets/PI3VDP12412.pdf
+ * .. _TI SN74LV4066A:  http://www.ti.com/lit/ds/symlink/sn74lv4066a.pdf
+ * .. _NXP CBTL03062:   http://pdf.datasheetarchive.com/indexerfiles/Datasheets-SW16/DSASW00308511.pdf
+ * .. _TI TS3DS10224:   http://www.ti.com/lit/ds/symlink/ts3ds10224.pdf
  */
 
 static void gmux_read_switch_state(struct apple_gmux_data *gmux_data)
diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index 4a23fbc66b71..d1a091b93192 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c
@@ -567,6 +567,7 @@ static void ideapad_sysfs_exit(struct ideapad_private *priv)
 static const struct key_entry ideapad_keymap[] = {
 	{ KE_KEY, 6,  { KEY_SWITCHVIDEOMODE } },
 	{ KE_KEY, 7,  { KEY_CAMERA } },
+	{ KE_KEY, 8,  { KEY_MICMUTE } },
 	{ KE_KEY, 11, { KEY_F16 } },
 	{ KE_KEY, 13, { KEY_WLAN } },
 	{ KE_KEY, 16, { KEY_PROG1 } },
@@ -809,6 +810,7 @@ static void ideapad_acpi_notify(acpi_handle handle, u32 event, void *data)
 				break;
 			case 13:
 			case 11:
+			case 8:
 			case 7:
 			case 6:
 				ideapad_input_report(priv, vpc_bit);
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index c3bfa1fe95bf..b65ce7519411 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -2043,6 +2043,7 @@ static int hotkey_autosleep_ack;
 
 static u32 hotkey_orig_mask;		/* events the BIOS had enabled */
 static u32 hotkey_all_mask;		/* all events supported in fw */
+static u32 hotkey_adaptive_all_mask;	/* all adaptive events supported in fw */
 static u32 hotkey_reserved_mask;	/* events better left disabled */
 static u32 hotkey_driver_mask;		/* events needed by the driver */
 static u32 hotkey_user_mask;		/* events visible to userspace */
@@ -2742,6 +2743,17 @@ static ssize_t hotkey_all_mask_show(struct device *dev,
 
 static DEVICE_ATTR_RO(hotkey_all_mask);
 
+/* sysfs hotkey all_mask ----------------------------------------------- */
+static ssize_t hotkey_adaptive_all_mask_show(struct device *dev,
+			   struct device_attribute *attr,
+			   char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "0x%08x\n",
+			hotkey_adaptive_all_mask | hotkey_source_mask);
+}
+
+static DEVICE_ATTR_RO(hotkey_adaptive_all_mask);
+
 /* sysfs hotkey recommended_mask --------------------------------------- */
 static ssize_t hotkey_recommended_mask_show(struct device *dev,
 					    struct device_attribute *attr,
@@ -2985,6 +2997,7 @@ static struct attribute *hotkey_attributes[] __initdata = {
 	&dev_attr_wakeup_hotunplug_complete.attr,
 	&dev_attr_hotkey_mask.attr,
 	&dev_attr_hotkey_all_mask.attr,
+	&dev_attr_hotkey_adaptive_all_mask.attr,
 	&dev_attr_hotkey_recommended_mask.attr,
 #ifdef CONFIG_THINKPAD_ACPI_HOTKEY_POLL
 	&dev_attr_hotkey_source_mask.attr,
@@ -3321,20 +3334,6 @@ static int __init hotkey_init(struct ibm_init_struct *iibm)
 	if (!tp_features.hotkey)
 		return 1;
 
-	/*
-	 * Check if we have an adaptive keyboard, like on the
-	 * Lenovo Carbon X1 2014 (2nd Gen).
-	 */
-	if (acpi_evalf(hkey_handle, &hkeyv, "MHKV", "qd")) {
-		if ((hkeyv >> 8) == 2) {
-			tp_features.has_adaptive_kbd = true;
-			res = sysfs_create_group(&tpacpi_pdev->dev.kobj,
-					&adaptive_kbd_attr_group);
-			if (res)
-				goto err_exit;
-		}
-	}
-
 	quirks = tpacpi_check_quirks(tpacpi_hotkey_qtable,
 				     ARRAY_SIZE(tpacpi_hotkey_qtable));
 
@@ -3357,30 +3356,70 @@ static int __init hotkey_init(struct ibm_init_struct *iibm)
 	   A30, R30, R31, T20-22, X20-21, X22-24.  Detected by checking
 	   for HKEY interface version 0x100 */
 	if (acpi_evalf(hkey_handle, &hkeyv, "MHKV", "qd")) {
-		if ((hkeyv >> 8) != 1) {
-			pr_err("unknown version of the HKEY interface: 0x%x\n",
-			       hkeyv);
-			pr_err("please report this to %s\n", TPACPI_MAIL);
-		} else {
+		vdbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_HKEY,
+			    "firmware HKEY interface version: 0x%x\n",
+			    hkeyv);
+
+		switch (hkeyv >> 8) {
+		case 1:
 			/*
 			 * MHKV 0x100 in A31, R40, R40e,
 			 * T4x, X31, and later
 			 */
-			vdbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_HKEY,
-				"firmware HKEY interface version: 0x%x\n",
-				hkeyv);
 
 			/* Paranoia check AND init hotkey_all_mask */
 			if (!acpi_evalf(hkey_handle, &hotkey_all_mask,
 					"MHKA", "qd")) {
-				pr_err("missing MHKA handler, "
-				       "please report this to %s\n",
+				pr_err("missing MHKA handler, please report this to %s\n",
 				       TPACPI_MAIL);
 				/* Fallback: pre-init for FN+F3,F4,F12 */
 				hotkey_all_mask = 0x080cU;
 			} else {
 				tp_features.hotkey_mask = 1;
 			}
+			break;
+
+		case 2:
+			/*
+			 * MHKV 0x200 in X1, T460s, X260, T560, X1 Tablet (2016)
+			 */
+
+			/* Paranoia check AND init hotkey_all_mask */
+			if (!acpi_evalf(hkey_handle, &hotkey_all_mask,
+					"MHKA", "dd", 1)) {
+				pr_err("missing MHKA handler, please report this to %s\n",
+				       TPACPI_MAIL);
+				/* Fallback: pre-init for FN+F3,F4,F12 */
+				hotkey_all_mask = 0x080cU;
+			} else {
+				tp_features.hotkey_mask = 1;
+			}
+
+			/*
+			 * Check if we have an adaptive keyboard, like on the
+			 * Lenovo Carbon X1 2014 (2nd Gen).
+			 */
+			if (acpi_evalf(hkey_handle, &hotkey_adaptive_all_mask,
+				       "MHKA", "dd", 2)) {
+				if (hotkey_adaptive_all_mask != 0) {
+					tp_features.has_adaptive_kbd = true;
+					res = sysfs_create_group(
+						&tpacpi_pdev->dev.kobj,
+						&adaptive_kbd_attr_group);
+					if (res)
+						goto err_exit;
+				}
+			} else {
+				tp_features.has_adaptive_kbd = false;
+				hotkey_adaptive_all_mask = 0x0U;
+			}
+			break;
+
+		default:
+			pr_err("unknown version of the HKEY interface: 0x%x\n",
+			       hkeyv);
+			pr_err("please report this to %s\n", TPACPI_MAIL);
+			break;
 		}
 	}
 
diff --git a/drivers/power/power_supply_core.c b/drivers/power/power_supply_core.c
index 456987c88baa..b13cd074c52a 100644
--- a/drivers/power/power_supply_core.c
+++ b/drivers/power/power_supply_core.c
@@ -565,11 +565,12 @@ static int power_supply_read_temp(struct thermal_zone_device *tzd,
 
 	WARN_ON(tzd == NULL);
 	psy = tzd->devdata;
-	ret = psy->desc->get_property(psy, POWER_SUPPLY_PROP_TEMP, &val);
+	ret = power_supply_get_property(psy, POWER_SUPPLY_PROP_TEMP, &val);
+	if (ret)
+		return ret;
 
 	/* Convert tenths of degree Celsius to milli degree Celsius. */
-	if (!ret)
-		*temp = val.intval * 100;
+	*temp = val.intval * 100;
 
 	return ret;
 }
@@ -612,10 +613,12 @@ static int ps_get_max_charge_cntl_limit(struct thermal_cooling_device *tcd,
 	int ret;
 
 	psy = tcd->devdata;
-	ret = psy->desc->get_property(psy,
-		POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT_MAX, &val);
-	if (!ret)
-		*state = val.intval;
+	ret = power_supply_get_property(psy,
+			POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT_MAX, &val);
+	if (ret)
+		return ret;
+
+	*state = val.intval;
 
 	return ret;
 }
@@ -628,10 +631,12 @@ static int ps_get_cur_chrage_cntl_limit(struct thermal_cooling_device *tcd,
 	int ret;
 
 	psy = tcd->devdata;
-	ret = psy->desc->get_property(psy,
-		POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT, &val);
-	if (!ret)
-		*state = val.intval;
+	ret = power_supply_get_property(psy,
+			POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT, &val);
+	if (ret)
+		return ret;
+
+	*state = val.intval;
 
 	return ret;
 }
diff --git a/drivers/power/tps65217_charger.c b/drivers/power/tps65217_charger.c
index d9f56730c735..73dfae41def8 100644
--- a/drivers/power/tps65217_charger.c
+++ b/drivers/power/tps65217_charger.c
@@ -197,6 +197,7 @@ static int tps65217_charger_probe(struct platform_device *pdev)
 {
 	struct tps65217 *tps = dev_get_drvdata(pdev->dev.parent);
 	struct tps65217_charger *charger;
+	struct power_supply_config cfg = {};
 	int ret;
 
 	dev_dbg(&pdev->dev, "%s\n", __func__);
@@ -208,9 +209,12 @@ static int tps65217_charger_probe(struct platform_device *pdev)
 	charger->tps = tps;
 	charger->dev = &pdev->dev;
 
+	cfg.of_node = pdev->dev.of_node;
+	cfg.drv_data = charger;
+
 	charger->ac = devm_power_supply_register(&pdev->dev,
 						 &tps65217_charger_desc,
-						 NULL);
+						 &cfg);
 	if (IS_ERR(charger->ac)) {
 		dev_err(&pdev->dev, "failed: power supply register\n");
 		return PTR_ERR(charger->ac);
diff --git a/drivers/pps/clients/pps_parport.c b/drivers/pps/clients/pps_parport.c
index 38a8bbe74810..83797d89c30f 100644
--- a/drivers/pps/clients/pps_parport.c
+++ b/drivers/pps/clients/pps_parport.c
@@ -195,7 +195,7 @@ static void parport_detach(struct parport *port)
 	struct pps_client_pp *device;
 
 	/* FIXME: oooh, this is ugly! */
-	if (strcmp(pardev->name, KBUILD_MODNAME))
+	if (!pardev || strcmp(pardev->name, KBUILD_MODNAME))
 		/* not our port */
 		return;
 
diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index dba3843c53b8..ed337a8c34ab 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -457,7 +457,8 @@ int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state)
 {
 	int err;
 
-	if (!pwm)
+	if (!pwm || !state || !state->period ||
+	    state->duty_cycle > state->period)
 		return -EINVAL;
 
 	if (!memcmp(state, &pwm->state, sizeof(*state)))
diff --git a/drivers/pwm/pwm-atmel-hlcdc.c b/drivers/pwm/pwm-atmel-hlcdc.c
index f994c7eaf41c..14fc011faa32 100644
--- a/drivers/pwm/pwm-atmel-hlcdc.c
+++ b/drivers/pwm/pwm-atmel-hlcdc.c
@@ -272,7 +272,7 @@ static int atmel_hlcdc_pwm_probe(struct platform_device *pdev)
 	chip->chip.of_pwm_n_cells = 3;
 	chip->chip.can_sleep = 1;
 
-	ret = pwmchip_add(&chip->chip);
+	ret = pwmchip_add_with_polarity(&chip->chip, PWM_POLARITY_INVERSED);
 	if (ret) {
 		clk_disable_unprepare(hlcdc->periph_clk);
 		return ret;
diff --git a/drivers/pwm/sysfs.c b/drivers/pwm/sysfs.c
index d98599249a05..01695d48dd54 100644
--- a/drivers/pwm/sysfs.c
+++ b/drivers/pwm/sysfs.c
@@ -152,7 +152,7 @@ static ssize_t enable_store(struct device *child,
 		goto unlock;
 	}
 
-	pwm_apply_state(pwm, &state);
+	ret = pwm_apply_state(pwm, &state);
 
 unlock:
 	mutex_unlock(&export->lock);
diff --git a/drivers/regulator/anatop-regulator.c b/drivers/regulator/anatop-regulator.c
index 63cd5e68c864..3a6d0290c54c 100644
--- a/drivers/regulator/anatop-regulator.c
+++ b/drivers/regulator/anatop-regulator.c
@@ -296,7 +296,7 @@ static int anatop_regulator_probe(struct platform_device *pdev)
 		if (!sreg->sel && !strcmp(sreg->name, "vddpu"))
 			sreg->sel = 22;
 
-		if (!sreg->sel) {
+		if (!sreg->bypass && !sreg->sel) {
 			dev_err(&pdev->dev, "Failed to read a valid default voltage selector.\n");
 			return -EINVAL;
 		}
diff --git a/drivers/regulator/max77620-regulator.c b/drivers/regulator/max77620-regulator.c
index 321e804aeab0..a1b49a6d538f 100644
--- a/drivers/regulator/max77620-regulator.c
+++ b/drivers/regulator/max77620-regulator.c
@@ -123,6 +123,9 @@ static int max77620_regulator_set_fps_src(struct max77620_regulator *pmic,
 	unsigned int val;
 	int ret;
 
+	if (!rinfo)
+		return 0;
+
 	switch (fps_src) {
 	case MAX77620_FPS_SRC_0:
 	case MAX77620_FPS_SRC_1:
@@ -171,6 +174,9 @@ static int max77620_regulator_set_fps_slots(struct max77620_regulator *pmic,
 	int pd = rpdata->active_fps_pd_slot;
 	int ret = 0;
 
+	if (!rinfo)
+		return 0;
+
 	if (is_suspend) {
 		pu = rpdata->suspend_fps_pu_slot;
 		pd = rpdata->suspend_fps_pd_slot;
@@ -680,7 +686,6 @@ static struct max77620_regulator_info max77620_regs_info[MAX77620_NUM_REGS] = {
 	RAIL_SD(SD1, sd1, "in-sd1", SD1, 600000, 1550000, 12500, 0x22, SD1),
 	RAIL_SD(SD2, sd2, "in-sd2", SDX, 600000, 3787500, 12500, 0xFF, NONE),
 	RAIL_SD(SD3, sd3, "in-sd3", SDX, 600000, 3787500, 12500, 0xFF, NONE),
-	RAIL_SD(SD4, sd4, "in-sd4", SDX, 600000, 3787500, 12500, 0xFF, NONE),
 
 	RAIL_LDO(LDO0, ldo0, "in-ldo0-1", N, 800000, 2375000, 25000),
 	RAIL_LDO(LDO1, ldo1, "in-ldo0-1", N, 800000, 2375000, 25000),
diff --git a/drivers/regulator/qcom_smd-regulator.c b/drivers/regulator/qcom_smd-regulator.c
index 56a17ec5b5ef..6c7fe4778793 100644
--- a/drivers/regulator/qcom_smd-regulator.c
+++ b/drivers/regulator/qcom_smd-regulator.c
@@ -140,6 +140,18 @@ static const struct regulator_ops rpm_smps_ldo_ops = {
 	.enable = rpm_reg_enable,
 	.disable = rpm_reg_disable,
 	.is_enabled = rpm_reg_is_enabled,
+	.list_voltage = regulator_list_voltage_linear_range,
+
+	.get_voltage = rpm_reg_get_voltage,
+	.set_voltage = rpm_reg_set_voltage,
+
+	.set_load = rpm_reg_set_load,
+};
+
+static const struct regulator_ops rpm_smps_ldo_ops_fixed = {
+	.enable = rpm_reg_enable,
+	.disable = rpm_reg_disable,
+	.is_enabled = rpm_reg_is_enabled,
 
 	.get_voltage = rpm_reg_get_voltage,
 	.set_voltage = rpm_reg_set_voltage,
@@ -247,7 +259,7 @@ static const struct regulator_desc pm8941_nldo = {
 static const struct regulator_desc pm8941_lnldo = {
 	.fixed_uV = 1740000,
 	.n_voltages = 1,
-	.ops = &rpm_smps_ldo_ops,
+	.ops = &rpm_smps_ldo_ops_fixed,
 };
 
 static const struct regulator_desc pm8941_switch = {
diff --git a/drivers/regulator/tps51632-regulator.c b/drivers/regulator/tps51632-regulator.c
index 572816e30095..c139890c1514 100644
--- a/drivers/regulator/tps51632-regulator.c
+++ b/drivers/regulator/tps51632-regulator.c
@@ -94,11 +94,14 @@ static int tps51632_dcdc_set_ramp_delay(struct regulator_dev *rdev,
 		int ramp_delay)
 {
 	struct tps51632_chip *tps = rdev_get_drvdata(rdev);
-	int bit = ramp_delay/6000;
+	int bit;
 	int ret;
 
-	if (bit)
-		bit--;
+	if (ramp_delay == 0)
+		bit = 0;
+	else
+		bit = DIV_ROUND_UP(ramp_delay, 6000) - 1;
+
 	ret = regmap_write(tps->regmap, TPS51632_SLEW_REGS, BIT(bit));
 	if (ret < 0)
 		dev_err(tps->dev, "SLEW reg write failed, err %d\n", ret);
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 80b1979e8d95..df036b872b05 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -1051,6 +1051,7 @@ static void qeth_l2_remove_device(struct ccwgroup_device *cgdev)
 		qeth_l2_set_offline(cgdev);
 
 	if (card->dev) {
+		netif_napi_del(&card->napi);
 		unregister_netdev(card->dev);
 		card->dev = NULL;
 	}
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index ac544330daeb..709b52339ff9 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -3226,6 +3226,7 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev)
 		qeth_l3_set_offline(cgdev);
 
 	if (card->dev) {
+		netif_napi_del(&card->napi);
 		unregister_netdev(card->dev);
 		card->dev = NULL;
 	}
diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c
index d4c285688ce9..3ddc85e6efd6 100644
--- a/drivers/scsi/53c700.c
+++ b/drivers/scsi/53c700.c
@@ -1122,7 +1122,7 @@ process_script_interrupt(__u32 dsps, __u32 dsp, struct scsi_cmnd *SCp,
 		} else {
 			struct scsi_cmnd *SCp;
 
-			SCp = scsi_host_find_tag(SDp->host, SCSI_NO_TAG);
+			SCp = SDp->current_cmnd;
 			if(unlikely(SCp == NULL)) {
 				sdev_printk(KERN_ERR, SDp,
 					"no saved request for untagged cmd\n");
@@ -1826,7 +1826,7 @@ NCR_700_queuecommand_lck(struct scsi_cmnd *SCp, void (*done)(struct scsi_cmnd *)
 		       slot->tag, slot);
 	} else {
 		slot->tag = SCSI_NO_TAG;
-		/* must populate current_cmnd for scsi_host_find_tag to work */
+		/* save current command for reselection */
 		SCp->device->current_cmnd = SCp;
 	}
 	/* sanity check: some of the commands generated by the mid-layer
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index d6a691e27d33..d6803a9e5ab8 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -10093,6 +10093,7 @@ static int ipr_probe_ioa(struct pci_dev *pdev,
 		ioa_cfg->intr_flag = IPR_USE_MSI;
 	else {
 		ioa_cfg->intr_flag = IPR_USE_LSI;
+		ioa_cfg->clear_isr = 1;
 		ioa_cfg->nvectors = 1;
 		dev_info(&pdev->dev, "Cannot enable MSI.\n");
 	}
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index 5649c200d37c..a92a62dea793 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -2548,7 +2548,7 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha,
 	if (!vha->flags.online)
 		return;
 
-	if (rsp->msix->cpuid != smp_processor_id()) {
+	if (rsp->msix && rsp->msix->cpuid != smp_processor_id()) {
 		/* if kernel does not notify qla of IRQ's CPU change,
 		 * then set it here.
 		 */
diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c
index 3408578b08d6..eaccd651ccda 100644
--- a/drivers/scsi/scsi_devinfo.c
+++ b/drivers/scsi/scsi_devinfo.c
@@ -230,6 +230,7 @@ static struct {
 	{"PIONEER", "CD-ROM DRM-624X", NULL, BLIST_FORCELUN | BLIST_SINGLELUN},
 	{"Promise", "VTrak E610f", NULL, BLIST_SPARSELUN | BLIST_NO_RSOC},
 	{"Promise", "", NULL, BLIST_SPARSELUN},
+	{"QEMU", "QEMU CD-ROM", NULL, BLIST_SKIP_VPD_PAGES},
 	{"QNAP", "iSCSI Storage", NULL, BLIST_MAX_1024},
 	{"SYNOLOGY", "iSCSI Storage", NULL, BLIST_MAX_1024},
 	{"QUANTUM", "XP34301", "1071", BLIST_NOTQ},
@@ -428,7 +429,7 @@ static struct scsi_dev_info_list *scsi_dev_info_list_find(const char *vendor,
 	 * here, and we don't know what device it is
 	 * trying to work with, leave it as-is.
 	 */
-	vmax = 8;	/* max length of vendor */
+	vmax = sizeof(devinfo->vendor);
 	vskip = vendor;
 	while (vmax > 0 && *vskip == ' ') {
 		vmax--;
@@ -438,7 +439,7 @@ static struct scsi_dev_info_list *scsi_dev_info_list_find(const char *vendor,
 	while (vmax > 0 && vskip[vmax - 1] == ' ')
 		--vmax;
 
-	mmax = 16;	/* max length of model */
+	mmax = sizeof(devinfo->model);
 	mskip = model;
 	while (mmax > 0 && *mskip == ' ') {
 		mmax--;
@@ -454,10 +455,12 @@ static struct scsi_dev_info_list *scsi_dev_info_list_find(const char *vendor,
 			 * Behave like the older version of get_device_flags.
 			 */
 			if (memcmp(devinfo->vendor, vskip, vmax) ||
-					devinfo->vendor[vmax])
+					(vmax < sizeof(devinfo->vendor) &&
+						devinfo->vendor[vmax]))
 				continue;
 			if (memcmp(devinfo->model, mskip, mmax) ||
-					devinfo->model[mmax])
+					(mmax < sizeof(devinfo->model) &&
+						devinfo->model[mmax]))
 				continue;
 			return devinfo;
 		} else {
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index a8b610eaa0ca..106a6adbd6f1 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1128,7 +1128,6 @@ static int scsi_eh_action(struct scsi_cmnd *scmd, int rtn)
  */
 void scsi_eh_finish_cmd(struct scsi_cmnd *scmd, struct list_head *done_q)
 {
-	scmd->device->host->host_failed--;
 	scmd->eh_eflags = 0;
 	list_move_tail(&scmd->eh_entry, done_q);
 }
@@ -2227,6 +2226,9 @@ int scsi_error_handler(void *data)
 		else
 			scsi_unjam_host(shost);
 
+		/* All scmds have been handled */
+		shost->host_failed = 0;
+
 		/*
 		 * Note - if the above fails completely, the action is to take
 		 * individual devices offline and flush the queue of any
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index f459dff30512..60bff78e9ead 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2867,10 +2867,10 @@ static int sd_revalidate_disk(struct gendisk *disk)
 	if (sdkp->opt_xfer_blocks &&
 	    sdkp->opt_xfer_blocks <= dev_max &&
 	    sdkp->opt_xfer_blocks <= SD_DEF_XFER_BLOCKS &&
-	    sdkp->opt_xfer_blocks * sdp->sector_size >= PAGE_SIZE)
-		rw_max = q->limits.io_opt =
-			sdkp->opt_xfer_blocks * sdp->sector_size;
-	else
+	    logical_to_bytes(sdp, sdkp->opt_xfer_blocks) >= PAGE_SIZE) {
+		q->limits.io_opt = logical_to_bytes(sdp, sdkp->opt_xfer_blocks);
+		rw_max = logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
+	} else
 		rw_max = BLK_DEF_MAX_SECTORS;
 
 	/* Combine with controller limits */
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 654630bb7d0e..765a6f1ac1b7 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -151,6 +151,11 @@ static inline sector_t logical_to_sectors(struct scsi_device *sdev, sector_t blo
 	return blocks << (ilog2(sdev->sector_size) - 9);
 }
 
+static inline unsigned int logical_to_bytes(struct scsi_device *sdev, sector_t blocks)
+{
+	return blocks * sdev->sector_size;
+}
+
 /*
  * A DIF-capable target device can be formatted with different
  * protection schemes.  Currently 0 through 3 are defined:
diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c
index cd89682065b9..1026e180eed7 100644
--- a/drivers/spi/spi-rockchip.c
+++ b/drivers/spi/spi-rockchip.c
@@ -578,7 +578,7 @@ static int rockchip_spi_transfer_one(
 		struct spi_device *spi,
 		struct spi_transfer *xfer)
 {
-	int ret = 1;
+	int ret = 0;
 	struct rockchip_spi *rs = spi_master_get_devdata(master);
 
 	WARN_ON(readl_relaxed(rs->regs + ROCKCHIP_SPI_SSIENR) &&
@@ -627,6 +627,8 @@ static int rockchip_spi_transfer_one(
 			spi_enable_chip(rs, 1);
 			ret = rockchip_spi_prepare_dma(rs);
 		}
+		/* successful DMA prepare means the transfer is in progress */
+		ret = ret ? ret : 1;
 	} else {
 		spi_enable_chip(rs, 1);
 		ret = rockchip_spi_pio_transfer(rs);
diff --git a/drivers/spi/spi-sun4i.c b/drivers/spi/spi-sun4i.c
index 1ddd9e2309b6..cf007f3b83ec 100644
--- a/drivers/spi/spi-sun4i.c
+++ b/drivers/spi/spi-sun4i.c
@@ -173,13 +173,17 @@ static int sun4i_spi_transfer_one(struct spi_master *master,
 {
 	struct sun4i_spi *sspi = spi_master_get_devdata(master);
 	unsigned int mclk_rate, div, timeout;
+	unsigned int start, end, tx_time;
 	unsigned int tx_len = 0;
 	int ret = 0;
 	u32 reg;
 
 	/* We don't support transfer larger than the FIFO */
 	if (tfr->len > SUN4I_FIFO_DEPTH)
-		return -EINVAL;
+		return -EMSGSIZE;
+
+	if (tfr->tx_buf && tfr->len >= SUN4I_FIFO_DEPTH)
+		return -EMSGSIZE;
 
 	reinit_completion(&sspi->done);
 	sspi->tx_buf = tfr->tx_buf;
@@ -269,8 +273,12 @@ static int sun4i_spi_transfer_one(struct spi_master *master,
 	sun4i_spi_write(sspi, SUN4I_BURST_CNT_REG, SUN4I_BURST_CNT(tfr->len));
 	sun4i_spi_write(sspi, SUN4I_XMIT_CNT_REG, SUN4I_XMIT_CNT(tx_len));
 
-	/* Fill the TX FIFO */
-	sun4i_spi_fill_fifo(sspi, SUN4I_FIFO_DEPTH);
+	/*
+	 * Fill the TX FIFO
+	 * Filling the FIFO fully causes timeout for some reason
+	 * at least on spi2 on A10s
+	 */
+	sun4i_spi_fill_fifo(sspi, SUN4I_FIFO_DEPTH - 1);
 
 	/* Enable the interrupts */
 	sun4i_spi_write(sspi, SUN4I_INT_CTL_REG, SUN4I_INT_CTL_TC);
@@ -279,9 +287,16 @@ static int sun4i_spi_transfer_one(struct spi_master *master,
 	reg = sun4i_spi_read(sspi, SUN4I_CTL_REG);
 	sun4i_spi_write(sspi, SUN4I_CTL_REG, reg | SUN4I_CTL_XCH);
 
+	tx_time = max(tfr->len * 8 * 2 / (tfr->speed_hz / 1000), 100U);
+	start = jiffies;
 	timeout = wait_for_completion_timeout(&sspi->done,
-					      msecs_to_jiffies(1000));
+					      msecs_to_jiffies(tx_time));
+	end = jiffies;
 	if (!timeout) {
+		dev_warn(&master->dev,
+			 "%s: timeout transferring %u bytes@%iHz for %i(%i)ms",
+			 dev_name(&spi->dev), tfr->len, tfr->speed_hz,
+			 jiffies_to_msecs(end - start), tx_time);
 		ret = -ETIMEDOUT;
 		goto out;
 	}
diff --git a/drivers/spi/spi-sun6i.c b/drivers/spi/spi-sun6i.c
index 42e2c4bd690a..7fce79a60608 100644
--- a/drivers/spi/spi-sun6i.c
+++ b/drivers/spi/spi-sun6i.c
@@ -160,6 +160,7 @@ static int sun6i_spi_transfer_one(struct spi_master *master,
 {
 	struct sun6i_spi *sspi = spi_master_get_devdata(master);
 	unsigned int mclk_rate, div, timeout;
+	unsigned int start, end, tx_time;
 	unsigned int tx_len = 0;
 	int ret = 0;
 	u32 reg;
@@ -269,9 +270,16 @@ static int sun6i_spi_transfer_one(struct spi_master *master,
 	reg = sun6i_spi_read(sspi, SUN6I_TFR_CTL_REG);
 	sun6i_spi_write(sspi, SUN6I_TFR_CTL_REG, reg | SUN6I_TFR_CTL_XCH);
 
+	tx_time = max(tfr->len * 8 * 2 / (tfr->speed_hz / 1000), 100U);
+	start = jiffies;
 	timeout = wait_for_completion_timeout(&sspi->done,
-					      msecs_to_jiffies(1000));
+					      msecs_to_jiffies(tx_time));
+	end = jiffies;
 	if (!timeout) {
+		dev_warn(&master->dev,
+			 "%s: timeout transferring %u bytes@%iHz for %i(%i)ms",
+			 dev_name(&spi->dev), tfr->len, tfr->speed_hz,
+			 jiffies_to_msecs(end - start), tx_time);
 		ret = -ETIMEDOUT;
 		goto out;
 	}
diff --git a/drivers/spi/spi-ti-qspi.c b/drivers/spi/spi-ti-qspi.c
index 443f664534e1..29ea8d2f9824 100644
--- a/drivers/spi/spi-ti-qspi.c
+++ b/drivers/spi/spi-ti-qspi.c
@@ -646,6 +646,13 @@ free_master:
 
 static int ti_qspi_remove(struct platform_device *pdev)
 {
+	struct ti_qspi *qspi = platform_get_drvdata(pdev);
+	int rc;
+
+	rc = spi_master_suspend(qspi->master);
+	if (rc)
+		return rc;
+
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 
diff --git a/drivers/staging/iio/accel/sca3000_core.c b/drivers/staging/iio/accel/sca3000_core.c
index a8f533af9eca..ec12181822e6 100644
--- a/drivers/staging/iio/accel/sca3000_core.c
+++ b/drivers/staging/iio/accel/sca3000_core.c
@@ -594,7 +594,7 @@ static ssize_t sca3000_read_frequency(struct device *dev,
 		goto error_ret_mut;
 	ret = sca3000_read_ctrl_reg(st, SCA3000_REG_CTRL_SEL_OUT_CTRL);
 	mutex_unlock(&st->lock);
-	if (ret)
+	if (ret < 0)
 		goto error_ret;
 	val = ret;
 	if (base_freq > 0)
diff --git a/drivers/staging/iio/adc/ad7606_spi.c b/drivers/staging/iio/adc/ad7606_spi.c
index 825da0769936..9587fa86dc69 100644
--- a/drivers/staging/iio/adc/ad7606_spi.c
+++ b/drivers/staging/iio/adc/ad7606_spi.c
@@ -21,7 +21,7 @@ static int ad7606_spi_read_block(struct device *dev,
 {
 	struct spi_device *spi = to_spi_device(dev);
 	int i, ret;
-	unsigned short *data;
+	unsigned short *data = buf;
 	__be16 *bdata = buf;
 
 	ret = spi_read(spi, buf, count * 2);
diff --git a/drivers/staging/iio/impedance-analyzer/ad5933.c b/drivers/staging/iio/impedance-analyzer/ad5933.c
index 9f43976f4ef2..170ac980abcb 100644
--- a/drivers/staging/iio/impedance-analyzer/ad5933.c
+++ b/drivers/staging/iio/impedance-analyzer/ad5933.c
@@ -444,10 +444,10 @@ static ssize_t ad5933_store(struct device *dev,
 		st->settling_cycles = val;
 
 		/* 2x, 4x handling, see datasheet */
-		if (val > 511)
-			val = (val >> 1) | (1 << 9);
-		else if (val > 1022)
+		if (val > 1022)
 			val = (val >> 2) | (3 << 9);
+		else if (val > 511)
+			val = (val >> 1) | (1 << 9);
 
 		dat = cpu_to_be16(val);
 		ret = ad5933_i2c_write(st->client,
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
index bbfee53cfcf5..845e49a52430 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
@@ -2521,12 +2521,13 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
 	return 0;
 
  failed:
-	if (ni)
+	if (ni) {
 		lnet_ni_decref(ni);
+		rej.ibr_cp.ibcp_queue_depth = kiblnd_msg_queue_size(version, ni);
+		rej.ibr_cp.ibcp_max_frags = kiblnd_rdma_frags(version, ni);
+	}
 
 	rej.ibr_version             = version;
-	rej.ibr_cp.ibcp_queue_depth = kiblnd_msg_queue_size(version, ni);
-	rej.ibr_cp.ibcp_max_frags = kiblnd_rdma_frags(version, ni);
 	kiblnd_reject(cmid, &rej);
 
 	return -ECONNREFUSED;
diff --git a/drivers/staging/rtl8188eu/core/rtw_efuse.c b/drivers/staging/rtl8188eu/core/rtw_efuse.c
index c17870cddb5b..fbce1f7e68ca 100644
--- a/drivers/staging/rtl8188eu/core/rtw_efuse.c
+++ b/drivers/staging/rtl8188eu/core/rtw_efuse.c
@@ -102,7 +102,7 @@ efuse_phymap_to_logical(u8 *phymap, u16 _offset, u16 _size_byte, u8  *pbuf)
 	if (!efuseTbl)
 		return;
 
-	eFuseWord = (u16 **)rtw_malloc2d(EFUSE_MAX_SECTION_88E, EFUSE_MAX_WORD_UNIT, sizeof(*eFuseWord));
+	eFuseWord = (u16 **)rtw_malloc2d(EFUSE_MAX_SECTION_88E, EFUSE_MAX_WORD_UNIT, sizeof(u16));
 	if (!eFuseWord) {
 		DBG_88E("%s: alloc eFuseWord fail!\n", __func__);
 		goto eFuseWord_failed;
diff --git a/drivers/staging/rtl8188eu/hal/usb_halinit.c b/drivers/staging/rtl8188eu/hal/usb_halinit.c
index 87ea3b844951..363f3a34ddce 100644
--- a/drivers/staging/rtl8188eu/hal/usb_halinit.c
+++ b/drivers/staging/rtl8188eu/hal/usb_halinit.c
@@ -2072,7 +2072,8 @@ void rtl8188eu_set_hal_ops(struct adapter *adapt)
 {
 	struct hal_ops	*halfunc = &adapt->HalFunc;
 
-	adapt->HalData = kzalloc(sizeof(*adapt->HalData), GFP_KERNEL);
+
+	adapt->HalData = kzalloc(sizeof(struct hal_data_8188e), GFP_KERNEL);
 	if (!adapt->HalData)
 		DBG_88E("cant not alloc memory for HAL DATA\n");
 
diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 6ceac4f2d4b2..5b4b47ed948b 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -857,14 +857,6 @@ __cpufreq_cooling_register(struct device_node *np,
 		goto free_power_table;
 	}
 
-	snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d",
-		 cpufreq_dev->id);
-
-	cool_dev = thermal_of_cooling_device_register(np, dev_name, cpufreq_dev,
-						      &cpufreq_cooling_ops);
-	if (IS_ERR(cool_dev))
-		goto remove_idr;
-
 	/* Fill freq-table in descending order of frequencies */
 	for (i = 0, freq = -1; i <= cpufreq_dev->max_level; i++) {
 		freq = find_next_max(table, freq);
@@ -877,6 +869,14 @@ __cpufreq_cooling_register(struct device_node *np,
 			pr_debug("%s: freq:%u KHz\n", __func__, freq);
 	}
 
+	snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d",
+		 cpufreq_dev->id);
+
+	cool_dev = thermal_of_cooling_device_register(np, dev_name, cpufreq_dev,
+						      &cpufreq_cooling_ops);
+	if (IS_ERR(cool_dev))
+		goto remove_idr;
+
 	cpufreq_dev->clipped_freq = cpufreq_dev->freq_table[0];
 	cpufreq_dev->cool_dev = cool_dev;
 
diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
index f856c4544eea..51e0d32883ba 100644
--- a/drivers/tty/pty.c
+++ b/drivers/tty/pty.c
@@ -667,8 +667,11 @@ static void pty_unix98_remove(struct tty_driver *driver, struct tty_struct *tty)
 		fsi = tty->driver_data;
 	else
 		fsi = tty->link->driver_data;
-	devpts_kill_index(fsi, tty->index);
-	devpts_release(fsi);
+
+	if (fsi) {
+		devpts_kill_index(fsi, tty->index);
+		devpts_release(fsi);
+	}
 }
 
 static const struct tty_operations ptm_unix98_ops = {
diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c
index f973bfce5d08..1e93a37e27f0 100644
--- a/drivers/tty/vt/keyboard.c
+++ b/drivers/tty/vt/keyboard.c
@@ -366,34 +366,22 @@ static void to_utf8(struct vc_data *vc, uint c)
 
 static void do_compute_shiftstate(void)
 {
-	unsigned int i, j, k, sym, val;
+	unsigned int k, sym, val;
 
 	shift_state = 0;
 	memset(shift_down, 0, sizeof(shift_down));
 
-	for (i = 0; i < ARRAY_SIZE(key_down); i++) {
-
-		if (!key_down[i])
+	for_each_set_bit(k, key_down, min(NR_KEYS, KEY_CNT)) {
+		sym = U(key_maps[0][k]);
+		if (KTYP(sym) != KT_SHIFT && KTYP(sym) != KT_SLOCK)
 			continue;
 
-		k = i * BITS_PER_LONG;
-
-		for (j = 0; j < BITS_PER_LONG; j++, k++) {
-
-			if (!test_bit(k, key_down))
-				continue;
+		val = KVAL(sym);
+		if (val == KVAL(K_CAPSSHIFT))
+			val = KVAL(K_SHIFT);
 
-			sym = U(key_maps[0][k]);
-			if (KTYP(sym) != KT_SHIFT && KTYP(sym) != KT_SLOCK)
-				continue;
-
-			val = KVAL(sym);
-			if (val == KVAL(K_CAPSSHIFT))
-				val = KVAL(K_SHIFT);
-
-			shift_down[val]++;
-			shift_state |= (1 << val);
-		}
+		shift_down[val]++;
+		shift_state |= BIT(val);
 	}
 }
 
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index dc125322f48f..5b0fe97c46ca 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -750,6 +750,7 @@ static void visual_init(struct vc_data *vc, int num, int init)
 	vc->vc_complement_mask = 0;
 	vc->vc_can_do_color = 0;
 	vc->vc_panic_force_write = false;
+	vc->vc_cur_blink_ms = DEFAULT_CURSOR_BLINK_MS;
 	vc->vc_sw->con_init(vc, init);
 	if (!vc->vc_complement_mask)
 		vc->vc_complement_mask = vc->vc_can_do_color ? 0x7700 : 0x0800;
diff --git a/drivers/usb/common/usb-otg-fsm.c b/drivers/usb/common/usb-otg-fsm.c
index 9059b7dc185e..2f537bbdda09 100644
--- a/drivers/usb/common/usb-otg-fsm.c
+++ b/drivers/usb/common/usb-otg-fsm.c
@@ -21,6 +21,7 @@
  * 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+#include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/mutex.h>
@@ -450,3 +451,4 @@ int otg_statemachine(struct otg_fsm *fsm)
 	return fsm->state_changed;
 }
 EXPORT_SYMBOL_GPL(otg_statemachine);
+MODULE_LICENSE("GPL");
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 34b837ae1ed7..d2e3f655c26f 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -2598,26 +2598,23 @@ EXPORT_SYMBOL_GPL(usb_create_hcd);
  * Don't deallocate the bandwidth_mutex until the last shared usb_hcd is
  * deallocated.
  *
- * Make sure to only deallocate the bandwidth_mutex when the primary HCD is
- * freed.  When hcd_release() is called for either hcd in a peer set
- * invalidate the peer's ->shared_hcd and ->primary_hcd pointers to
- * block new peering attempts
+ * Make sure to deallocate the bandwidth_mutex only when the last HCD is
+ * freed.  When hcd_release() is called for either hcd in a peer set,
+ * invalidate the peer's ->shared_hcd and ->primary_hcd pointers.
  */
 static void hcd_release(struct kref *kref)
 {
 	struct usb_hcd *hcd = container_of (kref, struct usb_hcd, kref);
 
 	mutex_lock(&usb_port_peer_mutex);
-	if (usb_hcd_is_primary_hcd(hcd)) {
-		kfree(hcd->address0_mutex);
-		kfree(hcd->bandwidth_mutex);
-	}
 	if (hcd->shared_hcd) {
 		struct usb_hcd *peer = hcd->shared_hcd;
 
 		peer->shared_hcd = NULL;
-		if (peer->primary_hcd == hcd)
-			peer->primary_hcd = NULL;
+		peer->primary_hcd = NULL;
+	} else {
+		kfree(hcd->address0_mutex);
+		kfree(hcd->bandwidth_mutex);
 	}
 	mutex_unlock(&usb_port_peer_mutex);
 	kfree(hcd);
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index 6dc810bce295..944a6dca0fcb 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -44,6 +44,9 @@ static const struct usb_device_id usb_quirk_list[] = {
 	/* Creative SB Audigy 2 NX */
 	{ USB_DEVICE(0x041e, 0x3020), .driver_info = USB_QUIRK_RESET_RESUME },
 
+	/* USB3503 */
+	{ USB_DEVICE(0x0424, 0x3503), .driver_info = USB_QUIRK_RESET_RESUME },
+
 	/* Microsoft Wireless Laser Mouse 6000 Receiver */
 	{ USB_DEVICE(0x045e, 0x00e1), .driver_info = USB_QUIRK_RESET_RESUME },
 
@@ -173,6 +176,10 @@ static const struct usb_device_id usb_quirk_list[] = {
 	/* MAYA44USB sound device */
 	{ USB_DEVICE(0x0a92, 0x0091), .driver_info = USB_QUIRK_RESET_RESUME },
 
+	/* ASUS Base Station(T100) */
+	{ USB_DEVICE(0x0b05, 0x17e0), .driver_info =
+			USB_QUIRK_IGNORE_REMOTE_WAKEUP },
+
 	/* Action Semiconductor flash disk */
 	{ USB_DEVICE(0x10d6, 0x2200), .driver_info =
 			USB_QUIRK_STRING_FETCH_255 },
@@ -188,26 +195,22 @@ static const struct usb_device_id usb_quirk_list[] = {
 	{ USB_DEVICE(0x1908, 0x1315), .driver_info =
 			USB_QUIRK_HONOR_BNUMINTERFACES },
 
-	/* INTEL VALUE SSD */
-	{ USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME },
-
-	/* USB3503 */
-	{ USB_DEVICE(0x0424, 0x3503), .driver_info = USB_QUIRK_RESET_RESUME },
-
-	/* ASUS Base Station(T100) */
-	{ USB_DEVICE(0x0b05, 0x17e0), .driver_info =
-			USB_QUIRK_IGNORE_REMOTE_WAKEUP },
-
 	/* Protocol and OTG Electrical Test Device */
 	{ USB_DEVICE(0x1a0a, 0x0200), .driver_info =
 			USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL },
 
+	/* Acer C120 LED Projector */
+	{ USB_DEVICE(0x1de1, 0xc102), .driver_info = USB_QUIRK_NO_LPM },
+
 	/* Blackmagic Design Intensity Shuttle */
 	{ USB_DEVICE(0x1edb, 0xbd3b), .driver_info = USB_QUIRK_NO_LPM },
 
 	/* Blackmagic Design UltraStudio SDI */
 	{ USB_DEVICE(0x1edb, 0xbd4f), .driver_info = USB_QUIRK_NO_LPM },
 
+	/* INTEL VALUE SSD */
+	{ USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME },
+
 	{ }  /* terminating entry must be last */
 };
 
diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h
index 3c58d633ce80..dec0b21fc626 100644
--- a/drivers/usb/dwc2/core.h
+++ b/drivers/usb/dwc2/core.h
@@ -64,6 +64,17 @@
 	DWC2_TRACE_SCHEDULER_VB(pr_fmt("%s: SCH: " fmt),		\
 				dev_name(hsotg->dev), ##__VA_ARGS__)
 
+#ifdef CONFIG_MIPS
+/*
+ * There are some MIPS machines that can run in either big-endian
+ * or little-endian mode and that use the dwc2 register without
+ * a byteswap in both ways.
+ * Unlike other architectures, MIPS apparently does not require a
+ * barrier before the __raw_writel() to synchronize with DMA but does
+ * require the barrier after the __raw_writel() to serialize a set of
+ * writes. This set of operations was added specifically for MIPS and
+ * should only be used there.
+ */
 static inline u32 dwc2_readl(const void __iomem *addr)
 {
 	u32 value = __raw_readl(addr);
@@ -90,6 +101,22 @@ static inline void dwc2_writel(u32 value, void __iomem *addr)
 	pr_info("INFO:: wrote %08x to %p\n", value, addr);
 #endif
 }
+#else
+/* Normal architectures just use readl/write */
+static inline u32 dwc2_readl(const void __iomem *addr)
+{
+	return readl(addr);
+}
+
+static inline void dwc2_writel(u32 value, void __iomem *addr)
+{
+	writel(value, addr);
+
+#ifdef DWC2_LOG_WRITES
+	pr_info("info:: wrote %08x to %p\n", value, addr);
+#endif
+}
+#endif
 
 /* Maximum number of Endpoints/HostChannels */
 #define MAX_EPS_CHANNELS	16
diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c
index 4c5e3005e1dc..26cf09d0fe3c 100644
--- a/drivers/usb/dwc2/gadget.c
+++ b/drivers/usb/dwc2/gadget.c
@@ -1018,7 +1018,7 @@ static int dwc2_hsotg_process_req_status(struct dwc2_hsotg *hsotg,
 	return 1;
 }
 
-static int dwc2_hsotg_ep_sethalt(struct usb_ep *ep, int value);
+static int dwc2_hsotg_ep_sethalt(struct usb_ep *ep, int value, bool now);
 
 /**
  * get_ep_head - return the first request on the endpoint
@@ -1094,7 +1094,7 @@ static int dwc2_hsotg_process_req_feature(struct dwc2_hsotg *hsotg,
 		case USB_ENDPOINT_HALT:
 			halted = ep->halted;
 
-			dwc2_hsotg_ep_sethalt(&ep->ep, set);
+			dwc2_hsotg_ep_sethalt(&ep->ep, set, true);
 
 			ret = dwc2_hsotg_send_reply(hsotg, ep0, NULL, 0);
 			if (ret) {
@@ -2948,8 +2948,13 @@ static int dwc2_hsotg_ep_dequeue(struct usb_ep *ep, struct usb_request *req)
  * dwc2_hsotg_ep_sethalt - set halt on a given endpoint
  * @ep: The endpoint to set halt.
  * @value: Set or unset the halt.
+ * @now: If true, stall the endpoint now. Otherwise return -EAGAIN if
+ *       the endpoint is busy processing requests.
+ *
+ * We need to stall the endpoint immediately if request comes from set_feature
+ * protocol command handler.
  */
-static int dwc2_hsotg_ep_sethalt(struct usb_ep *ep, int value)
+static int dwc2_hsotg_ep_sethalt(struct usb_ep *ep, int value, bool now)
 {
 	struct dwc2_hsotg_ep *hs_ep = our_ep(ep);
 	struct dwc2_hsotg *hs = hs_ep->parent;
@@ -2969,6 +2974,17 @@ static int dwc2_hsotg_ep_sethalt(struct usb_ep *ep, int value)
 		return 0;
 	}
 
+	if (hs_ep->isochronous) {
+		dev_err(hs->dev, "%s is Isochronous Endpoint\n", ep->name);
+		return -EINVAL;
+	}
+
+	if (!now && value && !list_empty(&hs_ep->queue)) {
+		dev_dbg(hs->dev, "%s request is pending, cannot halt\n",
+			ep->name);
+		return -EAGAIN;
+	}
+
 	if (hs_ep->dir_in) {
 		epreg = DIEPCTL(index);
 		epctl = dwc2_readl(hs->regs + epreg);
@@ -3020,7 +3036,7 @@ static int dwc2_hsotg_ep_sethalt_lock(struct usb_ep *ep, int value)
 	int ret = 0;
 
 	spin_lock_irqsave(&hs->lock, flags);
-	ret = dwc2_hsotg_ep_sethalt(ep, value);
+	ret = dwc2_hsotg_ep_sethalt(ep, value, false);
 	spin_unlock_irqrestore(&hs->lock, flags);
 
 	return ret;
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index 7ddf9449a063..654050684f4f 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -402,6 +402,7 @@
 #define DWC3_DEPCMD_GET_RSC_IDX(x)	(((x) >> DWC3_DEPCMD_PARAM_SHIFT) & 0x7f)
 #define DWC3_DEPCMD_STATUS(x)		(((x) >> 12) & 0x0F)
 #define DWC3_DEPCMD_HIPRI_FORCERM	(1 << 11)
+#define DWC3_DEPCMD_CLEARPENDIN		(1 << 11)
 #define DWC3_DEPCMD_CMDACT		(1 << 10)
 #define DWC3_DEPCMD_CMDIOC		(1 << 8)
 
diff --git a/drivers/usb/dwc3/dwc3-exynos.c b/drivers/usb/dwc3/dwc3-exynos.c
index dd5cb5577dca..2f1fb7e7aa54 100644
--- a/drivers/usb/dwc3/dwc3-exynos.c
+++ b/drivers/usb/dwc3/dwc3-exynos.c
@@ -128,12 +128,6 @@ static int dwc3_exynos_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, exynos);
 
-	ret = dwc3_exynos_register_phys(exynos);
-	if (ret) {
-		dev_err(dev, "couldn't register PHYs\n");
-		return ret;
-	}
-
 	exynos->dev	= dev;
 
 	exynos->clk = devm_clk_get(dev, "usbdrd30");
@@ -183,20 +177,29 @@ static int dwc3_exynos_probe(struct platform_device *pdev)
 		goto err3;
 	}
 
+	ret = dwc3_exynos_register_phys(exynos);
+	if (ret) {
+		dev_err(dev, "couldn't register PHYs\n");
+		goto err4;
+	}
+
 	if (node) {
 		ret = of_platform_populate(node, NULL, NULL, dev);
 		if (ret) {
 			dev_err(dev, "failed to add dwc3 core\n");
-			goto err4;
+			goto err5;
 		}
 	} else {
 		dev_err(dev, "no device node, failed to add dwc3 core\n");
 		ret = -ENODEV;
-		goto err4;
+		goto err5;
 	}
 
 	return 0;
 
+err5:
+	platform_device_unregister(exynos->usb2_phy);
+	platform_device_unregister(exynos->usb3_phy);
 err4:
 	regulator_disable(exynos->vdd10);
 err3:
diff --git a/drivers/usb/dwc3/dwc3-st.c b/drivers/usb/dwc3/dwc3-st.c
index 5c0adb9c6fb2..89a2f712fdfe 100644
--- a/drivers/usb/dwc3/dwc3-st.c
+++ b/drivers/usb/dwc3/dwc3-st.c
@@ -129,12 +129,18 @@ static int st_dwc3_drd_init(struct st_dwc3 *dwc3_data)
 	switch (dwc3_data->dr_mode) {
 	case USB_DR_MODE_PERIPHERAL:
 
-		val &= ~(USB3_FORCE_VBUSVALID | USB3_DELAY_VBUSVALID
+		val &= ~(USB3_DELAY_VBUSVALID
 			| USB3_SEL_FORCE_OPMODE | USB3_FORCE_OPMODE(0x3)
 			| USB3_SEL_FORCE_DPPULLDOWN2 | USB3_FORCE_DPPULLDOWN2
 			| USB3_SEL_FORCE_DMPULLDOWN2 | USB3_FORCE_DMPULLDOWN2);
 
-		val |= USB3_DEVICE_NOT_HOST;
+		/*
+		 * USB3_PORT2_FORCE_VBUSVALID When '1' and when
+		 * USB3_PORT2_DEVICE_NOT_HOST = 1, forces VBUSVLDEXT2 input
+		 * of the pico PHY to 1.
+		 */
+
+		val |= USB3_DEVICE_NOT_HOST | USB3_FORCE_VBUSVALID;
 		break;
 
 	case USB_DR_MODE_HOST:
@@ -227,7 +233,8 @@ static int st_dwc3_probe(struct platform_device *pdev)
 	dev_vdbg(&pdev->dev, "glue-logic addr 0x%p, syscfg-reg offset 0x%x\n",
 		 dwc3_data->glue_base, dwc3_data->syscfg_reg_off);
 
-	dwc3_data->rstc_pwrdn = devm_reset_control_get(dev, "powerdown");
+	dwc3_data->rstc_pwrdn =
+		devm_reset_control_get_exclusive(dev, "powerdown");
 	if (IS_ERR(dwc3_data->rstc_pwrdn)) {
 		dev_err(&pdev->dev, "could not get power controller\n");
 		ret = PTR_ERR(dwc3_data->rstc_pwrdn);
@@ -237,7 +244,8 @@ static int st_dwc3_probe(struct platform_device *pdev)
 	/* Manage PowerDown */
 	reset_control_deassert(dwc3_data->rstc_pwrdn);
 
-	dwc3_data->rstc_rst = devm_reset_control_get(dev, "softreset");
+	dwc3_data->rstc_rst =
+		devm_reset_control_get_shared(dev, "softreset");
 	if (IS_ERR(dwc3_data->rstc_rst)) {
 		dev_err(&pdev->dev, "could not get reset controller\n");
 		ret = PTR_ERR(dwc3_data->rstc_rst);
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 9a7d0bd15dc3..07248ff1be5c 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -347,6 +347,28 @@ int dwc3_send_gadget_ep_cmd(struct dwc3 *dwc, unsigned ep,
 	return ret;
 }
 
+static int dwc3_send_clear_stall_ep_cmd(struct dwc3_ep *dep)
+{
+	struct dwc3 *dwc = dep->dwc;
+	struct dwc3_gadget_ep_cmd_params params;
+	u32 cmd = DWC3_DEPCMD_CLEARSTALL;
+
+	/*
+	 * As of core revision 2.60a the recommended programming model
+	 * is to set the ClearPendIN bit when issuing a Clear Stall EP
+	 * command for IN endpoints. This is to prevent an issue where
+	 * some (non-compliant) hosts may not send ACK TPs for pending
+	 * IN transfers due to a mishandled error condition. Synopsys
+	 * STAR 9000614252.
+	 */
+	if (dep->direction && (dwc->revision >= DWC3_REVISION_260A))
+		cmd |= DWC3_DEPCMD_CLEARPENDIN;
+
+	memset(&params, 0, sizeof(params));
+
+	return dwc3_send_gadget_ep_cmd(dwc, dep->number, cmd, &params);
+}
+
 static dma_addr_t dwc3_trb_dma_offset(struct dwc3_ep *dep,
 		struct dwc3_trb *trb)
 {
@@ -1314,8 +1336,7 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol)
 		else
 			dep->flags |= DWC3_EP_STALL;
 	} else {
-		ret = dwc3_send_gadget_ep_cmd(dwc, dep->number,
-			DWC3_DEPCMD_CLEARSTALL, &params);
+		ret = dwc3_send_clear_stall_ep_cmd(dep);
 		if (ret)
 			dev_err(dwc->dev, "failed to clear STALL on %s\n",
 					dep->name);
@@ -2247,7 +2268,6 @@ static void dwc3_clear_stall_all_ep(struct dwc3 *dwc)
 
 	for (epnum = 1; epnum < DWC3_ENDPOINTS_NUM; epnum++) {
 		struct dwc3_ep *dep;
-		struct dwc3_gadget_ep_cmd_params params;
 		int ret;
 
 		dep = dwc->eps[epnum];
@@ -2259,9 +2279,7 @@ static void dwc3_clear_stall_all_ep(struct dwc3 *dwc)
 
 		dep->flags &= ~DWC3_EP_STALL;
 
-		memset(&params, 0, sizeof(params));
-		ret = dwc3_send_gadget_ep_cmd(dwc, dep->number,
-				DWC3_DEPCMD_CLEARSTALL, &params);
+		ret = dwc3_send_clear_stall_ep_cmd(dep);
 		WARN_ON_ONCE(ret);
 	}
 }
diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index d67de0d22a2b..eb648485a58c 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -1868,14 +1868,19 @@ unknown:
 				}
 				break;
 			}
-			req->length = value;
-			req->context = cdev;
-			req->zero = value < w_length;
-			value = composite_ep0_queue(cdev, req, GFP_ATOMIC);
-			if (value < 0) {
-				DBG(cdev, "ep_queue --> %d\n", value);
-				req->status = 0;
-				composite_setup_complete(gadget->ep0, req);
+
+			if (value >= 0) {
+				req->length = value;
+				req->context = cdev;
+				req->zero = value < w_length;
+				value = composite_ep0_queue(cdev, req,
+							    GFP_ATOMIC);
+				if (value < 0) {
+					DBG(cdev, "ep_queue --> %d\n", value);
+					req->status = 0;
+					composite_setup_complete(gadget->ep0,
+								 req);
+				}
 			}
 			return value;
 		}
diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c
index b6f60ca8a035..70cf3477f951 100644
--- a/drivers/usb/gadget/configfs.c
+++ b/drivers/usb/gadget/configfs.c
@@ -1401,6 +1401,7 @@ static const struct usb_gadget_driver configfs_driver_template = {
 		.owner          = THIS_MODULE,
 		.name		= "configfs-gadget",
 	},
+	.match_existing_only = 1,
 };
 
 static struct config_group *gadgets_make(
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 73515d54e1cc..cc33d2667408 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -2051,7 +2051,7 @@ static int __ffs_data_do_os_desc(enum ffs_os_desc_type type,
 
 		if (len < sizeof(*d) ||
 		    d->bFirstInterfaceNumber >= ffs->interfaces_count ||
-		    d->Reserved1)
+		    !d->Reserved1)
 			return -EINVAL;
 		for (i = 0; i < ARRAY_SIZE(d->Reserved2); ++i)
 			if (d->Reserved2[i])
@@ -2729,6 +2729,7 @@ static int _ffs_func_bind(struct usb_configuration *c,
 		func->ffs->ss_descs_count;
 
 	int fs_len, hs_len, ss_len, ret, i;
+	struct ffs_ep *eps_ptr;
 
 	/* Make it a single chunk, less management later on */
 	vla_group(d);
@@ -2777,12 +2778,9 @@ static int _ffs_func_bind(struct usb_configuration *c,
 	       ffs->raw_descs_length);
 
 	memset(vla_ptr(vlabuf, d, inums), 0xff, d_inums__sz);
-	for (ret = ffs->eps_count; ret; --ret) {
-		struct ffs_ep *ptr;
-
-		ptr = vla_ptr(vlabuf, d, eps);
-		ptr[ret].num = -1;
-	}
+	eps_ptr = vla_ptr(vlabuf, d, eps);
+	for (i = 0; i < ffs->eps_count; i++)
+		eps_ptr[i].num = -1;
 
 	/* Save pointers
 	 * d_eps == vlabuf, func->eps used to kfree vlabuf later
@@ -2851,7 +2849,7 @@ static int _ffs_func_bind(struct usb_configuration *c,
 		goto error;
 
 	func->function.os_desc_table = vla_ptr(vlabuf, d, os_desc_table);
-	if (c->cdev->use_os_string)
+	if (c->cdev->use_os_string) {
 		for (i = 0; i < ffs->interfaces_count; ++i) {
 			struct usb_os_desc *desc;
 
@@ -2862,13 +2860,15 @@ static int _ffs_func_bind(struct usb_configuration *c,
 				vla_ptr(vlabuf, d, ext_compat) + i * 16;
 			INIT_LIST_HEAD(&desc->ext_prop);
 		}
-	ret = ffs_do_os_descs(ffs->ms_os_descs_count,
-			      vla_ptr(vlabuf, d, raw_descs) +
-			      fs_len + hs_len + ss_len,
-			      d_raw_descs__sz - fs_len - hs_len - ss_len,
-			      __ffs_func_bind_do_os_desc, func);
-	if (unlikely(ret < 0))
-		goto error;
+		ret = ffs_do_os_descs(ffs->ms_os_descs_count,
+				      vla_ptr(vlabuf, d, raw_descs) +
+				      fs_len + hs_len + ss_len,
+				      d_raw_descs__sz - fs_len - hs_len -
+				      ss_len,
+				      __ffs_func_bind_do_os_desc, func);
+		if (unlikely(ret < 0))
+			goto error;
+	}
 	func->function.os_desc_n =
 		c->cdev->use_os_string ? ffs->interfaces_count : 0;
 
diff --git a/drivers/usb/gadget/function/f_printer.c b/drivers/usb/gadget/function/f_printer.c
index c45104e3a64b..64706a789580 100644
--- a/drivers/usb/gadget/function/f_printer.c
+++ b/drivers/usb/gadget/function/f_printer.c
@@ -161,14 +161,6 @@ static struct usb_endpoint_descriptor hs_ep_out_desc = {
 	.wMaxPacketSize =	cpu_to_le16(512)
 };
 
-static struct usb_qualifier_descriptor dev_qualifier = {
-	.bLength =		sizeof(dev_qualifier),
-	.bDescriptorType =	USB_DT_DEVICE_QUALIFIER,
-	.bcdUSB =		cpu_to_le16(0x0200),
-	.bDeviceClass =		USB_CLASS_PRINTER,
-	.bNumConfigurations =	1
-};
-
 static struct usb_descriptor_header *hs_printer_function[] = {
 	(struct usb_descriptor_header *) &intf_desc,
 	(struct usb_descriptor_header *) &hs_ep_in_desc,
diff --git a/drivers/usb/gadget/function/f_tcm.c b/drivers/usb/gadget/function/f_tcm.c
index 35fe3c80cfc0..197f73386fac 100644
--- a/drivers/usb/gadget/function/f_tcm.c
+++ b/drivers/usb/gadget/function/f_tcm.c
@@ -1445,16 +1445,18 @@ static void usbg_drop_tpg(struct se_portal_group *se_tpg)
 	for (i = 0; i < TPG_INSTANCES; ++i)
 		if (tpg_instances[i].tpg == tpg)
 			break;
-	if (i < TPG_INSTANCES)
+	if (i < TPG_INSTANCES) {
 		tpg_instances[i].tpg = NULL;
-	opts = container_of(tpg_instances[i].func_inst,
-		struct f_tcm_opts, func_inst);
-	mutex_lock(&opts->dep_lock);
-	if (opts->has_dep)
-		module_put(opts->dependent);
-	else
-		configfs_undepend_item_unlocked(&opts->func_inst.group.cg_item);
-	mutex_unlock(&opts->dep_lock);
+		opts = container_of(tpg_instances[i].func_inst,
+			struct f_tcm_opts, func_inst);
+		mutex_lock(&opts->dep_lock);
+		if (opts->has_dep)
+			module_put(opts->dependent);
+		else
+			configfs_undepend_item_unlocked(
+				&opts->func_inst.group.cg_item);
+		mutex_unlock(&opts->dep_lock);
+	}
 	mutex_unlock(&tpg_instances_lock);
 
 	kfree(tpg);
diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c
index 186d4b162524..cd214ec8a601 100644
--- a/drivers/usb/gadget/function/f_uac2.c
+++ b/drivers/usb/gadget/function/f_uac2.c
@@ -598,18 +598,6 @@ static struct usb_gadget_strings *fn_strings[] = {
 	NULL,
 };
 
-static struct usb_qualifier_descriptor devqual_desc = {
-	.bLength = sizeof devqual_desc,
-	.bDescriptorType = USB_DT_DEVICE_QUALIFIER,
-
-	.bcdUSB = cpu_to_le16(0x200),
-	.bDeviceClass = USB_CLASS_MISC,
-	.bDeviceSubClass = 0x02,
-	.bDeviceProtocol = 0x01,
-	.bNumConfigurations = 1,
-	.bRESERVED = 0,
-};
-
 static struct usb_interface_assoc_descriptor iad_desc = {
 	.bLength = sizeof iad_desc,
 	.bDescriptorType = USB_DT_INTERFACE_ASSOCIATION,
@@ -1292,6 +1280,7 @@ in_rq_cur(struct usb_function *fn, const struct usb_ctrlrequest *cr)
 
 	if (control_selector == UAC2_CS_CONTROL_SAM_FREQ) {
 		struct cntrl_cur_lay3 c;
+		memset(&c, 0, sizeof(struct cntrl_cur_lay3));
 
 		if (entity_id == USB_IN_CLK_ID)
 			c.dCUR = p_srate;
diff --git a/drivers/usb/gadget/function/storage_common.c b/drivers/usb/gadget/function/storage_common.c
index d62683017cf3..990df221c629 100644
--- a/drivers/usb/gadget/function/storage_common.c
+++ b/drivers/usb/gadget/function/storage_common.c
@@ -83,9 +83,7 @@ EXPORT_SYMBOL_GPL(fsg_fs_function);
  * USB 2.0 devices need to expose both high speed and full speed
  * descriptors, unless they only run at full speed.
  *
- * That means alternate endpoint descriptors (bigger packets)
- * and a "device qualifier" ... plus more construction options
- * for the configuration descriptor.
+ * That means alternate endpoint descriptors (bigger packets).
  */
 struct usb_endpoint_descriptor fsg_hs_bulk_in_desc = {
 	.bLength =		USB_DT_ENDPOINT_SIZE,
diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index e64479f882a5..aa3707bdebb4 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -938,8 +938,11 @@ ep0_read (struct file *fd, char __user *buf, size_t len, loff_t *ptr)
 			struct usb_ep		*ep = dev->gadget->ep0;
 			struct usb_request	*req = dev->req;
 
-			if ((retval = setup_req (ep, req, 0)) == 0)
-				retval = usb_ep_queue (ep, req, GFP_ATOMIC);
+			if ((retval = setup_req (ep, req, 0)) == 0) {
+				spin_unlock_irq (&dev->lock);
+				retval = usb_ep_queue (ep, req, GFP_KERNEL);
+				spin_lock_irq (&dev->lock);
+			}
 			dev->state = STATE_DEV_CONNECTED;
 
 			/* assume that was SET_CONFIGURATION */
@@ -1457,8 +1460,11 @@ delegate:
 							w_length);
 				if (value < 0)
 					break;
+
+				spin_unlock (&dev->lock);
 				value = usb_ep_queue (gadget->ep0, dev->req,
-							GFP_ATOMIC);
+							GFP_KERNEL);
+				spin_lock (&dev->lock);
 				if (value < 0) {
 					clean_req (gadget->ep0, dev->req);
 					break;
@@ -1481,11 +1487,14 @@ delegate:
 	if (value >= 0 && dev->state != STATE_DEV_SETUP) {
 		req->length = value;
 		req->zero = value < w_length;
-		value = usb_ep_queue (gadget->ep0, req, GFP_ATOMIC);
+
+		spin_unlock (&dev->lock);
+		value = usb_ep_queue (gadget->ep0, req, GFP_KERNEL);
 		if (value < 0) {
 			DBG (dev, "ep_queue --> %d\n", value);
 			req->status = 0;
 		}
+		return value;
 	}
 
 	/* device stalls when value < 0 */
diff --git a/drivers/usb/gadget/udc/udc-core.c b/drivers/usb/gadget/udc/udc-core.c
index 6e8300d6a737..e1b2dcebdc2e 100644
--- a/drivers/usb/gadget/udc/udc-core.c
+++ b/drivers/usb/gadget/udc/udc-core.c
@@ -603,11 +603,15 @@ int usb_gadget_probe_driver(struct usb_gadget_driver *driver)
 		}
 	}
 
-	list_add_tail(&driver->pending, &gadget_driver_pending_list);
-	pr_info("udc-core: couldn't find an available UDC - added [%s] to list of pending drivers\n",
-		driver->function);
+	if (!driver->match_existing_only) {
+		list_add_tail(&driver->pending, &gadget_driver_pending_list);
+		pr_info("udc-core: couldn't find an available UDC - added [%s] to list of pending drivers\n",
+			driver->function);
+		ret = 0;
+	}
+
 	mutex_unlock(&udc_lock);
-	return 0;
+	return ret;
 found:
 	ret = udc_bind_to_driver(udc, driver);
 	mutex_unlock(&udc_lock);
diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index ae1b6e69eb96..a962b89b65a6 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -368,6 +368,15 @@ static void ehci_shutdown(struct usb_hcd *hcd)
 {
 	struct ehci_hcd	*ehci = hcd_to_ehci(hcd);
 
+	/**
+	 * Protect the system from crashing at system shutdown in cases where
+	 * usb host is not added yet from OTG controller driver.
+	 * As ehci_setup() not done yet, so stop accessing registers or
+	 * variables initialized in ehci_setup()
+	 */
+	if (!ehci->sbrn)
+		return;
+
 	spin_lock_irq(&ehci->lock);
 	ehci->shutdown = true;
 	ehci->rh_state = EHCI_RH_STOPPING;
diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c
index ffc90295a95f..74f62d68f013 100644
--- a/drivers/usb/host/ehci-hub.c
+++ b/drivers/usb/host/ehci-hub.c
@@ -872,15 +872,23 @@ int ehci_hub_control(
 ) {
 	struct ehci_hcd	*ehci = hcd_to_ehci (hcd);
 	int		ports = HCS_N_PORTS (ehci->hcs_params);
-	u32 __iomem	*status_reg = &ehci->regs->port_status[
-				(wIndex & 0xff) - 1];
-	u32 __iomem	*hostpc_reg = &ehci->regs->hostpc[(wIndex & 0xff) - 1];
+	u32 __iomem	*status_reg, *hostpc_reg;
 	u32		temp, temp1, status;
 	unsigned long	flags;
 	int		retval = 0;
 	unsigned	selector;
 
 	/*
+	 * Avoid underflow while calculating (wIndex & 0xff) - 1.
+	 * The compiler might deduce that wIndex can never be 0 and then
+	 * optimize away the tests for !wIndex below.
+	 */
+	temp = wIndex & 0xff;
+	temp -= (temp > 0);
+	status_reg = &ehci->regs->port_status[temp];
+	hostpc_reg = &ehci->regs->hostpc[temp];
+
+	/*
 	 * FIXME:  support SetPortFeatures USB_PORT_FEAT_INDICATOR.
 	 * HCS_INDICATOR may say we can change LEDs to off/amber/green.
 	 * (track current state ourselves) ... blink for diagnostics,
diff --git a/drivers/usb/host/ehci-msm.c b/drivers/usb/host/ehci-msm.c
index d3afc89d00f5..2f8d3af811ce 100644
--- a/drivers/usb/host/ehci-msm.c
+++ b/drivers/usb/host/ehci-msm.c
@@ -179,22 +179,32 @@ static int ehci_msm_remove(struct platform_device *pdev)
 static int ehci_msm_pm_suspend(struct device *dev)
 {
 	struct usb_hcd *hcd = dev_get_drvdata(dev);
+	struct ehci_hcd *ehci = hcd_to_ehci(hcd);
 	bool do_wakeup = device_may_wakeup(dev);
 
 	dev_dbg(dev, "ehci-msm PM suspend\n");
 
-	return ehci_suspend(hcd, do_wakeup);
+	/* Only call ehci_suspend if ehci_setup has been done */
+	if (ehci->sbrn)
+		return ehci_suspend(hcd, do_wakeup);
+
+	return 0;
 }
 
 static int ehci_msm_pm_resume(struct device *dev)
 {
 	struct usb_hcd *hcd = dev_get_drvdata(dev);
+	struct ehci_hcd *ehci = hcd_to_ehci(hcd);
 
 	dev_dbg(dev, "ehci-msm PM resume\n");
-	ehci_resume(hcd, false);
+
+	/* Only call ehci_resume if ehci_setup has been done */
+	if (ehci->sbrn)
+		ehci_resume(hcd, false);
 
 	return 0;
 }
+
 #else
 #define ehci_msm_pm_suspend	NULL
 #define ehci_msm_pm_resume	NULL
diff --git a/drivers/usb/host/ehci-st.c b/drivers/usb/host/ehci-st.c
index a94ed677d937..be4a2788fc58 100644
--- a/drivers/usb/host/ehci-st.c
+++ b/drivers/usb/host/ehci-st.c
@@ -206,7 +206,8 @@ static int st_ehci_platform_probe(struct platform_device *dev)
 		priv->clk48 = NULL;
 	}
 
-	priv->pwr = devm_reset_control_get_optional(&dev->dev, "power");
+	priv->pwr =
+		devm_reset_control_get_optional_shared(&dev->dev, "power");
 	if (IS_ERR(priv->pwr)) {
 		err = PTR_ERR(priv->pwr);
 		if (err == -EPROBE_DEFER)
@@ -214,7 +215,8 @@ static int st_ehci_platform_probe(struct platform_device *dev)
 		priv->pwr = NULL;
 	}
 
-	priv->rst = devm_reset_control_get_optional(&dev->dev, "softreset");
+	priv->rst =
+		devm_reset_control_get_optional_shared(&dev->dev, "softreset");
 	if (IS_ERR(priv->rst)) {
 		err = PTR_ERR(priv->rst);
 		if (err == -EPROBE_DEFER)
diff --git a/drivers/usb/host/ehci-tegra.c b/drivers/usb/host/ehci-tegra.c
index 4031b372008e..9a3d7db5be57 100644
--- a/drivers/usb/host/ehci-tegra.c
+++ b/drivers/usb/host/ehci-tegra.c
@@ -81,15 +81,23 @@ static int tegra_reset_usb_controller(struct platform_device *pdev)
 	struct usb_hcd *hcd = platform_get_drvdata(pdev);
 	struct tegra_ehci_hcd *tegra =
 		(struct tegra_ehci_hcd *)hcd_to_ehci(hcd)->priv;
+	bool has_utmi_pad_registers = false;
 
 	phy_np = of_parse_phandle(pdev->dev.of_node, "nvidia,phy", 0);
 	if (!phy_np)
 		return -ENOENT;
 
+	if (of_property_read_bool(phy_np, "nvidia,has-utmi-pad-registers"))
+		has_utmi_pad_registers = true;
+
 	if (!usb1_reset_attempted) {
 		struct reset_control *usb1_reset;
 
-		usb1_reset = of_reset_control_get(phy_np, "usb");
+		if (!has_utmi_pad_registers)
+			usb1_reset = of_reset_control_get(phy_np, "utmi-pads");
+		else
+			usb1_reset = tegra->rst;
+
 		if (IS_ERR(usb1_reset)) {
 			dev_warn(&pdev->dev,
 				 "can't get utmi-pads reset from the PHY\n");
@@ -99,13 +107,15 @@ static int tegra_reset_usb_controller(struct platform_device *pdev)
 			reset_control_assert(usb1_reset);
 			udelay(1);
 			reset_control_deassert(usb1_reset);
+
+			if (!has_utmi_pad_registers)
+				reset_control_put(usb1_reset);
 		}
 
-		reset_control_put(usb1_reset);
 		usb1_reset_attempted = true;
 	}
 
-	if (!of_property_read_bool(phy_np, "nvidia,has-utmi-pad-registers")) {
+	if (!has_utmi_pad_registers) {
 		reset_control_assert(tegra->rst);
 		udelay(1);
 		reset_control_deassert(tegra->rst);
diff --git a/drivers/usb/host/ohci-q.c b/drivers/usb/host/ohci-q.c
index d029bbe9eb36..641fed609911 100644
--- a/drivers/usb/host/ohci-q.c
+++ b/drivers/usb/host/ohci-q.c
@@ -183,7 +183,6 @@ static int ed_schedule (struct ohci_hcd *ohci, struct ed *ed)
 {
 	int	branch;
 
-	ed->state = ED_OPER;
 	ed->ed_prev = NULL;
 	ed->ed_next = NULL;
 	ed->hwNextED = 0;
@@ -259,6 +258,8 @@ static int ed_schedule (struct ohci_hcd *ohci, struct ed *ed)
 	/* the HC may not see the schedule updates yet, but if it does
 	 * then they'll be properly ordered.
 	 */
+
+	ed->state = ED_OPER;
 	return 0;
 }
 
diff --git a/drivers/usb/host/ohci-st.c b/drivers/usb/host/ohci-st.c
index acf2eb2a5676..02816a1515a1 100644
--- a/drivers/usb/host/ohci-st.c
+++ b/drivers/usb/host/ohci-st.c
@@ -188,13 +188,15 @@ static int st_ohci_platform_probe(struct platform_device *dev)
 		priv->clk48 = NULL;
 	}
 
-	priv->pwr = devm_reset_control_get_optional(&dev->dev, "power");
+	priv->pwr =
+		devm_reset_control_get_optional_shared(&dev->dev, "power");
 	if (IS_ERR(priv->pwr)) {
 		err = PTR_ERR(priv->pwr);
 		goto err_put_clks;
 	}
 
-	priv->rst = devm_reset_control_get_optional(&dev->dev, "softreset");
+	priv->rst =
+		devm_reset_control_get_optional_shared(&dev->dev, "softreset");
 	if (IS_ERR(priv->rst)) {
 		err = PTR_ERR(priv->rst);
 		goto err_put_clks;
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 48672fac7ff3..c10972fcc8e4 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -37,6 +37,7 @@
 /* Device for a quirk */
 #define PCI_VENDOR_ID_FRESCO_LOGIC	0x1b73
 #define PCI_DEVICE_ID_FRESCO_LOGIC_PDK	0x1000
+#define PCI_DEVICE_ID_FRESCO_LOGIC_FL1009	0x1009
 #define PCI_DEVICE_ID_FRESCO_LOGIC_FL1400	0x1400
 
 #define PCI_VENDOR_ID_ETRON		0x1b6f
@@ -114,6 +115,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
 		xhci->quirks |= XHCI_TRUST_TX_LENGTH;
 	}
 
+	if (pdev->vendor == PCI_VENDOR_ID_FRESCO_LOGIC &&
+			pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_FL1009)
+		xhci->quirks |= XHCI_BROKEN_STREAMS;
+
 	if (pdev->vendor == PCI_VENDOR_ID_NEC)
 		xhci->quirks |= XHCI_NEC_HOST;
 
diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c
index 676ea458148b..1f3f981fe7f8 100644
--- a/drivers/usb/host/xhci-plat.c
+++ b/drivers/usb/host/xhci-plat.c
@@ -196,6 +196,9 @@ static int xhci_plat_probe(struct platform_device *pdev)
 		ret = clk_prepare_enable(clk);
 		if (ret)
 			goto put_hcd;
+	} else if (PTR_ERR(clk) == -EPROBE_DEFER) {
+		ret = -EPROBE_DEFER;
+		goto put_hcd;
 	}
 
 	xhci = hcd_to_xhci(hcd);
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 52deae4b7eac..d7d502578d79 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -290,6 +290,14 @@ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci)
 
 	temp_64 = xhci_read_64(xhci, &xhci->op_regs->cmd_ring);
 	xhci->cmd_ring_state = CMD_RING_STATE_ABORTED;
+
+	/*
+	 * Writing the CMD_RING_ABORT bit should cause a cmd completion event,
+	 * however on some host hw the CMD_RING_RUNNING bit is correctly cleared
+	 * but the completion event in never sent. Use the cmd timeout timer to
+	 * handle those cases. Use twice the time to cover the bit polling retry
+	 */
+	mod_timer(&xhci->cmd_timer, jiffies + (2 * XHCI_CMD_DEFAULT_TIMEOUT));
 	xhci_write_64(xhci, temp_64 | CMD_RING_ABORT,
 			&xhci->op_regs->cmd_ring);
 
@@ -314,6 +322,7 @@ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci)
 
 		xhci_err(xhci, "Stopped the command ring failed, "
 				"maybe the host is dead\n");
+		del_timer(&xhci->cmd_timer);
 		xhci->xhc_state |= XHCI_STATE_DYING;
 		xhci_quiesce(xhci);
 		xhci_halt(xhci);
@@ -1246,22 +1255,21 @@ void xhci_handle_command_timeout(unsigned long data)
 	int ret;
 	unsigned long flags;
 	u64 hw_ring_state;
-	struct xhci_command *cur_cmd = NULL;
+	bool second_timeout = false;
 	xhci = (struct xhci_hcd *) data;
 
 	/* mark this command to be cancelled */
 	spin_lock_irqsave(&xhci->lock, flags);
 	if (xhci->current_cmd) {
-		cur_cmd = xhci->current_cmd;
-		cur_cmd->status = COMP_CMD_ABORT;
+		if (xhci->current_cmd->status == COMP_CMD_ABORT)
+			second_timeout = true;
+		xhci->current_cmd->status = COMP_CMD_ABORT;
 	}
 
-
 	/* Make sure command ring is running before aborting it */
 	hw_ring_state = xhci_read_64(xhci, &xhci->op_regs->cmd_ring);
 	if ((xhci->cmd_ring_state & CMD_RING_STATE_RUNNING) &&
 	    (hw_ring_state & CMD_RING_RUNNING))  {
-
 		spin_unlock_irqrestore(&xhci->lock, flags);
 		xhci_dbg(xhci, "Command timeout\n");
 		ret = xhci_abort_cmd_ring(xhci);
@@ -1273,6 +1281,15 @@ void xhci_handle_command_timeout(unsigned long data)
 		}
 		return;
 	}
+
+	/* command ring failed to restart, or host removed. Bail out */
+	if (second_timeout || xhci->xhc_state & XHCI_STATE_REMOVING) {
+		spin_unlock_irqrestore(&xhci->lock, flags);
+		xhci_dbg(xhci, "command timed out twice, ring start fail?\n");
+		xhci_cleanup_command_queue(xhci);
+		return;
+	}
+
 	/* command timeout on stopped ring, ring can't be aborted */
 	xhci_dbg(xhci, "Command timeout on stopped ring\n");
 	xhci_handle_stopped_cmd_ring(xhci, xhci->current_cmd);
@@ -2721,7 +2738,8 @@ hw_died:
 		writel(irq_pending, &xhci->ir_set->irq_pending);
 	}
 
-	if (xhci->xhc_state & XHCI_STATE_DYING) {
+	if (xhci->xhc_state & XHCI_STATE_DYING ||
+	    xhci->xhc_state & XHCI_STATE_HALTED) {
 		xhci_dbg(xhci, "xHCI dying, ignoring interrupt. "
 				"Shouldn't IRQs be disabled?\n");
 		/* Clear the event handler busy flag (RW1C);
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index fa7e1ef36cd9..f2f9518c53ab 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -685,20 +685,23 @@ void xhci_stop(struct usb_hcd *hcd)
 	u32 temp;
 	struct xhci_hcd *xhci = hcd_to_xhci(hcd);
 
-	if (xhci->xhc_state & XHCI_STATE_HALTED)
-		return;
-
 	mutex_lock(&xhci->mutex);
-	spin_lock_irq(&xhci->lock);
-	xhci->xhc_state |= XHCI_STATE_HALTED;
-	xhci->cmd_ring_state = CMD_RING_STATE_STOPPED;
 
-	/* Make sure the xHC is halted for a USB3 roothub
-	 * (xhci_stop() could be called as part of failed init).
-	 */
-	xhci_halt(xhci);
-	xhci_reset(xhci);
-	spin_unlock_irq(&xhci->lock);
+	if (!(xhci->xhc_state & XHCI_STATE_HALTED)) {
+		spin_lock_irq(&xhci->lock);
+
+		xhci->xhc_state |= XHCI_STATE_HALTED;
+		xhci->cmd_ring_state = CMD_RING_STATE_STOPPED;
+		xhci_halt(xhci);
+		xhci_reset(xhci);
+
+		spin_unlock_irq(&xhci->lock);
+	}
+
+	if (!usb_hcd_is_primary_hcd(hcd)) {
+		mutex_unlock(&xhci->mutex);
+		return;
+	}
 
 	xhci_cleanup_msix(xhci);
 
@@ -4886,7 +4889,7 @@ int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks)
 		xhci->hcc_params2 = readl(&xhci->cap_regs->hcc_params2);
 	xhci_print_registers(xhci);
 
-	xhci->quirks = quirks;
+	xhci->quirks |= quirks;
 
 	get_quirks(dev, xhci);
 
diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index 39fd95833eb8..f824336def5c 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -1090,29 +1090,6 @@ void musb_stop(struct musb *musb)
 	musb_platform_try_idle(musb, 0);
 }
 
-static void musb_shutdown(struct platform_device *pdev)
-{
-	struct musb	*musb = dev_to_musb(&pdev->dev);
-	unsigned long	flags;
-
-	pm_runtime_get_sync(musb->controller);
-
-	musb_host_cleanup(musb);
-	musb_gadget_cleanup(musb);
-
-	spin_lock_irqsave(&musb->lock, flags);
-	musb_platform_disable(musb);
-	musb_generic_disable(musb);
-	spin_unlock_irqrestore(&musb->lock, flags);
-
-	musb_writeb(musb->mregs, MUSB_DEVCTL, 0);
-	musb_platform_exit(musb);
-
-	pm_runtime_put(musb->controller);
-	/* FIXME power down */
-}
-
-
 /*-------------------------------------------------------------------------*/
 
 /*
@@ -1702,7 +1679,7 @@ EXPORT_SYMBOL_GPL(musb_dma_completion);
 #define use_dma			0
 #endif
 
-static void (*musb_phy_callback)(enum musb_vbus_id_status status);
+static int (*musb_phy_callback)(enum musb_vbus_id_status status);
 
 /*
  * musb_mailbox - optional phy notifier function
@@ -1711,11 +1688,12 @@ static void (*musb_phy_callback)(enum musb_vbus_id_status status);
  * Optionally gets called from the USB PHY. Note that the USB PHY must be
  * disabled at the point the phy_callback is registered or unregistered.
  */
-void musb_mailbox(enum musb_vbus_id_status status)
+int musb_mailbox(enum musb_vbus_id_status status)
 {
 	if (musb_phy_callback)
-		musb_phy_callback(status);
+		return musb_phy_callback(status);
 
+	return -ENODEV;
 };
 EXPORT_SYMBOL_GPL(musb_mailbox);
 
@@ -2028,11 +2006,6 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl)
 	musb_readl = musb_default_readl;
 	musb_writel = musb_default_writel;
 
-	/* We need musb_read/write functions initialized for PM */
-	pm_runtime_use_autosuspend(musb->controller);
-	pm_runtime_set_autosuspend_delay(musb->controller, 200);
-	pm_runtime_enable(musb->controller);
-
 	/* The musb_platform_init() call:
 	 *   - adjusts musb->mregs
 	 *   - sets the musb->isr
@@ -2134,6 +2107,16 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl)
 	if (musb->ops->phy_callback)
 		musb_phy_callback = musb->ops->phy_callback;
 
+	/*
+	 * We need musb_read/write functions initialized for PM.
+	 * Note that at least 2430 glue needs autosuspend delay
+	 * somewhere above 300 ms for the hardware to idle properly
+	 * after disconnecting the cable in host mode. Let's use
+	 * 500 ms for some margin.
+	 */
+	pm_runtime_use_autosuspend(musb->controller);
+	pm_runtime_set_autosuspend_delay(musb->controller, 500);
+	pm_runtime_enable(musb->controller);
 	pm_runtime_get_sync(musb->controller);
 
 	status = usb_phy_init(musb->xceiv);
@@ -2237,13 +2220,8 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl)
 	if (status)
 		goto fail5;
 
-	pm_runtime_put(musb->controller);
-
-	/*
-	 * For why this is currently needed, see commit 3e43a0725637
-	 * ("usb: musb: core: add pm_runtime_irq_safe()")
-	 */
-	pm_runtime_irq_safe(musb->controller);
+	pm_runtime_mark_last_busy(musb->controller);
+	pm_runtime_put_autosuspend(musb->controller);
 
 	return 0;
 
@@ -2265,7 +2243,9 @@ fail2_5:
 	usb_phy_shutdown(musb->xceiv);
 
 err_usb_phy_init:
+	pm_runtime_dont_use_autosuspend(musb->controller);
 	pm_runtime_put_sync(musb->controller);
+	pm_runtime_disable(musb->controller);
 
 fail2:
 	if (musb->irq_wake)
@@ -2273,7 +2253,6 @@ fail2:
 	musb_platform_exit(musb);
 
 fail1:
-	pm_runtime_disable(musb->controller);
 	dev_err(musb->controller,
 		"musb_init_controller failed with status %d\n", status);
 
@@ -2312,6 +2291,7 @@ static int musb_remove(struct platform_device *pdev)
 {
 	struct device	*dev = &pdev->dev;
 	struct musb	*musb = dev_to_musb(dev);
+	unsigned long	flags;
 
 	/* this gets called on rmmod.
 	 *  - Host mode: host may still be active
@@ -2319,17 +2299,26 @@ static int musb_remove(struct platform_device *pdev)
 	 *  - OTG mode: both roles are deactivated (or never-activated)
 	 */
 	musb_exit_debugfs(musb);
-	musb_shutdown(pdev);
-	musb_phy_callback = NULL;
-
-	if (musb->dma_controller)
-		musb_dma_controller_destroy(musb->dma_controller);
-
-	usb_phy_shutdown(musb->xceiv);
 
 	cancel_work_sync(&musb->irq_work);
 	cancel_delayed_work_sync(&musb->finish_resume_work);
 	cancel_delayed_work_sync(&musb->deassert_reset_work);
+	pm_runtime_get_sync(musb->controller);
+	musb_host_cleanup(musb);
+	musb_gadget_cleanup(musb);
+	spin_lock_irqsave(&musb->lock, flags);
+	musb_platform_disable(musb);
+	musb_generic_disable(musb);
+	spin_unlock_irqrestore(&musb->lock, flags);
+	musb_writeb(musb->mregs, MUSB_DEVCTL, 0);
+	pm_runtime_dont_use_autosuspend(musb->controller);
+	pm_runtime_put_sync(musb->controller);
+	pm_runtime_disable(musb->controller);
+	musb_platform_exit(musb);
+	musb_phy_callback = NULL;
+	if (musb->dma_controller)
+		musb_dma_controller_destroy(musb->dma_controller);
+	usb_phy_shutdown(musb->xceiv);
 	musb_free(musb);
 	device_init_wakeup(dev, 0);
 	return 0;
@@ -2429,7 +2418,8 @@ static void musb_restore_context(struct musb *musb)
 	musb_writew(musb_base, MUSB_INTRTXE, musb->intrtxe);
 	musb_writew(musb_base, MUSB_INTRRXE, musb->intrrxe);
 	musb_writeb(musb_base, MUSB_INTRUSBE, musb->context.intrusbe);
-	musb_writeb(musb_base, MUSB_DEVCTL, musb->context.devctl);
+	if (musb->context.devctl & MUSB_DEVCTL_SESSION)
+		musb_writeb(musb_base, MUSB_DEVCTL, musb->context.devctl);
 
 	for (i = 0; i < musb->config->num_eps; ++i) {
 		struct musb_hw_ep	*hw_ep;
@@ -2612,7 +2602,6 @@ static struct platform_driver musb_driver = {
 	},
 	.probe		= musb_probe,
 	.remove		= musb_remove,
-	.shutdown	= musb_shutdown,
 };
 
 module_platform_driver(musb_driver);
diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h
index b6afe9e43305..b55a776b03eb 100644
--- a/drivers/usb/musb/musb_core.h
+++ b/drivers/usb/musb/musb_core.h
@@ -215,7 +215,7 @@ struct musb_platform_ops {
 				dma_addr_t *dma_addr, u32 *len);
 	void	(*pre_root_reset_end)(struct musb *musb);
 	void	(*post_root_reset_end)(struct musb *musb);
-	void	(*phy_callback)(enum musb_vbus_id_status status);
+	int	(*phy_callback)(enum musb_vbus_id_status status);
 };
 
 /*
@@ -312,6 +312,7 @@ struct musb {
 	struct work_struct	irq_work;
 	struct delayed_work	deassert_reset_work;
 	struct delayed_work	finish_resume_work;
+	struct delayed_work	gadget_work;
 	u16			hwvers;
 
 	u16			intrrxe;
diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c
index 152865b36522..af2a3a7addf9 100644
--- a/drivers/usb/musb/musb_gadget.c
+++ b/drivers/usb/musb/musb_gadget.c
@@ -1656,6 +1656,20 @@ static int musb_gadget_vbus_draw(struct usb_gadget *gadget, unsigned mA)
 	return usb_phy_set_power(musb->xceiv, mA);
 }
 
+static void musb_gadget_work(struct work_struct *work)
+{
+	struct musb *musb;
+	unsigned long flags;
+
+	musb = container_of(work, struct musb, gadget_work.work);
+	pm_runtime_get_sync(musb->controller);
+	spin_lock_irqsave(&musb->lock, flags);
+	musb_pullup(musb, musb->softconnect);
+	spin_unlock_irqrestore(&musb->lock, flags);
+	pm_runtime_mark_last_busy(musb->controller);
+	pm_runtime_put_autosuspend(musb->controller);
+}
+
 static int musb_gadget_pullup(struct usb_gadget *gadget, int is_on)
 {
 	struct musb	*musb = gadget_to_musb(gadget);
@@ -1663,20 +1677,16 @@ static int musb_gadget_pullup(struct usb_gadget *gadget, int is_on)
 
 	is_on = !!is_on;
 
-	pm_runtime_get_sync(musb->controller);
-
 	/* NOTE: this assumes we are sensing vbus; we'd rather
 	 * not pullup unless the B-session is active.
 	 */
 	spin_lock_irqsave(&musb->lock, flags);
 	if (is_on != musb->softconnect) {
 		musb->softconnect = is_on;
-		musb_pullup(musb, is_on);
+		schedule_delayed_work(&musb->gadget_work, 0);
 	}
 	spin_unlock_irqrestore(&musb->lock, flags);
 
-	pm_runtime_put(musb->controller);
-
 	return 0;
 }
 
@@ -1845,7 +1855,7 @@ int musb_gadget_setup(struct musb *musb)
 #elif IS_ENABLED(CONFIG_USB_MUSB_GADGET)
 	musb->g.is_otg = 0;
 #endif
-
+	INIT_DELAYED_WORK(&musb->gadget_work, musb_gadget_work);
 	musb_g_init_endpoints(musb);
 
 	musb->is_active = 0;
@@ -1866,6 +1876,8 @@ void musb_gadget_cleanup(struct musb *musb)
 {
 	if (musb->port_mode == MUSB_PORT_MODE_HOST)
 		return;
+
+	cancel_delayed_work_sync(&musb->gadget_work);
 	usb_del_gadget_udc(&musb->g);
 }
 
@@ -1914,8 +1926,8 @@ static int musb_gadget_start(struct usb_gadget *g,
 	if (musb->xceiv->last_event == USB_EVENT_ID)
 		musb_platform_set_vbus(musb, 1);
 
-	if (musb->xceiv->last_event == USB_EVENT_NONE)
-		pm_runtime_put(musb->controller);
+	pm_runtime_mark_last_busy(musb->controller);
+	pm_runtime_put_autosuspend(musb->controller);
 
 	return 0;
 
@@ -1934,8 +1946,7 @@ static int musb_gadget_stop(struct usb_gadget *g)
 	struct musb	*musb = gadget_to_musb(g);
 	unsigned long	flags;
 
-	if (musb->xceiv->last_event == USB_EVENT_NONE)
-		pm_runtime_get_sync(musb->controller);
+	pm_runtime_get_sync(musb->controller);
 
 	/*
 	 * REVISIT always use otg_set_peripheral() here too;
@@ -1963,7 +1974,8 @@ static int musb_gadget_stop(struct usb_gadget *g)
 	 * that currently misbehaves.
 	 */
 
-	pm_runtime_put(musb->controller);
+	pm_runtime_mark_last_busy(musb->controller);
+	pm_runtime_put_autosuspend(musb->controller);
 
 	return 0;
 }
diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c
index 2f8ad7f1f482..d227a71d85e1 100644
--- a/drivers/usb/musb/musb_host.c
+++ b/drivers/usb/musb/musb_host.c
@@ -434,7 +434,13 @@ static void musb_advance_schedule(struct musb *musb, struct urb *urb,
 		}
 	}
 
-	if (qh != NULL && qh->is_ready) {
+	/*
+	 * The pipe must be broken if current urb->status is set, so don't
+	 * start next urb.
+	 * TODO: to minimize the risk of regression, only check urb->status
+	 * for RX, until we have a test case to understand the behavior of TX.
+	 */
+	if ((!status || !is_in) && qh && qh->is_ready) {
 		dev_dbg(musb->controller, "... next ep%d %cX urb %p\n",
 		    hw_ep->epnum, is_in ? 'R' : 'T', next_urb(qh));
 		musb_start_urb(musb, is_in, qh);
@@ -594,14 +600,13 @@ musb_rx_reinit(struct musb *musb, struct musb_qh *qh, u8 epnum)
 		musb_writew(ep->regs, MUSB_TXCSR, 0);
 
 	/* scrub all previous state, clearing toggle */
-	} else {
-		csr = musb_readw(ep->regs, MUSB_RXCSR);
-		if (csr & MUSB_RXCSR_RXPKTRDY)
-			WARNING("rx%d, packet/%d ready?\n", ep->epnum,
-				musb_readw(ep->regs, MUSB_RXCOUNT));
-
-		musb_h_flush_rxfifo(ep, MUSB_RXCSR_CLRDATATOG);
 	}
+	csr = musb_readw(ep->regs, MUSB_RXCSR);
+	if (csr & MUSB_RXCSR_RXPKTRDY)
+		WARNING("rx%d, packet/%d ready?\n", ep->epnum,
+			musb_readw(ep->regs, MUSB_RXCOUNT));
+
+	musb_h_flush_rxfifo(ep, MUSB_RXCSR_CLRDATATOG);
 
 	/* target addr and (for multipoint) hub addr/port */
 	if (musb->is_multipoint) {
@@ -627,7 +632,7 @@ musb_rx_reinit(struct musb *musb, struct musb_qh *qh, u8 epnum)
 	ep->rx_reinit = 0;
 }
 
-static int musb_tx_dma_set_mode_mentor(struct dma_controller *dma,
+static void musb_tx_dma_set_mode_mentor(struct dma_controller *dma,
 		struct musb_hw_ep *hw_ep, struct musb_qh *qh,
 		struct urb *urb, u32 offset,
 		u32 *length, u8 *mode)
@@ -664,23 +669,18 @@ static int musb_tx_dma_set_mode_mentor(struct dma_controller *dma,
 	}
 	channel->desired_mode = *mode;
 	musb_writew(epio, MUSB_TXCSR, csr);
-
-	return 0;
 }
 
-static int musb_tx_dma_set_mode_cppi_tusb(struct dma_controller *dma,
-					  struct musb_hw_ep *hw_ep,
-					  struct musb_qh *qh,
-					  struct urb *urb,
-					  u32 offset,
-					  u32 *length,
-					  u8 *mode)
+static void musb_tx_dma_set_mode_cppi_tusb(struct dma_controller *dma,
+					   struct musb_hw_ep *hw_ep,
+					   struct musb_qh *qh,
+					   struct urb *urb,
+					   u32 offset,
+					   u32 *length,
+					   u8 *mode)
 {
 	struct dma_channel *channel = hw_ep->tx_channel;
 
-	if (!is_cppi_enabled(hw_ep->musb) && !tusb_dma_omap(hw_ep->musb))
-		return -ENODEV;
-
 	channel->actual_len = 0;
 
 	/*
@@ -688,8 +688,6 @@ static int musb_tx_dma_set_mode_cppi_tusb(struct dma_controller *dma,
 	 * to identify the zero-length-final-packet case.
 	 */
 	*mode = (urb->transfer_flags & URB_ZERO_PACKET) ? 1 : 0;
-
-	return 0;
 }
 
 static bool musb_tx_dma_program(struct dma_controller *dma,
@@ -699,15 +697,14 @@ static bool musb_tx_dma_program(struct dma_controller *dma,
 	struct dma_channel	*channel = hw_ep->tx_channel;
 	u16			pkt_size = qh->maxpacket;
 	u8			mode;
-	int			res;
 
 	if (musb_dma_inventra(hw_ep->musb) || musb_dma_ux500(hw_ep->musb))
-		res = musb_tx_dma_set_mode_mentor(dma, hw_ep, qh, urb,
-						 offset, &length, &mode);
+		musb_tx_dma_set_mode_mentor(dma, hw_ep, qh, urb, offset,
+					    &length, &mode);
+	else if (is_cppi_enabled(hw_ep->musb) || tusb_dma_omap(hw_ep->musb))
+		musb_tx_dma_set_mode_cppi_tusb(dma, hw_ep, qh, urb, offset,
+					       &length, &mode);
 	else
-		res = musb_tx_dma_set_mode_cppi_tusb(dma, hw_ep, qh, urb,
-						     offset, &length, &mode);
-	if (res)
 		return false;
 
 	qh->segsize = length;
@@ -995,9 +992,15 @@ static void musb_bulk_nak_timeout(struct musb *musb, struct musb_hw_ep *ep,
 	if (is_in) {
 		dma = is_dma_capable() ? ep->rx_channel : NULL;
 
-		/* clear nak timeout bit */
+		/*
+		 * Need to stop the transaction by clearing REQPKT first
+		 * then the NAK Timeout bit ref MUSBMHDRC USB 2.0 HIGH-SPEED
+		 * DUAL-ROLE CONTROLLER Programmer's Guide, section 9.2.2
+		 */
 		rx_csr = musb_readw(epio, MUSB_RXCSR);
 		rx_csr |= MUSB_RXCSR_H_WZC_BITS;
+		rx_csr &= ~MUSB_RXCSR_H_REQPKT;
+		musb_writew(epio, MUSB_RXCSR, rx_csr);
 		rx_csr &= ~MUSB_RXCSR_DATAERROR;
 		musb_writew(epio, MUSB_RXCSR, rx_csr);
 
@@ -1551,7 +1554,7 @@ static int musb_rx_dma_iso_cppi41(struct dma_controller *dma,
 				  struct urb *urb,
 				  size_t len)
 {
-	struct dma_channel *channel = hw_ep->tx_channel;
+	struct dma_channel *channel = hw_ep->rx_channel;
 	void __iomem *epio = hw_ep->regs;
 	dma_addr_t *buf;
 	u32 length, res;
@@ -1870,6 +1873,9 @@ void musb_host_rx(struct musb *musb, u8 epnum)
 		status = -EPROTO;
 		musb_writeb(epio, MUSB_RXINTERVAL, 0);
 
+		rx_csr &= ~MUSB_RXCSR_H_ERROR;
+		musb_writew(epio, MUSB_RXCSR, rx_csr);
+
 	} else if (rx_csr & MUSB_RXCSR_DATAERROR) {
 
 		if (USB_ENDPOINT_XFER_ISOC != qh->type) {
diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c
index c84e0322c108..0b4cec940386 100644
--- a/drivers/usb/musb/omap2430.c
+++ b/drivers/usb/musb/omap2430.c
@@ -49,97 +49,14 @@ struct omap2430_glue {
 	enum musb_vbus_id_status status;
 	struct work_struct	omap_musb_mailbox_work;
 	struct device		*control_otghs;
+	bool			cable_connected;
+	bool			enabled;
+	bool			powered;
 };
 #define glue_to_musb(g)		platform_get_drvdata(g->musb)
 
 static struct omap2430_glue	*_glue;
 
-static struct timer_list musb_idle_timer;
-
-static void musb_do_idle(unsigned long _musb)
-{
-	struct musb	*musb = (void *)_musb;
-	unsigned long	flags;
-	u8	power;
-	u8	devctl;
-
-	spin_lock_irqsave(&musb->lock, flags);
-
-	switch (musb->xceiv->otg->state) {
-	case OTG_STATE_A_WAIT_BCON:
-
-		devctl = musb_readb(musb->mregs, MUSB_DEVCTL);
-		if (devctl & MUSB_DEVCTL_BDEVICE) {
-			musb->xceiv->otg->state = OTG_STATE_B_IDLE;
-			MUSB_DEV_MODE(musb);
-		} else {
-			musb->xceiv->otg->state = OTG_STATE_A_IDLE;
-			MUSB_HST_MODE(musb);
-		}
-		break;
-	case OTG_STATE_A_SUSPEND:
-		/* finish RESUME signaling? */
-		if (musb->port1_status & MUSB_PORT_STAT_RESUME) {
-			power = musb_readb(musb->mregs, MUSB_POWER);
-			power &= ~MUSB_POWER_RESUME;
-			dev_dbg(musb->controller, "root port resume stopped, power %02x\n", power);
-			musb_writeb(musb->mregs, MUSB_POWER, power);
-			musb->is_active = 1;
-			musb->port1_status &= ~(USB_PORT_STAT_SUSPEND
-						| MUSB_PORT_STAT_RESUME);
-			musb->port1_status |= USB_PORT_STAT_C_SUSPEND << 16;
-			usb_hcd_poll_rh_status(musb->hcd);
-			/* NOTE: it might really be A_WAIT_BCON ... */
-			musb->xceiv->otg->state = OTG_STATE_A_HOST;
-		}
-		break;
-	case OTG_STATE_A_HOST:
-		devctl = musb_readb(musb->mregs, MUSB_DEVCTL);
-		if (devctl &  MUSB_DEVCTL_BDEVICE)
-			musb->xceiv->otg->state = OTG_STATE_B_IDLE;
-		else
-			musb->xceiv->otg->state = OTG_STATE_A_WAIT_BCON;
-	default:
-		break;
-	}
-	spin_unlock_irqrestore(&musb->lock, flags);
-}
-
-
-static void omap2430_musb_try_idle(struct musb *musb, unsigned long timeout)
-{
-	unsigned long		default_timeout = jiffies + msecs_to_jiffies(3);
-	static unsigned long	last_timer;
-
-	if (timeout == 0)
-		timeout = default_timeout;
-
-	/* Never idle if active, or when VBUS timeout is not set as host */
-	if (musb->is_active || ((musb->a_wait_bcon == 0)
-			&& (musb->xceiv->otg->state == OTG_STATE_A_WAIT_BCON))) {
-		dev_dbg(musb->controller, "%s active, deleting timer\n",
-			usb_otg_state_string(musb->xceiv->otg->state));
-		del_timer(&musb_idle_timer);
-		last_timer = jiffies;
-		return;
-	}
-
-	if (time_after(last_timer, timeout)) {
-		if (!timer_pending(&musb_idle_timer))
-			last_timer = timeout;
-		else {
-			dev_dbg(musb->controller, "Longer idle timer already pending, ignoring\n");
-			return;
-		}
-	}
-	last_timer = timeout;
-
-	dev_dbg(musb->controller, "%s inactive, for idle timer for %lu ms\n",
-		usb_otg_state_string(musb->xceiv->otg->state),
-		(unsigned long)jiffies_to_msecs(timeout - jiffies));
-	mod_timer(&musb_idle_timer, timeout);
-}
-
 static void omap2430_musb_set_vbus(struct musb *musb, int is_on)
 {
 	struct usb_otg	*otg = musb->xceiv->otg;
@@ -205,16 +122,6 @@ static void omap2430_musb_set_vbus(struct musb *musb, int is_on)
 		musb_readb(musb->mregs, MUSB_DEVCTL));
 }
 
-static int omap2430_musb_set_mode(struct musb *musb, u8 musb_mode)
-{
-	u8	devctl = musb_readb(musb->mregs, MUSB_DEVCTL);
-
-	devctl |= MUSB_DEVCTL_SESSION;
-	musb_writeb(musb->mregs, MUSB_DEVCTL, devctl);
-
-	return 0;
-}
-
 static inline void omap2430_low_level_exit(struct musb *musb)
 {
 	u32 l;
@@ -234,22 +141,63 @@ static inline void omap2430_low_level_init(struct musb *musb)
 	musb_writel(musb->mregs, OTG_FORCESTDBY, l);
 }
 
-static void omap2430_musb_mailbox(enum musb_vbus_id_status status)
+/*
+ * We can get multiple cable events so we need to keep track
+ * of the power state. Only keep power enabled if USB cable is
+ * connected and a gadget is started.
+ */
+static void omap2430_set_power(struct musb *musb, bool enabled, bool cable)
+{
+	struct device *dev = musb->controller;
+	struct omap2430_glue *glue = dev_get_drvdata(dev->parent);
+	bool power_up;
+	int res;
+
+	if (glue->enabled != enabled)
+		glue->enabled = enabled;
+
+	if (glue->cable_connected != cable)
+		glue->cable_connected = cable;
+
+	power_up = glue->enabled && glue->cable_connected;
+	if (power_up == glue->powered) {
+		dev_warn(musb->controller, "power state already %i\n",
+			 power_up);
+		return;
+	}
+
+	glue->powered = power_up;
+
+	if (power_up) {
+		res = pm_runtime_get_sync(musb->controller);
+		if (res < 0) {
+			dev_err(musb->controller, "could not enable: %i", res);
+			glue->powered = false;
+		}
+	} else {
+		pm_runtime_mark_last_busy(musb->controller);
+		pm_runtime_put_autosuspend(musb->controller);
+	}
+}
+
+static int omap2430_musb_mailbox(enum musb_vbus_id_status status)
 {
 	struct omap2430_glue	*glue = _glue;
 
 	if (!glue) {
 		pr_err("%s: musb core is not yet initialized\n", __func__);
-		return;
+		return -EPROBE_DEFER;
 	}
 	glue->status = status;
 
 	if (!glue_to_musb(glue)) {
 		pr_err("%s: musb core is not yet ready\n", __func__);
-		return;
+		return -EPROBE_DEFER;
 	}
 
 	schedule_work(&glue->omap_musb_mailbox_work);
+
+	return 0;
 }
 
 static void omap_musb_set_mailbox(struct omap2430_glue *glue)
@@ -259,6 +207,13 @@ static void omap_musb_set_mailbox(struct omap2430_glue *glue)
 	struct musb_hdrc_platform_data *pdata = dev_get_platdata(dev);
 	struct omap_musb_board_data *data = pdata->board_data;
 	struct usb_otg *otg = musb->xceiv->otg;
+	bool cable_connected;
+
+	cable_connected = ((glue->status == MUSB_ID_GROUND) ||
+			   (glue->status == MUSB_VBUS_VALID));
+
+	if (cable_connected)
+		omap2430_set_power(musb, glue->enabled, cable_connected);
 
 	switch (glue->status) {
 	case MUSB_ID_GROUND:
@@ -268,7 +223,6 @@ static void omap_musb_set_mailbox(struct omap2430_glue *glue)
 		musb->xceiv->otg->state = OTG_STATE_A_IDLE;
 		musb->xceiv->last_event = USB_EVENT_ID;
 		if (musb->gadget_driver) {
-			pm_runtime_get_sync(dev);
 			omap_control_usb_set_mode(glue->control_otghs,
 				USB_MODE_HOST);
 			omap2430_musb_set_vbus(musb, 1);
@@ -281,8 +235,6 @@ static void omap_musb_set_mailbox(struct omap2430_glue *glue)
 		otg->default_a = false;
 		musb->xceiv->otg->state = OTG_STATE_B_IDLE;
 		musb->xceiv->last_event = USB_EVENT_VBUS;
-		if (musb->gadget_driver)
-			pm_runtime_get_sync(dev);
 		omap_control_usb_set_mode(glue->control_otghs, USB_MODE_DEVICE);
 		break;
 
@@ -291,11 +243,8 @@ static void omap_musb_set_mailbox(struct omap2430_glue *glue)
 		dev_dbg(dev, "VBUS Disconnect\n");
 
 		musb->xceiv->last_event = USB_EVENT_NONE;
-		if (musb->gadget_driver) {
+		if (musb->gadget_driver)
 			omap2430_musb_set_vbus(musb, 0);
-			pm_runtime_mark_last_busy(dev);
-			pm_runtime_put_autosuspend(dev);
-		}
 
 		if (data->interface_type == MUSB_INTERFACE_UTMI)
 			otg_set_vbus(musb->xceiv->otg, 0);
@@ -307,6 +256,9 @@ static void omap_musb_set_mailbox(struct omap2430_glue *glue)
 		dev_dbg(dev, "ID float\n");
 	}
 
+	if (!cable_connected)
+		omap2430_set_power(musb, glue->enabled, cable_connected);
+
 	atomic_notifier_call_chain(&musb->xceiv->notifier,
 			musb->xceiv->last_event, NULL);
 }
@@ -316,13 +268,8 @@ static void omap_musb_mailbox_work(struct work_struct *mailbox_work)
 {
 	struct omap2430_glue *glue = container_of(mailbox_work,
 				struct omap2430_glue, omap_musb_mailbox_work);
-	struct musb *musb = glue_to_musb(glue);
-	struct device *dev = musb->controller;
 
-	pm_runtime_get_sync(dev);
 	omap_musb_set_mailbox(glue);
-	pm_runtime_mark_last_busy(dev);
-	pm_runtime_put_autosuspend(dev);
 }
 
 static irqreturn_t omap2430_musb_interrupt(int irq, void *__hci)
@@ -389,23 +336,7 @@ static int omap2430_musb_init(struct musb *musb)
 		return PTR_ERR(musb->phy);
 	}
 	musb->isr = omap2430_musb_interrupt;
-
-	/*
-	 * Enable runtime PM for musb parent (this driver). We can't
-	 * do it earlier as struct musb is not yet allocated and we
-	 * need to touch the musb registers for runtime PM.
-	 */
-	pm_runtime_enable(glue->dev);
-	status = pm_runtime_get_sync(glue->dev);
-	if (status < 0)
-		goto err1;
-
-	status = pm_runtime_get_sync(dev);
-	if (status < 0) {
-		dev_err(dev, "pm_runtime_get_sync FAILED %d\n", status);
-		pm_runtime_put_sync(glue->dev);
-		goto err1;
-	}
+	phy_init(musb->phy);
 
 	l = musb_readl(musb->mregs, OTG_INTERFSEL);
 
@@ -427,20 +358,10 @@ static int omap2430_musb_init(struct musb *musb)
 			musb_readl(musb->mregs, OTG_INTERFSEL),
 			musb_readl(musb->mregs, OTG_SIMENABLE));
 
-	setup_timer(&musb_idle_timer, musb_do_idle, (unsigned long) musb);
-
 	if (glue->status != MUSB_UNKNOWN)
 		omap_musb_set_mailbox(glue);
 
-	phy_init(musb->phy);
-	phy_power_on(musb->phy);
-
-	pm_runtime_put_noidle(musb->controller);
-	pm_runtime_put_noidle(glue->dev);
 	return 0;
-
-err1:
-	return status;
 }
 
 static void omap2430_musb_enable(struct musb *musb)
@@ -452,6 +373,11 @@ static void omap2430_musb_enable(struct musb *musb)
 	struct musb_hdrc_platform_data *pdata = dev_get_platdata(dev);
 	struct omap_musb_board_data *data = pdata->board_data;
 
+	if (!WARN_ON(!musb->phy))
+		phy_power_on(musb->phy);
+
+	omap2430_set_power(musb, true, glue->cable_connected);
+
 	switch (glue->status) {
 
 	case MUSB_ID_GROUND:
@@ -487,18 +413,25 @@ static void omap2430_musb_disable(struct musb *musb)
 	struct device *dev = musb->controller;
 	struct omap2430_glue *glue = dev_get_drvdata(dev->parent);
 
+	if (!WARN_ON(!musb->phy))
+		phy_power_off(musb->phy);
+
 	if (glue->status != MUSB_UNKNOWN)
 		omap_control_usb_set_mode(glue->control_otghs,
 			USB_MODE_DISCONNECT);
+
+	omap2430_set_power(musb, false, glue->cable_connected);
 }
 
 static int omap2430_musb_exit(struct musb *musb)
 {
-	del_timer_sync(&musb_idle_timer);
+	struct device *dev = musb->controller;
+	struct omap2430_glue *glue = dev_get_drvdata(dev->parent);
 
 	omap2430_low_level_exit(musb);
-	phy_power_off(musb->phy);
 	phy_exit(musb->phy);
+	musb->phy = NULL;
+	cancel_work_sync(&glue->omap_musb_mailbox_work);
 
 	return 0;
 }
@@ -512,9 +445,6 @@ static const struct musb_platform_ops omap2430_ops = {
 	.init		= omap2430_musb_init,
 	.exit		= omap2430_musb_exit,
 
-	.set_mode	= omap2430_musb_set_mode,
-	.try_idle	= omap2430_musb_try_idle,
-
 	.set_vbus	= omap2430_musb_set_vbus,
 
 	.enable		= omap2430_musb_enable,
@@ -639,11 +569,9 @@ static int omap2430_probe(struct platform_device *pdev)
 		goto err2;
 	}
 
-	/*
-	 * Note that we cannot enable PM runtime yet for this
-	 * driver as we need struct musb initialized first.
-	 * See omap2430_musb_init above.
-	 */
+	pm_runtime_enable(glue->dev);
+	pm_runtime_use_autosuspend(glue->dev);
+	pm_runtime_set_autosuspend_delay(glue->dev, 500);
 
 	ret = platform_device_add(musb);
 	if (ret) {
@@ -662,12 +590,14 @@ err0:
 
 static int omap2430_remove(struct platform_device *pdev)
 {
-	struct omap2430_glue		*glue = platform_get_drvdata(pdev);
+	struct omap2430_glue *glue = platform_get_drvdata(pdev);
+	struct musb *musb = glue_to_musb(glue);
 
 	pm_runtime_get_sync(glue->dev);
-	cancel_work_sync(&glue->omap_musb_mailbox_work);
 	platform_device_unregister(glue->musb);
+	omap2430_set_power(musb, false, false);
 	pm_runtime_put_sync(glue->dev);
+	pm_runtime_dont_use_autosuspend(glue->dev);
 	pm_runtime_disable(glue->dev);
 
 	return 0;
@@ -680,12 +610,13 @@ static int omap2430_runtime_suspend(struct device *dev)
 	struct omap2430_glue		*glue = dev_get_drvdata(dev);
 	struct musb			*musb = glue_to_musb(glue);
 
-	if (musb) {
-		musb->context.otg_interfsel = musb_readl(musb->mregs,
-				OTG_INTERFSEL);
+	if (!musb)
+		return 0;
 
-		omap2430_low_level_exit(musb);
-	}
+	musb->context.otg_interfsel = musb_readl(musb->mregs,
+						 OTG_INTERFSEL);
+
+	omap2430_low_level_exit(musb);
 
 	return 0;
 }
@@ -696,7 +627,7 @@ static int omap2430_runtime_resume(struct device *dev)
 	struct musb			*musb = glue_to_musb(glue);
 
 	if (!musb)
-		return -EPROBE_DEFER;
+		return 0;
 
 	omap2430_low_level_init(musb);
 	musb_writel(musb->mregs, OTG_INTERFSEL,
@@ -738,18 +669,8 @@ static struct platform_driver omap2430_driver = {
 	},
 };
 
+module_platform_driver(omap2430_driver);
+
 MODULE_DESCRIPTION("OMAP2PLUS MUSB Glue Layer");
 MODULE_AUTHOR("Felipe Balbi <balbi@ti.com>");
 MODULE_LICENSE("GPL v2");
-
-static int __init omap2430_init(void)
-{
-	return platform_driver_register(&omap2430_driver);
-}
-subsys_initcall(omap2430_init);
-
-static void __exit omap2430_exit(void)
-{
-	platform_driver_unregister(&omap2430_driver);
-}
-module_exit(omap2430_exit);
diff --git a/drivers/usb/musb/sunxi.c b/drivers/usb/musb/sunxi.c
index fdab4232cfbf..76500515dd8b 100644
--- a/drivers/usb/musb/sunxi.c
+++ b/drivers/usb/musb/sunxi.c
@@ -80,7 +80,8 @@ static struct musb *sunxi_musb;
 
 struct sunxi_glue {
 	struct device		*dev;
-	struct platform_device	*musb;
+	struct musb		*musb;
+	struct platform_device	*musb_pdev;
 	struct clk		*clk;
 	struct reset_control	*rst;
 	struct phy		*phy;
@@ -102,7 +103,7 @@ static void sunxi_musb_work(struct work_struct *work)
 		return;
 
 	if (test_and_clear_bit(SUNXI_MUSB_FL_HOSTMODE_PEND, &glue->flags)) {
-		struct musb *musb = platform_get_drvdata(glue->musb);
+		struct musb *musb = glue->musb;
 		unsigned long flags;
 		u8 devctl;
 
@@ -112,7 +113,7 @@ static void sunxi_musb_work(struct work_struct *work)
 		if (test_bit(SUNXI_MUSB_FL_HOSTMODE, &glue->flags)) {
 			set_bit(SUNXI_MUSB_FL_VBUS_ON, &glue->flags);
 			musb->xceiv->otg->default_a = 1;
-			musb->xceiv->otg->state = OTG_STATE_A_IDLE;
+			musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
 			MUSB_HST_MODE(musb);
 			devctl |= MUSB_DEVCTL_SESSION;
 		} else {
@@ -145,10 +146,12 @@ static void sunxi_musb_set_vbus(struct musb *musb, int is_on)
 {
 	struct sunxi_glue *glue = dev_get_drvdata(musb->controller->parent);
 
-	if (is_on)
+	if (is_on) {
 		set_bit(SUNXI_MUSB_FL_VBUS_ON, &glue->flags);
-	else
+		musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
+	} else {
 		clear_bit(SUNXI_MUSB_FL_VBUS_ON, &glue->flags);
+	}
 
 	schedule_work(&glue->work);
 }
@@ -264,15 +267,6 @@ static int sunxi_musb_init(struct musb *musb)
 	if (ret)
 		goto error_unregister_notifier;
 
-	if (musb->port_mode == MUSB_PORT_MODE_HOST) {
-		ret = phy_power_on(glue->phy);
-		if (ret)
-			goto error_phy_exit;
-		set_bit(SUNXI_MUSB_FL_PHY_ON, &glue->flags);
-		/* Stop musb work from turning vbus off again */
-		set_bit(SUNXI_MUSB_FL_VBUS_ON, &glue->flags);
-	}
-
 	musb->isr = sunxi_musb_interrupt;
 
 	/* Stop the musb-core from doing runtime pm (not supported on sunxi) */
@@ -280,8 +274,6 @@ static int sunxi_musb_init(struct musb *musb)
 
 	return 0;
 
-error_phy_exit:
-	phy_exit(glue->phy);
 error_unregister_notifier:
 	if (musb->port_mode == MUSB_PORT_MODE_DUAL_ROLE)
 		extcon_unregister_notifier(glue->extcon, EXTCON_USB_HOST,
@@ -323,10 +315,31 @@ static int sunxi_musb_exit(struct musb *musb)
 	return 0;
 }
 
+static int sunxi_set_mode(struct musb *musb, u8 mode)
+{
+	struct sunxi_glue *glue = dev_get_drvdata(musb->controller->parent);
+	int ret;
+
+	if (mode == MUSB_HOST) {
+		ret = phy_power_on(glue->phy);
+		if (ret)
+			return ret;
+
+		set_bit(SUNXI_MUSB_FL_PHY_ON, &glue->flags);
+		/* Stop musb work from turning vbus off again */
+		set_bit(SUNXI_MUSB_FL_VBUS_ON, &glue->flags);
+		musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
+	}
+
+	return 0;
+}
+
 static void sunxi_musb_enable(struct musb *musb)
 {
 	struct sunxi_glue *glue = dev_get_drvdata(musb->controller->parent);
 
+	glue->musb = musb;
+
 	/* musb_core does not call us in a balanced manner */
 	if (test_and_set_bit(SUNXI_MUSB_FL_ENABLED, &glue->flags))
 		return;
@@ -569,6 +582,7 @@ static const struct musb_platform_ops sunxi_musb_ops = {
 	.exit		= sunxi_musb_exit,
 	.enable		= sunxi_musb_enable,
 	.disable	= sunxi_musb_disable,
+	.set_mode	= sunxi_set_mode,
 	.fifo_offset	= sunxi_musb_fifo_offset,
 	.ep_offset	= sunxi_musb_ep_offset,
 	.busctl_offset	= sunxi_musb_busctl_offset,
@@ -721,9 +735,9 @@ static int sunxi_musb_probe(struct platform_device *pdev)
 	pinfo.data	= &pdata;
 	pinfo.size_data = sizeof(pdata);
 
-	glue->musb = platform_device_register_full(&pinfo);
-	if (IS_ERR(glue->musb)) {
-		ret = PTR_ERR(glue->musb);
+	glue->musb_pdev = platform_device_register_full(&pinfo);
+	if (IS_ERR(glue->musb_pdev)) {
+		ret = PTR_ERR(glue->musb_pdev);
 		dev_err(&pdev->dev, "Error registering musb dev: %d\n", ret);
 		goto err_unregister_usb_phy;
 	}
@@ -740,7 +754,7 @@ static int sunxi_musb_remove(struct platform_device *pdev)
 	struct sunxi_glue *glue = platform_get_drvdata(pdev);
 	struct platform_device *usb_phy = glue->usb_phy;
 
-	platform_device_unregister(glue->musb); /* Frees glue ! */
+	platform_device_unregister(glue->musb_pdev);
 	usb_phy_generic_unregister(usb_phy);
 
 	return 0;
diff --git a/drivers/usb/phy/phy-twl6030-usb.c b/drivers/usb/phy/phy-twl6030-usb.c
index 24e2b3cf1867..a72e8d670adc 100644
--- a/drivers/usb/phy/phy-twl6030-usb.c
+++ b/drivers/usb/phy/phy-twl6030-usb.c
@@ -97,6 +97,9 @@ struct twl6030_usb {
 
 	struct regulator		*usb3v3;
 
+	/* used to check initial cable status after probe */
+	struct delayed_work	get_status_work;
+
 	/* used to set vbus, in atomic path */
 	struct work_struct	set_vbus_work;
 
@@ -227,12 +230,16 @@ static irqreturn_t twl6030_usb_irq(int irq, void *_twl)
 			twl->asleep = 1;
 			status = MUSB_VBUS_VALID;
 			twl->linkstat = status;
-			musb_mailbox(status);
+			ret = musb_mailbox(status);
+			if (ret)
+				twl->linkstat = MUSB_UNKNOWN;
 		} else {
 			if (twl->linkstat != MUSB_UNKNOWN) {
 				status = MUSB_VBUS_OFF;
 				twl->linkstat = status;
-				musb_mailbox(status);
+				ret = musb_mailbox(status);
+				if (ret)
+					twl->linkstat = MUSB_UNKNOWN;
 				if (twl->asleep) {
 					regulator_disable(twl->usb3v3);
 					twl->asleep = 0;
@@ -264,7 +271,9 @@ static irqreturn_t twl6030_usbotg_irq(int irq, void *_twl)
 		twl6030_writeb(twl, TWL_MODULE_USB, 0x10, USB_ID_INT_EN_HI_SET);
 		status = MUSB_ID_GROUND;
 		twl->linkstat = status;
-		musb_mailbox(status);
+		ret = musb_mailbox(status);
+		if (ret)
+			twl->linkstat = MUSB_UNKNOWN;
 	} else  {
 		twl6030_writeb(twl, TWL_MODULE_USB, 0x10, USB_ID_INT_EN_HI_CLR);
 		twl6030_writeb(twl, TWL_MODULE_USB, 0x1, USB_ID_INT_EN_HI_SET);
@@ -274,6 +283,15 @@ static irqreturn_t twl6030_usbotg_irq(int irq, void *_twl)
 	return IRQ_HANDLED;
 }
 
+static void twl6030_status_work(struct work_struct *work)
+{
+	struct twl6030_usb *twl = container_of(work, struct twl6030_usb,
+					       get_status_work.work);
+
+	twl6030_usb_irq(twl->irq2, twl);
+	twl6030_usbotg_irq(twl->irq1, twl);
+}
+
 static int twl6030_enable_irq(struct twl6030_usb *twl)
 {
 	twl6030_writeb(twl, TWL_MODULE_USB, 0x1, USB_ID_INT_EN_HI_SET);
@@ -284,8 +302,6 @@ static int twl6030_enable_irq(struct twl6030_usb *twl)
 				REG_INT_MSK_LINE_C);
 	twl6030_interrupt_unmask(TWL6030_CHARGER_CTRL_INT_MASK,
 				REG_INT_MSK_STS_C);
-	twl6030_usb_irq(twl->irq2, twl);
-	twl6030_usbotg_irq(twl->irq1, twl);
 
 	return 0;
 }
@@ -371,6 +387,7 @@ static int twl6030_usb_probe(struct platform_device *pdev)
 		dev_warn(&pdev->dev, "could not create sysfs file\n");
 
 	INIT_WORK(&twl->set_vbus_work, otg_set_vbus_work);
+	INIT_DELAYED_WORK(&twl->get_status_work, twl6030_status_work);
 
 	status = request_threaded_irq(twl->irq1, NULL, twl6030_usbotg_irq,
 			IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING | IRQF_ONESHOT,
@@ -395,6 +412,7 @@ static int twl6030_usb_probe(struct platform_device *pdev)
 
 	twl->asleep = 0;
 	twl6030_enable_irq(twl);
+	schedule_delayed_work(&twl->get_status_work, HZ);
 	dev_info(&pdev->dev, "Initialized TWL6030 USB module\n");
 
 	return 0;
@@ -404,6 +422,7 @@ static int twl6030_usb_remove(struct platform_device *pdev)
 {
 	struct twl6030_usb *twl = platform_get_drvdata(pdev);
 
+	cancel_delayed_work(&twl->get_status_work);
 	twl6030_interrupt_mask(TWL6030_USBOTG_INT_MASK,
 		REG_INT_MSK_LINE_C);
 	twl6030_interrupt_mask(TWL6030_USBOTG_INT_MASK,
diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c
index 2eddbe538cda..5608af4a369d 100644
--- a/drivers/usb/serial/mos7720.c
+++ b/drivers/usb/serial/mos7720.c
@@ -2007,6 +2007,7 @@ static void mos7720_release(struct usb_serial *serial)
 				    urblist_entry)
 			usb_unlink_urb(urbtrack->urb);
 		spin_unlock_irqrestore(&mos_parport->listlock, flags);
+		parport_del_port(mos_parport->pp);
 
 		kref_put(&mos_parport->ref_count, destroy_mos_parport);
 	}
diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index 4d49fce406e1..5ef014ba6ae8 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -836,6 +836,7 @@ static int uas_slave_configure(struct scsi_device *sdev)
 	if (devinfo->flags & US_FL_BROKEN_FUA)
 		sdev->broken_fua = 1;
 
+	scsi_change_queue_depth(sdev, devinfo->qdepth - 2);
 	return 0;
 }
 
@@ -848,7 +849,6 @@ static struct scsi_host_template uas_host_template = {
 	.slave_configure = uas_slave_configure,
 	.eh_abort_handler = uas_eh_abort_handler,
 	.eh_bus_reset_handler = uas_eh_bus_reset_handler,
-	.can_queue = MAX_CMNDS,
 	.this_id = -1,
 	.sg_tablesize = SG_NONE,
 	.skip_settle_delay = 1,
diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c
index fca51105974e..2e0450bec1b1 100644
--- a/drivers/usb/usbip/vhci_hcd.c
+++ b/drivers/usb/usbip/vhci_hcd.c
@@ -941,7 +941,7 @@ static void vhci_stop(struct usb_hcd *hcd)
 
 static int vhci_get_frame_number(struct usb_hcd *hcd)
 {
-	pr_err("Not yet implemented\n");
+	dev_err_ratelimited(&hcd->self.root_hub->dev, "Not yet implemented\n");
 	return 0;
 }
 
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index b54f26c55dfd..b4b3e256491b 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -746,7 +746,7 @@ config ALIM7101_WDT
 
 config EBC_C384_WDT
 	tristate "WinSystems EBC-C384 Watchdog Timer"
-	depends on X86 && ISA
+	depends on X86 && ISA_BUS_API
 	select WATCHDOG_CORE
 	help
 	  Enables watchdog timer support for the watchdog timer on the
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index d46839f51e73..e4db19e88ab1 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -151,8 +151,6 @@ static DECLARE_WAIT_QUEUE_HEAD(balloon_wq);
 static void balloon_process(struct work_struct *work);
 static DECLARE_DELAYED_WORK(balloon_worker, balloon_process);
 
-static void release_memory_resource(struct resource *resource);
-
 /* When ballooning out (allocating memory to return to Xen) we don't really
    want the kernel to try too hard since that can trigger the oom killer. */
 #define GFP_BALLOON \
@@ -248,6 +246,19 @@ static enum bp_state update_schedule(enum bp_state state)
 }
 
 #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+static void release_memory_resource(struct resource *resource)
+{
+	if (!resource)
+		return;
+
+	/*
+	 * No need to reset region to identity mapped since we now
+	 * know that no I/O can be in this region
+	 */
+	release_resource(resource);
+	kfree(resource);
+}
+
 static struct resource *additional_memory_resource(phys_addr_t size)
 {
 	struct resource *res;
@@ -286,19 +297,6 @@ static struct resource *additional_memory_resource(phys_addr_t size)
 	return res;
 }
 
-static void release_memory_resource(struct resource *resource)
-{
-	if (!resource)
-		return;
-
-	/*
-	 * No need to reset region to identity mapped since we now
-	 * know that no I/O can be in this region
-	 */
-	release_resource(resource);
-	kfree(resource);
-}
-
 static enum bp_state reserve_additional_memory(void)
 {
 	long credit;
diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c
index 076970a54f89..4ce10bcca18b 100644
--- a/drivers/xen/xen-acpi-processor.c
+++ b/drivers/xen/xen-acpi-processor.c
@@ -423,36 +423,7 @@ upload:
 
 	return 0;
 }
-static int __init check_prereq(void)
-{
-	struct cpuinfo_x86 *c = &cpu_data(0);
-
-	if (!xen_initial_domain())
-		return -ENODEV;
-
-	if (!acpi_gbl_FADT.smi_command)
-		return -ENODEV;
-
-	if (c->x86_vendor == X86_VENDOR_INTEL) {
-		if (!cpu_has(c, X86_FEATURE_EST))
-			return -ENODEV;
 
-		return 0;
-	}
-	if (c->x86_vendor == X86_VENDOR_AMD) {
-		/* Copied from powernow-k8.h, can't include ../cpufreq/powernow
-		 * as we get compile warnings for the static functions.
-		 */
-#define CPUID_FREQ_VOLT_CAPABILITIES    0x80000007
-#define USE_HW_PSTATE                   0x00000080
-		u32 eax, ebx, ecx, edx;
-		cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
-		if ((edx & USE_HW_PSTATE) != USE_HW_PSTATE)
-			return -ENODEV;
-		return 0;
-	}
-	return -ENODEV;
-}
 /* acpi_perf_data is a pointer to percpu data. */
 static struct acpi_processor_performance __percpu *acpi_perf_data;
 
@@ -509,10 +480,10 @@ struct notifier_block xen_acpi_processor_resume_nb = {
 static int __init xen_acpi_processor_init(void)
 {
 	unsigned int i;
-	int rc = check_prereq();
+	int rc;
 
-	if (rc)
-		return rc;
+	if (!xen_initial_domain())
+		return -ENODEV;
 
 	nr_acpi_bits = get_max_acpi_id() + 1;
 	acpi_ids_done = kcalloc(BITS_TO_LONGS(nr_acpi_bits), sizeof(unsigned long), GFP_KERNEL);
diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c
index 8e67336f8ddd..6a25533da237 100644
--- a/drivers/xen/xen-pciback/conf_space.c
+++ b/drivers/xen/xen-pciback/conf_space.c
@@ -183,8 +183,7 @@ int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size,
 		field_start = OFFSET(cfg_entry);
 		field_end = OFFSET(cfg_entry) + field->size;
 
-		if ((req_start >= field_start && req_start < field_end)
-		    || (req_end > field_start && req_end <= field_end)) {
+		 if (req_end > field_start && field_end > req_start) {
 			err = conf_space_read(dev, cfg_entry, field_start,
 					      &tmp_val);
 			if (err)
@@ -230,8 +229,7 @@ int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, u32 value)
 		field_start = OFFSET(cfg_entry);
 		field_end = OFFSET(cfg_entry) + field->size;
 
-		if ((req_start >= field_start && req_start < field_end)
-		    || (req_end > field_start && req_end <= field_end)) {
+		 if (req_end > field_start && field_end > req_start) {
 			tmp_val = 0;
 
 			err = xen_pcibk_config_read(dev, field_start,
diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c
index ad3d17d29c81..9ead1c2ff1dd 100644
--- a/drivers/xen/xen-pciback/conf_space_header.c
+++ b/drivers/xen/xen-pciback/conf_space_header.c
@@ -145,7 +145,7 @@ static int rom_write(struct pci_dev *dev, int offset, u32 value, void *data)
 	/* A write to obtain the length must happen as a 32-bit write.
 	 * This does not (yet) support writing individual bytes
 	 */
-	if (value == ~PCI_ROM_ADDRESS_ENABLE)
+	if ((value | ~PCI_ROM_ADDRESS_MASK) == ~0U)
 		bar->which = 1;
 	else {
 		u32 tmpval;
@@ -225,38 +225,42 @@ static inline void read_dev_bar(struct pci_dev *dev,
 			   (PCI_BASE_ADDRESS_SPACE_MEMORY |
 				PCI_BASE_ADDRESS_MEM_TYPE_64))) {
 			bar_info->val = res[pos - 1].start >> 32;
-			bar_info->len_val = res[pos - 1].end >> 32;
+			bar_info->len_val = -resource_size(&res[pos - 1]) >> 32;
 			return;
 		}
 	}
 
+	if (!res[pos].flags ||
+	    (res[pos].flags & (IORESOURCE_DISABLED | IORESOURCE_UNSET |
+			       IORESOURCE_BUSY)))
+		return;
+
 	bar_info->val = res[pos].start |
 			(res[pos].flags & PCI_REGION_FLAG_MASK);
-	bar_info->len_val = resource_size(&res[pos]);
+	bar_info->len_val = -resource_size(&res[pos]) |
+			    (res[pos].flags & PCI_REGION_FLAG_MASK);
 }
 
 static void *bar_init(struct pci_dev *dev, int offset)
 {
-	struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
+	struct pci_bar_info *bar = kzalloc(sizeof(*bar), GFP_KERNEL);
 
 	if (!bar)
 		return ERR_PTR(-ENOMEM);
 
 	read_dev_bar(dev, bar, offset, ~0);
-	bar->which = 0;
 
 	return bar;
 }
 
 static void *rom_init(struct pci_dev *dev, int offset)
 {
-	struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
+	struct pci_bar_info *bar = kzalloc(sizeof(*bar), GFP_KERNEL);
 
 	if (!bar)
 		return ERR_PTR(-ENOMEM);
 
 	read_dev_bar(dev, bar, offset, ~PCI_ROM_ADDRESS_ENABLE);
-	bar->which = 0;
 
 	return bar;
 }
diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c
index cacf30d14747..7487971f9f78 100644
--- a/drivers/xen/xenbus/xenbus_dev_frontend.c
+++ b/drivers/xen/xenbus/xenbus_dev_frontend.c
@@ -316,11 +316,18 @@ static int xenbus_write_transaction(unsigned msg_type,
 			rc = -ENOMEM;
 			goto out;
 		}
+	} else {
+		list_for_each_entry(trans, &u->transactions, list)
+			if (trans->handle.id == u->u.msg.tx_id)
+				break;
+		if (&trans->list == &u->transactions)
+			return -ESRCH;
 	}
 
 	reply = xenbus_dev_request_and_reply(&u->u.msg);
 	if (IS_ERR(reply)) {
-		kfree(trans);
+		if (msg_type == XS_TRANSACTION_START)
+			kfree(trans);
 		rc = PTR_ERR(reply);
 		goto out;
 	}
@@ -333,12 +340,7 @@ static int xenbus_write_transaction(unsigned msg_type,
 			list_add(&trans->list, &u->transactions);
 		}
 	} else if (u->u.msg.type == XS_TRANSACTION_END) {
-		list_for_each_entry(trans, &u->transactions, list)
-			if (trans->handle.id == u->u.msg.tx_id)
-				break;
-		BUG_ON(&trans->list == &u->transactions);
 		list_del(&trans->list);
-
 		kfree(trans);
 	}
 
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
index 374b12af8812..22f7cd711c57 100644
--- a/drivers/xen/xenbus/xenbus_xs.c
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -232,10 +232,10 @@ static void transaction_resume(void)
 void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg)
 {
 	void *ret;
-	struct xsd_sockmsg req_msg = *msg;
+	enum xsd_sockmsg_type type = msg->type;
 	int err;
 
-	if (req_msg.type == XS_TRANSACTION_START)
+	if (type == XS_TRANSACTION_START)
 		transaction_start();
 
 	mutex_lock(&xs_state.request_mutex);
@@ -249,12 +249,8 @@ void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg)
 
 	mutex_unlock(&xs_state.request_mutex);
 
-	if (IS_ERR(ret))
-		return ret;
-
 	if ((msg->type == XS_TRANSACTION_END) ||
-	    ((req_msg.type == XS_TRANSACTION_START) &&
-	     (msg->type == XS_ERROR)))
+	    ((type == XS_TRANSACTION_START) && (msg->type == XS_ERROR)))
 		transaction_end();
 
 	return ret;
author	Daniel Vetter <daniel.vetter@ffwll.ch>	2016-08-05 11:36:15 +0300
committer	Daniel Vetter <daniel.vetter@ffwll.ch>	2016-08-05 11:36:15 +0300
commit	94558e265b9539b2ecec98d037bae51c902663c1 (patch)
tree	e2782bc903759c2a8bf0af9b3a8ec090a3b1bd3e /drivers
parent	5ac9056753e79ac5ad1ccc3c99b311688e46e8c9 (diff)
parent	1cf915d305b6e1d57db6c35c208016f9747ba3c6 (diff)
download	linux-94558e265b9539b2ecec98d037bae51c902663c1.tar.xz