55 files changed, 7540 insertions, 644 deletions
diff --git a/drivers/gpu/Makefile b/drivers/gpu/Makefile
index 30879df3daea..d8a22c2a579d 100644
--- a/drivers/gpu/Makefile
+++ b/drivers/gpu/Makefile
@@ -1 +1,2 @@
 obj-y			+= drm/ vga/
+obj-$(CONFIG_TEGRA_HOST1X)	+= host1x/
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 19b8e0d5d910..b16c50ee769c 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -215,8 +215,6 @@ source "drivers/gpu/drm/cirrus/Kconfig"
 
 source "drivers/gpu/drm/shmobile/Kconfig"
 
-source "drivers/gpu/drm/tegra/Kconfig"
-
 source "drivers/gpu/drm/omapdrm/Kconfig"
 
 source "drivers/gpu/drm/tilcdc/Kconfig"
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 6a4211521011..1c9f24396002 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -49,7 +49,6 @@ obj-$(CONFIG_DRM_GMA500) += gma500/
 obj-$(CONFIG_DRM_UDL) += udl/
 obj-$(CONFIG_DRM_AST) += ast/
 obj-$(CONFIG_DRM_SHMOBILE) +=shmobile/
-obj-$(CONFIG_DRM_TEGRA) += tegra/
 obj-$(CONFIG_DRM_OMAP)	+= omapdrm/
 obj-$(CONFIG_DRM_TILCDC)	+= tilcdc/
 obj-$(CONFIG_DRM_QXL) += qxl/
diff --git a/drivers/gpu/drm/tegra/Makefile b/drivers/gpu/drm/tegra/Makefile
deleted file mode 100644
index 80f73d1315d0..000000000000
--- a/drivers/gpu/drm/tegra/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
-ccflags-y := -Iinclude/drm
-ccflags-$(CONFIG_DRM_TEGRA_DEBUG) += -DDEBUG
-
-tegra-drm-y := drm.o fb.o dc.o host1x.o
-tegra-drm-y += output.o rgb.o hdmi.o
-
-obj-$(CONFIG_DRM_TEGRA) += tegra-drm.o
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
deleted file mode 100644
index 9d452df5bcad..000000000000
--- a/drivers/gpu/drm/tegra/drm.c
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
- * Copyright (C) 2012 Avionic Design GmbH
- * Copyright (C) 2012 NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/of_address.h>
-#include <linux/of_platform.h>
-
-#include <linux/dma-mapping.h>
-#include <asm/dma-iommu.h>
-
-#include "drm.h"
-
-#define DRIVER_NAME "tegra"
-#define DRIVER_DESC "NVIDIA Tegra graphics"
-#define DRIVER_DATE "20120330"
-#define DRIVER_MAJOR 0
-#define DRIVER_MINOR 0
-#define DRIVER_PATCHLEVEL 0
-
-static int tegra_drm_load(struct drm_device *drm, unsigned long flags)
-{
-	struct device *dev = drm->dev;
-	struct host1x *host1x;
-	int err;
-
-	host1x = dev_get_drvdata(dev);
-	drm->dev_private = host1x;
-	host1x->drm = drm;
-
-	drm_mode_config_init(drm);
-
-	err = host1x_drm_init(host1x, drm);
-	if (err < 0)
-		return err;
-
-	err = drm_vblank_init(drm, drm->mode_config.num_crtc);
-	if (err < 0)
-		return err;
-
-	err = tegra_drm_fb_init(drm);
-	if (err < 0)
-		return err;
-
-	drm_kms_helper_poll_init(drm);
-
-	return 0;
-}
-
-static int tegra_drm_unload(struct drm_device *drm)
-{
-	drm_kms_helper_poll_fini(drm);
-	tegra_drm_fb_exit(drm);
-
-	drm_mode_config_cleanup(drm);
-
-	return 0;
-}
-
-static int tegra_drm_open(struct drm_device *drm, struct drm_file *filp)
-{
-	return 0;
-}
-
-static void tegra_drm_lastclose(struct drm_device *drm)
-{
-	struct host1x *host1x = drm->dev_private;
-
-	drm_fbdev_cma_restore_mode(host1x->fbdev);
-}
-
-static struct drm_ioctl_desc tegra_drm_ioctls[] = {
-};
-
-static const struct file_operations tegra_drm_fops = {
-	.owner = THIS_MODULE,
-	.open = drm_open,
-	.release = drm_release,
-	.unlocked_ioctl = drm_ioctl,
-	.mmap = drm_gem_cma_mmap,
-	.poll = drm_poll,
-	.fasync = drm_fasync,
-	.read = drm_read,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl = drm_compat_ioctl,
-#endif
-	.llseek = noop_llseek,
-};
-
-static struct drm_crtc *tegra_crtc_from_pipe(struct drm_device *drm, int pipe)
-{
-	struct drm_crtc *crtc;
-
-	list_for_each_entry(crtc, &drm->mode_config.crtc_list, head) {
-		struct tegra_dc *dc = to_tegra_dc(crtc);
-
-		if (dc->pipe == pipe)
-			return crtc;
-	}
-
-	return NULL;
-}
-
-static u32 tegra_drm_get_vblank_counter(struct drm_device *dev, int crtc)
-{
-	/* TODO: implement real hardware counter using syncpoints */
-	return drm_vblank_count(dev, crtc);
-}
-
-static int tegra_drm_enable_vblank(struct drm_device *drm, int pipe)
-{
-	struct drm_crtc *crtc = tegra_crtc_from_pipe(drm, pipe);
-	struct tegra_dc *dc = to_tegra_dc(crtc);
-
-	if (!crtc)
-		return -ENODEV;
-
-	tegra_dc_enable_vblank(dc);
-
-	return 0;
-}
-
-static void tegra_drm_disable_vblank(struct drm_device *drm, int pipe)
-{
-	struct drm_crtc *crtc = tegra_crtc_from_pipe(drm, pipe);
-	struct tegra_dc *dc = to_tegra_dc(crtc);
-
-	if (crtc)
-		tegra_dc_disable_vblank(dc);
-}
-
-static void tegra_drm_preclose(struct drm_device *drm, struct drm_file *file)
-{
-	struct drm_crtc *crtc;
-
-	list_for_each_entry(crtc, &drm->mode_config.crtc_list, head)
-		tegra_dc_cancel_page_flip(crtc, file);
-}
-
-#ifdef CONFIG_DEBUG_FS
-static int tegra_debugfs_framebuffers(struct seq_file *s, void *data)
-{
-	struct drm_info_node *node = (struct drm_info_node *)s->private;
-	struct drm_device *drm = node->minor->dev;
-	struct drm_framebuffer *fb;
-
-	mutex_lock(&drm->mode_config.fb_lock);
-
-	list_for_each_entry(fb, &drm->mode_config.fb_list, head) {
-		seq_printf(s, "%3d: user size: %d x %d, depth %d, %d bpp, refcount %d\n",
-			   fb->base.id, fb->width, fb->height, fb->depth,
-			   fb->bits_per_pixel,
-			   atomic_read(&fb->refcount.refcount));
-	}
-
-	mutex_unlock(&drm->mode_config.fb_lock);
-
-	return 0;
-}
-
-static struct drm_info_list tegra_debugfs_list[] = {
-	{ "framebuffers", tegra_debugfs_framebuffers, 0 },
-};
-
-static int tegra_debugfs_init(struct drm_minor *minor)
-{
-	return drm_debugfs_create_files(tegra_debugfs_list,
-					ARRAY_SIZE(tegra_debugfs_list),
-					minor->debugfs_root, minor);
-}
-
-static void tegra_debugfs_cleanup(struct drm_minor *minor)
-{
-	drm_debugfs_remove_files(tegra_debugfs_list,
-				 ARRAY_SIZE(tegra_debugfs_list), minor);
-}
-#endif
-
-struct drm_driver tegra_drm_driver = {
-	.driver_features = DRIVER_BUS_PLATFORM | DRIVER_MODESET | DRIVER_GEM,
-	.load = tegra_drm_load,
-	.unload = tegra_drm_unload,
-	.open = tegra_drm_open,
-	.preclose = tegra_drm_preclose,
-	.lastclose = tegra_drm_lastclose,
-
-	.get_vblank_counter = tegra_drm_get_vblank_counter,
-	.enable_vblank = tegra_drm_enable_vblank,
-	.disable_vblank = tegra_drm_disable_vblank,
-
-#if defined(CONFIG_DEBUG_FS)
-	.debugfs_init = tegra_debugfs_init,
-	.debugfs_cleanup = tegra_debugfs_cleanup,
-#endif
-
-	.gem_free_object = drm_gem_cma_free_object,
-	.gem_vm_ops = &drm_gem_cma_vm_ops,
-	.dumb_create = drm_gem_cma_dumb_create,
-	.dumb_map_offset = drm_gem_cma_dumb_map_offset,
-	.dumb_destroy = drm_gem_cma_dumb_destroy,
-
-	.ioctls = tegra_drm_ioctls,
-	.num_ioctls = ARRAY_SIZE(tegra_drm_ioctls),
-	.fops = &tegra_drm_fops,
-
-	.name = DRIVER_NAME,
-	.desc = DRIVER_DESC,
-	.date = DRIVER_DATE,
-	.major = DRIVER_MAJOR,
-	.minor = DRIVER_MINOR,
-	.patchlevel = DRIVER_PATCHLEVEL,
-};
diff --git a/drivers/gpu/drm/tegra/fb.c b/drivers/gpu/drm/tegra/fb.c
deleted file mode 100644
index 03914953cb1c..000000000000
--- a/drivers/gpu/drm/tegra/fb.c
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (C) 2012 Avionic Design GmbH
- * Copyright (C) 2012 NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include "drm.h"
-
-static void tegra_drm_fb_output_poll_changed(struct drm_device *drm)
-{
-	struct host1x *host1x = drm->dev_private;
-
-	drm_fbdev_cma_hotplug_event(host1x->fbdev);
-}
-
-static const struct drm_mode_config_funcs tegra_drm_mode_funcs = {
-	.fb_create = drm_fb_cma_create,
-	.output_poll_changed = tegra_drm_fb_output_poll_changed,
-};
-
-int tegra_drm_fb_init(struct drm_device *drm)
-{
-	struct host1x *host1x = drm->dev_private;
-	struct drm_fbdev_cma *fbdev;
-
-	drm->mode_config.min_width = 0;
-	drm->mode_config.min_height = 0;
-
-	drm->mode_config.max_width = 4096;
-	drm->mode_config.max_height = 4096;
-
-	drm->mode_config.funcs = &tegra_drm_mode_funcs;
-
-	fbdev = drm_fbdev_cma_init(drm, 32, drm->mode_config.num_crtc,
-				   drm->mode_config.num_connector);
-	if (IS_ERR(fbdev))
-		return PTR_ERR(fbdev);
-
-	host1x->fbdev = fbdev;
-
-	return 0;
-}
-
-void tegra_drm_fb_exit(struct drm_device *drm)
-{
-	struct host1x *host1x = drm->dev_private;
-
-	drm_fbdev_cma_fini(host1x->fbdev);
-}
diff --git a/drivers/gpu/drm/tegra/host1x.c b/drivers/gpu/drm/tegra/host1x.c
deleted file mode 100644
index 92e25a7e00ea..000000000000
--- a/drivers/gpu/drm/tegra/host1x.c
+++ /dev/null
@@ -1,327 +0,0 @@
-/*
- * Copyright (C) 2012 Avionic Design GmbH
- * Copyright (C) 2012 NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/clk.h>
-#include <linux/err.h>
-#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/platform_device.h>
-
-#include "drm.h"
-
-struct host1x_drm_client {
-	struct host1x_client *client;
-	struct device_node *np;
-	struct list_head list;
-};
-
-static int host1x_add_drm_client(struct host1x *host1x, struct device_node *np)
-{
-	struct host1x_drm_client *client;
-
-	client = kzalloc(sizeof(*client), GFP_KERNEL);
-	if (!client)
-		return -ENOMEM;
-
-	INIT_LIST_HEAD(&client->list);
-	client->np = of_node_get(np);
-
-	list_add_tail(&client->list, &host1x->drm_clients);
-
-	return 0;
-}
-
-static int host1x_activate_drm_client(struct host1x *host1x,
-				      struct host1x_drm_client *drm,
-				      struct host1x_client *client)
-{
-	mutex_lock(&host1x->drm_clients_lock);
-	list_del_init(&drm->list);
-	list_add_tail(&drm->list, &host1x->drm_active);
-	drm->client = client;
-	mutex_unlock(&host1x->drm_clients_lock);
-
-	return 0;
-}
-
-static int host1x_remove_drm_client(struct host1x *host1x,
-				    struct host1x_drm_client *client)
-{
-	mutex_lock(&host1x->drm_clients_lock);
-	list_del_init(&client->list);
-	mutex_unlock(&host1x->drm_clients_lock);
-
-	of_node_put(client->np);
-	kfree(client);
-
-	return 0;
-}
-
-static int host1x_parse_dt(struct host1x *host1x)
-{
-	static const char * const compat[] = {
-		"nvidia,tegra20-dc",
-		"nvidia,tegra20-hdmi",
-		"nvidia,tegra30-dc",
-		"nvidia,tegra30-hdmi",
-	};
-	unsigned int i;
-	int err;
-
-	for (i = 0; i < ARRAY_SIZE(compat); i++) {
-		struct device_node *np;
-
-		for_each_child_of_node(host1x->dev->of_node, np) {
-			if (of_device_is_compatible(np, compat[i]) &&
-			    of_device_is_available(np)) {
-				err = host1x_add_drm_client(host1x, np);
-				if (err < 0)
-					return err;
-			}
-		}
-	}
-
-	return 0;
-}
-
-static int tegra_host1x_probe(struct platform_device *pdev)
-{
-	struct host1x *host1x;
-	struct resource *regs;
-	int err;
-
-	host1x = devm_kzalloc(&pdev->dev, sizeof(*host1x), GFP_KERNEL);
-	if (!host1x)
-		return -ENOMEM;
-
-	mutex_init(&host1x->drm_clients_lock);
-	INIT_LIST_HEAD(&host1x->drm_clients);
-	INIT_LIST_HEAD(&host1x->drm_active);
-	mutex_init(&host1x->clients_lock);
-	INIT_LIST_HEAD(&host1x->clients);
-	host1x->dev = &pdev->dev;
-
-	err = host1x_parse_dt(host1x);
-	if (err < 0) {
-		dev_err(&pdev->dev, "failed to parse DT: %d\n", err);
-		return err;
-	}
-
-	host1x->clk = devm_clk_get(&pdev->dev, NULL);
-	if (IS_ERR(host1x->clk))
-		return PTR_ERR(host1x->clk);
-
-	err = clk_prepare_enable(host1x->clk);
-	if (err < 0)
-		return err;
-
-	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!regs) {
-		err = -ENXIO;
-		goto err;
-	}
-
-	err = platform_get_irq(pdev, 0);
-	if (err < 0)
-		goto err;
-
-	host1x->syncpt = err;
-
-	err = platform_get_irq(pdev, 1);
-	if (err < 0)
-		goto err;
-
-	host1x->irq = err;
-
-	host1x->regs = devm_ioremap_resource(&pdev->dev, regs);
-	if (IS_ERR(host1x->regs)) {
-		err = PTR_ERR(host1x->regs);
-		goto err;
-	}
-
-	platform_set_drvdata(pdev, host1x);
-
-	return 0;
-
-err:
-	clk_disable_unprepare(host1x->clk);
-	return err;
-}
-
-static int tegra_host1x_remove(struct platform_device *pdev)
-{
-	struct host1x *host1x = platform_get_drvdata(pdev);
-
-	clk_disable_unprepare(host1x->clk);
-
-	return 0;
-}
-
-int host1x_drm_init(struct host1x *host1x, struct drm_device *drm)
-{
-	struct host1x_client *client;
-
-	mutex_lock(&host1x->clients_lock);
-
-	list_for_each_entry(client, &host1x->clients, list) {
-		if (client->ops && client->ops->drm_init) {
-			int err = client->ops->drm_init(client, drm);
-			if (err < 0) {
-				dev_err(host1x->dev,
-					"DRM setup failed for %s: %d\n",
-					dev_name(client->dev), err);
-				return err;
-			}
-		}
-	}
-
-	mutex_unlock(&host1x->clients_lock);
-
-	return 0;
-}
-
-int host1x_drm_exit(struct host1x *host1x)
-{
-	struct platform_device *pdev = to_platform_device(host1x->dev);
-	struct host1x_client *client;
-
-	if (!host1x->drm)
-		return 0;
-
-	mutex_lock(&host1x->clients_lock);
-
-	list_for_each_entry_reverse(client, &host1x->clients, list) {
-		if (client->ops && client->ops->drm_exit) {
-			int err = client->ops->drm_exit(client);
-			if (err < 0) {
-				dev_err(host1x->dev,
-					"DRM cleanup failed for %s: %d\n",
-					dev_name(client->dev), err);
-				return err;
-			}
-		}
-	}
-
-	mutex_unlock(&host1x->clients_lock);
-
-	drm_platform_exit(&tegra_drm_driver, pdev);
-	host1x->drm = NULL;
-
-	return 0;
-}
-
-int host1x_register_client(struct host1x *host1x, struct host1x_client *client)
-{
-	struct host1x_drm_client *drm, *tmp;
-	int err;
-
-	mutex_lock(&host1x->clients_lock);
-	list_add_tail(&client->list, &host1x->clients);
-	mutex_unlock(&host1x->clients_lock);
-
-	list_for_each_entry_safe(drm, tmp, &host1x->drm_clients, list)
-		if (drm->np == client->dev->of_node)
-			host1x_activate_drm_client(host1x, drm, client);
-
-	if (list_empty(&host1x->drm_clients)) {
-		struct platform_device *pdev = to_platform_device(host1x->dev);
-
-		err = drm_platform_init(&tegra_drm_driver, pdev);
-		if (err < 0) {
-			dev_err(host1x->dev, "drm_platform_init(): %d\n", err);
-			return err;
-		}
-	}
-
-	client->host1x = host1x;
-
-	return 0;
-}
-
-int host1x_unregister_client(struct host1x *host1x,
-			     struct host1x_client *client)
-{
-	struct host1x_drm_client *drm, *tmp;
-	int err;
-
-	list_for_each_entry_safe(drm, tmp, &host1x->drm_active, list) {
-		if (drm->client == client) {
-			err = host1x_drm_exit(host1x);
-			if (err < 0) {
-				dev_err(host1x->dev, "host1x_drm_exit(): %d\n",
-					err);
-				return err;
-			}
-
-			host1x_remove_drm_client(host1x, drm);
-			break;
-		}
-	}
-
-	mutex_lock(&host1x->clients_lock);
-	list_del_init(&client->list);
-	mutex_unlock(&host1x->clients_lock);
-
-	return 0;
-}
-
-static struct of_device_id tegra_host1x_of_match[] = {
-	{ .compatible = "nvidia,tegra30-host1x", },
-	{ .compatible = "nvidia,tegra20-host1x", },
-	{ },
-};
-MODULE_DEVICE_TABLE(of, tegra_host1x_of_match);
-
-struct platform_driver tegra_host1x_driver = {
-	.driver = {
-		.name = "tegra-host1x",
-		.owner = THIS_MODULE,
-		.of_match_table = tegra_host1x_of_match,
-	},
-	.probe = tegra_host1x_probe,
-	.remove = tegra_host1x_remove,
-};
-
-static int __init tegra_host1x_init(void)
-{
-	int err;
-
-	err = platform_driver_register(&tegra_host1x_driver);
-	if (err < 0)
-		return err;
-
-	err = platform_driver_register(&tegra_dc_driver);
-	if (err < 0)
-		goto unregister_host1x;
-
-	err = platform_driver_register(&tegra_hdmi_driver);
-	if (err < 0)
-		goto unregister_dc;
-
-	return 0;
-
-unregister_dc:
-	platform_driver_unregister(&tegra_dc_driver);
-unregister_host1x:
-	platform_driver_unregister(&tegra_host1x_driver);
-	return err;
-}
-module_init(tegra_host1x_init);
-
-static void __exit tegra_host1x_exit(void)
-{
-	platform_driver_unregister(&tegra_hdmi_driver);
-	platform_driver_unregister(&tegra_dc_driver);
-	platform_driver_unregister(&tegra_host1x_driver);
-}
-module_exit(tegra_host1x_exit);
-
-MODULE_AUTHOR("Thierry Reding <thierry.reding@avionic-design.de>");
-MODULE_DESCRIPTION("NVIDIA Tegra DRM driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/host1x/Kconfig b/drivers/gpu/host1x/Kconfig
new file mode 100644
index 000000000000..ccfd42b23606
--- /dev/null
+++ b/drivers/gpu/host1x/Kconfig
@@ -0,0 +1,24 @@
+config TEGRA_HOST1X
+	tristate "NVIDIA Tegra host1x driver"
+	depends on ARCH_TEGRA || ARCH_MULTIPLATFORM
+	help
+	  Driver for the NVIDIA Tegra host1x hardware.
+
+	  The Tegra host1x module is the DMA engine for register access to
+	  Tegra's graphics- and multimedia-related modules. The modules served
+	  by host1x are referred to as clients. host1x includes some other
+	  functionality, such as synchronization.
+
+if TEGRA_HOST1X
+
+config TEGRA_HOST1X_FIREWALL
+	bool "Enable HOST1X security firewall"
+	default y
+	help
+	  Say yes if kernel should protect command streams from tampering.
+
+	  If unsure, choose Y.
+
+source "drivers/gpu/host1x/drm/Kconfig"
+
+endif
diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile
new file mode 100644
index 000000000000..3b037b6e0298
--- /dev/null
+++ b/drivers/gpu/host1x/Makefile
@@ -0,0 +1,20 @@
+ccflags-y = -Idrivers/gpu/host1x
+
+host1x-y = \
+	syncpt.o \
+	dev.o \
+	intr.o \
+	cdma.o \
+	channel.o \
+	job.o \
+	debug.o \
+	hw/host1x01.o
+
+ccflags-y += -Iinclude/drm
+ccflags-$(CONFIG_DRM_TEGRA_DEBUG) += -DDEBUG
+
+host1x-$(CONFIG_DRM_TEGRA) += drm/drm.o drm/fb.o drm/dc.o
+host1x-$(CONFIG_DRM_TEGRA) += drm/output.o drm/rgb.o drm/hdmi.o
+host1x-$(CONFIG_DRM_TEGRA) += drm/gem.o
+host1x-$(CONFIG_DRM_TEGRA) += drm/gr2d.o
+obj-$(CONFIG_TEGRA_HOST1X) += host1x.o
diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
new file mode 100644
index 000000000000..de72172d3b5f
--- /dev/null
+++ b/drivers/gpu/host1x/cdma.c
@@ -0,0 +1,491 @@
+/*
+ * Tegra host1x Command DMA
+ *
+ * Copyright (c) 2010-2013, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#include <asm/cacheflush.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/kfifo.h>
+#include <linux/slab.h>
+#include <trace/events/host1x.h>
+
+#include "cdma.h"
+#include "channel.h"
+#include "dev.h"
+#include "debug.h"
+#include "host1x_bo.h"
+#include "job.h"
+
+/*
+ * push_buffer
+ *
+ * The push buffer is a circular array of words to be fetched by command DMA.
+ * Note that it works slightly differently to the sync queue; fence == pos
+ * means that the push buffer is full, not empty.
+ */
+
+#define HOST1X_PUSHBUFFER_SLOTS	512
+
+/*
+ * Clean up push buffer resources
+ */
+static void host1x_pushbuffer_destroy(struct push_buffer *pb)
+{
+	struct host1x_cdma *cdma = pb_to_cdma(pb);
+	struct host1x *host1x = cdma_to_host1x(cdma);
+
+	if (pb->phys != 0)
+		dma_free_writecombine(host1x->dev, pb->size_bytes + 4,
+				      pb->mapped, pb->phys);
+
+	pb->mapped = NULL;
+	pb->phys = 0;
+}
+
+/*
+ * Init push buffer resources
+ */
+static int host1x_pushbuffer_init(struct push_buffer *pb)
+{
+	struct host1x_cdma *cdma = pb_to_cdma(pb);
+	struct host1x *host1x = cdma_to_host1x(cdma);
+
+	pb->mapped = NULL;
+	pb->phys = 0;
+	pb->size_bytes = HOST1X_PUSHBUFFER_SLOTS * 8;
+
+	/* initialize buffer pointers */
+	pb->fence = pb->size_bytes - 8;
+	pb->pos = 0;
+
+	/* allocate and map pushbuffer memory */
+	pb->mapped = dma_alloc_writecombine(host1x->dev, pb->size_bytes + 4,
+					    &pb->phys, GFP_KERNEL);
+	if (!pb->mapped)
+		goto fail;
+
+	host1x_hw_pushbuffer_init(host1x, pb);
+
+	return 0;
+
+fail:
+	host1x_pushbuffer_destroy(pb);
+	return -ENOMEM;
+}
+
+/*
+ * Push two words to the push buffer
+ * Caller must ensure push buffer is not full
+ */
+static void host1x_pushbuffer_push(struct push_buffer *pb, u32 op1, u32 op2)
+{
+	u32 pos = pb->pos;
+	u32 *p = (u32 *)((u32)pb->mapped + pos);
+	WARN_ON(pos == pb->fence);
+	*(p++) = op1;
+	*(p++) = op2;
+	pb->pos = (pos + 8) & (pb->size_bytes - 1);
+}
+
+/*
+ * Pop a number of two word slots from the push buffer
+ * Caller must ensure push buffer is not empty
+ */
+static void host1x_pushbuffer_pop(struct push_buffer *pb, unsigned int slots)
+{
+	/* Advance the next write position */
+	pb->fence = (pb->fence + slots * 8) & (pb->size_bytes - 1);
+}
+
+/*
+ * Return the number of two word slots free in the push buffer
+ */
+static u32 host1x_pushbuffer_space(struct push_buffer *pb)
+{
+	return ((pb->fence - pb->pos) & (pb->size_bytes - 1)) / 8;
+}
+
+/*
+ * Sleep (if necessary) until the requested event happens
+ *   - CDMA_EVENT_SYNC_QUEUE_EMPTY : sync queue is completely empty.
+ *     - Returns 1
+ *   - CDMA_EVENT_PUSH_BUFFER_SPACE : there is space in the push buffer
+ *     - Return the amount of space (> 0)
+ * Must be called with the cdma lock held.
+ */
+unsigned int host1x_cdma_wait_locked(struct host1x_cdma *cdma,
+				     enum cdma_event event)
+{
+	for (;;) {
+		unsigned int space;
+
+		if (event == CDMA_EVENT_SYNC_QUEUE_EMPTY)
+			space = list_empty(&cdma->sync_queue) ? 1 : 0;
+		else if (event == CDMA_EVENT_PUSH_BUFFER_SPACE) {
+			struct push_buffer *pb = &cdma->push_buffer;
+			space = host1x_pushbuffer_space(pb);
+		} else {
+			WARN_ON(1);
+			return -EINVAL;
+		}
+
+		if (space)
+			return space;
+
+		trace_host1x_wait_cdma(dev_name(cdma_to_channel(cdma)->dev),
+				       event);
+
+		/* If somebody has managed to already start waiting, yield */
+		if (cdma->event != CDMA_EVENT_NONE) {
+			mutex_unlock(&cdma->lock);
+			schedule();
+			mutex_lock(&cdma->lock);
+			continue;
+		}
+		cdma->event = event;
+
+		mutex_unlock(&cdma->lock);
+		down(&cdma->sem);
+		mutex_lock(&cdma->lock);
+	}
+	return 0;
+}
+
+/*
+ * Start timer that tracks the time spent by the job.
+ * Must be called with the cdma lock held.
+ */
+static void cdma_start_timer_locked(struct host1x_cdma *cdma,
+				    struct host1x_job *job)
+{
+	struct host1x *host = cdma_to_host1x(cdma);
+
+	if (cdma->timeout.client) {
+		/* timer already started */
+		return;
+	}
+
+	cdma->timeout.client = job->client;
+	cdma->timeout.syncpt = host1x_syncpt_get(host, job->syncpt_id);
+	cdma->timeout.syncpt_val = job->syncpt_end;
+	cdma->timeout.start_ktime = ktime_get();
+
+	schedule_delayed_work(&cdma->timeout.wq,
+			      msecs_to_jiffies(job->timeout));
+}
+
+/*
+ * Stop timer when a buffer submission completes.
+ * Must be called with the cdma lock held.
+ */
+static void stop_cdma_timer_locked(struct host1x_cdma *cdma)
+{
+	cancel_delayed_work(&cdma->timeout.wq);
+	cdma->timeout.client = 0;
+}
+
+/*
+ * For all sync queue entries that have already finished according to the
+ * current sync point registers:
+ *  - unpin & unref their mems
+ *  - pop their push buffer slots
+ *  - remove them from the sync queue
+ * This is normally called from the host code's worker thread, but can be
+ * called manually if necessary.
+ * Must be called with the cdma lock held.
+ */
+static void update_cdma_locked(struct host1x_cdma *cdma)
+{
+	bool signal = false;
+	struct host1x *host1x = cdma_to_host1x(cdma);
+	struct host1x_job *job, *n;
+
+	/* If CDMA is stopped, queue is cleared and we can return */
+	if (!cdma->running)
+		return;
+
+	/*
+	 * Walk the sync queue, reading the sync point registers as necessary,
+	 * to consume as many sync queue entries as possible without blocking
+	 */
+	list_for_each_entry_safe(job, n, &cdma->sync_queue, list) {
+		struct host1x_syncpt *sp =
+			host1x_syncpt_get(host1x, job->syncpt_id);
+
+		/* Check whether this syncpt has completed, and bail if not */
+		if (!host1x_syncpt_is_expired(sp, job->syncpt_end)) {
+			/* Start timer on next pending syncpt */
+			if (job->timeout)
+				cdma_start_timer_locked(cdma, job);
+			break;
+		}
+
+		/* Cancel timeout, when a buffer completes */
+		if (cdma->timeout.client)
+			stop_cdma_timer_locked(cdma);
+
+		/* Unpin the memory */
+		host1x_job_unpin(job);
+
+		/* Pop push buffer slots */
+		if (job->num_slots) {
+			struct push_buffer *pb = &cdma->push_buffer;
+			host1x_pushbuffer_pop(pb, job->num_slots);
+			if (cdma->event == CDMA_EVENT_PUSH_BUFFER_SPACE)
+				signal = true;
+		}
+
+		list_del(&job->list);
+		host1x_job_put(job);
+	}
+
+	if (cdma->event == CDMA_EVENT_SYNC_QUEUE_EMPTY &&
+	    list_empty(&cdma->sync_queue))
+		signal = true;
+
+	if (signal) {
+		cdma->event = CDMA_EVENT_NONE;
+		up(&cdma->sem);
+	}
+}
+
+void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma,
+				   struct device *dev)
+{
+	u32 restart_addr;
+	u32 syncpt_incrs;
+	struct host1x_job *job = NULL;
+	u32 syncpt_val;
+	struct host1x *host1x = cdma_to_host1x(cdma);
+
+	syncpt_val = host1x_syncpt_load(cdma->timeout.syncpt);
+
+	dev_dbg(dev, "%s: starting cleanup (thresh %d)\n",
+		__func__, syncpt_val);
+
+	/*
+	 * Move the sync_queue read pointer to the first entry that hasn't
+	 * completed based on the current HW syncpt value. It's likely there
+	 * won't be any (i.e. we're still at the head), but covers the case
+	 * where a syncpt incr happens just prior/during the teardown.
+	 */
+
+	dev_dbg(dev, "%s: skip completed buffers still in sync_queue\n",
+		__func__);
+
+	list_for_each_entry(job, &cdma->sync_queue, list) {
+		if (syncpt_val < job->syncpt_end)
+			break;
+
+		host1x_job_dump(dev, job);
+	}
+
+	/*
+	 * Walk the sync_queue, first incrementing with the CPU syncpts that
+	 * are partially executed (the first buffer) or fully skipped while
+	 * still in the current context (slots are also NOP-ed).
+	 *
+	 * At the point contexts are interleaved, syncpt increments must be
+	 * done inline with the pushbuffer from a GATHER buffer to maintain
+	 * the order (slots are modified to be a GATHER of syncpt incrs).
+	 *
+	 * Note: save in restart_addr the location where the timed out buffer
+	 * started in the PB, so we can start the refetch from there (with the
+	 * modified NOP-ed PB slots). This lets things appear to have completed
+	 * properly for this buffer and resources are freed.
+	 */
+
+	dev_dbg(dev, "%s: perform CPU incr on pending same ctx buffers\n",
+		__func__);
+
+	if (!list_empty(&cdma->sync_queue))
+		restart_addr = job->first_get;
+	else
+		restart_addr = cdma->last_pos;
+
+	/* do CPU increments as long as this context continues */
+	list_for_each_entry_from(job, &cdma->sync_queue, list) {
+		/* different context, gets us out of this loop */
+		if (job->client != cdma->timeout.client)
+			break;
+
+		/* won't need a timeout when replayed */
+		job->timeout = 0;
+
+		syncpt_incrs = job->syncpt_end - syncpt_val;
+		dev_dbg(dev, "%s: CPU incr (%d)\n", __func__, syncpt_incrs);
+
+		host1x_job_dump(dev, job);
+
+		/* safe to use CPU to incr syncpts */
+		host1x_hw_cdma_timeout_cpu_incr(host1x, cdma, job->first_get,
+						syncpt_incrs, job->syncpt_end,
+						job->num_slots);
+
+		syncpt_val += syncpt_incrs;
+	}
+
+	/* The following sumbits from the same client may be dependent on the
+	 * failed submit and therefore they may fail. Force a small timeout
+	 * to make the queue cleanup faster */
+
+	list_for_each_entry_from(job, &cdma->sync_queue, list)
+		if (job->client == cdma->timeout.client)
+			job->timeout = min_t(unsigned int, job->timeout, 500);
+
+	dev_dbg(dev, "%s: finished sync_queue modification\n", __func__);
+
+	/* roll back DMAGET and start up channel again */
+	host1x_hw_cdma_resume(host1x, cdma, restart_addr);
+}
+
+/*
+ * Create a cdma
+ */
+int host1x_cdma_init(struct host1x_cdma *cdma)
+{
+	int err;
+
+	mutex_init(&cdma->lock);
+	sema_init(&cdma->sem, 0);
+
+	INIT_LIST_HEAD(&cdma->sync_queue);
+
+	cdma->event = CDMA_EVENT_NONE;
+	cdma->running = false;
+	cdma->torndown = false;
+
+	err = host1x_pushbuffer_init(&cdma->push_buffer);
+	if (err)
+		return err;
+	return 0;
+}
+
+/*
+ * Destroy a cdma
+ */
+int host1x_cdma_deinit(struct host1x_cdma *cdma)
+{
+	struct push_buffer *pb = &cdma->push_buffer;
+	struct host1x *host1x = cdma_to_host1x(cdma);
+
+	if (cdma->running) {
+		pr_warn("%s: CDMA still running\n", __func__);
+		return -EBUSY;
+	}
+
+	host1x_pushbuffer_destroy(pb);
+	host1x_hw_cdma_timeout_destroy(host1x, cdma);
+
+	return 0;
+}
+
+/*
+ * Begin a cdma submit
+ */
+int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job)
+{
+	struct host1x *host1x = cdma_to_host1x(cdma);
+
+	mutex_lock(&cdma->lock);
+
+	if (job->timeout) {
+		/* init state on first submit with timeout value */
+		if (!cdma->timeout.initialized) {
+			int err;
+			err = host1x_hw_cdma_timeout_init(host1x, cdma,
+							  job->syncpt_id);
+			if (err) {
+				mutex_unlock(&cdma->lock);
+				return err;
+			}
+		}
+	}
+	if (!cdma->running)
+		host1x_hw_cdma_start(host1x, cdma);
+
+	cdma->slots_free = 0;
+	cdma->slots_used = 0;
+	cdma->first_get = cdma->push_buffer.pos;
+
+	trace_host1x_cdma_begin(dev_name(job->channel->dev));
+	return 0;
+}
+
+/*
+ * Push two words into a push buffer slot
+ * Blocks as necessary if the push buffer is full.
+ */
+void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2)
+{
+	struct host1x *host1x = cdma_to_host1x(cdma);
+	struct push_buffer *pb = &cdma->push_buffer;
+	u32 slots_free = cdma->slots_free;
+
+	if (host1x_debug_trace_cmdbuf)
+		trace_host1x_cdma_push(dev_name(cdma_to_channel(cdma)->dev),
+				       op1, op2);
+
+	if (slots_free == 0) {
+		host1x_hw_cdma_flush(host1x, cdma);
+		slots_free = host1x_cdma_wait_locked(cdma,
+						CDMA_EVENT_PUSH_BUFFER_SPACE);
+	}
+	cdma->slots_free = slots_free - 1;
+	cdma->slots_used++;
+	host1x_pushbuffer_push(pb, op1, op2);
+}
+
+/*
+ * End a cdma submit
+ * Kick off DMA, add job to the sync queue, and a number of slots to be freed
+ * from the pushbuffer. The handles for a submit must all be pinned at the same
+ * time, but they can be unpinned in smaller chunks.
+ */
+void host1x_cdma_end(struct host1x_cdma *cdma,
+		     struct host1x_job *job)
+{
+	struct host1x *host1x = cdma_to_host1x(cdma);
+	bool idle = list_empty(&cdma->sync_queue);
+
+	host1x_hw_cdma_flush(host1x, cdma);
+
+	job->first_get = cdma->first_get;
+	job->num_slots = cdma->slots_used;
+	host1x_job_get(job);
+	list_add_tail(&job->list, &cdma->sync_queue);
+
+	/* start timer on idle -> active transitions */
+	if (job->timeout && idle)
+		cdma_start_timer_locked(cdma, job);
+
+	trace_host1x_cdma_end(dev_name(job->channel->dev));
+	mutex_unlock(&cdma->lock);
+}
+
+/*
+ * Update cdma state according to current sync point values
+ */
+void host1x_cdma_update(struct host1x_cdma *cdma)
+{
+	mutex_lock(&cdma->lock);
+	update_cdma_locked(cdma);
+	mutex_unlock(&cdma->lock);
+}
diff --git a/drivers/gpu/host1x/cdma.h b/drivers/gpu/host1x/cdma.h
new file mode 100644
index 000000000000..313c4b784348
--- /dev/null
+++ b/drivers/gpu/host1x/cdma.h
@@ -0,0 +1,100 @@
+/*
+ * Tegra host1x Command DMA
+ *
+ * Copyright (c) 2010-2013, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __HOST1X_CDMA_H
+#define __HOST1X_CDMA_H
+
+#include <linux/sched.h>
+#include <linux/semaphore.h>
+#include <linux/list.h>
+
+struct host1x_syncpt;
+struct host1x_userctx_timeout;
+struct host1x_job;
+
+/*
+ * cdma
+ *
+ * This is in charge of a host command DMA channel.
+ * Sends ops to a push buffer, and takes responsibility for unpinning
+ * (& possibly freeing) of memory after those ops have completed.
+ * Producer:
+ *	begin
+ *		push - send ops to the push buffer
+ *	end - start command DMA and enqueue handles to be unpinned
+ * Consumer:
+ *	update - call to update sync queue and push buffer, unpin memory
+ */
+
+struct push_buffer {
+	u32 *mapped;			/* mapped pushbuffer memory */
+	dma_addr_t phys;		/* physical address of pushbuffer */
+	u32 fence;			/* index we've written */
+	u32 pos;			/* index to write to */
+	u32 size_bytes;
+};
+
+struct buffer_timeout {
+	struct delayed_work wq;		/* work queue */
+	bool initialized;		/* timer one-time setup flag */
+	struct host1x_syncpt *syncpt;	/* buffer completion syncpt */
+	u32 syncpt_val;			/* syncpt value when completed */
+	ktime_t start_ktime;		/* starting time */
+	/* context timeout information */
+	int client;
+};
+
+enum cdma_event {
+	CDMA_EVENT_NONE,		/* not waiting for any event */
+	CDMA_EVENT_SYNC_QUEUE_EMPTY,	/* wait for empty sync queue */
+	CDMA_EVENT_PUSH_BUFFER_SPACE	/* wait for space in push buffer */
+};
+
+struct host1x_cdma {
+	struct mutex lock;		/* controls access to shared state */
+	struct semaphore sem;		/* signalled when event occurs */
+	enum cdma_event event;		/* event that sem is waiting for */
+	unsigned int slots_used;	/* pb slots used in current submit */
+	unsigned int slots_free;	/* pb slots free in current submit */
+	unsigned int first_get;		/* DMAGET value, where submit begins */
+	unsigned int last_pos;		/* last value written to DMAPUT */
+	struct push_buffer push_buffer;	/* channel's push buffer */
+	struct list_head sync_queue;	/* job queue */
+	struct buffer_timeout timeout;	/* channel's timeout state/wq */
+	bool running;
+	bool torndown;
+};
+
+#define cdma_to_channel(cdma) container_of(cdma, struct host1x_channel, cdma)
+#define cdma_to_host1x(cdma) dev_get_drvdata(cdma_to_channel(cdma)->dev->parent)
+#define pb_to_cdma(pb) container_of(pb, struct host1x_cdma, push_buffer)
+
+int host1x_cdma_init(struct host1x_cdma *cdma);
+int host1x_cdma_deinit(struct host1x_cdma *cdma);
+void host1x_cdma_stop(struct host1x_cdma *cdma);
+int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job);
+void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2);
+void host1x_cdma_end(struct host1x_cdma *cdma, struct host1x_job *job);
+void host1x_cdma_update(struct host1x_cdma *cdma);
+void host1x_cdma_peek(struct host1x_cdma *cdma, u32 dmaget, int slot,
+		      u32 *out);
+unsigned int host1x_cdma_wait_locked(struct host1x_cdma *cdma,
+				     enum cdma_event event);
+void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma,
+				   struct device *dev);
+#endif
diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c
new file mode 100644
index 000000000000..83ea51b9f0fc
--- /dev/null
+++ b/drivers/gpu/host1x/channel.c
@@ -0,0 +1,126 @@
+/*
+ * Tegra host1x Channel
+ *
+ * Copyright (c) 2010-2013, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/slab.h>
+#include <linux/module.h>
+
+#include "channel.h"
+#include "dev.h"
+#include "job.h"
+
+/* Constructor for the host1x device list */
+int host1x_channel_list_init(struct host1x *host)
+{
+	INIT_LIST_HEAD(&host->chlist.list);
+	mutex_init(&host->chlist_mutex);
+
+	if (host->info->nb_channels > BITS_PER_LONG) {
+		WARN(1, "host1x hardware has more channels than supported by the driver\n");
+		return -ENOSYS;
+	}
+
+	return 0;
+}
+
+int host1x_job_submit(struct host1x_job *job)
+{
+	struct host1x *host = dev_get_drvdata(job->channel->dev->parent);
+
+	return host1x_hw_channel_submit(host, job);
+}
+
+struct host1x_channel *host1x_channel_get(struct host1x_channel *channel)
+{
+	int err = 0;
+
+	mutex_lock(&channel->reflock);
+
+	if (channel->refcount == 0)
+		err = host1x_cdma_init(&channel->cdma);
+
+	if (!err)
+		channel->refcount++;
+
+	mutex_unlock(&channel->reflock);
+
+	return err ? NULL : channel;
+}
+
+void host1x_channel_put(struct host1x_channel *channel)
+{
+	mutex_lock(&channel->reflock);
+
+	if (channel->refcount == 1) {
+		struct host1x *host = dev_get_drvdata(channel->dev->parent);
+
+		host1x_hw_cdma_stop(host, &channel->cdma);
+		host1x_cdma_deinit(&channel->cdma);
+	}
+
+	channel->refcount--;
+
+	mutex_unlock(&channel->reflock);
+}
+
+struct host1x_channel *host1x_channel_request(struct device *dev)
+{
+	struct host1x *host = dev_get_drvdata(dev->parent);
+	int max_channels = host->info->nb_channels;
+	struct host1x_channel *channel = NULL;
+	int index, err;
+
+	mutex_lock(&host->chlist_mutex);
+
+	index = find_first_zero_bit(&host->allocated_channels, max_channels);
+	if (index >= max_channels)
+		goto fail;
+
+	channel = kzalloc(sizeof(*channel), GFP_KERNEL);
+	if (!channel)
+		goto fail;
+
+	err = host1x_hw_channel_init(host, channel, index);
+	if (err < 0)
+		goto fail;
+
+	/* Link device to host1x_channel */
+	channel->dev = dev;
+
+	/* Add to channel list */
+	list_add_tail(&channel->list, &host->chlist.list);
+
+	host->allocated_channels |= BIT(index);
+
+	mutex_unlock(&host->chlist_mutex);
+	return channel;
+
+fail:
+	dev_err(dev, "failed to init channel\n");
+	kfree(channel);
+	mutex_unlock(&host->chlist_mutex);
+	return NULL;
+}
+
+void host1x_channel_free(struct host1x_channel *channel)
+{
+	struct host1x *host = dev_get_drvdata(channel->dev->parent);
+
+	host->allocated_channels &= ~BIT(channel->id);
+	list_del(&channel->list);
+	kfree(channel);
+}
diff --git a/drivers/gpu/host1x/channel.h b/drivers/gpu/host1x/channel.h
new file mode 100644
index 000000000000..48723b8eea42
--- /dev/null
+++ b/drivers/gpu/host1x/channel.h
@@ -0,0 +1,52 @@
+/*
+ * Tegra host1x Channel
+ *
+ * Copyright (c) 2010-2013, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __HOST1X_CHANNEL_H
+#define __HOST1X_CHANNEL_H
+
+#include <linux/io.h>
+
+#include "cdma.h"
+
+struct host1x;
+
+struct host1x_channel {
+	struct list_head list;
+
+	unsigned int refcount;
+	unsigned int id;
+	struct mutex reflock;
+	struct mutex submitlock;
+	void __iomem *regs;
+	struct device *dev;
+	struct host1x_cdma cdma;
+};
+
+/* channel list operations */
+int host1x_channel_list_init(struct host1x *host);
+
+struct host1x_channel *host1x_channel_request(struct device *dev);
+void host1x_channel_free(struct host1x_channel *channel);
+struct host1x_channel *host1x_channel_get(struct host1x_channel *channel);
+void host1x_channel_put(struct host1x_channel *channel);
+int host1x_job_submit(struct host1x_job *job);
+
+#define host1x_for_each_channel(host, channel)				\
+	list_for_each_entry(channel, &host->chlist.list, list)
+
+#endif
diff --git a/drivers/gpu/host1x/debug.c b/drivers/gpu/host1x/debug.c
new file mode 100644
index 000000000000..3ec7d77de24d
--- /dev/null
+++ b/drivers/gpu/host1x/debug.c
@@ -0,0 +1,210 @@
+/*
+ * Copyright (C) 2010 Google, Inc.
+ * Author: Erik Gilling <konkers@android.com>
+ *
+ * Copyright (C) 2011-2013 NVIDIA Corporation
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/uaccess.h>
+
+#include <linux/io.h>
+
+#include "dev.h"
+#include "debug.h"
+#include "channel.h"
+
+unsigned int host1x_debug_trace_cmdbuf;
+
+static pid_t host1x_debug_force_timeout_pid;
+static u32 host1x_debug_force_timeout_val;
+static u32 host1x_debug_force_timeout_channel;
+
+void host1x_debug_output(struct output *o, const char *fmt, ...)
+{
+	va_list args;
+	int len;
+
+	va_start(args, fmt);
+	len = vsnprintf(o->buf, sizeof(o->buf), fmt, args);
+	va_end(args);
+	o->fn(o->ctx, o->buf, len);
+}
+
+static int show_channels(struct host1x_channel *ch, void *data, bool show_fifo)
+{
+	struct host1x *m = dev_get_drvdata(ch->dev->parent);
+	struct output *o = data;
+
+	mutex_lock(&ch->reflock);
+	if (ch->refcount) {
+		mutex_lock(&ch->cdma.lock);
+		if (show_fifo)
+			host1x_hw_show_channel_fifo(m, ch, o);
+		host1x_hw_show_channel_cdma(m, ch, o);
+		mutex_unlock(&ch->cdma.lock);
+	}
+	mutex_unlock(&ch->reflock);
+
+	return 0;
+}
+
+static void show_syncpts(struct host1x *m, struct output *o)
+{
+	int i;
+	host1x_debug_output(o, "---- syncpts ----\n");
+	for (i = 0; i < host1x_syncpt_nb_pts(m); i++) {
+		u32 max = host1x_syncpt_read_max(m->syncpt + i);
+		u32 min = host1x_syncpt_load(m->syncpt + i);
+		if (!min && !max)
+			continue;
+		host1x_debug_output(o, "id %d (%s) min %d max %d\n",
+				    i, m->syncpt[i].name, min, max);
+	}
+
+	for (i = 0; i < host1x_syncpt_nb_bases(m); i++) {
+		u32 base_val;
+		base_val = host1x_syncpt_load_wait_base(m->syncpt + i);
+		if (base_val)
+			host1x_debug_output(o, "waitbase id %d val %d\n", i,
+					    base_val);
+	}
+
+	host1x_debug_output(o, "\n");
+}
+
+static void show_all(struct host1x *m, struct output *o)
+{
+	struct host1x_channel *ch;
+
+	host1x_hw_show_mlocks(m, o);
+	show_syncpts(m, o);
+	host1x_debug_output(o, "---- channels ----\n");
+
+	host1x_for_each_channel(m, ch)
+		show_channels(ch, o, true);
+}
+
+#ifdef CONFIG_DEBUG_FS
+static void show_all_no_fifo(struct host1x *host1x, struct output *o)
+{
+	struct host1x_channel *ch;
+
+	host1x_hw_show_mlocks(host1x, o);
+	show_syncpts(host1x, o);
+	host1x_debug_output(o, "---- channels ----\n");
+
+	host1x_for_each_channel(host1x, ch)
+		show_channels(ch, o, false);
+}
+
+static int host1x_debug_show_all(struct seq_file *s, void *unused)
+{
+	struct output o = {
+		.fn = write_to_seqfile,
+		.ctx = s
+	};
+	show_all(s->private, &o);
+	return 0;
+}
+
+static int host1x_debug_show(struct seq_file *s, void *unused)
+{
+	struct output o = {
+		.fn = write_to_seqfile,
+		.ctx = s
+	};
+	show_all_no_fifo(s->private, &o);
+	return 0;
+}
+
+static int host1x_debug_open_all(struct inode *inode, struct file *file)
+{
+	return single_open(file, host1x_debug_show_all, inode->i_private);
+}
+
+static const struct file_operations host1x_debug_all_fops = {
+	.open		= host1x_debug_open_all,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int host1x_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, host1x_debug_show, inode->i_private);
+}
+
+static const struct file_operations host1x_debug_fops = {
+	.open		= host1x_debug_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+void host1x_debug_init(struct host1x *host1x)
+{
+	struct dentry *de = debugfs_create_dir("tegra-host1x", NULL);
+
+	if (!de)
+		return;
+
+	/* Store the created entry */
+	host1x->debugfs = de;
+
+	debugfs_create_file("status", S_IRUGO, de, host1x, &host1x_debug_fops);
+	debugfs_create_file("status_all", S_IRUGO, de, host1x,
+			    &host1x_debug_all_fops);
+
+	debugfs_create_u32("trace_cmdbuf", S_IRUGO|S_IWUSR, de,
+			   &host1x_debug_trace_cmdbuf);
+
+	host1x_hw_debug_init(host1x, de);
+
+	debugfs_create_u32("force_timeout_pid", S_IRUGO|S_IWUSR, de,
+			   &host1x_debug_force_timeout_pid);
+	debugfs_create_u32("force_timeout_val", S_IRUGO|S_IWUSR, de,
+			   &host1x_debug_force_timeout_val);
+	debugfs_create_u32("force_timeout_channel", S_IRUGO|S_IWUSR, de,
+			   &host1x_debug_force_timeout_channel);
+}
+
+void host1x_debug_deinit(struct host1x *host1x)
+{
+	debugfs_remove_recursive(host1x->debugfs);
+}
+#else
+void host1x_debug_init(struct host1x *host1x)
+{
+}
+void host1x_debug_deinit(struct host1x *host1x)
+{
+}
+#endif
+
+void host1x_debug_dump(struct host1x *host1x)
+{
+	struct output o = {
+		.fn = write_to_printk
+	};
+	show_all(host1x, &o);
+}
+
+void host1x_debug_dump_syncpts(struct host1x *host1x)
+{
+	struct output o = {
+		.fn = write_to_printk
+	};
+	show_syncpts(host1x, &o);
+}
diff --git a/drivers/gpu/host1x/debug.h b/drivers/gpu/host1x/debug.h
new file mode 100644
index 000000000000..4595b2e0799f
--- /dev/null
+++ b/drivers/gpu/host1x/debug.h
@@ -0,0 +1,51 @@
+/*
+ * Tegra host1x Debug
+ *
+ * Copyright (c) 2011-2013 NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __HOST1X_DEBUG_H
+#define __HOST1X_DEBUG_H
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+struct host1x;
+
+struct output {
+	void (*fn)(void *ctx, const char *str, size_t len);
+	void *ctx;
+	char buf[256];
+};
+
+static inline void write_to_seqfile(void *ctx, const char *str, size_t len)
+{
+	seq_write((struct seq_file *)ctx, str, len);
+}
+
+static inline void write_to_printk(void *ctx, const char *str, size_t len)
+{
+	pr_info("%s", str);
+}
+
+void __printf(2, 3) host1x_debug_output(struct output *o, const char *fmt, ...);
+
+extern unsigned int host1x_debug_trace_cmdbuf;
+
+void host1x_debug_init(struct host1x *host1x);
+void host1x_debug_deinit(struct host1x *host1x);
+void host1x_debug_dump(struct host1x *host1x);
+void host1x_debug_dump_syncpts(struct host1x *host1x);
+
+#endif
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
new file mode 100644
index 000000000000..28e28a23d444
--- /dev/null
+++ b/drivers/gpu/host1x/dev.c
@@ -0,0 +1,246 @@
+/*
+ * Tegra host1x driver
+ *
+ * Copyright (c) 2010-2013, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/host1x.h>
+
+#include "dev.h"
+#include "intr.h"
+#include "channel.h"
+#include "debug.h"
+#include "hw/host1x01.h"
+#include "host1x_client.h"
+
+void host1x_set_drm_data(struct device *dev, void *data)
+{
+	struct host1x *host1x = dev_get_drvdata(dev);
+	host1x->drm_data = data;
+}
+
+void *host1x_get_drm_data(struct device *dev)
+{
+	struct host1x *host1x = dev_get_drvdata(dev);
+	return host1x->drm_data;
+}
+
+void host1x_sync_writel(struct host1x *host1x, u32 v, u32 r)
+{
+	void __iomem *sync_regs = host1x->regs + host1x->info->sync_offset;
+
+	writel(v, sync_regs + r);
+}
+
+u32 host1x_sync_readl(struct host1x *host1x, u32 r)
+{
+	void __iomem *sync_regs = host1x->regs + host1x->info->sync_offset;
+
+	return readl(sync_regs + r);
+}
+
+void host1x_ch_writel(struct host1x_channel *ch, u32 v, u32 r)
+{
+	writel(v, ch->regs + r);
+}
+
+u32 host1x_ch_readl(struct host1x_channel *ch, u32 r)
+{
+	return readl(ch->regs + r);
+}
+
+static const struct host1x_info host1x01_info = {
+	.nb_channels	= 8,
+	.nb_pts		= 32,
+	.nb_mlocks	= 16,
+	.nb_bases	= 8,
+	.init		= host1x01_init,
+	.sync_offset	= 0x3000,
+};
+
+static struct of_device_id host1x_of_match[] = {
+	{ .compatible = "nvidia,tegra30-host1x", .data = &host1x01_info, },
+	{ .compatible = "nvidia,tegra20-host1x", .data = &host1x01_info, },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, host1x_of_match);
+
+static int host1x_probe(struct platform_device *pdev)
+{
+	const struct of_device_id *id;
+	struct host1x *host;
+	struct resource *regs;
+	int syncpt_irq;
+	int err;
+
+	id = of_match_device(host1x_of_match, &pdev->dev);
+	if (!id)
+		return -EINVAL;
+
+	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!regs) {
+		dev_err(&pdev->dev, "failed to get registers\n");
+		return -ENXIO;
+	}
+
+	syncpt_irq = platform_get_irq(pdev, 0);
+	if (syncpt_irq < 0) {
+		dev_err(&pdev->dev, "failed to get IRQ\n");
+		return -ENXIO;
+	}
+
+	host = devm_kzalloc(&pdev->dev, sizeof(*host), GFP_KERNEL);
+	if (!host)
+		return -ENOMEM;
+
+	host->dev = &pdev->dev;
+	host->info = id->data;
+
+	/* set common host1x device data */
+	platform_set_drvdata(pdev, host);
+
+	host->regs = devm_ioremap_resource(&pdev->dev, regs);
+	if (IS_ERR(host->regs))
+		return PTR_ERR(host->regs);
+
+	if (host->info->init) {
+		err = host->info->init(host);
+		if (err)
+			return err;
+	}
+
+	host->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(host->clk)) {
+		dev_err(&pdev->dev, "failed to get clock\n");
+		err = PTR_ERR(host->clk);
+		return err;
+	}
+
+	err = host1x_channel_list_init(host);
+	if (err) {
+		dev_err(&pdev->dev, "failed to initialize channel list\n");
+		return err;
+	}
+
+	err = clk_prepare_enable(host->clk);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to enable clock\n");
+		return err;
+	}
+
+	err = host1x_syncpt_init(host);
+	if (err) {
+		dev_err(&pdev->dev, "failed to initialize syncpts\n");
+		return err;
+	}
+
+	err = host1x_intr_init(host, syncpt_irq);
+	if (err) {
+		dev_err(&pdev->dev, "failed to initialize interrupts\n");
+		goto fail_deinit_syncpt;
+	}
+
+	host1x_debug_init(host);
+
+	host1x_drm_alloc(pdev);
+
+	return 0;
+
+fail_deinit_syncpt:
+	host1x_syncpt_deinit(host);
+	return err;
+}
+
+static int __exit host1x_remove(struct platform_device *pdev)
+{
+	struct host1x *host = platform_get_drvdata(pdev);
+
+	host1x_intr_deinit(host);
+	host1x_syncpt_deinit(host);
+	clk_disable_unprepare(host->clk);
+
+	return 0;
+}
+
+static struct platform_driver tegra_host1x_driver = {
+	.probe = host1x_probe,
+	.remove = __exit_p(host1x_remove),
+	.driver = {
+		.owner = THIS_MODULE,
+		.name = "tegra-host1x",
+		.of_match_table = host1x_of_match,
+	},
+};
+
+static int __init tegra_host1x_init(void)
+{
+	int err;
+
+	err = platform_driver_register(&tegra_host1x_driver);
+	if (err < 0)
+		return err;
+
+#ifdef CONFIG_DRM_TEGRA
+	err = platform_driver_register(&tegra_dc_driver);
+	if (err < 0)
+		goto unregister_host1x;
+
+	err = platform_driver_register(&tegra_hdmi_driver);
+	if (err < 0)
+		goto unregister_dc;
+
+	err = platform_driver_register(&tegra_gr2d_driver);
+	if (err < 0)
+		goto unregister_hdmi;
+#endif
+
+	return 0;
+
+#ifdef CONFIG_DRM_TEGRA
+unregister_hdmi:
+	platform_driver_unregister(&tegra_hdmi_driver);
+unregister_dc:
+	platform_driver_unregister(&tegra_dc_driver);
+unregister_host1x:
+	platform_driver_unregister(&tegra_host1x_driver);
+	return err;
+#endif
+}
+module_init(tegra_host1x_init);
+
+static void __exit tegra_host1x_exit(void)
+{
+#ifdef CONFIG_DRM_TEGRA
+	platform_driver_unregister(&tegra_gr2d_driver);
+	platform_driver_unregister(&tegra_hdmi_driver);
+	platform_driver_unregister(&tegra_dc_driver);
+#endif
+	platform_driver_unregister(&tegra_host1x_driver);
+}
+module_exit(tegra_host1x_exit);
+
+MODULE_AUTHOR("Thierry Reding <thierry.reding@avionic-design.de>");
+MODULE_AUTHOR("Terje Bergstrom <tbergstrom@nvidia.com>");
+MODULE_DESCRIPTION("Host1x driver for Tegra products");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
new file mode 100644
index 000000000000..a1607d6e135b
--- /dev/null
+++ b/drivers/gpu/host1x/dev.h
@@ -0,0 +1,308 @@
+/*
+ * Copyright (c) 2012-2013, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HOST1X_DEV_H
+#define HOST1X_DEV_H
+
+#include <linux/platform_device.h>
+#include <linux/device.h>
+
+#include "channel.h"
+#include "syncpt.h"
+#include "intr.h"
+#include "cdma.h"
+#include "job.h"
+
+struct host1x_syncpt;
+struct host1x_channel;
+struct host1x_cdma;
+struct host1x_job;
+struct push_buffer;
+struct output;
+struct dentry;
+
+struct host1x_channel_ops {
+	int (*init)(struct host1x_channel *channel, struct host1x *host,
+		    unsigned int id);
+	int (*submit)(struct host1x_job *job);
+};
+
+struct host1x_cdma_ops {
+	void (*start)(struct host1x_cdma *cdma);
+	void (*stop)(struct host1x_cdma *cdma);
+	void (*flush)(struct  host1x_cdma *cdma);
+	int (*timeout_init)(struct host1x_cdma *cdma, u32 syncpt_id);
+	void (*timeout_destroy)(struct host1x_cdma *cdma);
+	void (*freeze)(struct host1x_cdma *cdma);
+	void (*resume)(struct host1x_cdma *cdma, u32 getptr);
+	void (*timeout_cpu_incr)(struct host1x_cdma *cdma, u32 getptr,
+				 u32 syncpt_incrs, u32 syncval, u32 nr_slots);
+};
+
+struct host1x_pushbuffer_ops {
+	void (*init)(struct push_buffer *pb);
+};
+
+struct host1x_debug_ops {
+	void (*debug_init)(struct dentry *de);
+	void (*show_channel_cdma)(struct host1x *host,
+				  struct host1x_channel *ch,
+				  struct output *o);
+	void (*show_channel_fifo)(struct host1x *host,
+				  struct host1x_channel *ch,
+				  struct output *o);
+	void (*show_mlocks)(struct host1x *host, struct output *output);
+
+};
+
+struct host1x_syncpt_ops {
+	void (*restore)(struct host1x_syncpt *syncpt);
+	void (*restore_wait_base)(struct host1x_syncpt *syncpt);
+	void (*load_wait_base)(struct host1x_syncpt *syncpt);
+	u32 (*load)(struct host1x_syncpt *syncpt);
+	void (*cpu_incr)(struct host1x_syncpt *syncpt);
+	int (*patch_wait)(struct host1x_syncpt *syncpt, void *patch_addr);
+};
+
+struct host1x_intr_ops {
+	int (*init_host_sync)(struct host1x *host, u32 cpm,
+		void (*syncpt_thresh_work)(struct work_struct *work));
+	void (*set_syncpt_threshold)(
+		struct host1x *host, u32 id, u32 thresh);
+	void (*enable_syncpt_intr)(struct host1x *host, u32 id);
+	void (*disable_syncpt_intr)(struct host1x *host, u32 id);
+	void (*disable_all_syncpt_intrs)(struct host1x *host);
+	int (*free_syncpt_irq)(struct host1x *host);
+};
+
+struct host1x_info {
+	int	nb_channels;		/* host1x: num channels supported */
+	int	nb_pts;			/* host1x: num syncpoints supported */
+	int	nb_bases;		/* host1x: num syncpoints supported */
+	int	nb_mlocks;		/* host1x: number of mlocks */
+	int	(*init)(struct host1x *); /* initialize per SoC ops */
+	int	sync_offset;
+};
+
+struct host1x {
+	const struct host1x_info *info;
+
+	void __iomem *regs;
+	struct host1x_syncpt *syncpt;
+	struct device *dev;
+	struct clk *clk;
+
+	struct mutex intr_mutex;
+	struct workqueue_struct *intr_wq;
+	int intr_syncpt_irq;
+
+	const struct host1x_syncpt_ops *syncpt_op;
+	const struct host1x_intr_ops *intr_op;
+	const struct host1x_channel_ops *channel_op;
+	const struct host1x_cdma_ops *cdma_op;
+	const struct host1x_pushbuffer_ops *cdma_pb_op;
+	const struct host1x_debug_ops *debug_op;
+
+	struct host1x_syncpt *nop_sp;
+
+	struct mutex chlist_mutex;
+	struct host1x_channel chlist;
+	unsigned long allocated_channels;
+	unsigned int num_allocated_channels;
+
+	struct dentry *debugfs;
+
+	void *drm_data;
+};
+
+void host1x_sync_writel(struct host1x *host1x, u32 r, u32 v);
+u32 host1x_sync_readl(struct host1x *host1x, u32 r);
+void host1x_ch_writel(struct host1x_channel *ch, u32 r, u32 v);
+u32 host1x_ch_readl(struct host1x_channel *ch, u32 r);
+
+static inline void host1x_hw_syncpt_restore(struct host1x *host,
+					    struct host1x_syncpt *sp)
+{
+	host->syncpt_op->restore(sp);
+}
+
+static inline void host1x_hw_syncpt_restore_wait_base(struct host1x *host,
+						      struct host1x_syncpt *sp)
+{
+	host->syncpt_op->restore_wait_base(sp);
+}
+
+static inline void host1x_hw_syncpt_load_wait_base(struct host1x *host,
+						   struct host1x_syncpt *sp)
+{
+	host->syncpt_op->load_wait_base(sp);
+}
+
+static inline u32 host1x_hw_syncpt_load(struct host1x *host,
+					struct host1x_syncpt *sp)
+{
+	return host->syncpt_op->load(sp);
+}
+
+static inline void host1x_hw_syncpt_cpu_incr(struct host1x *host,
+					     struct host1x_syncpt *sp)
+{
+	host->syncpt_op->cpu_incr(sp);
+}
+
+static inline int host1x_hw_syncpt_patch_wait(struct host1x *host,
+					      struct host1x_syncpt *sp,
+					      void *patch_addr)
+{
+	return host->syncpt_op->patch_wait(sp, patch_addr);
+}
+
+static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm,
+			void (*syncpt_thresh_work)(struct work_struct *))
+{
+	return host->intr_op->init_host_sync(host, cpm, syncpt_thresh_work);
+}
+
+static inline void host1x_hw_intr_set_syncpt_threshold(struct host1x *host,
+						       u32 id, u32 thresh)
+{
+	host->intr_op->set_syncpt_threshold(host, id, thresh);
+}
+
+static inline void host1x_hw_intr_enable_syncpt_intr(struct host1x *host,
+						     u32 id)
+{
+	host->intr_op->enable_syncpt_intr(host, id);
+}
+
+static inline void host1x_hw_intr_disable_syncpt_intr(struct host1x *host,
+						      u32 id)
+{
+	host->intr_op->disable_syncpt_intr(host, id);
+}
+
+static inline void host1x_hw_intr_disable_all_syncpt_intrs(struct host1x *host)
+{
+	host->intr_op->disable_all_syncpt_intrs(host);
+}
+
+static inline int host1x_hw_intr_free_syncpt_irq(struct host1x *host)
+{
+	return host->intr_op->free_syncpt_irq(host);
+}
+
+static inline int host1x_hw_channel_init(struct host1x *host,
+					 struct host1x_channel *channel,
+					 int chid)
+{
+	return host->channel_op->init(channel, host, chid);
+}
+
+static inline int host1x_hw_channel_submit(struct host1x *host,
+					   struct host1x_job *job)
+{
+	return host->channel_op->submit(job);
+}
+
+static inline void host1x_hw_cdma_start(struct host1x *host,
+					struct host1x_cdma *cdma)
+{
+	host->cdma_op->start(cdma);
+}
+
+static inline void host1x_hw_cdma_stop(struct host1x *host,
+				       struct host1x_cdma *cdma)
+{
+	host->cdma_op->stop(cdma);
+}
+
+static inline void host1x_hw_cdma_flush(struct host1x *host,
+					struct host1x_cdma *cdma)
+{
+	host->cdma_op->flush(cdma);
+}
+
+static inline int host1x_hw_cdma_timeout_init(struct host1x *host,
+					      struct host1x_cdma *cdma,
+					      u32 syncpt_id)
+{
+	return host->cdma_op->timeout_init(cdma, syncpt_id);
+}
+
+static inline void host1x_hw_cdma_timeout_destroy(struct host1x *host,
+						  struct host1x_cdma *cdma)
+{
+	host->cdma_op->timeout_destroy(cdma);
+}
+
+static inline void host1x_hw_cdma_freeze(struct host1x *host,
+					 struct host1x_cdma *cdma)
+{
+	host->cdma_op->freeze(cdma);
+}
+
+static inline void host1x_hw_cdma_resume(struct host1x *host,
+					 struct host1x_cdma *cdma, u32 getptr)
+{
+	host->cdma_op->resume(cdma, getptr);
+}
+
+static inline void host1x_hw_cdma_timeout_cpu_incr(struct host1x *host,
+						   struct host1x_cdma *cdma,
+						   u32 getptr,
+						   u32 syncpt_incrs,
+						   u32 syncval, u32 nr_slots)
+{
+	host->cdma_op->timeout_cpu_incr(cdma, getptr, syncpt_incrs, syncval,
+					nr_slots);
+}
+
+static inline void host1x_hw_pushbuffer_init(struct host1x *host,
+					     struct push_buffer *pb)
+{
+	host->cdma_pb_op->init(pb);
+}
+
+static inline void host1x_hw_debug_init(struct host1x *host, struct dentry *de)
+{
+	if (host->debug_op && host->debug_op->debug_init)
+		host->debug_op->debug_init(de);
+}
+
+static inline void host1x_hw_show_channel_cdma(struct host1x *host,
+					       struct host1x_channel *channel,
+					       struct output *o)
+{
+	host->debug_op->show_channel_cdma(host, channel, o);
+}
+
+static inline void host1x_hw_show_channel_fifo(struct host1x *host,
+					       struct host1x_channel *channel,
+					       struct output *o)
+{
+	host->debug_op->show_channel_fifo(host, channel, o);
+}
+
+static inline void host1x_hw_show_mlocks(struct host1x *host, struct output *o)
+{
+	host->debug_op->show_mlocks(host, o);
+}
+
+extern struct platform_driver tegra_hdmi_driver;
+extern struct platform_driver tegra_dc_driver;
+extern struct platform_driver tegra_gr2d_driver;
+
+#endif
diff --git a/drivers/gpu/drm/tegra/Kconfig b/drivers/gpu/host1x/drm/Kconfig
index be1daf7344d3..69853a4de40a 100644
--- a/drivers/gpu/drm/tegra/Kconfig
+++ b/drivers/gpu/host1x/drm/Kconfig
@@ -1,12 +1,10 @@
 config DRM_TEGRA
-	tristate "NVIDIA Tegra DRM"
-	depends on DRM && OF && ARCH_TEGRA
+	bool "NVIDIA Tegra DRM"
+	depends on DRM
 	select DRM_KMS_HELPER
-	select DRM_GEM_CMA_HELPER
-	select DRM_KMS_CMA_HELPER
-	select FB_CFB_FILLRECT
-	select FB_CFB_COPYAREA
-	select FB_CFB_IMAGEBLIT
+	select FB_SYS_FILLRECT
+	select FB_SYS_COPYAREA
+	select FB_SYS_IMAGEBLIT
 	help
 	  Choose this option if you have an NVIDIA Tegra SoC.
 
@@ -15,6 +13,14 @@ config DRM_TEGRA
 
 if DRM_TEGRA
 
+config DRM_TEGRA_STAGING
+	bool "Enable HOST1X interface"
+	depends on STAGING
+	help
+	  Say yes if HOST1X should be available for userspace DRM users.
+
+	  If unsure, choose N.
+
 config DRM_TEGRA_DEBUG
 	bool "NVIDIA Tegra DRM debug support"
 	help
diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/host1x/drm/dc.c
index de94707b9dbe..1e2060324f02 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/host1x/drm/dc.c
@@ -14,8 +14,10 @@
 #include <linux/platform_device.h>
 #include <linux/clk/tegra.h>
 
-#include "drm.h"
+#include "host1x_client.h"
 #include "dc.h"
+#include "drm.h"
+#include "gem.h"
 
 struct tegra_plane {
 	struct drm_plane base;
@@ -51,9 +53,9 @@ static int tegra_plane_update(struct drm_plane *plane, struct drm_crtc *crtc,
 	window.bits_per_pixel = fb->bits_per_pixel;
 
 	for (i = 0; i < drm_format_num_planes(fb->pixel_format); i++) {
-		struct drm_gem_cma_object *gem = drm_fb_cma_get_gem_obj(fb, i);
+		struct tegra_bo *bo = tegra_fb_get_plane(fb, i);
 
-		window.base[i] = gem->paddr + fb->offsets[i];
+		window.base[i] = bo->paddr + fb->offsets[i];
 
 		/*
 		 * Tegra doesn't support different strides for U and V planes
@@ -103,7 +105,9 @@ static const struct drm_plane_funcs tegra_plane_funcs = {
 };
 
 static const uint32_t plane_formats[] = {
+	DRM_FORMAT_XBGR8888,
 	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_RGB565,
 	DRM_FORMAT_UYVY,
 	DRM_FORMAT_YUV420,
 	DRM_FORMAT_YUV422,
@@ -136,7 +140,7 @@ static int tegra_dc_add_planes(struct drm_device *drm, struct tegra_dc *dc)
 static int tegra_dc_set_base(struct tegra_dc *dc, int x, int y,
 			     struct drm_framebuffer *fb)
 {
-	struct drm_gem_cma_object *gem = drm_fb_cma_get_gem_obj(fb, 0);
+	struct tegra_bo *bo = tegra_fb_get_plane(fb, 0);
 	unsigned long value;
 
 	tegra_dc_writel(dc, WINDOW_A_SELECT, DC_CMD_DISPLAY_WINDOW_HEADER);
@@ -144,7 +148,7 @@ static int tegra_dc_set_base(struct tegra_dc *dc, int x, int y,
 	value = fb->offsets[0] + y * fb->pitches[0] +
 		x * fb->bits_per_pixel / 8;
 
-	tegra_dc_writel(dc, gem->paddr + value, DC_WINBUF_START_ADDR);
+	tegra_dc_writel(dc, bo->paddr + value, DC_WINBUF_START_ADDR);
 	tegra_dc_writel(dc, fb->pitches[0], DC_WIN_LINE_STRIDE);
 
 	value = GENERAL_UPDATE | WIN_A_UPDATE;
@@ -186,20 +190,20 @@ static void tegra_dc_finish_page_flip(struct tegra_dc *dc)
 {
 	struct drm_device *drm = dc->base.dev;
 	struct drm_crtc *crtc = &dc->base;
-	struct drm_gem_cma_object *gem;
 	unsigned long flags, base;
+	struct tegra_bo *bo;
 
 	if (!dc->event)
 		return;
 
-	gem = drm_fb_cma_get_gem_obj(crtc->fb, 0);
+	bo = tegra_fb_get_plane(crtc->fb, 0);
 
 	/* check if new start address has been latched */
 	tegra_dc_writel(dc, READ_MUX, DC_CMD_STATE_ACCESS);
 	base = tegra_dc_readl(dc, DC_WINBUF_START_ADDR);
 	tegra_dc_writel(dc, 0, DC_CMD_STATE_ACCESS);
 
-	if (base == gem->paddr + crtc->fb->offsets[0]) {
+	if (base == bo->paddr + crtc->fb->offsets[0]) {
 		spin_lock_irqsave(&drm->event_lock, flags);
 		drm_send_vblank_event(drm, dc->pipe, dc->event);
 		drm_vblank_put(drm, dc->pipe);
@@ -541,6 +545,9 @@ int tegra_dc_setup_window(struct tegra_dc *dc, unsigned int index,
 unsigned int tegra_dc_format(uint32_t format)
 {
 	switch (format) {
+	case DRM_FORMAT_XBGR8888:
+		return WIN_COLOR_DEPTH_R8G8B8A8;
+
 	case DRM_FORMAT_XRGB8888:
 		return WIN_COLOR_DEPTH_B8G8R8A8;
 
@@ -569,7 +576,7 @@ static int tegra_crtc_mode_set(struct drm_crtc *crtc,
 			       struct drm_display_mode *adjusted,
 			       int x, int y, struct drm_framebuffer *old_fb)
 {
-	struct drm_gem_cma_object *gem = drm_fb_cma_get_gem_obj(crtc->fb, 0);
+	struct tegra_bo *bo = tegra_fb_get_plane(crtc->fb, 0);
 	struct tegra_dc *dc = to_tegra_dc(crtc);
 	struct tegra_dc_window window;
 	unsigned long div, value;
@@ -616,7 +623,7 @@ static int tegra_crtc_mode_set(struct drm_crtc *crtc,
 	window.format = tegra_dc_format(crtc->fb->pixel_format);
 	window.bits_per_pixel = crtc->fb->bits_per_pixel;
 	window.stride[0] = crtc->fb->pitches[0];
-	window.base[0] = gem->paddr;
+	window.base[0] = bo->paddr;
 
 	err = tegra_dc_setup_window(dc, 0, &window);
 	if (err < 0)
@@ -1097,7 +1104,7 @@ static const struct host1x_client_ops dc_client_ops = {
 
 static int tegra_dc_probe(struct platform_device *pdev)
 {
-	struct host1x *host1x = dev_get_drvdata(pdev->dev.parent);
+	struct host1x_drm *host1x = host1x_get_drm_data(pdev->dev.parent);
 	struct resource *regs;
 	struct tegra_dc *dc;
 	int err;
@@ -1160,7 +1167,7 @@ static int tegra_dc_probe(struct platform_device *pdev)
 
 static int tegra_dc_remove(struct platform_device *pdev)
 {
-	struct host1x *host1x = dev_get_drvdata(pdev->dev.parent);
+	struct host1x_drm *host1x = host1x_get_drm_data(pdev->dev.parent);
 	struct tegra_dc *dc = platform_get_drvdata(pdev);
 	int err;
 
diff --git a/drivers/gpu/drm/tegra/dc.h b/drivers/gpu/host1x/drm/dc.h
index 79eaec9aac77..79eaec9aac77 100644
--- a/drivers/gpu/drm/tegra/dc.h
+++ b/drivers/gpu/host1x/drm/dc.h
diff --git a/drivers/gpu/host1x/drm/drm.c b/drivers/gpu/host1x/drm/drm.c
new file mode 100644
index 000000000000..2b561c9118c6
--- /dev/null
+++ b/drivers/gpu/host1x/drm/drm.c
@@ -0,0 +1,640 @@
+/*
+ * Copyright (C) 2012 Avionic Design GmbH
+ * Copyright (C) 2012-2013 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+
+#include <linux/dma-mapping.h>
+#include <asm/dma-iommu.h>
+
+#include <drm/drm.h>
+#include <drm/drmP.h>
+
+#include "host1x_client.h"
+#include "dev.h"
+#include "drm.h"
+#include "gem.h"
+#include "syncpt.h"
+
+#define DRIVER_NAME "tegra"
+#define DRIVER_DESC "NVIDIA Tegra graphics"
+#define DRIVER_DATE "20120330"
+#define DRIVER_MAJOR 0
+#define DRIVER_MINOR 0
+#define DRIVER_PATCHLEVEL 0
+
+struct host1x_drm_client {
+	struct host1x_client *client;
+	struct device_node *np;
+	struct list_head list;
+};
+
+static int host1x_add_drm_client(struct host1x_drm *host1x,
+				 struct device_node *np)
+{
+	struct host1x_drm_client *client;
+
+	client = kzalloc(sizeof(*client), GFP_KERNEL);
+	if (!client)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&client->list);
+	client->np = of_node_get(np);
+
+	list_add_tail(&client->list, &host1x->drm_clients);
+
+	return 0;
+}
+
+static int host1x_activate_drm_client(struct host1x_drm *host1x,
+				      struct host1x_drm_client *drm,
+				      struct host1x_client *client)
+{
+	mutex_lock(&host1x->drm_clients_lock);
+	list_del_init(&drm->list);
+	list_add_tail(&drm->list, &host1x->drm_active);
+	drm->client = client;
+	mutex_unlock(&host1x->drm_clients_lock);
+
+	return 0;
+}
+
+static int host1x_remove_drm_client(struct host1x_drm *host1x,
+				    struct host1x_drm_client *client)
+{
+	mutex_lock(&host1x->drm_clients_lock);
+	list_del_init(&client->list);
+	mutex_unlock(&host1x->drm_clients_lock);
+
+	of_node_put(client->np);
+	kfree(client);
+
+	return 0;
+}
+
+static int host1x_parse_dt(struct host1x_drm *host1x)
+{
+	static const char * const compat[] = {
+		"nvidia,tegra20-dc",
+		"nvidia,tegra20-hdmi",
+		"nvidia,tegra20-gr2d",
+		"nvidia,tegra30-dc",
+		"nvidia,tegra30-hdmi",
+		"nvidia,tegra30-gr2d",
+	};
+	unsigned int i;
+	int err;
+
+	for (i = 0; i < ARRAY_SIZE(compat); i++) {
+		struct device_node *np;
+
+		for_each_child_of_node(host1x->dev->of_node, np) {
+			if (of_device_is_compatible(np, compat[i]) &&
+			    of_device_is_available(np)) {
+				err = host1x_add_drm_client(host1x, np);
+				if (err < 0)
+					return err;
+			}
+		}
+	}
+
+	return 0;
+}
+
+int host1x_drm_alloc(struct platform_device *pdev)
+{
+	struct host1x_drm *host1x;
+	int err;
+
+	host1x = devm_kzalloc(&pdev->dev, sizeof(*host1x), GFP_KERNEL);
+	if (!host1x)
+		return -ENOMEM;
+
+	mutex_init(&host1x->drm_clients_lock);
+	INIT_LIST_HEAD(&host1x->drm_clients);
+	INIT_LIST_HEAD(&host1x->drm_active);
+	mutex_init(&host1x->clients_lock);
+	INIT_LIST_HEAD(&host1x->clients);
+	host1x->dev = &pdev->dev;
+
+	err = host1x_parse_dt(host1x);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to parse DT: %d\n", err);
+		return err;
+	}
+
+	host1x_set_drm_data(&pdev->dev, host1x);
+
+	return 0;
+}
+
+int host1x_drm_init(struct host1x_drm *host1x, struct drm_device *drm)
+{
+	struct host1x_client *client;
+
+	mutex_lock(&host1x->clients_lock);
+
+	list_for_each_entry(client, &host1x->clients, list) {
+		if (client->ops && client->ops->drm_init) {
+			int err = client->ops->drm_init(client, drm);
+			if (err < 0) {
+				dev_err(host1x->dev,
+					"DRM setup failed for %s: %d\n",
+					dev_name(client->dev), err);
+				return err;
+			}
+		}
+	}
+
+	mutex_unlock(&host1x->clients_lock);
+
+	return 0;
+}
+
+int host1x_drm_exit(struct host1x_drm *host1x)
+{
+	struct platform_device *pdev = to_platform_device(host1x->dev);
+	struct host1x_client *client;
+
+	if (!host1x->drm)
+		return 0;
+
+	mutex_lock(&host1x->clients_lock);
+
+	list_for_each_entry_reverse(client, &host1x->clients, list) {
+		if (client->ops && client->ops->drm_exit) {
+			int err = client->ops->drm_exit(client);
+			if (err < 0) {
+				dev_err(host1x->dev,
+					"DRM cleanup failed for %s: %d\n",
+					dev_name(client->dev), err);
+				return err;
+			}
+		}
+	}
+
+	mutex_unlock(&host1x->clients_lock);
+
+	drm_platform_exit(&tegra_drm_driver, pdev);
+	host1x->drm = NULL;
+
+	return 0;
+}
+
+int host1x_register_client(struct host1x_drm *host1x,
+			   struct host1x_client *client)
+{
+	struct host1x_drm_client *drm, *tmp;
+	int err;
+
+	mutex_lock(&host1x->clients_lock);
+	list_add_tail(&client->list, &host1x->clients);
+	mutex_unlock(&host1x->clients_lock);
+
+	list_for_each_entry_safe(drm, tmp, &host1x->drm_clients, list)
+		if (drm->np == client->dev->of_node)
+			host1x_activate_drm_client(host1x, drm, client);
+
+	if (list_empty(&host1x->drm_clients)) {
+		struct platform_device *pdev = to_platform_device(host1x->dev);
+
+		err = drm_platform_init(&tegra_drm_driver, pdev);
+		if (err < 0) {
+			dev_err(host1x->dev, "drm_platform_init(): %d\n", err);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+int host1x_unregister_client(struct host1x_drm *host1x,
+			     struct host1x_client *client)
+{
+	struct host1x_drm_client *drm, *tmp;
+	int err;
+
+	list_for_each_entry_safe(drm, tmp, &host1x->drm_active, list) {
+		if (drm->client == client) {
+			err = host1x_drm_exit(host1x);
+			if (err < 0) {
+				dev_err(host1x->dev, "host1x_drm_exit(): %d\n",
+					err);
+				return err;
+			}
+
+			host1x_remove_drm_client(host1x, drm);
+			break;
+		}
+	}
+
+	mutex_lock(&host1x->clients_lock);
+	list_del_init(&client->list);
+	mutex_unlock(&host1x->clients_lock);
+
+	return 0;
+}
+
+static int tegra_drm_load(struct drm_device *drm, unsigned long flags)
+{
+	struct host1x_drm *host1x;
+	int err;
+
+	host1x = host1x_get_drm_data(drm->dev);
+	drm->dev_private = host1x;
+	host1x->drm = drm;
+
+	drm_mode_config_init(drm);
+
+	err = host1x_drm_init(host1x, drm);
+	if (err < 0)
+		return err;
+
+	err = drm_vblank_init(drm, drm->mode_config.num_crtc);
+	if (err < 0)
+		return err;
+
+	err = tegra_drm_fb_init(drm);
+	if (err < 0)
+		return err;
+
+	drm_kms_helper_poll_init(drm);
+
+	return 0;
+}
+
+static int tegra_drm_unload(struct drm_device *drm)
+{
+	drm_kms_helper_poll_fini(drm);
+	tegra_drm_fb_exit(drm);
+
+	drm_mode_config_cleanup(drm);
+
+	return 0;
+}
+
+static int tegra_drm_open(struct drm_device *drm, struct drm_file *filp)
+{
+	struct host1x_drm_file *fpriv;
+
+	fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL);
+	if (!fpriv)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&fpriv->contexts);
+	filp->driver_priv = fpriv;
+
+	return 0;
+}
+
+static void host1x_drm_context_free(struct host1x_drm_context *context)
+{
+	context->client->ops->close_channel(context);
+	kfree(context);
+}
+
+static void tegra_drm_lastclose(struct drm_device *drm)
+{
+	struct host1x_drm *host1x = drm->dev_private;
+
+	tegra_fbdev_restore_mode(host1x->fbdev);
+}
+
+#ifdef CONFIG_DRM_TEGRA_STAGING
+static bool host1x_drm_file_owns_context(struct host1x_drm_file *file,
+					 struct host1x_drm_context *context)
+{
+	struct host1x_drm_context *ctx;
+
+	list_for_each_entry(ctx, &file->contexts, list)
+		if (ctx == context)
+			return true;
+
+	return false;
+}
+
+static int tegra_gem_create(struct drm_device *drm, void *data,
+			    struct drm_file *file)
+{
+	struct drm_tegra_gem_create *args = data;
+	struct tegra_bo *bo;
+
+	bo = tegra_bo_create_with_handle(file, drm, args->size,
+					 &args->handle);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	return 0;
+}
+
+static int tegra_gem_mmap(struct drm_device *drm, void *data,
+			  struct drm_file *file)
+{
+	struct drm_tegra_gem_mmap *args = data;
+	struct drm_gem_object *gem;
+	struct tegra_bo *bo;
+
+	gem = drm_gem_object_lookup(drm, file, args->handle);
+	if (!gem)
+		return -EINVAL;
+
+	bo = to_tegra_bo(gem);
+
+	args->offset = tegra_bo_get_mmap_offset(bo);
+
+	drm_gem_object_unreference(gem);
+
+	return 0;
+}
+
+static int tegra_syncpt_read(struct drm_device *drm, void *data,
+			     struct drm_file *file)
+{
+	struct drm_tegra_syncpt_read *args = data;
+	struct host1x *host = dev_get_drvdata(drm->dev);
+	struct host1x_syncpt *sp = host1x_syncpt_get(host, args->id);
+
+	if (!sp)
+		return -EINVAL;
+
+	args->value = host1x_syncpt_read_min(sp);
+	return 0;
+}
+
+static int tegra_syncpt_incr(struct drm_device *drm, void *data,
+			     struct drm_file *file)
+{
+	struct drm_tegra_syncpt_incr *args = data;
+	struct host1x *host = dev_get_drvdata(drm->dev);
+	struct host1x_syncpt *sp = host1x_syncpt_get(host, args->id);
+
+	if (!sp)
+		return -EINVAL;
+
+	host1x_syncpt_incr(sp);
+	return 0;
+}
+
+static int tegra_syncpt_wait(struct drm_device *drm, void *data,
+			     struct drm_file *file)
+{
+	struct drm_tegra_syncpt_wait *args = data;
+	struct host1x *host = dev_get_drvdata(drm->dev);
+	struct host1x_syncpt *sp = host1x_syncpt_get(host, args->id);
+
+	if (!sp)
+		return -EINVAL;
+
+	return host1x_syncpt_wait(sp, args->thresh, args->timeout,
+				  &args->value);
+}
+
+static int tegra_open_channel(struct drm_device *drm, void *data,
+			      struct drm_file *file)
+{
+	struct drm_tegra_open_channel *args = data;
+	struct host1x_client *client;
+	struct host1x_drm_context *context;
+	struct host1x_drm_file *fpriv = file->driver_priv;
+	struct host1x_drm *host1x = drm->dev_private;
+	int err = -ENODEV;
+
+	context = kzalloc(sizeof(*context), GFP_KERNEL);
+	if (!context)
+		return -ENOMEM;
+
+	list_for_each_entry(client, &host1x->clients, list)
+		if (client->class == args->client) {
+			err = client->ops->open_channel(client, context);
+			if (err)
+				break;
+
+			context->client = client;
+			list_add(&context->list, &fpriv->contexts);
+			args->context = (uintptr_t)context;
+			return 0;
+		}
+
+	kfree(context);
+	return err;
+}
+
+static int tegra_close_channel(struct drm_device *drm, void *data,
+			       struct drm_file *file)
+{
+	struct drm_tegra_close_channel *args = data;
+	struct host1x_drm_file *fpriv = file->driver_priv;
+	struct host1x_drm_context *context =
+		(struct host1x_drm_context *)(uintptr_t)args->context;
+
+	if (!host1x_drm_file_owns_context(fpriv, context))
+		return -EINVAL;
+
+	list_del(&context->list);
+	host1x_drm_context_free(context);
+
+	return 0;
+}
+
+static int tegra_get_syncpt(struct drm_device *drm, void *data,
+			    struct drm_file *file)
+{
+	struct drm_tegra_get_syncpt *args = data;
+	struct host1x_drm_file *fpriv = file->driver_priv;
+	struct host1x_drm_context *context =
+		(struct host1x_drm_context *)(uintptr_t)args->context;
+	struct host1x_syncpt *syncpt;
+
+	if (!host1x_drm_file_owns_context(fpriv, context))
+		return -ENODEV;
+
+	if (args->index >= context->client->num_syncpts)
+		return -EINVAL;
+
+	syncpt = context->client->syncpts[args->index];
+	args->id = host1x_syncpt_id(syncpt);
+
+	return 0;
+}
+
+static int tegra_submit(struct drm_device *drm, void *data,
+			struct drm_file *file)
+{
+	struct drm_tegra_submit *args = data;
+	struct host1x_drm_file *fpriv = file->driver_priv;
+	struct host1x_drm_context *context =
+		(struct host1x_drm_context *)(uintptr_t)args->context;
+
+	if (!host1x_drm_file_owns_context(fpriv, context))
+		return -ENODEV;
+
+	return context->client->ops->submit(context, args, drm, file);
+}
+#endif
+
+static struct drm_ioctl_desc tegra_drm_ioctls[] = {
+#ifdef CONFIG_DRM_TEGRA_STAGING
+	DRM_IOCTL_DEF_DRV(TEGRA_GEM_CREATE, tegra_gem_create, DRM_UNLOCKED | DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(TEGRA_GEM_MMAP, tegra_gem_mmap, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(TEGRA_SYNCPT_READ, tegra_syncpt_read, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(TEGRA_SYNCPT_INCR, tegra_syncpt_incr, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(TEGRA_SYNCPT_WAIT, tegra_syncpt_wait, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(TEGRA_OPEN_CHANNEL, tegra_open_channel, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(TEGRA_CLOSE_CHANNEL, tegra_close_channel, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(TEGRA_GET_SYNCPT, tegra_get_syncpt, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(TEGRA_SUBMIT, tegra_submit, DRM_UNLOCKED),
+#endif
+};
+
+static const struct file_operations tegra_drm_fops = {
+	.owner = THIS_MODULE,
+	.open = drm_open,
+	.release = drm_release,
+	.unlocked_ioctl = drm_ioctl,
+	.mmap = tegra_drm_mmap,
+	.poll = drm_poll,
+	.fasync = drm_fasync,
+	.read = drm_read,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = drm_compat_ioctl,
+#endif
+	.llseek = noop_llseek,
+};
+
+static struct drm_crtc *tegra_crtc_from_pipe(struct drm_device *drm, int pipe)
+{
+	struct drm_crtc *crtc;
+
+	list_for_each_entry(crtc, &drm->mode_config.crtc_list, head) {
+		struct tegra_dc *dc = to_tegra_dc(crtc);
+
+		if (dc->pipe == pipe)
+			return crtc;
+	}
+
+	return NULL;
+}
+
+static u32 tegra_drm_get_vblank_counter(struct drm_device *dev, int crtc)
+{
+	/* TODO: implement real hardware counter using syncpoints */
+	return drm_vblank_count(dev, crtc);
+}
+
+static int tegra_drm_enable_vblank(struct drm_device *drm, int pipe)
+{
+	struct drm_crtc *crtc = tegra_crtc_from_pipe(drm, pipe);
+	struct tegra_dc *dc = to_tegra_dc(crtc);
+
+	if (!crtc)
+		return -ENODEV;
+
+	tegra_dc_enable_vblank(dc);
+
+	return 0;
+}
+
+static void tegra_drm_disable_vblank(struct drm_device *drm, int pipe)
+{
+	struct drm_crtc *crtc = tegra_crtc_from_pipe(drm, pipe);
+	struct tegra_dc *dc = to_tegra_dc(crtc);
+
+	if (crtc)
+		tegra_dc_disable_vblank(dc);
+}
+
+static void tegra_drm_preclose(struct drm_device *drm, struct drm_file *file)
+{
+	struct host1x_drm_file *fpriv = file->driver_priv;
+	struct host1x_drm_context *context, *tmp;
+	struct drm_crtc *crtc;
+
+	list_for_each_entry(crtc, &drm->mode_config.crtc_list, head)
+		tegra_dc_cancel_page_flip(crtc, file);
+
+	list_for_each_entry_safe(context, tmp, &fpriv->contexts, list)
+		host1x_drm_context_free(context);
+
+	kfree(fpriv);
+}
+
+#ifdef CONFIG_DEBUG_FS
+static int tegra_debugfs_framebuffers(struct seq_file *s, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *)s->private;
+	struct drm_device *drm = node->minor->dev;
+	struct drm_framebuffer *fb;
+
+	mutex_lock(&drm->mode_config.fb_lock);
+
+	list_for_each_entry(fb, &drm->mode_config.fb_list, head) {
+		seq_printf(s, "%3d: user size: %d x %d, depth %d, %d bpp, refcount %d\n",
+			   fb->base.id, fb->width, fb->height, fb->depth,
+			   fb->bits_per_pixel,
+			   atomic_read(&fb->refcount.refcount));
+	}
+
+	mutex_unlock(&drm->mode_config.fb_lock);
+
+	return 0;
+}
+
+static struct drm_info_list tegra_debugfs_list[] = {
+	{ "framebuffers", tegra_debugfs_framebuffers, 0 },
+};
+
+static int tegra_debugfs_init(struct drm_minor *minor)
+{
+	return drm_debugfs_create_files(tegra_debugfs_list,
+					ARRAY_SIZE(tegra_debugfs_list),
+					minor->debugfs_root, minor);
+}
+
+static void tegra_debugfs_cleanup(struct drm_minor *minor)
+{
+	drm_debugfs_remove_files(tegra_debugfs_list,
+				 ARRAY_SIZE(tegra_debugfs_list), minor);
+}
+#endif
+
+struct drm_driver tegra_drm_driver = {
+	.driver_features = DRIVER_BUS_PLATFORM | DRIVER_MODESET | DRIVER_GEM,
+	.load = tegra_drm_load,
+	.unload = tegra_drm_unload,
+	.open = tegra_drm_open,
+	.preclose = tegra_drm_preclose,
+	.lastclose = tegra_drm_lastclose,
+
+	.get_vblank_counter = tegra_drm_get_vblank_counter,
+	.enable_vblank = tegra_drm_enable_vblank,
+	.disable_vblank = tegra_drm_disable_vblank,
+
+#if defined(CONFIG_DEBUG_FS)
+	.debugfs_init = tegra_debugfs_init,
+	.debugfs_cleanup = tegra_debugfs_cleanup,
+#endif
+
+	.gem_free_object = tegra_bo_free_object,
+	.gem_vm_ops = &tegra_bo_vm_ops,
+	.dumb_create = tegra_bo_dumb_create,
+	.dumb_map_offset = tegra_bo_dumb_map_offset,
+	.dumb_destroy = tegra_bo_dumb_destroy,
+
+	.ioctls = tegra_drm_ioctls,
+	.num_ioctls = ARRAY_SIZE(tegra_drm_ioctls),
+	.fops = &tegra_drm_fops,
+
+	.name = DRIVER_NAME,
+	.desc = DRIVER_DESC,
+	.date = DRIVER_DATE,
+	.major = DRIVER_MAJOR,
+	.minor = DRIVER_MINOR,
+	.patchlevel = DRIVER_PATCHLEVEL,
+};
diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/host1x/drm/drm.h
index 6dd75a2600eb..02ce020f2575 100644
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/host1x/drm/drm.h
@@ -1,24 +1,36 @@
 /*
  * Copyright (C) 2012 Avionic Design GmbH
- * Copyright (C) 2012 NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (C) 2012-2013 NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
 
-#ifndef TEGRA_DRM_H
-#define TEGRA_DRM_H 1
+#ifndef HOST1X_DRM_H
+#define HOST1X_DRM_H 1
 
 #include <drm/drmP.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_fb_helper.h>
-#include <drm/drm_gem_cma_helper.h>
-#include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_fixed.h>
+#include <uapi/drm/tegra_drm.h>
 
-struct host1x {
+#include "host1x.h"
+
+struct tegra_fb {
+	struct drm_framebuffer base;
+	struct tegra_bo **planes;
+	unsigned int num_planes;
+};
+
+struct tegra_fbdev {
+	struct drm_fb_helper base;
+	struct tegra_fb *fb;
+};
+
+struct host1x_drm {
 	struct drm_device *drm;
 	struct device *dev;
 	void __iomem *regs;
@@ -33,31 +45,53 @@ struct host1x {
 	struct mutex clients_lock;
 	struct list_head clients;
 
-	struct drm_fbdev_cma *fbdev;
+	struct tegra_fbdev *fbdev;
 };
 
 struct host1x_client;
 
+struct host1x_drm_context {
+	struct host1x_client *client;
+	struct host1x_channel *channel;
+	struct list_head list;
+};
+
 struct host1x_client_ops {
 	int (*drm_init)(struct host1x_client *client, struct drm_device *drm);
 	int (*drm_exit)(struct host1x_client *client);
+	int (*open_channel)(struct host1x_client *client,
+			    struct host1x_drm_context *context);
+	void (*close_channel)(struct host1x_drm_context *context);
+	int (*submit)(struct host1x_drm_context *context,
+		      struct drm_tegra_submit *args, struct drm_device *drm,
+		      struct drm_file *file);
+};
+
+struct host1x_drm_file {
+	struct list_head contexts;
 };
 
 struct host1x_client {
-	struct host1x *host1x;
+	struct host1x_drm *host1x;
 	struct device *dev;
 
 	const struct host1x_client_ops *ops;
 
+	enum host1x_class class;
+	struct host1x_channel *channel;
+
+	struct host1x_syncpt **syncpts;
+	unsigned int num_syncpts;
+
 	struct list_head list;
 };
 
-extern int host1x_drm_init(struct host1x *host1x, struct drm_device *drm);
-extern int host1x_drm_exit(struct host1x *host1x);
+extern int host1x_drm_init(struct host1x_drm *host1x, struct drm_device *drm);
+extern int host1x_drm_exit(struct host1x_drm *host1x);
 
-extern int host1x_register_client(struct host1x *host1x,
+extern int host1x_register_client(struct host1x_drm *host1x,
 				  struct host1x_client *client);
-extern int host1x_unregister_client(struct host1x *host1x,
+extern int host1x_unregister_client(struct host1x_drm *host1x,
 				    struct host1x_client *client);
 
 struct tegra_output;
@@ -66,7 +100,7 @@ struct tegra_dc {
 	struct host1x_client client;
 	spinlock_t lock;
 
-	struct host1x *host1x;
+	struct host1x_drm *host1x;
 	struct device *dev;
 
 	struct drm_crtc base;
@@ -226,12 +260,12 @@ extern int tegra_output_init(struct drm_device *drm, struct tegra_output *output
 extern int tegra_output_exit(struct tegra_output *output);
 
 /* from fb.c */
+struct tegra_bo *tegra_fb_get_plane(struct drm_framebuffer *framebuffer,
+				    unsigned int index);
 extern int tegra_drm_fb_init(struct drm_device *drm);
 extern void tegra_drm_fb_exit(struct drm_device *drm);
+extern void tegra_fbdev_restore_mode(struct tegra_fbdev *fbdev);
 
-extern struct platform_driver tegra_host1x_driver;
-extern struct platform_driver tegra_hdmi_driver;
-extern struct platform_driver tegra_dc_driver;
 extern struct drm_driver tegra_drm_driver;
 
-#endif /* TEGRA_DRM_H */
+#endif /* HOST1X_DRM_H */
diff --git a/drivers/gpu/host1x/drm/fb.c b/drivers/gpu/host1x/drm/fb.c
new file mode 100644
index 000000000000..979a3e32b78b
--- /dev/null
+++ b/drivers/gpu/host1x/drm/fb.c
@@ -0,0 +1,374 @@
+/*
+ * Copyright (C) 2012-2013 Avionic Design GmbH
+ * Copyright (C) 2012 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Based on the KMS/FB CMA helpers
+ *   Copyright (C) 2012 Analog Device Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+
+#include "drm.h"
+#include "gem.h"
+
+static inline struct tegra_fb *to_tegra_fb(struct drm_framebuffer *fb)
+{
+	return container_of(fb, struct tegra_fb, base);
+}
+
+static inline struct tegra_fbdev *to_tegra_fbdev(struct drm_fb_helper *helper)
+{
+	return container_of(helper, struct tegra_fbdev, base);
+}
+
+struct tegra_bo *tegra_fb_get_plane(struct drm_framebuffer *framebuffer,
+				    unsigned int index)
+{
+	struct tegra_fb *fb = to_tegra_fb(framebuffer);
+
+	if (index >= drm_format_num_planes(framebuffer->pixel_format))
+		return NULL;
+
+	return fb->planes[index];
+}
+
+static void tegra_fb_destroy(struct drm_framebuffer *framebuffer)
+{
+	struct tegra_fb *fb = to_tegra_fb(framebuffer);
+	unsigned int i;
+
+	for (i = 0; i < fb->num_planes; i++) {
+		struct tegra_bo *bo = fb->planes[i];
+
+		if (bo)
+			drm_gem_object_unreference_unlocked(&bo->gem);
+	}
+
+	drm_framebuffer_cleanup(framebuffer);
+	kfree(fb->planes);
+	kfree(fb);
+}
+
+static int tegra_fb_create_handle(struct drm_framebuffer *framebuffer,
+				  struct drm_file *file, unsigned int *handle)
+{
+	struct tegra_fb *fb = to_tegra_fb(framebuffer);
+
+	return drm_gem_handle_create(file, &fb->planes[0]->gem, handle);
+}
+
+static struct drm_framebuffer_funcs tegra_fb_funcs = {
+	.destroy = tegra_fb_destroy,
+	.create_handle = tegra_fb_create_handle,
+};
+
+static struct tegra_fb *tegra_fb_alloc(struct drm_device *drm,
+				       struct drm_mode_fb_cmd2 *mode_cmd,
+				       struct tegra_bo **planes,
+				       unsigned int num_planes)
+{
+	struct tegra_fb *fb;
+	unsigned int i;
+	int err;
+
+	fb = kzalloc(sizeof(*fb), GFP_KERNEL);
+	if (!fb)
+		return ERR_PTR(-ENOMEM);
+
+	fb->planes = kzalloc(num_planes * sizeof(*planes), GFP_KERNEL);
+	if (!fb->planes)
+		return ERR_PTR(-ENOMEM);
+
+	fb->num_planes = num_planes;
+
+	drm_helper_mode_fill_fb_struct(&fb->base, mode_cmd);
+
+	for (i = 0; i < fb->num_planes; i++)
+		fb->planes[i] = planes[i];
+
+	err = drm_framebuffer_init(drm, &fb->base, &tegra_fb_funcs);
+	if (err < 0) {
+		dev_err(drm->dev, "failed to initialize framebuffer: %d\n",
+			err);
+		kfree(fb->planes);
+		kfree(fb);
+		return ERR_PTR(err);
+	}
+
+	return fb;
+}
+
+static struct drm_framebuffer *tegra_fb_create(struct drm_device *drm,
+					       struct drm_file *file,
+					       struct drm_mode_fb_cmd2 *cmd)
+{
+	unsigned int hsub, vsub, i;
+	struct tegra_bo *planes[4];
+	struct drm_gem_object *gem;
+	struct tegra_fb *fb;
+	int err;
+
+	hsub = drm_format_horz_chroma_subsampling(cmd->pixel_format);
+	vsub = drm_format_vert_chroma_subsampling(cmd->pixel_format);
+
+	for (i = 0; i < drm_format_num_planes(cmd->pixel_format); i++) {
+		unsigned int width = cmd->width / (i ? hsub : 1);
+		unsigned int height = cmd->height / (i ? vsub : 1);
+		unsigned int size, bpp;
+
+		gem = drm_gem_object_lookup(drm, file, cmd->handles[i]);
+		if (!gem) {
+			err = -ENXIO;
+			goto unreference;
+		}
+
+		bpp = drm_format_plane_cpp(cmd->pixel_format, i);
+
+		size = (height - 1) * cmd->pitches[i] +
+		       width * bpp + cmd->offsets[i];
+
+		if (gem->size < size) {
+			err = -EINVAL;
+			goto unreference;
+		}
+
+		planes[i] = to_tegra_bo(gem);
+	}
+
+	fb = tegra_fb_alloc(drm, cmd, planes, i);
+	if (IS_ERR(fb)) {
+		err = PTR_ERR(fb);
+		goto unreference;
+	}
+
+	return &fb->base;
+
+unreference:
+	while (i--)
+		drm_gem_object_unreference_unlocked(&planes[i]->gem);
+
+	return ERR_PTR(err);
+}
+
+static struct fb_ops tegra_fb_ops = {
+	.owner = THIS_MODULE,
+	.fb_fillrect = sys_fillrect,
+	.fb_copyarea = sys_copyarea,
+	.fb_imageblit = sys_imageblit,
+	.fb_check_var = drm_fb_helper_check_var,
+	.fb_set_par = drm_fb_helper_set_par,
+	.fb_blank = drm_fb_helper_blank,
+	.fb_pan_display = drm_fb_helper_pan_display,
+	.fb_setcmap = drm_fb_helper_setcmap,
+};
+
+static int tegra_fbdev_probe(struct drm_fb_helper *helper,
+			     struct drm_fb_helper_surface_size *sizes)
+{
+	struct tegra_fbdev *fbdev = to_tegra_fbdev(helper);
+	struct drm_device *drm = helper->dev;
+	struct drm_mode_fb_cmd2 cmd = { 0 };
+	unsigned int bytes_per_pixel;
+	struct drm_framebuffer *fb;
+	unsigned long offset;
+	struct fb_info *info;
+	struct tegra_bo *bo;
+	size_t size;
+	int err;
+
+	bytes_per_pixel = DIV_ROUND_UP(sizes->surface_bpp, 8);
+
+	cmd.width = sizes->surface_width;
+	cmd.height = sizes->surface_height;
+	cmd.pitches[0] = sizes->surface_width * bytes_per_pixel;
+	cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
+						     sizes->surface_depth);
+
+	size = cmd.pitches[0] * cmd.height;
+
+	bo = tegra_bo_create(drm, size);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	info = framebuffer_alloc(0, drm->dev);
+	if (!info) {
+		dev_err(drm->dev, "failed to allocate framebuffer info\n");
+		tegra_bo_free_object(&bo->gem);
+		return -ENOMEM;
+	}
+
+	fbdev->fb = tegra_fb_alloc(drm, &cmd, &bo, 1);
+	if (IS_ERR(fbdev->fb)) {
+		dev_err(drm->dev, "failed to allocate DRM framebuffer\n");
+		err = PTR_ERR(fbdev->fb);
+		goto release;
+	}
+
+	fb = &fbdev->fb->base;
+	helper->fb = fb;
+	helper->fbdev = info;
+
+	info->par = helper;
+	info->flags = FBINFO_FLAG_DEFAULT;
+	info->fbops = &tegra_fb_ops;
+
+	err = fb_alloc_cmap(&info->cmap, 256, 0);
+	if (err < 0) {
+		dev_err(drm->dev, "failed to allocate color map: %d\n", err);
+		goto destroy;
+	}
+
+	drm_fb_helper_fill_fix(info, fb->pitches[0], fb->depth);
+	drm_fb_helper_fill_var(info, helper, fb->width, fb->height);
+
+	offset = info->var.xoffset * bytes_per_pixel +
+		 info->var.yoffset * fb->pitches[0];
+
+	drm->mode_config.fb_base = (resource_size_t)bo->paddr;
+	info->screen_base = bo->vaddr + offset;
+	info->screen_size = size;
+	info->fix.smem_start = (unsigned long)(bo->paddr + offset);
+	info->fix.smem_len = size;
+
+	return 0;
+
+destroy:
+	drm_framebuffer_unregister_private(fb);
+	tegra_fb_destroy(fb);
+release:
+	framebuffer_release(info);
+	return err;
+}
+
+static struct drm_fb_helper_funcs tegra_fb_helper_funcs = {
+	.fb_probe = tegra_fbdev_probe,
+};
+
+static struct tegra_fbdev *tegra_fbdev_create(struct drm_device *drm,
+					      unsigned int preferred_bpp,
+					      unsigned int num_crtc,
+					      unsigned int max_connectors)
+{
+	struct drm_fb_helper *helper;
+	struct tegra_fbdev *fbdev;
+	int err;
+
+	fbdev = kzalloc(sizeof(*fbdev), GFP_KERNEL);
+	if (!fbdev) {
+		dev_err(drm->dev, "failed to allocate DRM fbdev\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	fbdev->base.funcs = &tegra_fb_helper_funcs;
+	helper = &fbdev->base;
+
+	err = drm_fb_helper_init(drm, &fbdev->base, num_crtc, max_connectors);
+	if (err < 0) {
+		dev_err(drm->dev, "failed to initialize DRM FB helper\n");
+		goto free;
+	}
+
+	err = drm_fb_helper_single_add_all_connectors(&fbdev->base);
+	if (err < 0) {
+		dev_err(drm->dev, "failed to add connectors\n");
+		goto fini;
+	}
+
+	drm_helper_disable_unused_functions(drm);
+
+	err = drm_fb_helper_initial_config(&fbdev->base, preferred_bpp);
+	if (err < 0) {
+		dev_err(drm->dev, "failed to set initial configuration\n");
+		goto fini;
+	}
+
+	return fbdev;
+
+fini:
+	drm_fb_helper_fini(&fbdev->base);
+free:
+	kfree(fbdev);
+	return ERR_PTR(err);
+}
+
+static void tegra_fbdev_free(struct tegra_fbdev *fbdev)
+{
+	struct fb_info *info = fbdev->base.fbdev;
+
+	if (info) {
+		int err;
+
+		err = unregister_framebuffer(info);
+		if (err < 0)
+			DRM_DEBUG_KMS("failed to unregister framebuffer\n");
+
+		if (info->cmap.len)
+			fb_dealloc_cmap(&info->cmap);
+
+		framebuffer_release(info);
+	}
+
+	if (fbdev->fb) {
+		drm_framebuffer_unregister_private(&fbdev->fb->base);
+		tegra_fb_destroy(&fbdev->fb->base);
+	}
+
+	drm_fb_helper_fini(&fbdev->base);
+	kfree(fbdev);
+}
+
+static void tegra_fb_output_poll_changed(struct drm_device *drm)
+{
+	struct host1x_drm *host1x = drm->dev_private;
+
+	if (host1x->fbdev)
+		drm_fb_helper_hotplug_event(&host1x->fbdev->base);
+}
+
+static const struct drm_mode_config_funcs tegra_drm_mode_funcs = {
+	.fb_create = tegra_fb_create,
+	.output_poll_changed = tegra_fb_output_poll_changed,
+};
+
+int tegra_drm_fb_init(struct drm_device *drm)
+{
+	struct host1x_drm *host1x = drm->dev_private;
+	struct tegra_fbdev *fbdev;
+
+	drm->mode_config.min_width = 0;
+	drm->mode_config.min_height = 0;
+
+	drm->mode_config.max_width = 4096;
+	drm->mode_config.max_height = 4096;
+
+	drm->mode_config.funcs = &tegra_drm_mode_funcs;
+
+	fbdev = tegra_fbdev_create(drm, 32, drm->mode_config.num_crtc,
+				   drm->mode_config.num_connector);
+	if (IS_ERR(fbdev))
+		return PTR_ERR(fbdev);
+
+	host1x->fbdev = fbdev;
+
+	return 0;
+}
+
+void tegra_drm_fb_exit(struct drm_device *drm)
+{
+	struct host1x_drm *host1x = drm->dev_private;
+
+	tegra_fbdev_free(host1x->fbdev);
+}
+
+void tegra_fbdev_restore_mode(struct tegra_fbdev *fbdev)
+{
+	if (fbdev) {
+		drm_modeset_lock_all(fbdev->base.dev);
+		drm_fb_helper_restore_fbdev_mode(&fbdev->base);
+		drm_modeset_unlock_all(fbdev->base.dev);
+	}
+}
diff --git a/drivers/gpu/host1x/drm/gem.c b/drivers/gpu/host1x/drm/gem.c
new file mode 100644
index 000000000000..c5e9a9b494c2
--- /dev/null
+++ b/drivers/gpu/host1x/drm/gem.c
@@ -0,0 +1,270 @@
+/*
+ * NVIDIA Tegra DRM GEM helper functions
+ *
+ * Copyright (C) 2012 Sascha Hauer, Pengutronix
+ * Copyright (C) 2013 NVIDIA CORPORATION, All rights reserved.
+ *
+ * Based on the GEM/CMA helpers
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/export.h>
+#include <linux/dma-mapping.h>
+
+#include <drm/drmP.h>
+#include <drm/drm.h>
+
+#include "gem.h"
+
+static inline struct tegra_bo *host1x_to_drm_bo(struct host1x_bo *bo)
+{
+	return container_of(bo, struct tegra_bo, base);
+}
+
+static void tegra_bo_put(struct host1x_bo *bo)
+{
+	struct tegra_bo *obj = host1x_to_drm_bo(bo);
+	struct drm_device *drm = obj->gem.dev;
+
+	mutex_lock(&drm->struct_mutex);
+	drm_gem_object_unreference(&obj->gem);
+	mutex_unlock(&drm->struct_mutex);
+}
+
+static dma_addr_t tegra_bo_pin(struct host1x_bo *bo, struct sg_table **sgt)
+{
+	struct tegra_bo *obj = host1x_to_drm_bo(bo);
+
+	return obj->paddr;
+}
+
+static void tegra_bo_unpin(struct host1x_bo *bo, struct sg_table *sgt)
+{
+}
+
+static void *tegra_bo_mmap(struct host1x_bo *bo)
+{
+	struct tegra_bo *obj = host1x_to_drm_bo(bo);
+
+	return obj->vaddr;
+}
+
+static void tegra_bo_munmap(struct host1x_bo *bo, void *addr)
+{
+}
+
+static void *tegra_bo_kmap(struct host1x_bo *bo, unsigned int page)
+{
+	struct tegra_bo *obj = host1x_to_drm_bo(bo);
+
+	return obj->vaddr + page * PAGE_SIZE;
+}
+
+static void tegra_bo_kunmap(struct host1x_bo *bo, unsigned int page,
+			    void *addr)
+{
+}
+
+static struct host1x_bo *tegra_bo_get(struct host1x_bo *bo)
+{
+	struct tegra_bo *obj = host1x_to_drm_bo(bo);
+	struct drm_device *drm = obj->gem.dev;
+
+	mutex_lock(&drm->struct_mutex);
+	drm_gem_object_reference(&obj->gem);
+	mutex_unlock(&drm->struct_mutex);
+
+	return bo;
+}
+
+const struct host1x_bo_ops tegra_bo_ops = {
+	.get = tegra_bo_get,
+	.put = tegra_bo_put,
+	.pin = tegra_bo_pin,
+	.unpin = tegra_bo_unpin,
+	.mmap = tegra_bo_mmap,
+	.munmap = tegra_bo_munmap,
+	.kmap = tegra_bo_kmap,
+	.kunmap = tegra_bo_kunmap,
+};
+
+static void tegra_bo_destroy(struct drm_device *drm, struct tegra_bo *bo)
+{
+	dma_free_writecombine(drm->dev, bo->gem.size, bo->vaddr, bo->paddr);
+}
+
+unsigned int tegra_bo_get_mmap_offset(struct tegra_bo *bo)
+{
+	return (unsigned int)bo->gem.map_list.hash.key << PAGE_SHIFT;
+}
+
+struct tegra_bo *tegra_bo_create(struct drm_device *drm, unsigned int size)
+{
+	struct tegra_bo *bo;
+	int err;
+
+	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+	if (!bo)
+		return ERR_PTR(-ENOMEM);
+
+	host1x_bo_init(&bo->base, &tegra_bo_ops);
+	size = round_up(size, PAGE_SIZE);
+
+	bo->vaddr = dma_alloc_writecombine(drm->dev, size, &bo->paddr,
+					   GFP_KERNEL | __GFP_NOWARN);
+	if (!bo->vaddr) {
+		dev_err(drm->dev, "failed to allocate buffer with size %u\n",
+			size);
+		err = -ENOMEM;
+		goto err_dma;
+	}
+
+	err = drm_gem_object_init(drm, &bo->gem, size);
+	if (err)
+		goto err_init;
+
+	err = drm_gem_create_mmap_offset(&bo->gem);
+	if (err)
+		goto err_mmap;
+
+	return bo;
+
+err_mmap:
+	drm_gem_object_release(&bo->gem);
+err_init:
+	tegra_bo_destroy(drm, bo);
+err_dma:
+	kfree(bo);
+
+	return ERR_PTR(err);
+
+}
+
+struct tegra_bo *tegra_bo_create_with_handle(struct drm_file *file,
+					    struct drm_device *drm,
+					    unsigned int size,
+					    unsigned int *handle)
+{
+	struct tegra_bo *bo;
+	int ret;
+
+	bo = tegra_bo_create(drm, size);
+	if (IS_ERR(bo))
+		return bo;
+
+	ret = drm_gem_handle_create(file, &bo->gem, handle);
+	if (ret)
+		goto err;
+
+	drm_gem_object_unreference_unlocked(&bo->gem);
+
+	return bo;
+
+err:
+	tegra_bo_free_object(&bo->gem);
+	return ERR_PTR(ret);
+}
+
+void tegra_bo_free_object(struct drm_gem_object *gem)
+{
+	struct tegra_bo *bo = to_tegra_bo(gem);
+
+	if (gem->map_list.map)
+		drm_gem_free_mmap_offset(gem);
+
+	drm_gem_object_release(gem);
+	tegra_bo_destroy(gem->dev, bo);
+
+	kfree(bo);
+}
+
+int tegra_bo_dumb_create(struct drm_file *file, struct drm_device *drm,
+			 struct drm_mode_create_dumb *args)
+{
+	int min_pitch = DIV_ROUND_UP(args->width * args->bpp, 8);
+	struct tegra_bo *bo;
+
+	if (args->pitch < min_pitch)
+		args->pitch = min_pitch;
+
+	if (args->size < args->pitch * args->height)
+		args->size = args->pitch * args->height;
+
+	bo = tegra_bo_create_with_handle(file, drm, args->size,
+					    &args->handle);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	return 0;
+}
+
+int tegra_bo_dumb_map_offset(struct drm_file *file, struct drm_device *drm,
+			     uint32_t handle, uint64_t *offset)
+{
+	struct drm_gem_object *gem;
+	struct tegra_bo *bo;
+
+	mutex_lock(&drm->struct_mutex);
+
+	gem = drm_gem_object_lookup(drm, file, handle);
+	if (!gem) {
+		dev_err(drm->dev, "failed to lookup GEM object\n");
+		mutex_unlock(&drm->struct_mutex);
+		return -EINVAL;
+	}
+
+	bo = to_tegra_bo(gem);
+
+	*offset = tegra_bo_get_mmap_offset(bo);
+
+	drm_gem_object_unreference(gem);
+
+	mutex_unlock(&drm->struct_mutex);
+
+	return 0;
+}
+
+const struct vm_operations_struct tegra_bo_vm_ops = {
+	.open = drm_gem_vm_open,
+	.close = drm_gem_vm_close,
+};
+
+int tegra_drm_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct drm_gem_object *gem;
+	struct tegra_bo *bo;
+	int ret;
+
+	ret = drm_gem_mmap(file, vma);
+	if (ret)
+		return ret;
+
+	gem = vma->vm_private_data;
+	bo = to_tegra_bo(gem);
+
+	ret = remap_pfn_range(vma, vma->vm_start, bo->paddr >> PAGE_SHIFT,
+			      vma->vm_end - vma->vm_start, vma->vm_page_prot);
+	if (ret)
+		drm_gem_vm_close(vma);
+
+	return ret;
+}
+
+int tegra_bo_dumb_destroy(struct drm_file *file, struct drm_device *drm,
+			  unsigned int handle)
+{
+	return drm_gem_handle_delete(file, handle);
+}
diff --git a/drivers/gpu/host1x/drm/gem.h b/drivers/gpu/host1x/drm/gem.h
new file mode 100644
index 000000000000..34de2b486eb7
--- /dev/null
+++ b/drivers/gpu/host1x/drm/gem.h
@@ -0,0 +1,59 @@
+/*
+ * Tegra host1x GEM implementation
+ *
+ * Copyright (c) 2012-2013, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __HOST1X_GEM_H
+#define __HOST1X_GEM_H
+
+#include <drm/drm.h>
+#include <drm/drmP.h>
+
+#include "host1x_bo.h"
+
+struct tegra_bo {
+	struct drm_gem_object gem;
+	struct host1x_bo base;
+	dma_addr_t paddr;
+	void *vaddr;
+};
+
+static inline struct tegra_bo *to_tegra_bo(struct drm_gem_object *gem)
+{
+	return container_of(gem, struct tegra_bo, gem);
+}
+
+extern const struct host1x_bo_ops tegra_bo_ops;
+
+struct tegra_bo *tegra_bo_create(struct drm_device *drm, unsigned int size);
+struct tegra_bo *tegra_bo_create_with_handle(struct drm_file *file,
+					    struct drm_device *drm,
+					    unsigned int size,
+					    unsigned int *handle);
+void tegra_bo_free_object(struct drm_gem_object *gem);
+unsigned int tegra_bo_get_mmap_offset(struct tegra_bo *bo);
+int tegra_bo_dumb_create(struct drm_file *file, struct drm_device *drm,
+			 struct drm_mode_create_dumb *args);
+int tegra_bo_dumb_map_offset(struct drm_file *file, struct drm_device *drm,
+			     uint32_t handle, uint64_t *offset);
+int tegra_bo_dumb_destroy(struct drm_file *file, struct drm_device *drm,
+			  unsigned int handle);
+
+int tegra_drm_mmap(struct file *file, struct vm_area_struct *vma);
+
+extern const struct vm_operations_struct tegra_bo_vm_ops;
+
+#endif
diff --git a/drivers/gpu/host1x/drm/gr2d.c b/drivers/gpu/host1x/drm/gr2d.c
new file mode 100644
index 000000000000..6a45ae090ee7
--- /dev/null
+++ b/drivers/gpu/host1x/drm/gr2d.c
@@ -0,0 +1,339 @@
+/*
+ * drivers/video/tegra/host/gr2d/gr2d.c
+ *
+ * Tegra Graphics 2D
+ *
+ * Copyright (c) 2012-2013, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/export.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/clk.h>
+
+#include "channel.h"
+#include "drm.h"
+#include "gem.h"
+#include "job.h"
+#include "host1x.h"
+#include "host1x_bo.h"
+#include "host1x_client.h"
+#include "syncpt.h"
+
+struct gr2d {
+	struct host1x_client client;
+	struct clk *clk;
+	struct host1x_channel *channel;
+	unsigned long *addr_regs;
+};
+
+static inline struct gr2d *to_gr2d(struct host1x_client *client)
+{
+	return container_of(client, struct gr2d, client);
+}
+
+static int gr2d_is_addr_reg(struct device *dev, u32 class, u32 reg);
+
+static int gr2d_client_init(struct host1x_client *client,
+			    struct drm_device *drm)
+{
+	return 0;
+}
+
+static int gr2d_client_exit(struct host1x_client *client)
+{
+	return 0;
+}
+
+static int gr2d_open_channel(struct host1x_client *client,
+			     struct host1x_drm_context *context)
+{
+	struct gr2d *gr2d = to_gr2d(client);
+
+	context->channel = host1x_channel_get(gr2d->channel);
+
+	if (!context->channel)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void gr2d_close_channel(struct host1x_drm_context *context)
+{
+	host1x_channel_put(context->channel);
+}
+
+static struct host1x_bo *host1x_bo_lookup(struct drm_device *drm,
+					  struct drm_file *file,
+					  u32 handle)
+{
+	struct drm_gem_object *gem;
+	struct tegra_bo *bo;
+
+	gem = drm_gem_object_lookup(drm, file, handle);
+	if (!gem)
+		return 0;
+
+	mutex_lock(&drm->struct_mutex);
+	drm_gem_object_unreference(gem);
+	mutex_unlock(&drm->struct_mutex);
+
+	bo = to_tegra_bo(gem);
+	return &bo->base;
+}
+
+static int gr2d_submit(struct host1x_drm_context *context,
+		       struct drm_tegra_submit *args, struct drm_device *drm,
+		       struct drm_file *file)
+{
+	struct host1x_job *job;
+	unsigned int num_cmdbufs = args->num_cmdbufs;
+	unsigned int num_relocs = args->num_relocs;
+	unsigned int num_waitchks = args->num_waitchks;
+	struct drm_tegra_cmdbuf __user *cmdbufs =
+		(void * __user)(uintptr_t)args->cmdbufs;
+	struct drm_tegra_reloc __user *relocs =
+		(void * __user)(uintptr_t)args->relocs;
+	struct drm_tegra_waitchk __user *waitchks =
+		(void * __user)(uintptr_t)args->waitchks;
+	struct drm_tegra_syncpt syncpt;
+	int err;
+
+	/* We don't yet support other than one syncpt_incr struct per submit */
+	if (args->num_syncpts != 1)
+		return -EINVAL;
+
+	job = host1x_job_alloc(context->channel, args->num_cmdbufs,
+			       args->num_relocs, args->num_waitchks);
+	if (!job)
+		return -ENOMEM;
+
+	job->num_relocs = args->num_relocs;
+	job->num_waitchk = args->num_waitchks;
+	job->client = (u32)args->context;
+	job->class = context->client->class;
+	job->serialize = true;
+
+	while (num_cmdbufs) {
+		struct drm_tegra_cmdbuf cmdbuf;
+		struct host1x_bo *bo;
+
+		err = copy_from_user(&cmdbuf, cmdbufs, sizeof(cmdbuf));
+		if (err)
+			goto fail;
+
+		bo = host1x_bo_lookup(drm, file, cmdbuf.handle);
+		if (!bo)
+			goto fail;
+
+		host1x_job_add_gather(job, bo, cmdbuf.words, cmdbuf.offset);
+		num_cmdbufs--;
+		cmdbufs++;
+	}
+
+	err = copy_from_user(job->relocarray, relocs,
+			     sizeof(*relocs) * num_relocs);
+	if (err)
+		goto fail;
+
+	while (num_relocs--) {
+		struct host1x_reloc *reloc = &job->relocarray[num_relocs];
+		struct host1x_bo *cmdbuf, *target;
+
+		cmdbuf = host1x_bo_lookup(drm, file, (u32)reloc->cmdbuf);
+		target = host1x_bo_lookup(drm, file, (u32)reloc->target);
+
+		reloc->cmdbuf = cmdbuf;
+		reloc->target = target;
+
+		if (!reloc->target || !reloc->cmdbuf)
+			goto fail;
+	}
+
+	err = copy_from_user(job->waitchk, waitchks,
+			     sizeof(*waitchks) * num_waitchks);
+	if (err)
+		goto fail;
+
+	err = copy_from_user(&syncpt, (void * __user)(uintptr_t)args->syncpts,
+			     sizeof(syncpt));
+	if (err)
+		goto fail;
+
+	job->syncpt_id = syncpt.id;
+	job->syncpt_incrs = syncpt.incrs;
+	job->timeout = 10000;
+	job->is_addr_reg = gr2d_is_addr_reg;
+
+	if (args->timeout && args->timeout < 10000)
+		job->timeout = args->timeout;
+
+	err = host1x_job_pin(job, context->client->dev);
+	if (err)
+		goto fail;
+
+	err = host1x_job_submit(job);
+	if (err)
+		goto fail_submit;
+
+	args->fence = job->syncpt_end;
+
+	host1x_job_put(job);
+	return 0;
+
+fail_submit:
+	host1x_job_unpin(job);
+fail:
+	host1x_job_put(job);
+	return err;
+}
+
+static struct host1x_client_ops gr2d_client_ops = {
+	.drm_init = gr2d_client_init,
+	.drm_exit = gr2d_client_exit,
+	.open_channel = gr2d_open_channel,
+	.close_channel = gr2d_close_channel,
+	.submit = gr2d_submit,
+};
+
+static void gr2d_init_addr_reg_map(struct device *dev, struct gr2d *gr2d)
+{
+	const u32 gr2d_addr_regs[] = {0x1a, 0x1b, 0x26, 0x2b, 0x2c, 0x2d, 0x31,
+				      0x32, 0x48, 0x49, 0x4a, 0x4b, 0x4c};
+	unsigned long *bitmap;
+	int i;
+
+	bitmap = devm_kzalloc(dev, DIV_ROUND_UP(256, BITS_PER_BYTE),
+			      GFP_KERNEL);
+
+	for (i = 0; i < ARRAY_SIZE(gr2d_addr_regs); ++i) {
+		u32 reg = gr2d_addr_regs[i];
+		bitmap[BIT_WORD(reg)] |= BIT_MASK(reg);
+	}
+
+	gr2d->addr_regs = bitmap;
+}
+
+static int gr2d_is_addr_reg(struct device *dev, u32 class, u32 reg)
+{
+	struct gr2d *gr2d = dev_get_drvdata(dev);
+
+	switch (class) {
+	case HOST1X_CLASS_HOST1X:
+		return reg == 0x2b;
+	case HOST1X_CLASS_GR2D:
+	case HOST1X_CLASS_GR2D_SB:
+		reg &= 0xff;
+		if (gr2d->addr_regs[BIT_WORD(reg)] & BIT_MASK(reg))
+			return 1;
+	default:
+		return 0;
+	}
+}
+
+static const struct of_device_id gr2d_match[] = {
+	{ .compatible = "nvidia,tegra30-gr2d" },
+	{ .compatible = "nvidia,tegra20-gr2d" },
+	{ },
+};
+
+static int gr2d_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct host1x_drm *host1x = host1x_get_drm_data(dev->parent);
+	int err;
+	struct gr2d *gr2d = NULL;
+	struct host1x_syncpt **syncpts;
+
+	gr2d = devm_kzalloc(dev, sizeof(*gr2d), GFP_KERNEL);
+	if (!gr2d)
+		return -ENOMEM;
+
+	syncpts = devm_kzalloc(dev, sizeof(*syncpts), GFP_KERNEL);
+	if (!syncpts)
+		return -ENOMEM;
+
+	gr2d->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(gr2d->clk)) {
+		dev_err(dev, "cannot get clock\n");
+		return PTR_ERR(gr2d->clk);
+	}
+
+	err = clk_prepare_enable(gr2d->clk);
+	if (err) {
+		dev_err(dev, "cannot turn on clock\n");
+		return err;
+	}
+
+	gr2d->channel = host1x_channel_request(dev);
+	if (!gr2d->channel)
+		return -ENOMEM;
+
+	*syncpts = host1x_syncpt_request(dev, 0);
+	if (!(*syncpts)) {
+		host1x_channel_free(gr2d->channel);
+		return -ENOMEM;
+	}
+
+	gr2d->client.ops = &gr2d_client_ops;
+	gr2d->client.dev = dev;
+	gr2d->client.class = HOST1X_CLASS_GR2D;
+	gr2d->client.syncpts = syncpts;
+	gr2d->client.num_syncpts = 1;
+
+	err = host1x_register_client(host1x, &gr2d->client);
+	if (err < 0) {
+		dev_err(dev, "failed to register host1x client: %d\n", err);
+		return err;
+	}
+
+	gr2d_init_addr_reg_map(dev, gr2d);
+
+	platform_set_drvdata(pdev, gr2d);
+
+	return 0;
+}
+
+static int __exit gr2d_remove(struct platform_device *pdev)
+{
+	struct host1x_drm *host1x = host1x_get_drm_data(pdev->dev.parent);
+	struct gr2d *gr2d = platform_get_drvdata(pdev);
+	unsigned int i;
+	int err;
+
+	err = host1x_unregister_client(host1x, &gr2d->client);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to unregister client: %d\n", err);
+		return err;
+	}
+
+	for (i = 0; i < gr2d->client.num_syncpts; i++)
+		host1x_syncpt_free(gr2d->client.syncpts[i]);
+
+	host1x_channel_free(gr2d->channel);
+	clk_disable_unprepare(gr2d->clk);
+
+	return 0;
+}
+
+struct platform_driver tegra_gr2d_driver = {
+	.probe = gr2d_probe,
+	.remove = __exit_p(gr2d_remove),
+	.driver = {
+		.owner = THIS_MODULE,
+		.name = "gr2d",
+		.of_match_table = gr2d_match,
+	}
+};
diff --git a/drivers/gpu/drm/tegra/hdmi.c b/drivers/gpu/host1x/drm/hdmi.c
index bb747f6cd1a4..01097da09f7f 100644
--- a/drivers/gpu/drm/tegra/hdmi.c
+++ b/drivers/gpu/host1x/drm/hdmi.c
@@ -22,6 +22,7 @@
 #include "hdmi.h"
 #include "drm.h"
 #include "dc.h"
+#include "host1x_client.h"
 
 struct tegra_hdmi {
 	struct host1x_client client;
@@ -1189,7 +1190,7 @@ static const struct host1x_client_ops hdmi_client_ops = {
 
 static int tegra_hdmi_probe(struct platform_device *pdev)
 {
-	struct host1x *host1x = dev_get_drvdata(pdev->dev.parent);
+	struct host1x_drm *host1x = host1x_get_drm_data(pdev->dev.parent);
 	struct tegra_hdmi *hdmi;
 	struct resource *regs;
 	int err;
@@ -1278,7 +1279,7 @@ static int tegra_hdmi_probe(struct platform_device *pdev)
 
 static int tegra_hdmi_remove(struct platform_device *pdev)
 {
-	struct host1x *host1x = dev_get_drvdata(pdev->dev.parent);
+	struct host1x_drm *host1x = host1x_get_drm_data(pdev->dev.parent);
 	struct tegra_hdmi *hdmi = platform_get_drvdata(pdev);
 	int err;
 
diff --git a/drivers/gpu/drm/tegra/hdmi.h b/drivers/gpu/host1x/drm/hdmi.h
index 52ac36e08ccb..52ac36e08ccb 100644
--- a/drivers/gpu/drm/tegra/hdmi.h
+++ b/drivers/gpu/host1x/drm/hdmi.h
diff --git a/drivers/gpu/drm/tegra/output.c b/drivers/gpu/host1x/drm/output.c
index 8140fc6c34d8..8140fc6c34d8 100644
--- a/drivers/gpu/drm/tegra/output.c
+++ b/drivers/gpu/host1x/drm/output.c
diff --git a/drivers/gpu/drm/tegra/rgb.c b/drivers/gpu/host1x/drm/rgb.c
index ed4416f20260..ed4416f20260 100644
--- a/drivers/gpu/drm/tegra/rgb.c
+++ b/drivers/gpu/host1x/drm/rgb.c
diff --git a/drivers/gpu/host1x/host1x.h b/drivers/gpu/host1x/host1x.h
new file mode 100644
index 000000000000..a2bc1e65e972
--- /dev/null
+++ b/drivers/gpu/host1x/host1x.h
@@ -0,0 +1,30 @@
+/*
+ * Tegra host1x driver
+ *
+ * Copyright (c) 2009-2013, NVIDIA Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __LINUX_HOST1X_H
+#define __LINUX_HOST1X_H
+
+enum host1x_class {
+	HOST1X_CLASS_HOST1X	= 0x1,
+	HOST1X_CLASS_GR2D	= 0x51,
+	HOST1X_CLASS_GR2D_SB    = 0x52
+};
+
+#endif
diff --git a/drivers/gpu/host1x/host1x_bo.h b/drivers/gpu/host1x/host1x_bo.h
new file mode 100644
index 000000000000..4c1f10bd773d
--- /dev/null
+++ b/drivers/gpu/host1x/host1x_bo.h
@@ -0,0 +1,87 @@
+/*
+ * Tegra host1x Memory Management Abstraction header
+ *
+ * Copyright (c) 2012-2013, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _HOST1X_BO_H
+#define _HOST1X_BO_H
+
+struct host1x_bo;
+
+struct host1x_bo_ops {
+	struct host1x_bo *(*get)(struct host1x_bo *bo);
+	void (*put)(struct host1x_bo *bo);
+	dma_addr_t (*pin)(struct host1x_bo *bo, struct sg_table **sgt);
+	void (*unpin)(struct host1x_bo *bo, struct sg_table *sgt);
+	void *(*mmap)(struct host1x_bo *bo);
+	void (*munmap)(struct host1x_bo *bo, void *addr);
+	void *(*kmap)(struct host1x_bo *bo, unsigned int pagenum);
+	void (*kunmap)(struct host1x_bo *bo, unsigned int pagenum, void *addr);
+};
+
+struct host1x_bo {
+	const struct host1x_bo_ops *ops;
+};
+
+static inline void host1x_bo_init(struct host1x_bo *bo,
+				  const struct host1x_bo_ops *ops)
+{
+	bo->ops = ops;
+}
+
+static inline struct host1x_bo *host1x_bo_get(struct host1x_bo *bo)
+{
+	return bo->ops->get(bo);
+}
+
+static inline void host1x_bo_put(struct host1x_bo *bo)
+{
+	bo->ops->put(bo);
+}
+
+static inline dma_addr_t host1x_bo_pin(struct host1x_bo *bo,
+				       struct sg_table **sgt)
+{
+	return bo->ops->pin(bo, sgt);
+}
+
+static inline void host1x_bo_unpin(struct host1x_bo *bo, struct sg_table *sgt)
+{
+	bo->ops->unpin(bo, sgt);
+}
+
+static inline void *host1x_bo_mmap(struct host1x_bo *bo)
+{
+	return bo->ops->mmap(bo);
+}
+
+static inline void host1x_bo_munmap(struct host1x_bo *bo, void *addr)
+{
+	bo->ops->munmap(bo, addr);
+}
+
+static inline void *host1x_bo_kmap(struct host1x_bo *bo, unsigned int pagenum)
+{
+	return bo->ops->kmap(bo, pagenum);
+}
+
+static inline void host1x_bo_kunmap(struct host1x_bo *bo,
+				    unsigned int pagenum, void *addr)
+{
+	bo->ops->kunmap(bo, pagenum, addr);
+}
+
+#endif
diff --git a/drivers/gpu/host1x/host1x_client.h b/drivers/gpu/host1x/host1x_client.h
new file mode 100644
index 000000000000..9b85f10f4a44
--- /dev/null
+++ b/drivers/gpu/host1x/host1x_client.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2013, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HOST1X_CLIENT_H
+#define HOST1X_CLIENT_H
+
+struct device;
+struct platform_device;
+
+#ifdef CONFIG_DRM_TEGRA
+int host1x_drm_alloc(struct platform_device *pdev);
+#else
+static inline int host1x_drm_alloc(struct platform_device *pdev)
+{
+	return 0;
+}
+#endif
+
+void host1x_set_drm_data(struct device *dev, void *data);
+void *host1x_get_drm_data(struct device *dev);
+
+#endif
diff --git a/drivers/gpu/host1x/hw/Makefile b/drivers/gpu/host1x/hw/Makefile
new file mode 100644
index 000000000000..9b50863a2236
--- /dev/null
+++ b/drivers/gpu/host1x/hw/Makefile
@@ -0,0 +1,6 @@
+ccflags-y = -Idrivers/gpu/host1x
+
+host1x-hw-objs  = \
+	host1x01.o
+
+obj-$(CONFIG_TEGRA_HOST1X) += host1x-hw.o
diff --git a/drivers/gpu/host1x/hw/cdma_hw.c b/drivers/gpu/host1x/hw/cdma_hw.c
new file mode 100644
index 000000000000..590b69d91dab
--- /dev/null
+++ b/drivers/gpu/host1x/hw/cdma_hw.c
@@ -0,0 +1,326 @@
+/*
+ * Tegra host1x Command DMA
+ *
+ * Copyright (c) 2010-2013, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/slab.h>
+#include <linux/scatterlist.h>
+#include <linux/dma-mapping.h>
+
+#include "cdma.h"
+#include "channel.h"
+#include "dev.h"
+#include "debug.h"
+
+/*
+ * Put the restart at the end of pushbuffer memor
+ */
+static void push_buffer_init(struct push_buffer *pb)
+{
+	*(pb->mapped + (pb->size_bytes >> 2)) = host1x_opcode_restart(0);
+}
+
+/*
+ * Increment timedout buffer's syncpt via CPU.
+ */
+static void cdma_timeout_cpu_incr(struct host1x_cdma *cdma, u32 getptr,
+				u32 syncpt_incrs, u32 syncval, u32 nr_slots)
+{
+	struct host1x *host1x = cdma_to_host1x(cdma);
+	struct push_buffer *pb = &cdma->push_buffer;
+	u32 i;
+
+	for (i = 0; i < syncpt_incrs; i++)
+		host1x_syncpt_cpu_incr(cdma->timeout.syncpt);
+
+	/* after CPU incr, ensure shadow is up to date */
+	host1x_syncpt_load(cdma->timeout.syncpt);
+
+	/* NOP all the PB slots */
+	while (nr_slots--) {
+		u32 *p = (u32 *)((u32)pb->mapped + getptr);
+		*(p++) = HOST1X_OPCODE_NOP;
+		*(p++) = HOST1X_OPCODE_NOP;
+		dev_dbg(host1x->dev, "%s: NOP at 0x%x\n", __func__,
+			pb->phys + getptr);
+		getptr = (getptr + 8) & (pb->size_bytes - 1);
+	}
+	wmb();
+}
+
+/*
+ * Start channel DMA
+ */
+static void cdma_start(struct host1x_cdma *cdma)
+{
+	struct host1x_channel *ch = cdma_to_channel(cdma);
+
+	if (cdma->running)
+		return;
+
+	cdma->last_pos = cdma->push_buffer.pos;
+
+	host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP,
+			 HOST1X_CHANNEL_DMACTRL);
+
+	/* set base, put and end pointer */
+	host1x_ch_writel(ch, cdma->push_buffer.phys, HOST1X_CHANNEL_DMASTART);
+	host1x_ch_writel(ch, cdma->push_buffer.pos, HOST1X_CHANNEL_DMAPUT);
+	host1x_ch_writel(ch, cdma->push_buffer.phys +
+			 cdma->push_buffer.size_bytes + 4,
+			 HOST1X_CHANNEL_DMAEND);
+
+	/* reset GET */
+	host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP |
+			 HOST1X_CHANNEL_DMACTRL_DMAGETRST |
+			 HOST1X_CHANNEL_DMACTRL_DMAINITGET,
+			 HOST1X_CHANNEL_DMACTRL);
+
+	/* start the command DMA */
+	host1x_ch_writel(ch, 0, HOST1X_CHANNEL_DMACTRL);
+
+	cdma->running = true;
+}
+
+/*
+ * Similar to cdma_start(), but rather than starting from an idle
+ * state (where DMA GET is set to DMA PUT), on a timeout we restore
+ * DMA GET from an explicit value (so DMA may again be pending).
+ */
+static void cdma_timeout_restart(struct host1x_cdma *cdma, u32 getptr)
+{
+	struct host1x *host1x = cdma_to_host1x(cdma);
+	struct host1x_channel *ch = cdma_to_channel(cdma);
+
+	if (cdma->running)
+		return;
+
+	cdma->last_pos = cdma->push_buffer.pos;
+
+	host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP,
+			 HOST1X_CHANNEL_DMACTRL);
+
+	/* set base, end pointer (all of memory) */
+	host1x_ch_writel(ch, cdma->push_buffer.phys, HOST1X_CHANNEL_DMASTART);
+	host1x_ch_writel(ch, cdma->push_buffer.phys +
+			 cdma->push_buffer.size_bytes,
+			 HOST1X_CHANNEL_DMAEND);
+
+	/* set GET, by loading the value in PUT (then reset GET) */
+	host1x_ch_writel(ch, getptr, HOST1X_CHANNEL_DMAPUT);
+	host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP |
+			 HOST1X_CHANNEL_DMACTRL_DMAGETRST |
+			 HOST1X_CHANNEL_DMACTRL_DMAINITGET,
+			 HOST1X_CHANNEL_DMACTRL);
+
+	dev_dbg(host1x->dev,
+		"%s: DMA GET 0x%x, PUT HW 0x%x / shadow 0x%x\n", __func__,
+		host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET),
+		host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT),
+		cdma->last_pos);
+
+	/* deassert GET reset and set PUT */
+	host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP,
+			 HOST1X_CHANNEL_DMACTRL);
+	host1x_ch_writel(ch, cdma->push_buffer.pos, HOST1X_CHANNEL_DMAPUT);
+
+	/* start the command DMA */
+	host1x_ch_writel(ch, 0, HOST1X_CHANNEL_DMACTRL);
+
+	cdma->running = true;
+}
+
+/*
+ * Kick channel DMA into action by writing its PUT offset (if it has changed)
+ */
+static void cdma_flush(struct host1x_cdma *cdma)
+{
+	struct host1x_channel *ch = cdma_to_channel(cdma);
+
+	if (cdma->push_buffer.pos != cdma->last_pos) {
+		host1x_ch_writel(ch, cdma->push_buffer.pos,
+				 HOST1X_CHANNEL_DMAPUT);
+		cdma->last_pos = cdma->push_buffer.pos;
+	}
+}
+
+static void cdma_stop(struct host1x_cdma *cdma)
+{
+	struct host1x_channel *ch = cdma_to_channel(cdma);
+
+	mutex_lock(&cdma->lock);
+	if (cdma->running) {
+		host1x_cdma_wait_locked(cdma, CDMA_EVENT_SYNC_QUEUE_EMPTY);
+		host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP,
+				 HOST1X_CHANNEL_DMACTRL);
+		cdma->running = false;
+	}
+	mutex_unlock(&cdma->lock);
+}
+
+/*
+ * Stops both channel's command processor and CDMA immediately.
+ * Also, tears down the channel and resets corresponding module.
+ */
+static void cdma_freeze(struct host1x_cdma *cdma)
+{
+	struct host1x *host = cdma_to_host1x(cdma);
+	struct host1x_channel *ch = cdma_to_channel(cdma);
+	u32 cmdproc_stop;
+
+	if (cdma->torndown && !cdma->running) {
+		dev_warn(host->dev, "Already torn down\n");
+		return;
+	}
+
+	dev_dbg(host->dev, "freezing channel (id %d)\n", ch->id);
+
+	cmdproc_stop = host1x_sync_readl(host, HOST1X_SYNC_CMDPROC_STOP);
+	cmdproc_stop |= BIT(ch->id);
+	host1x_sync_writel(host, cmdproc_stop, HOST1X_SYNC_CMDPROC_STOP);
+
+	dev_dbg(host->dev, "%s: DMA GET 0x%x, PUT HW 0x%x / shadow 0x%x\n",
+		__func__, host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET),
+		host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT),
+		cdma->last_pos);
+
+	host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP,
+			 HOST1X_CHANNEL_DMACTRL);
+
+	host1x_sync_writel(host, BIT(ch->id), HOST1X_SYNC_CH_TEARDOWN);
+
+	cdma->running = false;
+	cdma->torndown = true;
+}
+
+static void cdma_resume(struct host1x_cdma *cdma, u32 getptr)
+{
+	struct host1x *host1x = cdma_to_host1x(cdma);
+	struct host1x_channel *ch = cdma_to_channel(cdma);
+	u32 cmdproc_stop;
+
+	dev_dbg(host1x->dev,
+		"resuming channel (id %d, DMAGET restart = 0x%x)\n",
+		ch->id, getptr);
+
+	cmdproc_stop = host1x_sync_readl(host1x, HOST1X_SYNC_CMDPROC_STOP);
+	cmdproc_stop &= ~(BIT(ch->id));
+	host1x_sync_writel(host1x, cmdproc_stop, HOST1X_SYNC_CMDPROC_STOP);
+
+	cdma->torndown = false;
+	cdma_timeout_restart(cdma, getptr);
+}
+
+/*
+ * If this timeout fires, it indicates the current sync_queue entry has
+ * exceeded its TTL and the userctx should be timed out and remaining
+ * submits already issued cleaned up (future submits return an error).
+ */
+static void cdma_timeout_handler(struct work_struct *work)
+{
+	struct host1x_cdma *cdma;
+	struct host1x *host1x;
+	struct host1x_channel *ch;
+
+	u32 syncpt_val;
+
+	u32 prev_cmdproc, cmdproc_stop;
+
+	cdma = container_of(to_delayed_work(work), struct host1x_cdma,
+			    timeout.wq);
+	host1x = cdma_to_host1x(cdma);
+	ch = cdma_to_channel(cdma);
+
+	host1x_debug_dump(cdma_to_host1x(cdma));
+
+	mutex_lock(&cdma->lock);
+
+	if (!cdma->timeout.client) {
+		dev_dbg(host1x->dev,
+			"cdma_timeout: expired, but has no clientid\n");
+		mutex_unlock(&cdma->lock);
+		return;
+	}
+
+	/* stop processing to get a clean snapshot */
+	prev_cmdproc = host1x_sync_readl(host1x, HOST1X_SYNC_CMDPROC_STOP);
+	cmdproc_stop = prev_cmdproc | BIT(ch->id);
+	host1x_sync_writel(host1x, cmdproc_stop, HOST1X_SYNC_CMDPROC_STOP);
+
+	dev_dbg(host1x->dev, "cdma_timeout: cmdproc was 0x%x is 0x%x\n",
+		prev_cmdproc, cmdproc_stop);
+
+	syncpt_val = host1x_syncpt_load(cdma->timeout.syncpt);
+
+	/* has buffer actually completed? */
+	if ((s32)(syncpt_val - cdma->timeout.syncpt_val) >= 0) {
+		dev_dbg(host1x->dev,
+			"cdma_timeout: expired, but buffer had completed\n");
+		/* restore */
+		cmdproc_stop = prev_cmdproc & ~(BIT(ch->id));
+		host1x_sync_writel(host1x, cmdproc_stop,
+				   HOST1X_SYNC_CMDPROC_STOP);
+		mutex_unlock(&cdma->lock);
+		return;
+	}
+
+	dev_warn(host1x->dev, "%s: timeout: %d (%s), HW thresh %d, done %d\n",
+		__func__, cdma->timeout.syncpt->id, cdma->timeout.syncpt->name,
+		syncpt_val, cdma->timeout.syncpt_val);
+
+	/* stop HW, resetting channel/module */
+	host1x_hw_cdma_freeze(host1x, cdma);
+
+	host1x_cdma_update_sync_queue(cdma, ch->dev);
+	mutex_unlock(&cdma->lock);
+}
+
+/*
+ * Init timeout resources
+ */
+static int cdma_timeout_init(struct host1x_cdma *cdma, u32 syncpt_id)
+{
+	INIT_DELAYED_WORK(&cdma->timeout.wq, cdma_timeout_handler);
+	cdma->timeout.initialized = true;
+
+	return 0;
+}
+
+/*
+ * Clean up timeout resources
+ */
+static void cdma_timeout_destroy(struct host1x_cdma *cdma)
+{
+	if (cdma->timeout.initialized)
+		cancel_delayed_work(&cdma->timeout.wq);
+	cdma->timeout.initialized = false;
+}
+
+static const struct host1x_cdma_ops host1x_cdma_ops = {
+	.start = cdma_start,
+	.stop = cdma_stop,
+	.flush = cdma_flush,
+
+	.timeout_init = cdma_timeout_init,
+	.timeout_destroy = cdma_timeout_destroy,
+	.freeze = cdma_freeze,
+	.resume = cdma_resume,
+	.timeout_cpu_incr = cdma_timeout_cpu_incr,
+};
+
+static const struct host1x_pushbuffer_ops host1x_pushbuffer_ops = {
+	.init = push_buffer_init,
+};
diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
new file mode 100644
index 000000000000..ee199623e365
--- /dev/null
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -0,0 +1,168 @@
+/*
+ * Tegra host1x Channel
+ *
+ * Copyright (c) 2010-2013, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/slab.h>
+#include <trace/events/host1x.h>
+
+#include "host1x.h"
+#include "host1x_bo.h"
+#include "channel.h"
+#include "dev.h"
+#include "intr.h"
+#include "job.h"
+
+#define HOST1X_CHANNEL_SIZE 16384
+#define TRACE_MAX_LENGTH 128U
+
+static void trace_write_gather(struct host1x_cdma *cdma, struct host1x_bo *bo,
+			       u32 offset, u32 words)
+{
+	void *mem = NULL;
+
+	if (host1x_debug_trace_cmdbuf)
+		mem = host1x_bo_mmap(bo);
+
+	if (mem) {
+		u32 i;
+		/*
+		 * Write in batches of 128 as there seems to be a limit
+		 * of how much you can output to ftrace at once.
+		 */
+		for (i = 0; i < words; i += TRACE_MAX_LENGTH) {
+			trace_host1x_cdma_push_gather(
+				dev_name(cdma_to_channel(cdma)->dev),
+				(u32)bo, min(words - i, TRACE_MAX_LENGTH),
+				offset + i * sizeof(u32), mem);
+		}
+		host1x_bo_munmap(bo, mem);
+	}
+}
+
+static void submit_gathers(struct host1x_job *job)
+{
+	struct host1x_cdma *cdma = &job->channel->cdma;
+	unsigned int i;
+
+	for (i = 0; i < job->num_gathers; i++) {
+		struct host1x_job_gather *g = &job->gathers[i];
+		u32 op1 = host1x_opcode_gather(g->words);
+		u32 op2 = g->base + g->offset;
+		trace_write_gather(cdma, g->bo, g->offset, op1 & 0xffff);
+		host1x_cdma_push(cdma, op1, op2);
+	}
+}
+
+static int channel_submit(struct host1x_job *job)
+{
+	struct host1x_channel *ch = job->channel;
+	struct host1x_syncpt *sp;
+	u32 user_syncpt_incrs = job->syncpt_incrs;
+	u32 prev_max = 0;
+	u32 syncval;
+	int err;
+	struct host1x_waitlist *completed_waiter = NULL;
+	struct host1x *host = dev_get_drvdata(ch->dev->parent);
+
+	sp = host->syncpt + job->syncpt_id;
+	trace_host1x_channel_submit(dev_name(ch->dev),
+				    job->num_gathers, job->num_relocs,
+				    job->num_waitchk, job->syncpt_id,
+				    job->syncpt_incrs);
+
+	/* before error checks, return current max */
+	prev_max = job->syncpt_end = host1x_syncpt_read_max(sp);
+
+	/* get submit lock */
+	err = mutex_lock_interruptible(&ch->submitlock);
+	if (err)
+		goto error;
+
+	completed_waiter = kzalloc(sizeof(*completed_waiter), GFP_KERNEL);
+	if (!completed_waiter) {
+		mutex_unlock(&ch->submitlock);
+		err = -ENOMEM;
+		goto error;
+	}
+
+	/* begin a CDMA submit */
+	err = host1x_cdma_begin(&ch->cdma, job);
+	if (err) {
+		mutex_unlock(&ch->submitlock);
+		goto error;
+	}
+
+	if (job->serialize) {
+		/*
+		 * Force serialization by inserting a host wait for the
+		 * previous job to finish before this one can commence.
+		 */
+		host1x_cdma_push(&ch->cdma,
+				 host1x_opcode_setclass(HOST1X_CLASS_HOST1X,
+					host1x_uclass_wait_syncpt_r(), 1),
+				 host1x_class_host_wait_syncpt(job->syncpt_id,
+					host1x_syncpt_read_max(sp)));
+	}
+
+	syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
+
+	job->syncpt_end = syncval;
+
+	/* add a setclass for modules that require it */
+	if (job->class)
+		host1x_cdma_push(&ch->cdma,
+				 host1x_opcode_setclass(job->class, 0, 0),
+				 HOST1X_OPCODE_NOP);
+
+	submit_gathers(job);
+
+	/* end CDMA submit & stash pinned hMems into sync queue */
+	host1x_cdma_end(&ch->cdma, job);
+
+	trace_host1x_channel_submitted(dev_name(ch->dev), prev_max, syncval);
+
+	/* schedule a submit complete interrupt */
+	err = host1x_intr_add_action(host, job->syncpt_id, syncval,
+				     HOST1X_INTR_ACTION_SUBMIT_COMPLETE, ch,
+				     completed_waiter, NULL);
+	completed_waiter = NULL;
+	WARN(err, "Failed to set submit complete interrupt");
+
+	mutex_unlock(&ch->submitlock);
+
+	return 0;
+
+error:
+	kfree(completed_waiter);
+	return err;
+}
+
+static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev,
+			       unsigned int index)
+{
+	ch->id = index;
+	mutex_init(&ch->reflock);
+	mutex_init(&ch->submitlock);
+
+	ch->regs = dev->regs + index * HOST1X_CHANNEL_SIZE;
+	return 0;
+}
+
+static const struct host1x_channel_ops host1x_channel_ops = {
+	.init = host1x_channel_init,
+	.submit = channel_submit,
+};
diff --git a/drivers/gpu/host1x/hw/debug_hw.c b/drivers/gpu/host1x/hw/debug_hw.c
new file mode 100644
index 000000000000..334c038052f5
--- /dev/null
+++ b/drivers/gpu/host1x/hw/debug_hw.c
@@ -0,0 +1,322 @@
+/*
+ * Copyright (C) 2010 Google, Inc.
+ * Author: Erik Gilling <konkers@android.com>
+ *
+ * Copyright (C) 2011-2013 NVIDIA Corporation
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/mm.h>
+#include <linux/scatterlist.h>
+
+#include <linux/io.h>
+
+#include "dev.h"
+#include "debug.h"
+#include "cdma.h"
+#include "channel.h"
+#include "host1x_bo.h"
+
+#define HOST1X_DEBUG_MAX_PAGE_OFFSET 102400
+
+enum {
+	HOST1X_OPCODE_SETCLASS	= 0x00,
+	HOST1X_OPCODE_INCR	= 0x01,
+	HOST1X_OPCODE_NONINCR	= 0x02,
+	HOST1X_OPCODE_MASK	= 0x03,
+	HOST1X_OPCODE_IMM	= 0x04,
+	HOST1X_OPCODE_RESTART	= 0x05,
+	HOST1X_OPCODE_GATHER	= 0x06,
+	HOST1X_OPCODE_EXTEND	= 0x0e,
+};
+
+enum {
+	HOST1X_OPCODE_EXTEND_ACQUIRE_MLOCK	= 0x00,
+	HOST1X_OPCODE_EXTEND_RELEASE_MLOCK	= 0x01,
+};
+
+static unsigned int show_channel_command(struct output *o, u32 val)
+{
+	unsigned mask;
+	unsigned subop;
+
+	switch (val >> 28) {
+	case HOST1X_OPCODE_SETCLASS:
+		mask = val & 0x3f;
+		if (mask) {
+			host1x_debug_output(o, "SETCL(class=%03x, offset=%03x, mask=%02x, [",
+					    val >> 6 & 0x3ff,
+					    val >> 16 & 0xfff, mask);
+			return hweight8(mask);
+		} else {
+			host1x_debug_output(o, "SETCL(class=%03x)\n",
+					    val >> 6 & 0x3ff);
+			return 0;
+		}
+
+	case HOST1X_OPCODE_INCR:
+		host1x_debug_output(o, "INCR(offset=%03x, [",
+				    val >> 16 & 0xfff);
+		return val & 0xffff;
+
+	case HOST1X_OPCODE_NONINCR:
+		host1x_debug_output(o, "NONINCR(offset=%03x, [",
+				    val >> 16 & 0xfff);
+		return val & 0xffff;
+
+	case HOST1X_OPCODE_MASK:
+		mask = val & 0xffff;
+		host1x_debug_output(o, "MASK(offset=%03x, mask=%03x, [",
+				    val >> 16 & 0xfff, mask);
+		return hweight16(mask);
+
+	case HOST1X_OPCODE_IMM:
+		host1x_debug_output(o, "IMM(offset=%03x, data=%03x)\n",
+				    val >> 16 & 0xfff, val & 0xffff);
+		return 0;
+
+	case HOST1X_OPCODE_RESTART:
+		host1x_debug_output(o, "RESTART(offset=%08x)\n", val << 4);
+		return 0;
+
+	case HOST1X_OPCODE_GATHER:
+		host1x_debug_output(o, "GATHER(offset=%03x, insert=%d, type=%d, count=%04x, addr=[",
+				    val >> 16 & 0xfff, val >> 15 & 0x1,
+				    val >> 14 & 0x1, val & 0x3fff);
+		return 1;
+
+	case HOST1X_OPCODE_EXTEND:
+		subop = val >> 24 & 0xf;
+		if (subop == HOST1X_OPCODE_EXTEND_ACQUIRE_MLOCK)
+			host1x_debug_output(o, "ACQUIRE_MLOCK(index=%d)\n",
+					    val & 0xff);
+		else if (subop == HOST1X_OPCODE_EXTEND_RELEASE_MLOCK)
+			host1x_debug_output(o, "RELEASE_MLOCK(index=%d)\n",
+					    val & 0xff);
+		else
+			host1x_debug_output(o, "EXTEND_UNKNOWN(%08x)\n", val);
+		return 0;
+
+	default:
+		return 0;
+	}
+}
+
+static void show_gather(struct output *o, phys_addr_t phys_addr,
+			unsigned int words, struct host1x_cdma *cdma,
+			phys_addr_t pin_addr, u32 *map_addr)
+{
+	/* Map dmaget cursor to corresponding mem handle */
+	u32 offset = phys_addr - pin_addr;
+	unsigned int data_count = 0, i;
+
+	/*
+	 * Sometimes we're given different hardware address to the same
+	 * page - in these cases the offset will get an invalid number and
+	 * we just have to bail out.
+	 */
+	if (offset > HOST1X_DEBUG_MAX_PAGE_OFFSET) {
+		host1x_debug_output(o, "[address mismatch]\n");
+		return;
+	}
+
+	for (i = 0; i < words; i++) {
+		u32 addr = phys_addr + i * 4;
+		u32 val = *(map_addr + offset / 4 + i);
+
+		if (!data_count) {
+			host1x_debug_output(o, "%08x: %08x:", addr, val);
+			data_count = show_channel_command(o, val);
+		} else {
+			host1x_debug_output(o, "%08x%s", val,
+					    data_count > 0 ? ", " : "])\n");
+			data_count--;
+		}
+	}
+}
+
+static void show_channel_gathers(struct output *o, struct host1x_cdma *cdma)
+{
+	struct host1x_job *job;
+
+	list_for_each_entry(job, &cdma->sync_queue, list) {
+		int i;
+		host1x_debug_output(o, "\n%p: JOB, syncpt_id=%d, syncpt_val=%d, first_get=%08x, timeout=%d num_slots=%d, num_handles=%d\n",
+				    job, job->syncpt_id, job->syncpt_end,
+				    job->first_get, job->timeout,
+				    job->num_slots, job->num_unpins);
+
+		for (i = 0; i < job->num_gathers; i++) {
+			struct host1x_job_gather *g = &job->gathers[i];
+			u32 *mapped;
+
+			if (job->gather_copy_mapped)
+				mapped = (u32 *)job->gather_copy_mapped;
+			else
+				mapped = host1x_bo_mmap(g->bo);
+
+			if (!mapped) {
+				host1x_debug_output(o, "[could not mmap]\n");
+				continue;
+			}
+
+			host1x_debug_output(o, "    GATHER at %08x+%04x, %d words\n",
+					    g->base, g->offset, g->words);
+
+			show_gather(o, g->base + g->offset, g->words, cdma,
+				    g->base, mapped);
+
+			if (!job->gather_copy_mapped)
+				host1x_bo_munmap(g->bo, mapped);
+		}
+	}
+}
+
+static void host1x_debug_show_channel_cdma(struct host1x *host,
+					   struct host1x_channel *ch,
+					   struct output *o)
+{
+	struct host1x_cdma *cdma = &ch->cdma;
+	u32 dmaput, dmaget, dmactrl;
+	u32 cbstat, cbread;
+	u32 val, base, baseval;
+
+	dmaput = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT);
+	dmaget = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET);
+	dmactrl = host1x_ch_readl(ch, HOST1X_CHANNEL_DMACTRL);
+	cbread = host1x_sync_readl(host, HOST1X_SYNC_CBREAD(ch->id));
+	cbstat = host1x_sync_readl(host, HOST1X_SYNC_CBSTAT(ch->id));
+
+	host1x_debug_output(o, "%d-%s: ", ch->id, dev_name(ch->dev));
+
+	if (HOST1X_CHANNEL_DMACTRL_DMASTOP_V(dmactrl) ||
+	    !ch->cdma.push_buffer.mapped) {
+		host1x_debug_output(o, "inactive\n\n");
+		return;
+	}
+
+	if (HOST1X_SYNC_CBSTAT_CBCLASS_V(cbstat) == HOST1X_CLASS_HOST1X &&
+	    HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat) ==
+	    HOST1X_UCLASS_WAIT_SYNCPT)
+		host1x_debug_output(o, "waiting on syncpt %d val %d\n",
+				    cbread >> 24, cbread & 0xffffff);
+	else if (HOST1X_SYNC_CBSTAT_CBCLASS_V(cbstat) ==
+	   HOST1X_CLASS_HOST1X &&
+	   HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat) ==
+	   HOST1X_UCLASS_WAIT_SYNCPT_BASE) {
+
+		base = (cbread >> 16) & 0xff;
+		baseval =
+			host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_BASE(base));
+		val = cbread & 0xffff;
+		host1x_debug_output(o, "waiting on syncpt %d val %d (base %d = %d; offset = %d)\n",
+				    cbread >> 24, baseval + val, base,
+				    baseval, val);
+	} else
+		host1x_debug_output(o, "active class %02x, offset %04x, val %08x\n",
+				    HOST1X_SYNC_CBSTAT_CBCLASS_V(cbstat),
+				    HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat),
+				    cbread);
+
+	host1x_debug_output(o, "DMAPUT %08x, DMAGET %08x, DMACTL %08x\n",
+			    dmaput, dmaget, dmactrl);
+	host1x_debug_output(o, "CBREAD %08x, CBSTAT %08x\n", cbread, cbstat);
+
+	show_channel_gathers(o, cdma);
+	host1x_debug_output(o, "\n");
+}
+
+static void host1x_debug_show_channel_fifo(struct host1x *host,
+					   struct host1x_channel *ch,
+					   struct output *o)
+{
+	u32 val, rd_ptr, wr_ptr, start, end;
+	unsigned int data_count = 0;
+
+	host1x_debug_output(o, "%d: fifo:\n", ch->id);
+
+	val = host1x_ch_readl(ch, HOST1X_CHANNEL_FIFOSTAT);
+	host1x_debug_output(o, "FIFOSTAT %08x\n", val);
+	if (HOST1X_CHANNEL_FIFOSTAT_CFEMPTY_V(val)) {
+		host1x_debug_output(o, "[empty]\n");
+		return;
+	}
+
+	host1x_sync_writel(host, 0x0, HOST1X_SYNC_CFPEEK_CTRL);
+	host1x_sync_writel(host, HOST1X_SYNC_CFPEEK_CTRL_ENA_F(1) |
+			   HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(ch->id),
+			   HOST1X_SYNC_CFPEEK_CTRL);
+
+	val = host1x_sync_readl(host, HOST1X_SYNC_CFPEEK_PTRS);
+	rd_ptr = HOST1X_SYNC_CFPEEK_PTRS_CF_RD_PTR_V(val);
+	wr_ptr = HOST1X_SYNC_CFPEEK_PTRS_CF_WR_PTR_V(val);
+
+	val = host1x_sync_readl(host, HOST1X_SYNC_CF_SETUP(ch->id));
+	start = HOST1X_SYNC_CF_SETUP_BASE_V(val);
+	end = HOST1X_SYNC_CF_SETUP_LIMIT_V(val);
+
+	do {
+		host1x_sync_writel(host, 0x0, HOST1X_SYNC_CFPEEK_CTRL);
+		host1x_sync_writel(host, HOST1X_SYNC_CFPEEK_CTRL_ENA_F(1) |
+				   HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(ch->id) |
+				   HOST1X_SYNC_CFPEEK_CTRL_ADDR_F(rd_ptr),
+				   HOST1X_SYNC_CFPEEK_CTRL);
+		val = host1x_sync_readl(host, HOST1X_SYNC_CFPEEK_READ);
+
+		if (!data_count) {
+			host1x_debug_output(o, "%08x:", val);
+			data_count = show_channel_command(o, val);
+		} else {
+			host1x_debug_output(o, "%08x%s", val,
+					    data_count > 0 ? ", " : "])\n");
+			data_count--;
+		}
+
+		if (rd_ptr == end)
+			rd_ptr = start;
+		else
+			rd_ptr++;
+	} while (rd_ptr != wr_ptr);
+
+	if (data_count)
+		host1x_debug_output(o, ", ...])\n");
+	host1x_debug_output(o, "\n");
+
+	host1x_sync_writel(host, 0x0, HOST1X_SYNC_CFPEEK_CTRL);
+}
+
+static void host1x_debug_show_mlocks(struct host1x *host, struct output *o)
+{
+	int i;
+
+	host1x_debug_output(o, "---- mlocks ----\n");
+	for (i = 0; i < host1x_syncpt_nb_mlocks(host); i++) {
+		u32 owner =
+			host1x_sync_readl(host, HOST1X_SYNC_MLOCK_OWNER(i));
+		if (HOST1X_SYNC_MLOCK_OWNER_CH_OWNS_V(owner))
+			host1x_debug_output(o, "%d: locked by channel %d\n",
+				i, HOST1X_SYNC_MLOCK_OWNER_CHID_F(owner));
+		else if (HOST1X_SYNC_MLOCK_OWNER_CPU_OWNS_V(owner))
+			host1x_debug_output(o, "%d: locked by cpu\n", i);
+		else
+			host1x_debug_output(o, "%d: unlocked\n", i);
+	}
+	host1x_debug_output(o, "\n");
+}
+
+static const struct host1x_debug_ops host1x_debug_ops = {
+	.show_channel_cdma = host1x_debug_show_channel_cdma,
+	.show_channel_fifo = host1x_debug_show_channel_fifo,
+	.show_mlocks = host1x_debug_show_mlocks,
+};
diff --git a/drivers/gpu/host1x/hw/host1x01.c b/drivers/gpu/host1x/hw/host1x01.c
new file mode 100644
index 000000000000..a14e91cd1e58
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x01.c
@@ -0,0 +1,42 @@
+/*
+ * Host1x init for T20 and T30 Architecture Chips
+ *
+ * Copyright (c) 2011-2013, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* include hw specification */
+#include "hw/host1x01.h"
+#include "hw/host1x01_hardware.h"
+
+/* include code */
+#include "hw/cdma_hw.c"
+#include "hw/channel_hw.c"
+#include "hw/debug_hw.c"
+#include "hw/intr_hw.c"
+#include "hw/syncpt_hw.c"
+
+#include "dev.h"
+
+int host1x01_init(struct host1x *host)
+{
+	host->channel_op = &host1x_channel_ops;
+	host->cdma_op = &host1x_cdma_ops;
+	host->cdma_pb_op = &host1x_pushbuffer_ops;
+	host->syncpt_op = &host1x_syncpt_ops;
+	host->intr_op = &host1x_intr_ops;
+	host->debug_op = &host1x_debug_ops;
+
+	return 0;
+}
diff --git a/drivers/gpu/host1x/hw/host1x01.h b/drivers/gpu/host1x/hw/host1x01.h
new file mode 100644
index 000000000000..2706b6743250
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x01.h
@@ -0,0 +1,25 @@
+/*
+ * Host1x init for T20 and T30 Architecture Chips
+ *
+ * Copyright (c) 2011-2013, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef HOST1X_HOST1X01_H
+#define HOST1X_HOST1X01_H
+
+struct host1x;
+
+int host1x01_init(struct host1x *host);
+
+#endif /* HOST1X_HOST1X01_H_ */
diff --git a/drivers/gpu/host1x/hw/host1x01_hardware.h b/drivers/gpu/host1x/hw/host1x01_hardware.h
new file mode 100644
index 000000000000..5f0fb866efa8
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x01_hardware.h
@@ -0,0 +1,143 @@
+/*
+ * Tegra host1x Register Offsets for Tegra20 and Tegra30
+ *
+ * Copyright (c) 2010-2013 NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __HOST1X_HOST1X01_HARDWARE_H
+#define __HOST1X_HOST1X01_HARDWARE_H
+
+#include <linux/types.h>
+#include <linux/bitops.h>
+
+#include "hw_host1x01_channel.h"
+#include "hw_host1x01_sync.h"
+#include "hw_host1x01_uclass.h"
+
+static inline u32 host1x_class_host_wait_syncpt(
+	unsigned indx, unsigned threshold)
+{
+	return host1x_uclass_wait_syncpt_indx_f(indx)
+		| host1x_uclass_wait_syncpt_thresh_f(threshold);
+}
+
+static inline u32 host1x_class_host_load_syncpt_base(
+	unsigned indx, unsigned threshold)
+{
+	return host1x_uclass_load_syncpt_base_base_indx_f(indx)
+		| host1x_uclass_load_syncpt_base_value_f(threshold);
+}
+
+static inline u32 host1x_class_host_wait_syncpt_base(
+	unsigned indx, unsigned base_indx, unsigned offset)
+{
+	return host1x_uclass_wait_syncpt_base_indx_f(indx)
+		| host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
+		| host1x_uclass_wait_syncpt_base_offset_f(offset);
+}
+
+static inline u32 host1x_class_host_incr_syncpt_base(
+	unsigned base_indx, unsigned offset)
+{
+	return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
+		| host1x_uclass_incr_syncpt_base_offset_f(offset);
+}
+
+static inline u32 host1x_class_host_incr_syncpt(
+	unsigned cond, unsigned indx)
+{
+	return host1x_uclass_incr_syncpt_cond_f(cond)
+		| host1x_uclass_incr_syncpt_indx_f(indx);
+}
+
+static inline u32 host1x_class_host_indoff_reg_write(
+	unsigned mod_id, unsigned offset, bool auto_inc)
+{
+	u32 v = host1x_uclass_indoff_indbe_f(0xf)
+		| host1x_uclass_indoff_indmodid_f(mod_id)
+		| host1x_uclass_indoff_indroffset_f(offset);
+	if (auto_inc)
+		v |= host1x_uclass_indoff_autoinc_f(1);
+	return v;
+}
+
+static inline u32 host1x_class_host_indoff_reg_read(
+	unsigned mod_id, unsigned offset, bool auto_inc)
+{
+	u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
+		| host1x_uclass_indoff_indroffset_f(offset)
+		| host1x_uclass_indoff_rwn_read_v();
+	if (auto_inc)
+		v |= host1x_uclass_indoff_autoinc_f(1);
+	return v;
+}
+
+
+/* cdma opcodes */
+static inline u32 host1x_opcode_setclass(
+	unsigned class_id, unsigned offset, unsigned mask)
+{
+	return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
+}
+
+static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
+{
+	return (1 << 28) | (offset << 16) | count;
+}
+
+static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
+{
+	return (2 << 28) | (offset << 16) | count;
+}
+
+static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
+{
+	return (3 << 28) | (offset << 16) | mask;
+}
+
+static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
+{
+	return (4 << 28) | (offset << 16) | value;
+}
+
+static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
+{
+	return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
+		host1x_class_host_incr_syncpt(cond, indx));
+}
+
+static inline u32 host1x_opcode_restart(unsigned address)
+{
+	return (5 << 28) | (address >> 4);
+}
+
+static inline u32 host1x_opcode_gather(unsigned count)
+{
+	return (6 << 28) | count;
+}
+
+static inline u32 host1x_opcode_gather_nonincr(unsigned offset,	unsigned count)
+{
+	return (6 << 28) | (offset << 16) | BIT(15) | count;
+}
+
+static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
+{
+	return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
+}
+
+#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x01_channel.h b/drivers/gpu/host1x/hw/hw_host1x01_channel.h
new file mode 100644
index 000000000000..b4bc7ca4e051
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x01_channel.h
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2012-2013, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+ /*
+  * Function naming determines intended use:
+  *
+  *     <x>_r(void) : Returns the offset for register <x>.
+  *
+  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+  *
+  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+  *
+  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+  *         and masked to place it at field <y> of register <x>.  This value
+  *         can be |'d with others to produce a full register value for
+  *         register <x>.
+  *
+  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+  *         value can be ~'d and then &'d to clear the value of field <y> for
+  *         register <x>.
+  *
+  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+  *         to place it at field <y> of register <x>.  This value can be |'d
+  *         with others to produce a full register value for <x>.
+  *
+  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+  *         <x> value 'r' after being shifted to place its LSB at bit 0.
+  *         This value is suitable for direct comparison with other unshifted
+  *         values appropriate for use in field <y> of register <x>.
+  *
+  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+  *         field <y> of register <x>.  This value is suitable for direct
+  *         comparison with unshifted values appropriate for use in field <y>
+  *         of register <x>.
+  */
+
+#ifndef __hw_host1x_channel_host1x_h__
+#define __hw_host1x_channel_host1x_h__
+
+static inline u32 host1x_channel_fifostat_r(void)
+{
+	return 0x0;
+}
+#define HOST1X_CHANNEL_FIFOSTAT \
+	host1x_channel_fifostat_r()
+static inline u32 host1x_channel_fifostat_cfempty_v(u32 r)
+{
+	return (r >> 10) & 0x1;
+}
+#define HOST1X_CHANNEL_FIFOSTAT_CFEMPTY_V(r) \
+	host1x_channel_fifostat_cfempty_v(r)
+static inline u32 host1x_channel_dmastart_r(void)
+{
+	return 0x14;
+}
+#define HOST1X_CHANNEL_DMASTART \
+	host1x_channel_dmastart_r()
+static inline u32 host1x_channel_dmaput_r(void)
+{
+	return 0x18;
+}
+#define HOST1X_CHANNEL_DMAPUT \
+	host1x_channel_dmaput_r()
+static inline u32 host1x_channel_dmaget_r(void)
+{
+	return 0x1c;
+}
+#define HOST1X_CHANNEL_DMAGET \
+	host1x_channel_dmaget_r()
+static inline u32 host1x_channel_dmaend_r(void)
+{
+	return 0x20;
+}
+#define HOST1X_CHANNEL_DMAEND \
+	host1x_channel_dmaend_r()
+static inline u32 host1x_channel_dmactrl_r(void)
+{
+	return 0x24;
+}
+#define HOST1X_CHANNEL_DMACTRL \
+	host1x_channel_dmactrl_r()
+static inline u32 host1x_channel_dmactrl_dmastop(void)
+{
+	return 1 << 0;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMASTOP \
+	host1x_channel_dmactrl_dmastop()
+static inline u32 host1x_channel_dmactrl_dmastop_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMASTOP_V(r) \
+	host1x_channel_dmactrl_dmastop_v(r)
+static inline u32 host1x_channel_dmactrl_dmagetrst(void)
+{
+	return 1 << 1;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMAGETRST \
+	host1x_channel_dmactrl_dmagetrst()
+static inline u32 host1x_channel_dmactrl_dmainitget(void)
+{
+	return 1 << 2;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMAINITGET \
+	host1x_channel_dmactrl_dmainitget()
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x01_sync.h b/drivers/gpu/host1x/hw/hw_host1x01_sync.h
new file mode 100644
index 000000000000..ac704e579977
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x01_sync.h
@@ -0,0 +1,243 @@
+/*
+ * Copyright (c) 2012-2013, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+ /*
+  * Function naming determines intended use:
+  *
+  *     <x>_r(void) : Returns the offset for register <x>.
+  *
+  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+  *
+  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+  *
+  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+  *         and masked to place it at field <y> of register <x>.  This value
+  *         can be |'d with others to produce a full register value for
+  *         register <x>.
+  *
+  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+  *         value can be ~'d and then &'d to clear the value of field <y> for
+  *         register <x>.
+  *
+  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+  *         to place it at field <y> of register <x>.  This value can be |'d
+  *         with others to produce a full register value for <x>.
+  *
+  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+  *         <x> value 'r' after being shifted to place its LSB at bit 0.
+  *         This value is suitable for direct comparison with other unshifted
+  *         values appropriate for use in field <y> of register <x>.
+  *
+  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+  *         field <y> of register <x>.  This value is suitable for direct
+  *         comparison with unshifted values appropriate for use in field <y>
+  *         of register <x>.
+  */
+
+#ifndef __hw_host1x01_sync_h__
+#define __hw_host1x01_sync_h__
+
+#define REGISTER_STRIDE	4
+
+static inline u32 host1x_sync_syncpt_r(unsigned int id)
+{
+	return 0x400 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT(id) \
+	host1x_sync_syncpt_r(id)
+static inline u32 host1x_sync_syncpt_thresh_cpu0_int_status_r(unsigned int id)
+{
+	return 0x40 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(id) \
+	host1x_sync_syncpt_thresh_cpu0_int_status_r(id)
+static inline u32 host1x_sync_syncpt_thresh_int_disable_r(unsigned int id)
+{
+	return 0x60 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(id) \
+	host1x_sync_syncpt_thresh_int_disable_r(id)
+static inline u32 host1x_sync_syncpt_thresh_int_enable_cpu0_r(unsigned int id)
+{
+	return 0x68 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(id) \
+	host1x_sync_syncpt_thresh_int_enable_cpu0_r(id)
+static inline u32 host1x_sync_cf_setup_r(unsigned int channel)
+{
+	return 0x80 + channel * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_CF_SETUP(channel) \
+	host1x_sync_cf_setup_r(channel)
+static inline u32 host1x_sync_cf_setup_base_v(u32 r)
+{
+	return (r >> 0) & 0x1ff;
+}
+#define HOST1X_SYNC_CF_SETUP_BASE_V(r) \
+	host1x_sync_cf_setup_base_v(r)
+static inline u32 host1x_sync_cf_setup_limit_v(u32 r)
+{
+	return (r >> 16) & 0x1ff;
+}
+#define HOST1X_SYNC_CF_SETUP_LIMIT_V(r) \
+	host1x_sync_cf_setup_limit_v(r)
+static inline u32 host1x_sync_cmdproc_stop_r(void)
+{
+	return 0xac;
+}
+#define HOST1X_SYNC_CMDPROC_STOP \
+	host1x_sync_cmdproc_stop_r()
+static inline u32 host1x_sync_ch_teardown_r(void)
+{
+	return 0xb0;
+}
+#define HOST1X_SYNC_CH_TEARDOWN \
+	host1x_sync_ch_teardown_r()
+static inline u32 host1x_sync_usec_clk_r(void)
+{
+	return 0x1a4;
+}
+#define HOST1X_SYNC_USEC_CLK \
+	host1x_sync_usec_clk_r()
+static inline u32 host1x_sync_ctxsw_timeout_cfg_r(void)
+{
+	return 0x1a8;
+}
+#define HOST1X_SYNC_CTXSW_TIMEOUT_CFG \
+	host1x_sync_ctxsw_timeout_cfg_r()
+static inline u32 host1x_sync_ip_busy_timeout_r(void)
+{
+	return 0x1bc;
+}
+#define HOST1X_SYNC_IP_BUSY_TIMEOUT \
+	host1x_sync_ip_busy_timeout_r()
+static inline u32 host1x_sync_mlock_owner_r(unsigned int id)
+{
+	return 0x340 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_MLOCK_OWNER(id) \
+	host1x_sync_mlock_owner_r(id)
+static inline u32 host1x_sync_mlock_owner_chid_f(u32 v)
+{
+	return (v & 0xf) << 8;
+}
+#define HOST1X_SYNC_MLOCK_OWNER_CHID_F(v) \
+	host1x_sync_mlock_owner_chid_f(v)
+static inline u32 host1x_sync_mlock_owner_cpu_owns_v(u32 r)
+{
+	return (r >> 1) & 0x1;
+}
+#define HOST1X_SYNC_MLOCK_OWNER_CPU_OWNS_V(r) \
+	host1x_sync_mlock_owner_cpu_owns_v(r)
+static inline u32 host1x_sync_mlock_owner_ch_owns_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+#define HOST1X_SYNC_MLOCK_OWNER_CH_OWNS_V(r) \
+	host1x_sync_mlock_owner_ch_owns_v(r)
+static inline u32 host1x_sync_syncpt_int_thresh_r(unsigned int id)
+{
+	return 0x500 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_INT_THRESH(id) \
+	host1x_sync_syncpt_int_thresh_r(id)
+static inline u32 host1x_sync_syncpt_base_r(unsigned int id)
+{
+	return 0x600 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_BASE(id) \
+	host1x_sync_syncpt_base_r(id)
+static inline u32 host1x_sync_syncpt_cpu_incr_r(unsigned int id)
+{
+	return 0x700 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_CPU_INCR(id) \
+	host1x_sync_syncpt_cpu_incr_r(id)
+static inline u32 host1x_sync_cbread_r(unsigned int channel)
+{
+	return 0x720 + channel * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_CBREAD(channel) \
+	host1x_sync_cbread_r(channel)
+static inline u32 host1x_sync_cfpeek_ctrl_r(void)
+{
+	return 0x74c;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL \
+	host1x_sync_cfpeek_ctrl_r()
+static inline u32 host1x_sync_cfpeek_ctrl_addr_f(u32 v)
+{
+	return (v & 0x1ff) << 0;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL_ADDR_F(v) \
+	host1x_sync_cfpeek_ctrl_addr_f(v)
+static inline u32 host1x_sync_cfpeek_ctrl_channr_f(u32 v)
+{
+	return (v & 0x7) << 16;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(v) \
+	host1x_sync_cfpeek_ctrl_channr_f(v)
+static inline u32 host1x_sync_cfpeek_ctrl_ena_f(u32 v)
+{
+	return (v & 0x1) << 31;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL_ENA_F(v) \
+	host1x_sync_cfpeek_ctrl_ena_f(v)
+static inline u32 host1x_sync_cfpeek_read_r(void)
+{
+	return 0x750;
+}
+#define HOST1X_SYNC_CFPEEK_READ \
+	host1x_sync_cfpeek_read_r()
+static inline u32 host1x_sync_cfpeek_ptrs_r(void)
+{
+	return 0x754;
+}
+#define HOST1X_SYNC_CFPEEK_PTRS \
+	host1x_sync_cfpeek_ptrs_r()
+static inline u32 host1x_sync_cfpeek_ptrs_cf_rd_ptr_v(u32 r)
+{
+	return (r >> 0) & 0x1ff;
+}
+#define HOST1X_SYNC_CFPEEK_PTRS_CF_RD_PTR_V(r) \
+	host1x_sync_cfpeek_ptrs_cf_rd_ptr_v(r)
+static inline u32 host1x_sync_cfpeek_ptrs_cf_wr_ptr_v(u32 r)
+{
+	return (r >> 16) & 0x1ff;
+}
+#define HOST1X_SYNC_CFPEEK_PTRS_CF_WR_PTR_V(r) \
+	host1x_sync_cfpeek_ptrs_cf_wr_ptr_v(r)
+static inline u32 host1x_sync_cbstat_r(unsigned int channel)
+{
+	return 0x758 + channel * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_CBSTAT(channel) \
+	host1x_sync_cbstat_r(channel)
+static inline u32 host1x_sync_cbstat_cboffset_v(u32 r)
+{
+	return (r >> 0) & 0xffff;
+}
+#define HOST1X_SYNC_CBSTAT_CBOFFSET_V(r) \
+	host1x_sync_cbstat_cboffset_v(r)
+static inline u32 host1x_sync_cbstat_cbclass_v(u32 r)
+{
+	return (r >> 16) & 0x3ff;
+}
+#define HOST1X_SYNC_CBSTAT_CBCLASS_V(r) \
+	host1x_sync_cbstat_cbclass_v(r)
+
+#endif /* __hw_host1x01_sync_h__ */
diff --git a/drivers/gpu/host1x/hw/hw_host1x01_uclass.h b/drivers/gpu/host1x/hw/hw_host1x01_uclass.h
new file mode 100644
index 000000000000..42f3ce19ca32
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x01_uclass.h
@@ -0,0 +1,174 @@
+/*
+ * Copyright (c) 2012-2013, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+ /*
+  * Function naming determines intended use:
+  *
+  *     <x>_r(void) : Returns the offset for register <x>.
+  *
+  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+  *
+  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+  *
+  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+  *         and masked to place it at field <y> of register <x>.  This value
+  *         can be |'d with others to produce a full register value for
+  *         register <x>.
+  *
+  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+  *         value can be ~'d and then &'d to clear the value of field <y> for
+  *         register <x>.
+  *
+  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+  *         to place it at field <y> of register <x>.  This value can be |'d
+  *         with others to produce a full register value for <x>.
+  *
+  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+  *         <x> value 'r' after being shifted to place its LSB at bit 0.
+  *         This value is suitable for direct comparison with other unshifted
+  *         values appropriate for use in field <y> of register <x>.
+  *
+  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+  *         field <y> of register <x>.  This value is suitable for direct
+  *         comparison with unshifted values appropriate for use in field <y>
+  *         of register <x>.
+  */
+
+#ifndef __hw_host1x_uclass_host1x_h__
+#define __hw_host1x_uclass_host1x_h__
+
+static inline u32 host1x_uclass_incr_syncpt_r(void)
+{
+	return 0x0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT \
+	host1x_uclass_incr_syncpt_r()
+static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_COND_F(v) \
+	host1x_uclass_incr_syncpt_cond_f(v)
+static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \
+	host1x_uclass_incr_syncpt_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_r(void)
+{
+	return 0x8;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT \
+	host1x_uclass_wait_syncpt_r()
+static inline u32 host1x_uclass_wait_syncpt_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_INDX_F(v) \
+	host1x_uclass_wait_syncpt_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_thresh_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_THRESH_F(v) \
+	host1x_uclass_wait_syncpt_thresh_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_r(void)
+{
+	return 0x9;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE \
+	host1x_uclass_wait_syncpt_base_r()
+static inline u32 host1x_uclass_wait_syncpt_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_INDX_F(v) \
+	host1x_uclass_wait_syncpt_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 16;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_wait_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_offset_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_OFFSET_F(v) \
+	host1x_uclass_wait_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_load_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_value_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(v) \
+	host1x_uclass_load_syncpt_base_value_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_incr_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_offset_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_OFFSET_F(v) \
+	host1x_uclass_incr_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_indoff_r(void)
+{
+	return 0x2d;
+}
+#define HOST1X_UCLASS_INDOFF \
+	host1x_uclass_indoff_r()
+static inline u32 host1x_uclass_indoff_indbe_f(u32 v)
+{
+	return (v & 0xf) << 28;
+}
+#define HOST1X_UCLASS_INDOFF_INDBE_F(v) \
+	host1x_uclass_indoff_indbe_f(v)
+static inline u32 host1x_uclass_indoff_autoinc_f(u32 v)
+{
+	return (v & 0x1) << 27;
+}
+#define HOST1X_UCLASS_INDOFF_AUTOINC_F(v) \
+	host1x_uclass_indoff_autoinc_f(v)
+static inline u32 host1x_uclass_indoff_indmodid_f(u32 v)
+{
+	return (v & 0xff) << 18;
+}
+#define HOST1X_UCLASS_INDOFF_INDMODID_F(v) \
+	host1x_uclass_indoff_indmodid_f(v)
+static inline u32 host1x_uclass_indoff_indroffset_f(u32 v)
+{
+	return (v & 0xffff) << 2;
+}
+#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
+	host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_indoff_rwn_read_v(void)
+{
+	return 1;
+}
+#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
+	host1x_uclass_indoff_indroffset_f(v)
+#endif
diff --git a/drivers/gpu/host1x/hw/intr_hw.c b/drivers/gpu/host1x/hw/intr_hw.c
new file mode 100644
index 000000000000..b592eef1efcb
--- /dev/null
+++ b/drivers/gpu/host1x/hw/intr_hw.c
@@ -0,0 +1,143 @@
+/*
+ * Tegra host1x Interrupt Management
+ *
+ * Copyright (C) 2010 Google, Inc.
+ * Copyright (c) 2010-2013, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <asm/mach/irq.h>
+
+#include "intr.h"
+#include "dev.h"
+
+/*
+ * Sync point threshold interrupt service function
+ * Handles sync point threshold triggers, in interrupt context
+ */
+static void host1x_intr_syncpt_handle(struct host1x_syncpt *syncpt)
+{
+	unsigned int id = syncpt->id;
+	struct host1x *host = syncpt->host;
+
+	host1x_sync_writel(host, BIT_MASK(id),
+		HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(BIT_WORD(id)));
+	host1x_sync_writel(host, BIT_MASK(id),
+		HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(BIT_WORD(id)));
+
+	queue_work(host->intr_wq, &syncpt->intr.work);
+}
+
+static irqreturn_t syncpt_thresh_isr(int irq, void *dev_id)
+{
+	struct host1x *host = dev_id;
+	unsigned long reg;
+	int i, id;
+
+	for (i = 0; i <= BIT_WORD(host->info->nb_pts); i++) {
+		reg = host1x_sync_readl(host,
+			HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i));
+		for_each_set_bit(id, &reg, BITS_PER_LONG) {
+			struct host1x_syncpt *syncpt =
+				host->syncpt + (i * BITS_PER_LONG + id);
+			host1x_intr_syncpt_handle(syncpt);
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void _host1x_intr_disable_all_syncpt_intrs(struct host1x *host)
+{
+	u32 i;
+
+	for (i = 0; i <= BIT_WORD(host->info->nb_pts); ++i) {
+		host1x_sync_writel(host, 0xffffffffu,
+			HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(i));
+		host1x_sync_writel(host, 0xffffffffu,
+			HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i));
+	}
+}
+
+static int _host1x_intr_init_host_sync(struct host1x *host, u32 cpm,
+	void (*syncpt_thresh_work)(struct work_struct *))
+{
+	int i, err;
+
+	host1x_hw_intr_disable_all_syncpt_intrs(host);
+
+	for (i = 0; i < host->info->nb_pts; i++)
+		INIT_WORK(&host->syncpt[i].intr.work, syncpt_thresh_work);
+
+	err = devm_request_irq(host->dev, host->intr_syncpt_irq,
+			       syncpt_thresh_isr, IRQF_SHARED,
+			       "host1x_syncpt", host);
+	if (IS_ERR_VALUE(err)) {
+		WARN_ON(1);
+		return err;
+	}
+
+	/* disable the ip_busy_timeout. this prevents write drops */
+	host1x_sync_writel(host, 0, HOST1X_SYNC_IP_BUSY_TIMEOUT);
+
+	/*
+	 * increase the auto-ack timout to the maximum value. 2d will hang
+	 * otherwise on Tegra2.
+	 */
+	host1x_sync_writel(host, 0xff, HOST1X_SYNC_CTXSW_TIMEOUT_CFG);
+
+	/* update host clocks per usec */
+	host1x_sync_writel(host, cpm, HOST1X_SYNC_USEC_CLK);
+
+	return 0;
+}
+
+static void _host1x_intr_set_syncpt_threshold(struct host1x *host,
+	u32 id, u32 thresh)
+{
+	host1x_sync_writel(host, thresh, HOST1X_SYNC_SYNCPT_INT_THRESH(id));
+}
+
+static void _host1x_intr_enable_syncpt_intr(struct host1x *host, u32 id)
+{
+	host1x_sync_writel(host, BIT_MASK(id),
+		HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(BIT_WORD(id)));
+}
+
+static void _host1x_intr_disable_syncpt_intr(struct host1x *host, u32 id)
+{
+	host1x_sync_writel(host, BIT_MASK(id),
+		HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(BIT_WORD(id)));
+	host1x_sync_writel(host, BIT_MASK(id),
+		HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(BIT_WORD(id)));
+}
+
+static int _host1x_free_syncpt_irq(struct host1x *host)
+{
+	devm_free_irq(host->dev, host->intr_syncpt_irq, host);
+	flush_workqueue(host->intr_wq);
+	return 0;
+}
+
+static const struct host1x_intr_ops host1x_intr_ops = {
+	.init_host_sync = _host1x_intr_init_host_sync,
+	.set_syncpt_threshold = _host1x_intr_set_syncpt_threshold,
+	.enable_syncpt_intr = _host1x_intr_enable_syncpt_intr,
+	.disable_syncpt_intr = _host1x_intr_disable_syncpt_intr,
+	.disable_all_syncpt_intrs = _host1x_intr_disable_all_syncpt_intrs,
+	.free_syncpt_irq = _host1x_free_syncpt_irq,
+};
diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c b/drivers/gpu/host1x/hw/syncpt_hw.c
new file mode 100644
index 000000000000..61174990102a
--- /dev/null
+++ b/drivers/gpu/host1x/hw/syncpt_hw.c
@@ -0,0 +1,114 @@
+/*
+ * Tegra host1x Syncpoints
+ *
+ * Copyright (c) 2010-2013, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/io.h>
+
+#include "dev.h"
+#include "syncpt.h"
+
+/*
+ * Write the current syncpoint value back to hw.
+ */
+static void syncpt_restore(struct host1x_syncpt *sp)
+{
+	struct host1x *host = sp->host;
+	int min = host1x_syncpt_read_min(sp);
+	host1x_sync_writel(host, min, HOST1X_SYNC_SYNCPT(sp->id));
+}
+
+/*
+ * Write the current waitbase value back to hw.
+ */
+static void syncpt_restore_wait_base(struct host1x_syncpt *sp)
+{
+	struct host1x *host = sp->host;
+	host1x_sync_writel(host, sp->base_val,
+			   HOST1X_SYNC_SYNCPT_BASE(sp->id));
+}
+
+/*
+ * Read waitbase value from hw.
+ */
+static void syncpt_read_wait_base(struct host1x_syncpt *sp)
+{
+	struct host1x *host = sp->host;
+	sp->base_val =
+		host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_BASE(sp->id));
+}
+
+/*
+ * Updates the last value read from hardware.
+ */
+static u32 syncpt_load(struct host1x_syncpt *sp)
+{
+	struct host1x *host = sp->host;
+	u32 old, live;
+
+	/* Loop in case there's a race writing to min_val */
+	do {
+		old = host1x_syncpt_read_min(sp);
+		live = host1x_sync_readl(host, HOST1X_SYNC_SYNCPT(sp->id));
+	} while ((u32)atomic_cmpxchg(&sp->min_val, old, live) != old);
+
+	if (!host1x_syncpt_check_max(sp, live))
+		dev_err(host->dev, "%s failed: id=%u, min=%d, max=%d\n",
+			__func__, sp->id, host1x_syncpt_read_min(sp),
+			host1x_syncpt_read_max(sp));
+
+	return live;
+}
+
+/*
+ * Write a cpu syncpoint increment to the hardware, without touching
+ * the cache.
+ */
+static void syncpt_cpu_incr(struct host1x_syncpt *sp)
+{
+	struct host1x *host = sp->host;
+	u32 reg_offset = sp->id / 32;
+
+	if (!host1x_syncpt_client_managed(sp) &&
+	    host1x_syncpt_idle(sp)) {
+		dev_err(host->dev, "Trying to increment syncpoint id %d beyond max\n",
+			sp->id);
+		host1x_debug_dump(sp->host);
+		return;
+	}
+	host1x_sync_writel(host, BIT_MASK(sp->id),
+			   HOST1X_SYNC_SYNCPT_CPU_INCR(reg_offset));
+	wmb();
+}
+
+/* remove a wait pointed to by patch_addr */
+static int syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr)
+{
+	u32 override = host1x_class_host_wait_syncpt(
+		HOST1X_SYNCPT_RESERVED, 0);
+
+	*((u32 *)patch_addr) = override;
+	return 0;
+}
+
+static const struct host1x_syncpt_ops host1x_syncpt_ops = {
+	.restore = syncpt_restore,
+	.restore_wait_base = syncpt_restore_wait_base,
+	.load_wait_base = syncpt_read_wait_base,
+	.load = syncpt_load,
+	.cpu_incr = syncpt_cpu_incr,
+	.patch_wait = syncpt_patch_wait,
+};
diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c
new file mode 100644
index 000000000000..2491bf82e30c
--- /dev/null
+++ b/drivers/gpu/host1x/intr.c
@@ -0,0 +1,354 @@
+/*
+ * Tegra host1x Interrupt Management
+ *
+ * Copyright (c) 2010-2013, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/clk.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/irq.h>
+
+#include <trace/events/host1x.h>
+#include "channel.h"
+#include "dev.h"
+#include "intr.h"
+
+/* Wait list management */
+
+enum waitlist_state {
+	WLS_PENDING,
+	WLS_REMOVED,
+	WLS_CANCELLED,
+	WLS_HANDLED
+};
+
+static void waiter_release(struct kref *kref)
+{
+	kfree(container_of(kref, struct host1x_waitlist, refcount));
+}
+
+/*
+ * add a waiter to a waiter queue, sorted by threshold
+ * returns true if it was added at the head of the queue
+ */
+static bool add_waiter_to_queue(struct host1x_waitlist *waiter,
+				struct list_head *queue)
+{
+	struct host1x_waitlist *pos;
+	u32 thresh = waiter->thresh;
+
+	list_for_each_entry_reverse(pos, queue, list)
+		if ((s32)(pos->thresh - thresh) <= 0) {
+			list_add(&waiter->list, &pos->list);
+			return false;
+		}
+
+	list_add(&waiter->list, queue);
+	return true;
+}
+
+/*
+ * run through a waiter queue for a single sync point ID
+ * and gather all completed waiters into lists by actions
+ */
+static void remove_completed_waiters(struct list_head *head, u32 sync,
+			struct list_head completed[HOST1X_INTR_ACTION_COUNT])
+{
+	struct list_head *dest;
+	struct host1x_waitlist *waiter, *next, *prev;
+
+	list_for_each_entry_safe(waiter, next, head, list) {
+		if ((s32)(waiter->thresh - sync) > 0)
+			break;
+
+		dest = completed + waiter->action;
+
+		/* consolidate submit cleanups */
+		if (waiter->action == HOST1X_INTR_ACTION_SUBMIT_COMPLETE &&
+		    !list_empty(dest)) {
+			prev = list_entry(dest->prev,
+					  struct host1x_waitlist, list);
+			if (prev->data == waiter->data) {
+				prev->count++;
+				dest = NULL;
+			}
+		}
+
+		/* PENDING->REMOVED or CANCELLED->HANDLED */
+		if (atomic_inc_return(&waiter->state) == WLS_HANDLED || !dest) {
+			list_del(&waiter->list);
+			kref_put(&waiter->refcount, waiter_release);
+		} else
+			list_move_tail(&waiter->list, dest);
+	}
+}
+
+static void reset_threshold_interrupt(struct host1x *host,
+				      struct list_head *head,
+				      unsigned int id)
+{
+	u32 thresh =
+		list_first_entry(head, struct host1x_waitlist, list)->thresh;
+
+	host1x_hw_intr_set_syncpt_threshold(host, id, thresh);
+	host1x_hw_intr_enable_syncpt_intr(host, id);
+}
+
+static void action_submit_complete(struct host1x_waitlist *waiter)
+{
+	struct host1x_channel *channel = waiter->data;
+
+	host1x_cdma_update(&channel->cdma);
+
+	/*  Add nr_completed to trace */
+	trace_host1x_channel_submit_complete(dev_name(channel->dev),
+					     waiter->count, waiter->thresh);
+
+}
+
+static void action_wakeup(struct host1x_waitlist *waiter)
+{
+	wait_queue_head_t *wq = waiter->data;
+	wake_up(wq);
+}
+
+static void action_wakeup_interruptible(struct host1x_waitlist *waiter)
+{
+	wait_queue_head_t *wq = waiter->data;
+	wake_up_interruptible(wq);
+}
+
+typedef void (*action_handler)(struct host1x_waitlist *waiter);
+
+static action_handler action_handlers[HOST1X_INTR_ACTION_COUNT] = {
+	action_submit_complete,
+	action_wakeup,
+	action_wakeup_interruptible,
+};
+
+static void run_handlers(struct list_head completed[HOST1X_INTR_ACTION_COUNT])
+{
+	struct list_head *head = completed;
+	int i;
+
+	for (i = 0; i < HOST1X_INTR_ACTION_COUNT; ++i, ++head) {
+		action_handler handler = action_handlers[i];
+		struct host1x_waitlist *waiter, *next;
+
+		list_for_each_entry_safe(waiter, next, head, list) {
+			list_del(&waiter->list);
+			handler(waiter);
+			WARN_ON(atomic_xchg(&waiter->state, WLS_HANDLED) !=
+				WLS_REMOVED);
+			kref_put(&waiter->refcount, waiter_release);
+		}
+	}
+}
+
+/*
+ * Remove & handle all waiters that have completed for the given syncpt
+ */
+static int process_wait_list(struct host1x *host,
+			     struct host1x_syncpt *syncpt,
+			     u32 threshold)
+{
+	struct list_head completed[HOST1X_INTR_ACTION_COUNT];
+	unsigned int i;
+	int empty;
+
+	for (i = 0; i < HOST1X_INTR_ACTION_COUNT; ++i)
+		INIT_LIST_HEAD(completed + i);
+
+	spin_lock(&syncpt->intr.lock);
+
+	remove_completed_waiters(&syncpt->intr.wait_head, threshold,
+				 completed);
+
+	empty = list_empty(&syncpt->intr.wait_head);
+	if (empty)
+		host1x_hw_intr_disable_syncpt_intr(host, syncpt->id);
+	else
+		reset_threshold_interrupt(host, &syncpt->intr.wait_head,
+					  syncpt->id);
+
+	spin_unlock(&syncpt->intr.lock);
+
+	run_handlers(completed);
+
+	return empty;
+}
+
+/*
+ * Sync point threshold interrupt service thread function
+ * Handles sync point threshold triggers, in thread context
+ */
+
+static void syncpt_thresh_work(struct work_struct *work)
+{
+	struct host1x_syncpt_intr *syncpt_intr =
+		container_of(work, struct host1x_syncpt_intr, work);
+	struct host1x_syncpt *syncpt =
+		container_of(syncpt_intr, struct host1x_syncpt, intr);
+	unsigned int id = syncpt->id;
+	struct host1x *host = syncpt->host;
+
+	(void)process_wait_list(host, syncpt,
+				host1x_syncpt_load(host->syncpt + id));
+}
+
+int host1x_intr_add_action(struct host1x *host, u32 id, u32 thresh,
+			   enum host1x_intr_action action, void *data,
+			   struct host1x_waitlist *waiter, void **ref)
+{
+	struct host1x_syncpt *syncpt;
+	int queue_was_empty;
+
+	if (waiter == NULL) {
+		pr_warn("%s: NULL waiter\n", __func__);
+		return -EINVAL;
+	}
+
+	/* initialize a new waiter */
+	INIT_LIST_HEAD(&waiter->list);
+	kref_init(&waiter->refcount);
+	if (ref)
+		kref_get(&waiter->refcount);
+	waiter->thresh = thresh;
+	waiter->action = action;
+	atomic_set(&waiter->state, WLS_PENDING);
+	waiter->data = data;
+	waiter->count = 1;
+
+	syncpt = host->syncpt + id;
+
+	spin_lock(&syncpt->intr.lock);
+
+	queue_was_empty = list_empty(&syncpt->intr.wait_head);
+
+	if (add_waiter_to_queue(waiter, &syncpt->intr.wait_head)) {
+		/* added at head of list - new threshold value */
+		host1x_hw_intr_set_syncpt_threshold(host, id, thresh);
+
+		/* added as first waiter - enable interrupt */
+		if (queue_was_empty)
+			host1x_hw_intr_enable_syncpt_intr(host, id);
+	}
+
+	spin_unlock(&syncpt->intr.lock);
+
+	if (ref)
+		*ref = waiter;
+	return 0;
+}
+
+void host1x_intr_put_ref(struct host1x *host, u32 id, void *ref)
+{
+	struct host1x_waitlist *waiter = ref;
+	struct host1x_syncpt *syncpt;
+
+	while (atomic_cmpxchg(&waiter->state, WLS_PENDING, WLS_CANCELLED) ==
+	       WLS_REMOVED)
+		schedule();
+
+	syncpt = host->syncpt + id;
+	(void)process_wait_list(host, syncpt,
+				host1x_syncpt_load(host->syncpt + id));
+
+	kref_put(&waiter->refcount, waiter_release);
+}
+
+int host1x_intr_init(struct host1x *host, unsigned int irq_sync)
+{
+	unsigned int id;
+	u32 nb_pts = host1x_syncpt_nb_pts(host);
+
+	mutex_init(&host->intr_mutex);
+	host->intr_syncpt_irq = irq_sync;
+	host->intr_wq = create_workqueue("host_syncpt");
+	if (!host->intr_wq)
+		return -ENOMEM;
+
+	for (id = 0; id < nb_pts; ++id) {
+		struct host1x_syncpt *syncpt = host->syncpt + id;
+
+		spin_lock_init(&syncpt->intr.lock);
+		INIT_LIST_HEAD(&syncpt->intr.wait_head);
+		snprintf(syncpt->intr.thresh_irq_name,
+			 sizeof(syncpt->intr.thresh_irq_name),
+			 "host1x_sp_%02d", id);
+	}
+
+	host1x_intr_start(host);
+
+	return 0;
+}
+
+void host1x_intr_deinit(struct host1x *host)
+{
+	host1x_intr_stop(host);
+	destroy_workqueue(host->intr_wq);
+}
+
+void host1x_intr_start(struct host1x *host)
+{
+	u32 hz = clk_get_rate(host->clk);
+	int err;
+
+	mutex_lock(&host->intr_mutex);
+	err = host1x_hw_intr_init_host_sync(host, DIV_ROUND_UP(hz, 1000000),
+					    syncpt_thresh_work);
+	if (err) {
+		mutex_unlock(&host->intr_mutex);
+		return;
+	}
+	mutex_unlock(&host->intr_mutex);
+}
+
+void host1x_intr_stop(struct host1x *host)
+{
+	unsigned int id;
+	struct host1x_syncpt *syncpt = host->syncpt;
+	u32 nb_pts = host1x_syncpt_nb_pts(host);
+
+	mutex_lock(&host->intr_mutex);
+
+	host1x_hw_intr_disable_all_syncpt_intrs(host);
+
+	for (id = 0; id < nb_pts; ++id) {
+		struct host1x_waitlist *waiter, *next;
+
+		list_for_each_entry_safe(waiter, next,
+			&syncpt[id].intr.wait_head, list) {
+			if (atomic_cmpxchg(&waiter->state,
+			    WLS_CANCELLED, WLS_HANDLED) == WLS_CANCELLED) {
+				list_del(&waiter->list);
+				kref_put(&waiter->refcount, waiter_release);
+			}
+		}
+
+		if (!list_empty(&syncpt[id].intr.wait_head)) {
+			/* output diagnostics */
+			mutex_unlock(&host->intr_mutex);
+			pr_warn("%s cannot stop syncpt intr id=%d\n",
+				__func__, id);
+			return;
+		}
+	}
+
+	host1x_hw_intr_free_syncpt_irq(host);
+
+	mutex_unlock(&host->intr_mutex);
+}
diff --git a/drivers/gpu/host1x/intr.h b/drivers/gpu/host1x/intr.h
new file mode 100644
index 000000000000..2b8adf016a05
--- /dev/null
+++ b/drivers/gpu/host1x/intr.h
@@ -0,0 +1,102 @@
+/*
+ * Tegra host1x Interrupt Management
+ *
+ * Copyright (c) 2010-2013, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __HOST1X_INTR_H
+#define __HOST1X_INTR_H
+
+#include <linux/interrupt.h>
+#include <linux/workqueue.h>
+
+struct host1x;
+
+enum host1x_intr_action {
+	/*
+	 * Perform cleanup after a submit has completed.
+	 * 'data' points to a channel
+	 */
+	HOST1X_INTR_ACTION_SUBMIT_COMPLETE = 0,
+
+	/*
+	 * Wake up a  task.
+	 * 'data' points to a wait_queue_head_t
+	 */
+	HOST1X_INTR_ACTION_WAKEUP,
+
+	/*
+	 * Wake up a interruptible task.
+	 * 'data' points to a wait_queue_head_t
+	 */
+	HOST1X_INTR_ACTION_WAKEUP_INTERRUPTIBLE,
+
+	HOST1X_INTR_ACTION_COUNT
+};
+
+struct host1x_syncpt_intr {
+	spinlock_t lock;
+	struct list_head wait_head;
+	char thresh_irq_name[12];
+	struct work_struct work;
+};
+
+struct host1x_waitlist {
+	struct list_head list;
+	struct kref refcount;
+	u32 thresh;
+	enum host1x_intr_action action;
+	atomic_t state;
+	void *data;
+	int count;
+};
+
+/*
+ * Schedule an action to be taken when a sync point reaches the given threshold.
+ *
+ * @id the sync point
+ * @thresh the threshold
+ * @action the action to take
+ * @data a pointer to extra data depending on action, see above
+ * @waiter waiter structure - assumes ownership
+ * @ref must be passed if cancellation is possible, else NULL
+ *
+ * This is a non-blocking api.
+ */
+int host1x_intr_add_action(struct host1x *host, u32 id, u32 thresh,
+	enum host1x_intr_action action, void *data,
+	struct host1x_waitlist *waiter, void **ref);
+
+/*
+ * Unreference an action submitted to host1x_intr_add_action().
+ * You must call this if you passed non-NULL as ref.
+ * @ref the ref returned from host1x_intr_add_action()
+ */
+void host1x_intr_put_ref(struct host1x *host, u32 id, void *ref);
+
+/* Initialize host1x sync point interrupt */
+int host1x_intr_init(struct host1x *host, unsigned int irq_sync);
+
+/* Deinitialize host1x sync point interrupt */
+void host1x_intr_deinit(struct host1x *host);
+
+/* Enable host1x sync point interrupt */
+void host1x_intr_start(struct host1x *host);
+
+/* Disable host1x sync point interrupt */
+void host1x_intr_stop(struct host1x *host);
+
+irqreturn_t host1x_syncpt_thresh_fn(void *dev_id);
+#endif
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
new file mode 100644
index 000000000000..f665d679031c
--- /dev/null
+++ b/drivers/gpu/host1x/job.c
@@ -0,0 +1,603 @@
+/*
+ * Tegra host1x Job
+ *
+ * Copyright (c) 2010-2013, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/kref.h>
+#include <linux/module.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <trace/events/host1x.h>
+
+#include "channel.h"
+#include "dev.h"
+#include "host1x_bo.h"
+#include "job.h"
+#include "syncpt.h"
+
+struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
+				    u32 num_cmdbufs, u32 num_relocs,
+				    u32 num_waitchks)
+{
+	struct host1x_job *job = NULL;
+	unsigned int num_unpins = num_cmdbufs + num_relocs;
+	u64 total;
+	void *mem;
+
+	/* Check that we're not going to overflow */
+	total = sizeof(struct host1x_job) +
+		num_relocs * sizeof(struct host1x_reloc) +
+		num_unpins * sizeof(struct host1x_job_unpin_data) +
+		num_waitchks * sizeof(struct host1x_waitchk) +
+		num_cmdbufs * sizeof(struct host1x_job_gather) +
+		num_unpins * sizeof(dma_addr_t) +
+		num_unpins * sizeof(u32 *);
+	if (total > ULONG_MAX)
+		return NULL;
+
+	mem = job = kzalloc(total, GFP_KERNEL);
+	if (!job)
+		return NULL;
+
+	kref_init(&job->ref);
+	job->channel = ch;
+
+	/* Redistribute memory to the structs  */
+	mem += sizeof(struct host1x_job);
+	job->relocarray = num_relocs ? mem : NULL;
+	mem += num_relocs * sizeof(struct host1x_reloc);
+	job->unpins = num_unpins ? mem : NULL;
+	mem += num_unpins * sizeof(struct host1x_job_unpin_data);
+	job->waitchk = num_waitchks ? mem : NULL;
+	mem += num_waitchks * sizeof(struct host1x_waitchk);
+	job->gathers = num_cmdbufs ? mem : NULL;
+	mem += num_cmdbufs * sizeof(struct host1x_job_gather);
+	job->addr_phys = num_unpins ? mem : NULL;
+
+	job->reloc_addr_phys = job->addr_phys;
+	job->gather_addr_phys = &job->addr_phys[num_relocs];
+
+	return job;
+}
+
+struct host1x_job *host1x_job_get(struct host1x_job *job)
+{
+	kref_get(&job->ref);
+	return job;
+}
+
+static void job_free(struct kref *ref)
+{
+	struct host1x_job *job = container_of(ref, struct host1x_job, ref);
+
+	kfree(job);
+}
+
+void host1x_job_put(struct host1x_job *job)
+{
+	kref_put(&job->ref, job_free);
+}
+
+void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo,
+			   u32 words, u32 offset)
+{
+	struct host1x_job_gather *cur_gather = &job->gathers[job->num_gathers];
+
+	cur_gather->words = words;
+	cur_gather->bo = bo;
+	cur_gather->offset = offset;
+	job->num_gathers++;
+}
+
+/*
+ * NULL an already satisfied WAIT_SYNCPT host method, by patching its
+ * args in the command stream. The method data is changed to reference
+ * a reserved (never given out or incr) HOST1X_SYNCPT_RESERVED syncpt
+ * with a matching threshold value of 0, so is guaranteed to be popped
+ * by the host HW.
+ */
+static void host1x_syncpt_patch_offset(struct host1x_syncpt *sp,
+				       struct host1x_bo *h, u32 offset)
+{
+	void *patch_addr = NULL;
+
+	/* patch the wait */
+	patch_addr = host1x_bo_kmap(h, offset >> PAGE_SHIFT);
+	if (patch_addr) {
+		host1x_syncpt_patch_wait(sp,
+					 patch_addr + (offset & ~PAGE_MASK));
+		host1x_bo_kunmap(h, offset >> PAGE_SHIFT, patch_addr);
+	} else
+		pr_err("Could not map cmdbuf for wait check\n");
+}
+
+/*
+ * Check driver supplied waitchk structs for syncpt thresholds
+ * that have already been satisfied and NULL the comparison (to
+ * avoid a wrap condition in the HW).
+ */
+static int do_waitchks(struct host1x_job *job, struct host1x *host,
+		       struct host1x_bo *patch)
+{
+	int i;
+
+	/* compare syncpt vs wait threshold */
+	for (i = 0; i < job->num_waitchk; i++) {
+		struct host1x_waitchk *wait = &job->waitchk[i];
+		struct host1x_syncpt *sp =
+			host1x_syncpt_get(host, wait->syncpt_id);
+
+		/* validate syncpt id */
+		if (wait->syncpt_id > host1x_syncpt_nb_pts(host))
+			continue;
+
+		/* skip all other gathers */
+		if (patch != wait->bo)
+			continue;
+
+		trace_host1x_syncpt_wait_check(wait->bo, wait->offset,
+					       wait->syncpt_id, wait->thresh,
+					       host1x_syncpt_read_min(sp));
+
+		if (host1x_syncpt_is_expired(sp, wait->thresh)) {
+			dev_dbg(host->dev,
+				"drop WAIT id %d (%s) thresh 0x%x, min 0x%x\n",
+				wait->syncpt_id, sp->name, wait->thresh,
+				host1x_syncpt_read_min(sp));
+
+			host1x_syncpt_patch_offset(sp, patch, wait->offset);
+		}
+
+		wait->bo = NULL;
+	}
+
+	return 0;
+}
+
+static unsigned int pin_job(struct host1x_job *job)
+{
+	unsigned int i;
+
+	job->num_unpins = 0;
+
+	for (i = 0; i < job->num_relocs; i++) {
+		struct host1x_reloc *reloc = &job->relocarray[i];
+		struct sg_table *sgt;
+		dma_addr_t phys_addr;
+
+		reloc->target = host1x_bo_get(reloc->target);
+		if (!reloc->target)
+			goto unpin;
+
+		phys_addr = host1x_bo_pin(reloc->target, &sgt);
+		if (!phys_addr)
+			goto unpin;
+
+		job->addr_phys[job->num_unpins] = phys_addr;
+		job->unpins[job->num_unpins].bo = reloc->target;
+		job->unpins[job->num_unpins].sgt = sgt;
+		job->num_unpins++;
+	}
+
+	for (i = 0; i < job->num_gathers; i++) {
+		struct host1x_job_gather *g = &job->gathers[i];
+		struct sg_table *sgt;
+		dma_addr_t phys_addr;
+
+		g->bo = host1x_bo_get(g->bo);
+		if (!g->bo)
+			goto unpin;
+
+		phys_addr = host1x_bo_pin(g->bo, &sgt);
+		if (!phys_addr)
+			goto unpin;
+
+		job->addr_phys[job->num_unpins] = phys_addr;
+		job->unpins[job->num_unpins].bo = g->bo;
+		job->unpins[job->num_unpins].sgt = sgt;
+		job->num_unpins++;
+	}
+
+	return job->num_unpins;
+
+unpin:
+	host1x_job_unpin(job);
+	return 0;
+}
+
+static unsigned int do_relocs(struct host1x_job *job, struct host1x_bo *cmdbuf)
+{
+	int i = 0;
+	u32 last_page = ~0;
+	void *cmdbuf_page_addr = NULL;
+
+	/* pin & patch the relocs for one gather */
+	while (i < job->num_relocs) {
+		struct host1x_reloc *reloc = &job->relocarray[i];
+		u32 reloc_addr = (job->reloc_addr_phys[i] +
+			reloc->target_offset) >> reloc->shift;
+		u32 *target;
+
+		/* skip all other gathers */
+		if (!(reloc->cmdbuf && cmdbuf == reloc->cmdbuf)) {
+			i++;
+			continue;
+		}
+
+		if (last_page != reloc->cmdbuf_offset >> PAGE_SHIFT) {
+			if (cmdbuf_page_addr)
+				host1x_bo_kunmap(cmdbuf, last_page,
+						 cmdbuf_page_addr);
+
+			cmdbuf_page_addr = host1x_bo_kmap(cmdbuf,
+					reloc->cmdbuf_offset >> PAGE_SHIFT);
+			last_page = reloc->cmdbuf_offset >> PAGE_SHIFT;
+
+			if (unlikely(!cmdbuf_page_addr)) {
+				pr_err("Could not map cmdbuf for relocation\n");
+				return -ENOMEM;
+			}
+		}
+
+		target = cmdbuf_page_addr + (reloc->cmdbuf_offset & ~PAGE_MASK);
+		*target = reloc_addr;
+
+		/* mark this gather as handled */
+		reloc->cmdbuf = 0;
+	}
+
+	if (cmdbuf_page_addr)
+		host1x_bo_kunmap(cmdbuf, last_page, cmdbuf_page_addr);
+
+	return 0;
+}
+
+static int check_reloc(struct host1x_reloc *reloc, struct host1x_bo *cmdbuf,
+		       unsigned int offset)
+{
+	offset *= sizeof(u32);
+
+	if (reloc->cmdbuf != cmdbuf || reloc->cmdbuf_offset != offset)
+		return -EINVAL;
+
+	return 0;
+}
+
+struct host1x_firewall {
+	struct host1x_job *job;
+	struct device *dev;
+
+	unsigned int num_relocs;
+	struct host1x_reloc *reloc;
+
+	struct host1x_bo *cmdbuf_id;
+	unsigned int offset;
+
+	u32 words;
+	u32 class;
+	u32 reg;
+	u32 mask;
+	u32 count;
+};
+
+static int check_mask(struct host1x_firewall *fw)
+{
+	u32 mask = fw->mask;
+	u32 reg = fw->reg;
+
+	while (mask) {
+		if (fw->words == 0)
+			return -EINVAL;
+
+		if (mask & 1) {
+			if (fw->job->is_addr_reg(fw->dev, fw->class, reg)) {
+				bool bad_reloc = check_reloc(fw->reloc,
+							     fw->cmdbuf_id,
+							     fw->offset);
+				if (!fw->num_relocs || bad_reloc)
+					return -EINVAL;
+				fw->reloc++;
+				fw->num_relocs--;
+			}
+			fw->words--;
+			fw->offset++;
+		}
+		mask >>= 1;
+		reg++;
+	}
+
+	return 0;
+}
+
+static int check_incr(struct host1x_firewall *fw)
+{
+	u32 count = fw->count;
+	u32 reg = fw->reg;
+
+	while (fw) {
+		if (fw->words == 0)
+			return -EINVAL;
+
+		if (fw->job->is_addr_reg(fw->dev, fw->class, reg)) {
+			bool bad_reloc = check_reloc(fw->reloc, fw->cmdbuf_id,
+						     fw->offset);
+			if (!fw->num_relocs || bad_reloc)
+				return -EINVAL;
+			fw->reloc++;
+			fw->num_relocs--;
+		}
+		reg++;
+		fw->words--;
+		fw->offset++;
+		count--;
+	}
+
+	return 0;
+}
+
+static int check_nonincr(struct host1x_firewall *fw)
+{
+	int is_addr_reg = fw->job->is_addr_reg(fw->dev, fw->class, fw->reg);
+	u32 count = fw->count;
+
+	while (count) {
+		if (fw->words == 0)
+			return -EINVAL;
+
+		if (is_addr_reg) {
+			bool bad_reloc = check_reloc(fw->reloc, fw->cmdbuf_id,
+						     fw->offset);
+			if (!fw->num_relocs || bad_reloc)
+				return -EINVAL;
+			fw->reloc++;
+			fw->num_relocs--;
+		}
+		fw->words--;
+		fw->offset++;
+		count--;
+	}
+
+	return 0;
+}
+
+static int validate(struct host1x_job *job, struct device *dev,
+		    struct host1x_job_gather *g)
+{
+	u32 *cmdbuf_base;
+	int err = 0;
+	struct host1x_firewall fw;
+
+	fw.job = job;
+	fw.dev = dev;
+	fw.reloc = job->relocarray;
+	fw.num_relocs = job->num_relocs;
+	fw.cmdbuf_id = g->bo;
+
+	fw.offset = 0;
+	fw.class = 0;
+
+	if (!job->is_addr_reg)
+		return 0;
+
+	cmdbuf_base = host1x_bo_mmap(g->bo);
+	if (!cmdbuf_base)
+		return -ENOMEM;
+
+	fw.words = g->words;
+	while (fw.words && !err) {
+		u32 word = cmdbuf_base[fw.offset];
+		u32 opcode = (word & 0xf0000000) >> 28;
+
+		fw.mask = 0;
+		fw.reg = 0;
+		fw.count = 0;
+		fw.words--;
+		fw.offset++;
+
+		switch (opcode) {
+		case 0:
+			fw.class = word >> 6 & 0x3ff;
+			fw.mask = word & 0x3f;
+			fw.reg = word >> 16 & 0xfff;
+			err = check_mask(&fw);
+			if (err)
+				goto out;
+			break;
+		case 1:
+			fw.reg = word >> 16 & 0xfff;
+			fw.count = word & 0xffff;
+			err = check_incr(&fw);
+			if (err)
+				goto out;
+			break;
+
+		case 2:
+			fw.reg = word >> 16 & 0xfff;
+			fw.count = word & 0xffff;
+			err = check_nonincr(&fw);
+			if (err)
+				goto out;
+			break;
+
+		case 3:
+			fw.mask = word & 0xffff;
+			fw.reg = word >> 16 & 0xfff;
+			err = check_mask(&fw);
+			if (err)
+				goto out;
+			break;
+		case 4:
+		case 5:
+		case 14:
+			break;
+		default:
+			err = -EINVAL;
+			break;
+		}
+	}
+
+	/* No relocs should remain at this point */
+	if (fw.num_relocs)
+		err = -EINVAL;
+
+out:
+	host1x_bo_munmap(g->bo, cmdbuf_base);
+
+	return err;
+}
+
+static inline int copy_gathers(struct host1x_job *job, struct device *dev)
+{
+	size_t size = 0;
+	size_t offset = 0;
+	int i;
+
+	for (i = 0; i < job->num_gathers; i++) {
+		struct host1x_job_gather *g = &job->gathers[i];
+		size += g->words * sizeof(u32);
+	}
+
+	job->gather_copy_mapped = dma_alloc_writecombine(dev, size,
+							 &job->gather_copy,
+							 GFP_KERNEL);
+	if (!job->gather_copy_mapped) {
+		int err = PTR_ERR(job->gather_copy_mapped);
+		job->gather_copy_mapped = NULL;
+		return err;
+	}
+
+	job->gather_copy_size = size;
+
+	for (i = 0; i < job->num_gathers; i++) {
+		struct host1x_job_gather *g = &job->gathers[i];
+		void *gather;
+
+		gather = host1x_bo_mmap(g->bo);
+		memcpy(job->gather_copy_mapped + offset, gather + g->offset,
+		       g->words * sizeof(u32));
+		host1x_bo_munmap(g->bo, gather);
+
+		g->base = job->gather_copy;
+		g->offset = offset;
+		g->bo = NULL;
+
+		offset += g->words * sizeof(u32);
+	}
+
+	return 0;
+}
+
+int host1x_job_pin(struct host1x_job *job, struct device *dev)
+{
+	int err;
+	unsigned int i, j;
+	struct host1x *host = dev_get_drvdata(dev->parent);
+	DECLARE_BITMAP(waitchk_mask, host1x_syncpt_nb_pts(host));
+
+	bitmap_zero(waitchk_mask, host1x_syncpt_nb_pts(host));
+	for (i = 0; i < job->num_waitchk; i++) {
+		u32 syncpt_id = job->waitchk[i].syncpt_id;
+		if (syncpt_id < host1x_syncpt_nb_pts(host))
+			set_bit(syncpt_id, waitchk_mask);
+	}
+
+	/* get current syncpt values for waitchk */
+	for_each_set_bit(i, waitchk_mask, host1x_syncpt_nb_pts(host))
+		host1x_syncpt_load(host->syncpt + i);
+
+	/* pin memory */
+	err = pin_job(job);
+	if (!err)
+		goto out;
+
+	/* patch gathers */
+	for (i = 0; i < job->num_gathers; i++) {
+		struct host1x_job_gather *g = &job->gathers[i];
+
+		/* process each gather mem only once */
+		if (g->handled)
+			continue;
+
+		g->base = job->gather_addr_phys[i];
+
+		for (j = 0; j < job->num_gathers; j++)
+			if (job->gathers[j].bo == g->bo)
+				job->gathers[j].handled = true;
+
+		err = 0;
+
+		if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
+			err = validate(job, dev, g);
+
+		if (err)
+			dev_err(dev, "Job invalid (err=%d)\n", err);
+
+		if (!err)
+			err = do_relocs(job, g->bo);
+
+		if (!err)
+			err = do_waitchks(job, host, g->bo);
+
+		if (err)
+			break;
+	}
+
+	if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && !err) {
+		err = copy_gathers(job, dev);
+		if (err) {
+			host1x_job_unpin(job);
+			return err;
+		}
+	}
+
+out:
+	wmb();
+
+	return err;
+}
+
+void host1x_job_unpin(struct host1x_job *job)
+{
+	unsigned int i;
+
+	for (i = 0; i < job->num_unpins; i++) {
+		struct host1x_job_unpin_data *unpin = &job->unpins[i];
+		host1x_bo_unpin(unpin->bo, unpin->sgt);
+		host1x_bo_put(unpin->bo);
+	}
+	job->num_unpins = 0;
+
+	if (job->gather_copy_size)
+		dma_free_writecombine(job->channel->dev, job->gather_copy_size,
+				      job->gather_copy_mapped,
+				      job->gather_copy);
+}
+
+/*
+ * Debug routine used to dump job entries
+ */
+void host1x_job_dump(struct device *dev, struct host1x_job *job)
+{
+	dev_dbg(dev, "    SYNCPT_ID   %d\n", job->syncpt_id);
+	dev_dbg(dev, "    SYNCPT_VAL  %d\n", job->syncpt_end);
+	dev_dbg(dev, "    FIRST_GET   0x%x\n", job->first_get);
+	dev_dbg(dev, "    TIMEOUT     %d\n", job->timeout);
+	dev_dbg(dev, "    NUM_SLOTS   %d\n", job->num_slots);
+	dev_dbg(dev, "    NUM_HANDLES %d\n", job->num_unpins);
+}
diff --git a/drivers/gpu/host1x/job.h b/drivers/gpu/host1x/job.h
new file mode 100644
index 000000000000..fba45f20458e
--- /dev/null
+++ b/drivers/gpu/host1x/job.h
@@ -0,0 +1,162 @@
+/*
+ * Tegra host1x Job
+ *
+ * Copyright (c) 2011-2013, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __HOST1X_JOB_H
+#define __HOST1X_JOB_H
+
+struct host1x_job_gather {
+	u32 words;
+	dma_addr_t base;
+	struct host1x_bo *bo;
+	int offset;
+	bool handled;
+};
+
+struct host1x_cmdbuf {
+	u32 handle;
+	u32 offset;
+	u32 words;
+	u32 pad;
+};
+
+struct host1x_reloc {
+	struct host1x_bo *cmdbuf;
+	u32 cmdbuf_offset;
+	struct host1x_bo *target;
+	u32 target_offset;
+	u32 shift;
+	u32 pad;
+};
+
+struct host1x_waitchk {
+	struct host1x_bo *bo;
+	u32 offset;
+	u32 syncpt_id;
+	u32 thresh;
+};
+
+struct host1x_job_unpin_data {
+	struct host1x_bo *bo;
+	struct sg_table *sgt;
+};
+
+/*
+ * Each submit is tracked as a host1x_job.
+ */
+struct host1x_job {
+	/* When refcount goes to zero, job can be freed */
+	struct kref ref;
+
+	/* List entry */
+	struct list_head list;
+
+	/* Channel where job is submitted to */
+	struct host1x_channel *channel;
+
+	u32 client;
+
+	/* Gathers and their memory */
+	struct host1x_job_gather *gathers;
+	unsigned int num_gathers;
+
+	/* Wait checks to be processed at submit time */
+	struct host1x_waitchk *waitchk;
+	unsigned int num_waitchk;
+	u32 waitchk_mask;
+
+	/* Array of handles to be pinned & unpinned */
+	struct host1x_reloc *relocarray;
+	unsigned int num_relocs;
+	struct host1x_job_unpin_data *unpins;
+	unsigned int num_unpins;
+
+	dma_addr_t *addr_phys;
+	dma_addr_t *gather_addr_phys;
+	dma_addr_t *reloc_addr_phys;
+
+	/* Sync point id, number of increments and end related to the submit */
+	u32 syncpt_id;
+	u32 syncpt_incrs;
+	u32 syncpt_end;
+
+	/* Maximum time to wait for this job */
+	unsigned int timeout;
+
+	/* Index and number of slots used in the push buffer */
+	unsigned int first_get;
+	unsigned int num_slots;
+
+	/* Copy of gathers */
+	size_t gather_copy_size;
+	dma_addr_t gather_copy;
+	u8 *gather_copy_mapped;
+
+	/* Check if register is marked as an address reg */
+	int (*is_addr_reg)(struct device *dev, u32 reg, u32 class);
+
+	/* Request a SETCLASS to this class */
+	u32 class;
+
+	/* Add a channel wait for previous ops to complete */
+	bool serialize;
+};
+/*
+ * Allocate memory for a job. Just enough memory will be allocated to
+ * accomodate the submit.
+ */
+struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
+				    u32 num_cmdbufs, u32 num_relocs,
+				    u32 num_waitchks);
+
+/*
+ * Add a gather to a job.
+ */
+void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *mem_id,
+			   u32 words, u32 offset);
+
+/*
+ * Increment reference going to host1x_job.
+ */
+struct host1x_job *host1x_job_get(struct host1x_job *job);
+
+/*
+ * Decrement reference job, free if goes to zero.
+ */
+void host1x_job_put(struct host1x_job *job);
+
+/*
+ * Pin memory related to job. This handles relocation of addresses to the
+ * host1x address space. Handles both the gather memory and any other memory
+ * referred to from the gather buffers.
+ *
+ * Handles also patching out host waits that would wait for an expired sync
+ * point value.
+ */
+int host1x_job_pin(struct host1x_job *job, struct device *dev);
+
+/*
+ * Unpin memory related to job.
+ */
+void host1x_job_unpin(struct host1x_job *job);
+
+/*
+ * Dump contents of job to debug output.
+ */
+void host1x_job_dump(struct device *dev, struct host1x_job *job);
+
+#endif
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
new file mode 100644
index 000000000000..4b493453e805
--- /dev/null
+++ b/drivers/gpu/host1x/syncpt.c
@@ -0,0 +1,387 @@
+/*
+ * Tegra host1x Syncpoints
+ *
+ * Copyright (c) 2010-2013, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+
+#include <trace/events/host1x.h>
+
+#include "syncpt.h"
+#include "dev.h"
+#include "intr.h"
+#include "debug.h"
+
+#define SYNCPT_CHECK_PERIOD (2 * HZ)
+#define MAX_STUCK_CHECK_COUNT 15
+
+static struct host1x_syncpt *_host1x_syncpt_alloc(struct host1x *host,
+						  struct device *dev,
+						  int client_managed)
+{
+	int i;
+	struct host1x_syncpt *sp = host->syncpt;
+	char *name;
+
+	for (i = 0; i < host->info->nb_pts && sp->name; i++, sp++)
+		;
+	if (sp->dev)
+		return NULL;
+
+	name = kasprintf(GFP_KERNEL, "%02d-%s", sp->id,
+			dev ? dev_name(dev) : NULL);
+	if (!name)
+		return NULL;
+
+	sp->dev = dev;
+	sp->name = name;
+	sp->client_managed = client_managed;
+
+	return sp;
+}
+
+u32 host1x_syncpt_id(struct host1x_syncpt *sp)
+{
+	return sp->id;
+}
+
+/*
+ * Updates the value sent to hardware.
+ */
+u32 host1x_syncpt_incr_max(struct host1x_syncpt *sp, u32 incrs)
+{
+	return (u32)atomic_add_return(incrs, &sp->max_val);
+}
+
+ /*
+ * Write cached syncpoint and waitbase values to hardware.
+ */
+void host1x_syncpt_restore(struct host1x *host)
+{
+	struct host1x_syncpt *sp_base = host->syncpt;
+	u32 i;
+
+	for (i = 0; i < host1x_syncpt_nb_pts(host); i++)
+		host1x_hw_syncpt_restore(host, sp_base + i);
+	for (i = 0; i < host1x_syncpt_nb_bases(host); i++)
+		host1x_hw_syncpt_restore_wait_base(host, sp_base + i);
+	wmb();
+}
+
+/*
+ * Update the cached syncpoint and waitbase values by reading them
+ * from the registers.
+  */
+void host1x_syncpt_save(struct host1x *host)
+{
+	struct host1x_syncpt *sp_base = host->syncpt;
+	u32 i;
+
+	for (i = 0; i < host1x_syncpt_nb_pts(host); i++) {
+		if (host1x_syncpt_client_managed(sp_base + i))
+			host1x_hw_syncpt_load(host, sp_base + i);
+		else
+			WARN_ON(!host1x_syncpt_idle(sp_base + i));
+	}
+
+	for (i = 0; i < host1x_syncpt_nb_bases(host); i++)
+		host1x_hw_syncpt_load_wait_base(host, sp_base + i);
+}
+
+/*
+ * Updates the cached syncpoint value by reading a new value from the hardware
+ * register
+ */
+u32 host1x_syncpt_load(struct host1x_syncpt *sp)
+{
+	u32 val;
+	val = host1x_hw_syncpt_load(sp->host, sp);
+	trace_host1x_syncpt_load_min(sp->id, val);
+
+	return val;
+}
+
+/*
+ * Get the current syncpoint base
+ */
+u32 host1x_syncpt_load_wait_base(struct host1x_syncpt *sp)
+{
+	u32 val;
+	host1x_hw_syncpt_load_wait_base(sp->host, sp);
+	val = sp->base_val;
+	return val;
+}
+
+/*
+ * Write a cpu syncpoint increment to the hardware, without touching
+ * the cache. Caller is responsible for host being powered.
+ */
+void host1x_syncpt_cpu_incr(struct host1x_syncpt *sp)
+{
+	host1x_hw_syncpt_cpu_incr(sp->host, sp);
+}
+
+/*
+ * Increment syncpoint value from cpu, updating cache
+ */
+void host1x_syncpt_incr(struct host1x_syncpt *sp)
+{
+	if (host1x_syncpt_client_managed(sp))
+		host1x_syncpt_incr_max(sp, 1);
+	host1x_syncpt_cpu_incr(sp);
+}
+
+/*
+ * Updated sync point form hardware, and returns true if syncpoint is expired,
+ * false if we may need to wait
+ */
+static bool syncpt_load_min_is_expired(struct host1x_syncpt *sp, u32 thresh)
+{
+	host1x_hw_syncpt_load(sp->host, sp);
+	return host1x_syncpt_is_expired(sp, thresh);
+}
+
+/*
+ * Main entrypoint for syncpoint value waits.
+ */
+int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout,
+			u32 *value)
+{
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+	void *ref;
+	struct host1x_waitlist *waiter;
+	int err = 0, check_count = 0;
+	u32 val;
+
+	if (value)
+		*value = 0;
+
+	/* first check cache */
+	if (host1x_syncpt_is_expired(sp, thresh)) {
+		if (value)
+			*value = host1x_syncpt_load(sp);
+		return 0;
+	}
+
+	/* try to read from register */
+	val = host1x_hw_syncpt_load(sp->host, sp);
+	if (host1x_syncpt_is_expired(sp, thresh)) {
+		if (value)
+			*value = val;
+		goto done;
+	}
+
+	if (!timeout) {
+		err = -EAGAIN;
+		goto done;
+	}
+
+	/* allocate a waiter */
+	waiter = kzalloc(sizeof(*waiter), GFP_KERNEL);
+	if (!waiter) {
+		err = -ENOMEM;
+		goto done;
+	}
+
+	/* schedule a wakeup when the syncpoint value is reached */
+	err = host1x_intr_add_action(sp->host, sp->id, thresh,
+				     HOST1X_INTR_ACTION_WAKEUP_INTERRUPTIBLE,
+				     &wq, waiter, &ref);
+	if (err)
+		goto done;
+
+	err = -EAGAIN;
+	/* Caller-specified timeout may be impractically low */
+	if (timeout < 0)
+		timeout = LONG_MAX;
+
+	/* wait for the syncpoint, or timeout, or signal */
+	while (timeout) {
+		long check = min_t(long, SYNCPT_CHECK_PERIOD, timeout);
+		int remain = wait_event_interruptible_timeout(wq,
+				syncpt_load_min_is_expired(sp, thresh),
+				check);
+		if (remain > 0 || host1x_syncpt_is_expired(sp, thresh)) {
+			if (value)
+				*value = host1x_syncpt_load(sp);
+			err = 0;
+			break;
+		}
+		if (remain < 0) {
+			err = remain;
+			break;
+		}
+		timeout -= check;
+		if (timeout && check_count <= MAX_STUCK_CHECK_COUNT) {
+			dev_warn(sp->host->dev,
+				"%s: syncpoint id %d (%s) stuck waiting %d, timeout=%ld\n",
+				 current->comm, sp->id, sp->name,
+				 thresh, timeout);
+
+			host1x_debug_dump_syncpts(sp->host);
+			if (check_count == MAX_STUCK_CHECK_COUNT)
+				host1x_debug_dump(sp->host);
+			check_count++;
+		}
+	}
+	host1x_intr_put_ref(sp->host, sp->id, ref);
+
+done:
+	return err;
+}
+EXPORT_SYMBOL(host1x_syncpt_wait);
+
+/*
+ * Returns true if syncpoint is expired, false if we may need to wait
+ */
+bool host1x_syncpt_is_expired(struct host1x_syncpt *sp, u32 thresh)
+{
+	u32 current_val;
+	u32 future_val;
+	smp_rmb();
+	current_val = (u32)atomic_read(&sp->min_val);
+	future_val = (u32)atomic_read(&sp->max_val);
+
+	/* Note the use of unsigned arithmetic here (mod 1<<32).
+	 *
+	 * c = current_val = min_val	= the current value of the syncpoint.
+	 * t = thresh			= the value we are checking
+	 * f = future_val  = max_val	= the value c will reach when all
+	 *				  outstanding increments have completed.
+	 *
+	 * Note that c always chases f until it reaches f.
+	 *
+	 * Dtf = (f - t)
+	 * Dtc = (c - t)
+	 *
+	 *  Consider all cases:
+	 *
+	 *	A) .....c..t..f.....	Dtf < Dtc	need to wait
+	 *	B) .....c.....f..t..	Dtf > Dtc	expired
+	 *	C) ..t..c.....f.....	Dtf > Dtc	expired	   (Dct very large)
+	 *
+	 *  Any case where f==c: always expired (for any t).	Dtf == Dcf
+	 *  Any case where t==c: always expired (for any f).	Dtf >= Dtc (because Dtc==0)
+	 *  Any case where t==f!=c: always wait.		Dtf <  Dtc (because Dtf==0,
+	 *							Dtc!=0)
+	 *
+	 *  Other cases:
+	 *
+	 *	A) .....t..f..c.....	Dtf < Dtc	need to wait
+	 *	A) .....f..c..t.....	Dtf < Dtc	need to wait
+	 *	A) .....f..t..c.....	Dtf > Dtc	expired
+	 *
+	 *   So:
+	 *	   Dtf >= Dtc implies EXPIRED	(return true)
+	 *	   Dtf <  Dtc implies WAIT	(return false)
+	 *
+	 * Note: If t is expired then we *cannot* wait on it. We would wait
+	 * forever (hang the system).
+	 *
+	 * Note: do NOT get clever and remove the -thresh from both sides. It
+	 * is NOT the same.
+	 *
+	 * If future valueis zero, we have a client managed sync point. In that
+	 * case we do a direct comparison.
+	 */
+	if (!host1x_syncpt_client_managed(sp))
+		return future_val - thresh >= current_val - thresh;
+	else
+		return (s32)(current_val - thresh) >= 0;
+}
+
+/* remove a wait pointed to by patch_addr */
+int host1x_syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr)
+{
+	return host1x_hw_syncpt_patch_wait(sp->host, sp, patch_addr);
+}
+
+int host1x_syncpt_init(struct host1x *host)
+{
+	struct host1x_syncpt *syncpt;
+	int i;
+
+	syncpt = devm_kzalloc(host->dev, sizeof(*syncpt) * host->info->nb_pts,
+		GFP_KERNEL);
+	if (!syncpt)
+		return -ENOMEM;
+
+	for (i = 0; i < host->info->nb_pts; ++i) {
+		syncpt[i].id = i;
+		syncpt[i].host = host;
+	}
+
+	host->syncpt = syncpt;
+
+	host1x_syncpt_restore(host);
+
+	/* Allocate sync point to use for clearing waits for expired fences */
+	host->nop_sp = _host1x_syncpt_alloc(host, NULL, 0);
+	if (!host->nop_sp)
+		return -ENOMEM;
+
+	return 0;
+}
+
+struct host1x_syncpt *host1x_syncpt_request(struct device *dev,
+					    int client_managed)
+{
+	struct host1x *host = dev_get_drvdata(dev->parent);
+	return _host1x_syncpt_alloc(host, dev, client_managed);
+}
+
+void host1x_syncpt_free(struct host1x_syncpt *sp)
+{
+	if (!sp)
+		return;
+
+	kfree(sp->name);
+	sp->dev = NULL;
+	sp->name = NULL;
+	sp->client_managed = 0;
+}
+
+void host1x_syncpt_deinit(struct host1x *host)
+{
+	int i;
+	struct host1x_syncpt *sp = host->syncpt;
+	for (i = 0; i < host->info->nb_pts; i++, sp++)
+		kfree(sp->name);
+}
+
+int host1x_syncpt_nb_pts(struct host1x *host)
+{
+	return host->info->nb_pts;
+}
+
+int host1x_syncpt_nb_bases(struct host1x *host)
+{
+	return host->info->nb_bases;
+}
+
+int host1x_syncpt_nb_mlocks(struct host1x *host)
+{
+	return host->info->nb_mlocks;
+}
+
+struct host1x_syncpt *host1x_syncpt_get(struct host1x *host, u32 id)
+{
+	if (host->info->nb_pts < id)
+		return NULL;
+	return host->syncpt + id;
+}
diff --git a/drivers/gpu/host1x/syncpt.h b/drivers/gpu/host1x/syncpt.h
new file mode 100644
index 000000000000..c99806130f2e
--- /dev/null
+++ b/drivers/gpu/host1x/syncpt.h
@@ -0,0 +1,165 @@
+/*
+ * Tegra host1x Syncpoints
+ *
+ * Copyright (c) 2010-2013, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __HOST1X_SYNCPT_H
+#define __HOST1X_SYNCPT_H
+
+#include <linux/atomic.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+
+#include "intr.h"
+
+struct host1x;
+
+/* Reserved for replacing an expired wait with a NOP */
+#define HOST1X_SYNCPT_RESERVED			0
+
+struct host1x_syncpt {
+	int id;
+	atomic_t min_val;
+	atomic_t max_val;
+	u32 base_val;
+	const char *name;
+	int client_managed;
+	struct host1x *host;
+	struct device *dev;
+
+	/* interrupt data */
+	struct host1x_syncpt_intr intr;
+};
+
+/* Initialize sync point array  */
+int host1x_syncpt_init(struct host1x *host);
+
+/*  Free sync point array */
+void host1x_syncpt_deinit(struct host1x *host);
+
+/*
+ * Read max. It indicates how many operations there are in queue, either in
+ * channel or in a software thread.
+ * */
+static inline u32 host1x_syncpt_read_max(struct host1x_syncpt *sp)
+{
+	smp_rmb();
+	return (u32)atomic_read(&sp->max_val);
+}
+
+/*
+ * Read min, which is a shadow of the current sync point value in hardware.
+ */
+static inline u32 host1x_syncpt_read_min(struct host1x_syncpt *sp)
+{
+	smp_rmb();
+	return (u32)atomic_read(&sp->min_val);
+}
+
+/* Return number of sync point supported. */
+int host1x_syncpt_nb_pts(struct host1x *host);
+
+/* Return number of wait bases supported. */
+int host1x_syncpt_nb_bases(struct host1x *host);
+
+/* Return number of mlocks supported. */
+int host1x_syncpt_nb_mlocks(struct host1x *host);
+
+/*
+ * Check sync point sanity. If max is larger than min, there have too many
+ * sync point increments.
+ *
+ * Client managed sync point are not tracked.
+ * */
+static inline bool host1x_syncpt_check_max(struct host1x_syncpt *sp, u32 real)
+{
+	u32 max;
+	if (sp->client_managed)
+		return true;
+	max = host1x_syncpt_read_max(sp);
+	return (s32)(max - real) >= 0;
+}
+
+/* Return true if sync point is client managed. */
+static inline int host1x_syncpt_client_managed(struct host1x_syncpt *sp)
+{
+	return sp->client_managed;
+}
+
+/*
+ * Returns true if syncpoint min == max, which means that there are no
+ * outstanding operations.
+ */
+static inline bool host1x_syncpt_idle(struct host1x_syncpt *sp)
+{
+	int min, max;
+	smp_rmb();
+	min = atomic_read(&sp->min_val);
+	max = atomic_read(&sp->max_val);
+	return (min == max);
+}
+
+/* Return pointer to struct denoting sync point id. */
+struct host1x_syncpt *host1x_syncpt_get(struct host1x *host, u32 id);
+
+/* Request incrementing a sync point. */
+void host1x_syncpt_cpu_incr(struct host1x_syncpt *sp);
+
+/* Load current value from hardware to the shadow register. */
+u32 host1x_syncpt_load(struct host1x_syncpt *sp);
+
+/* Check if the given syncpoint value has already passed */
+bool host1x_syncpt_is_expired(struct host1x_syncpt *sp, u32 thresh);
+
+/* Save host1x sync point state into shadow registers. */
+void host1x_syncpt_save(struct host1x *host);
+
+/* Reset host1x sync point state from shadow registers. */
+void host1x_syncpt_restore(struct host1x *host);
+
+/* Read current wait base value into shadow register and return it. */
+u32 host1x_syncpt_load_wait_base(struct host1x_syncpt *sp);
+
+/* Increment sync point and its max. */
+void host1x_syncpt_incr(struct host1x_syncpt *sp);
+
+/* Indicate future operations by incrementing the sync point max. */
+u32 host1x_syncpt_incr_max(struct host1x_syncpt *sp, u32 incrs);
+
+/* Wait until sync point reaches a threshold value, or a timeout. */
+int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh,
+			long timeout, u32 *value);
+
+/* Check if sync point id is valid. */
+static inline int host1x_syncpt_is_valid(struct host1x_syncpt *sp)
+{
+	return sp->id < host1x_syncpt_nb_pts(sp->host);
+}
+
+/* Patch a wait by replacing it with a wait for syncpt 0 value 0 */
+int host1x_syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr);
+
+/* Return id of the sync point */
+u32 host1x_syncpt_id(struct host1x_syncpt *sp);
+
+/* Allocate a sync point for a device. */
+struct host1x_syncpt *host1x_syncpt_request(struct device *dev,
+		int client_managed);
+
+/* Free a sync point. */
+void host1x_syncpt_free(struct host1x_syncpt *sp);
+
+#endif
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 4c1546f71d56..3e0e3f088686 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -21,6 +21,8 @@ source "drivers/gpu/vga/Kconfig"
 
 source "drivers/gpu/drm/Kconfig"
 
+source "drivers/gpu/host1x/Kconfig"
+
 config VGASTATE
        tristate
        default n
diff --git a/include/trace/events/host1x.h b/include/trace/events/host1x.h
new file mode 100644
index 000000000000..94db6a2c3540
--- /dev/null
+++ b/include/trace/events/host1x.h
@@ -0,0 +1,253 @@
+/*
+ * include/trace/events/host1x.h
+ *
+ * host1x event logging to ftrace.
+ *
+ * Copyright (c) 2010-2013, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM host1x
+
+#if !defined(_TRACE_HOST1X_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_HOST1X_H
+
+#include <linux/ktime.h>
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(host1x,
+	TP_PROTO(const char *name),
+	TP_ARGS(name),
+	TP_STRUCT__entry(__field(const char *, name)),
+	TP_fast_assign(__entry->name = name;),
+	TP_printk("name=%s", __entry->name)
+);
+
+DEFINE_EVENT(host1x, host1x_channel_open,
+	TP_PROTO(const char *name),
+	TP_ARGS(name)
+);
+
+DEFINE_EVENT(host1x, host1x_channel_release,
+	TP_PROTO(const char *name),
+	TP_ARGS(name)
+);
+
+DEFINE_EVENT(host1x, host1x_cdma_begin,
+	TP_PROTO(const char *name),
+	TP_ARGS(name)
+);
+
+DEFINE_EVENT(host1x, host1x_cdma_end,
+	TP_PROTO(const char *name),
+	TP_ARGS(name)
+);
+
+TRACE_EVENT(host1x_cdma_push,
+	TP_PROTO(const char *name, u32 op1, u32 op2),
+
+	TP_ARGS(name, op1, op2),
+
+	TP_STRUCT__entry(
+		__field(const char *, name)
+		__field(u32, op1)
+		__field(u32, op2)
+	),
+
+	TP_fast_assign(
+		__entry->name = name;
+		__entry->op1 = op1;
+		__entry->op2 = op2;
+	),
+
+	TP_printk("name=%s, op1=%08x, op2=%08x",
+		__entry->name, __entry->op1, __entry->op2)
+);
+
+TRACE_EVENT(host1x_cdma_push_gather,
+	TP_PROTO(const char *name, u32 mem_id,
+			u32 words, u32 offset, void *cmdbuf),
+
+	TP_ARGS(name, mem_id, words, offset, cmdbuf),
+
+	TP_STRUCT__entry(
+		__field(const char *, name)
+		__field(u32, mem_id)
+		__field(u32, words)
+		__field(u32, offset)
+		__field(bool, cmdbuf)
+		__dynamic_array(u32, cmdbuf, words)
+	),
+
+	TP_fast_assign(
+		if (cmdbuf) {
+			memcpy(__get_dynamic_array(cmdbuf), cmdbuf+offset,
+					words * sizeof(u32));
+		}
+		__entry->cmdbuf = cmdbuf;
+		__entry->name = name;
+		__entry->mem_id = mem_id;
+		__entry->words = words;
+		__entry->offset = offset;
+	),
+
+	TP_printk("name=%s, mem_id=%08x, words=%u, offset=%d, contents=[%s]",
+	  __entry->name, __entry->mem_id,
+	  __entry->words, __entry->offset,
+	  __print_hex(__get_dynamic_array(cmdbuf),
+		  __entry->cmdbuf ? __entry->words * 4 : 0))
+);
+
+TRACE_EVENT(host1x_channel_submit,
+	TP_PROTO(const char *name, u32 cmdbufs, u32 relocs, u32 waitchks,
+			u32 syncpt_id, u32 syncpt_incrs),
+
+	TP_ARGS(name, cmdbufs, relocs, waitchks, syncpt_id, syncpt_incrs),
+
+	TP_STRUCT__entry(
+		__field(const char *, name)
+		__field(u32, cmdbufs)
+		__field(u32, relocs)
+		__field(u32, waitchks)
+		__field(u32, syncpt_id)
+		__field(u32, syncpt_incrs)
+	),
+
+	TP_fast_assign(
+		__entry->name = name;
+		__entry->cmdbufs = cmdbufs;
+		__entry->relocs = relocs;
+		__entry->waitchks = waitchks;
+		__entry->syncpt_id = syncpt_id;
+		__entry->syncpt_incrs = syncpt_incrs;
+	),
+
+	TP_printk("name=%s, cmdbufs=%u, relocs=%u, waitchks=%d,"
+		"syncpt_id=%u, syncpt_incrs=%u",
+	  __entry->name, __entry->cmdbufs, __entry->relocs, __entry->waitchks,
+	  __entry->syncpt_id, __entry->syncpt_incrs)
+);
+
+TRACE_EVENT(host1x_channel_submitted,
+	TP_PROTO(const char *name, u32 syncpt_base, u32 syncpt_max),
+
+	TP_ARGS(name, syncpt_base, syncpt_max),
+
+	TP_STRUCT__entry(
+		__field(const char *, name)
+		__field(u32, syncpt_base)
+		__field(u32, syncpt_max)
+	),
+
+	TP_fast_assign(
+		__entry->name = name;
+		__entry->syncpt_base = syncpt_base;
+		__entry->syncpt_max = syncpt_max;
+	),
+
+	TP_printk("name=%s, syncpt_base=%d, syncpt_max=%d",
+		__entry->name, __entry->syncpt_base, __entry->syncpt_max)
+);
+
+TRACE_EVENT(host1x_channel_submit_complete,
+	TP_PROTO(const char *name, int count, u32 thresh),
+
+	TP_ARGS(name, count, thresh),
+
+	TP_STRUCT__entry(
+		__field(const char *, name)
+		__field(int, count)
+		__field(u32, thresh)
+	),
+
+	TP_fast_assign(
+		__entry->name = name;
+		__entry->count = count;
+		__entry->thresh = thresh;
+	),
+
+	TP_printk("name=%s, count=%d, thresh=%d",
+		__entry->name, __entry->count, __entry->thresh)
+);
+
+TRACE_EVENT(host1x_wait_cdma,
+	TP_PROTO(const char *name, u32 eventid),
+
+	TP_ARGS(name, eventid),
+
+	TP_STRUCT__entry(
+		__field(const char *, name)
+		__field(u32, eventid)
+	),
+
+	TP_fast_assign(
+		__entry->name = name;
+		__entry->eventid = eventid;
+	),
+
+	TP_printk("name=%s, event=%d", __entry->name, __entry->eventid)
+);
+
+TRACE_EVENT(host1x_syncpt_load_min,
+	TP_PROTO(u32 id, u32 val),
+
+	TP_ARGS(id, val),
+
+	TP_STRUCT__entry(
+		__field(u32, id)
+		__field(u32, val)
+	),
+
+	TP_fast_assign(
+		__entry->id = id;
+		__entry->val = val;
+	),
+
+	TP_printk("id=%d, val=%d", __entry->id, __entry->val)
+);
+
+TRACE_EVENT(host1x_syncpt_wait_check,
+	TP_PROTO(void *mem_id, u32 offset, u32 syncpt_id, u32 thresh, u32 min),
+
+	TP_ARGS(mem_id, offset, syncpt_id, thresh, min),
+
+	TP_STRUCT__entry(
+		__field(void *, mem_id)
+		__field(u32, offset)
+		__field(u32, syncpt_id)
+		__field(u32, thresh)
+		__field(u32, min)
+	),
+
+	TP_fast_assign(
+		__entry->mem_id = mem_id;
+		__entry->offset = offset;
+		__entry->syncpt_id = syncpt_id;
+		__entry->thresh = thresh;
+		__entry->min = min;
+	),
+
+	TP_printk("mem_id=%p, offset=%05x, id=%d, thresh=%d, current=%d",
+		__entry->mem_id, __entry->offset,
+		__entry->syncpt_id, __entry->thresh,
+		__entry->min)
+);
+
+#endif /*  _TRACE_HOST1X_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/uapi/drm/Kbuild b/include/uapi/drm/Kbuild
index a042a957296d..119487e05e65 100644
--- a/include/uapi/drm/Kbuild
+++ b/include/uapi/drm/Kbuild
@@ -13,5 +13,6 @@ header-y += r128_drm.h
 header-y += radeon_drm.h
 header-y += savage_drm.h
 header-y += sis_drm.h
+header-y += tegra_drm.h
 header-y += via_drm.h
 header-y += vmwgfx_drm.h
diff --git a/include/uapi/drm/tegra_drm.h b/include/uapi/drm/tegra_drm.h
new file mode 100644
index 000000000000..6e132a2f7420
--- /dev/null
+++ b/include/uapi/drm/tegra_drm.h
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2012-2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _UAPI_TEGRA_DRM_H_
+#define _UAPI_TEGRA_DRM_H_
+
+struct drm_tegra_gem_create {
+	__u64 size;
+	__u32 flags;
+	__u32 handle;
+};
+
+struct drm_tegra_gem_mmap {
+	__u32 handle;
+	__u32 offset;
+};
+
+struct drm_tegra_syncpt_read {
+	__u32 id;
+	__u32 value;
+};
+
+struct drm_tegra_syncpt_incr {
+	__u32 id;
+	__u32 pad;
+};
+
+struct drm_tegra_syncpt_wait {
+	__u32 id;
+	__u32 thresh;
+	__u32 timeout;
+	__u32 value;
+};
+
+#define DRM_TEGRA_NO_TIMEOUT	(0xffffffff)
+
+struct drm_tegra_open_channel {
+	__u32 client;
+	__u32 pad;
+	__u64 context;
+};
+
+struct drm_tegra_close_channel {
+	__u64 context;
+};
+
+struct drm_tegra_get_syncpt {
+	__u64 context;
+	__u32 index;
+	__u32 id;
+};
+
+struct drm_tegra_syncpt {
+	__u32 id;
+	__u32 incrs;
+};
+
+struct drm_tegra_cmdbuf {
+	__u32 handle;
+	__u32 offset;
+	__u32 words;
+	__u32 pad;
+};
+
+struct drm_tegra_reloc {
+	struct {
+		__u32 handle;
+		__u32 offset;
+	} cmdbuf;
+	struct {
+		__u32 handle;
+		__u32 offset;
+	} target;
+	__u32 shift;
+	__u32 pad;
+};
+
+struct drm_tegra_waitchk {
+	__u32 handle;
+	__u32 offset;
+	__u32 syncpt;
+	__u32 thresh;
+};
+
+struct drm_tegra_submit {
+	__u64 context;
+	__u32 num_syncpts;
+	__u32 num_cmdbufs;
+	__u32 num_relocs;
+	__u32 num_waitchks;
+	__u32 waitchk_mask;
+	__u32 timeout;
+	__u32 pad;
+	__u64 syncpts;
+	__u64 cmdbufs;
+	__u64 relocs;
+	__u64 waitchks;
+	__u32 fence;		/* Return value */
+
+	__u32 reserved[5];	/* future expansion */
+};
+
+#define DRM_TEGRA_GEM_CREATE	0x00
+#define DRM_TEGRA_GEM_MMAP	0x01
+#define DRM_TEGRA_SYNCPT_READ	0x02
+#define DRM_TEGRA_SYNCPT_INCR	0x03
+#define DRM_TEGRA_SYNCPT_WAIT	0x04
+#define DRM_TEGRA_OPEN_CHANNEL	0x05
+#define DRM_TEGRA_CLOSE_CHANNEL	0x06
+#define DRM_TEGRA_GET_SYNCPT	0x07
+#define DRM_TEGRA_SUBMIT	0x08
+
+#define DRM_IOCTL_TEGRA_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GEM_CREATE, struct drm_tegra_gem_create)
+#define DRM_IOCTL_TEGRA_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GEM_MMAP, struct drm_tegra_gem_mmap)
+#define DRM_IOCTL_TEGRA_SYNCPT_READ DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_SYNCPT_READ, struct drm_tegra_syncpt_read)
+#define DRM_IOCTL_TEGRA_SYNCPT_INCR DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_SYNCPT_INCR, struct drm_tegra_syncpt_incr)
+#define DRM_IOCTL_TEGRA_SYNCPT_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_SYNCPT_WAIT, struct drm_tegra_syncpt_wait)
+#define DRM_IOCTL_TEGRA_OPEN_CHANNEL DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_OPEN_CHANNEL, struct drm_tegra_open_channel)
+#define DRM_IOCTL_TEGRA_CLOSE_CHANNEL DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_CLOSE_CHANNEL, struct drm_tegra_open_channel)
+#define DRM_IOCTL_TEGRA_GET_SYNCPT DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GET_SYNCPT, struct drm_tegra_get_syncpt)
+#define DRM_IOCTL_TEGRA_SUBMIT DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_SUBMIT, struct drm_tegra_submit)
+
+#endif