From d1724078d6a01177c1db4ea0b75fda1ca8a73d57 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Wed, 24 Jun 2009 19:57:35 +0200
Subject: ttm: Make messages more readable.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/ttm/ttm_module.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/ttm/ttm_module.h b/include/drm/ttm/ttm_module.h
index 889a4c7958ae..d1d433834e4f 100644
--- a/include/drm/ttm/ttm_module.h
+++ b/include/drm/ttm/ttm_module.h
@@ -33,7 +33,7 @@
 
 #include <linux/kernel.h>
 
-#define TTM_PFX "[TTM]"
+#define TTM_PFX "[TTM] "
 
 enum ttm_global_types {
 	TTM_GLOBAL_TTM_MEM = 0,
-- 
cgit v1.2.3


From ad49f501867cba87e1e45e5ebae0b12435d68bf1 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@linux.ie>
Date: Fri, 10 Jul 2009 22:36:26 +1000
Subject: drm/ttm/radeon: add dma32 support.

This add support for using dma32 memory on gpus that really need it.

Currently IGPs are left without DMA32 but we might need to change
that unless we can fix rs690.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/radeon.h        |  1 +
 drivers/gpu/drm/radeon/radeon_device.c | 17 +++++++++++++++--
 drivers/gpu/drm/radeon/radeon_ttm.c    |  3 ++-
 drivers/gpu/drm/ttm/ttm_bo.c           |  7 ++++++-
 drivers/gpu/drm/ttm/ttm_tt.c           |  9 +++++++--
 include/drm/ttm/ttm_bo_driver.h        |  5 ++++-
 6 files changed, 35 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index e7662ba9abfb..3060ce14071e 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -624,6 +624,7 @@ struct radeon_device {
 	bool				gpu_lockup;
 	bool				shutdown;
 	bool				suspend;
+	bool				need_dma32;
 };
 
 int radeon_device_init(struct radeon_device *rdev,
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 332911267ebe..27a5ac969953 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -450,6 +450,7 @@ int radeon_device_init(struct radeon_device *rdev,
 		       uint32_t flags)
 {
 	int r, ret;
+	int dma_bits;
 
 	DRM_INFO("radeon: Initializing kernel modesetting.\n");
 	rdev->shutdown = false;
@@ -492,8 +493,20 @@ int radeon_device_init(struct radeon_device *rdev,
 		return r;
 	}
 
-	/* Report DMA addressing limitation */
-	r = pci_set_dma_mask(rdev->pdev, DMA_BIT_MASK(32));
+	/* set DMA mask + need_dma32 flags.
+	 * PCIE - can handle 40-bits.
+	 * IGP - can handle 40-bits (in theory)
+	 * AGP - generally dma32 is safest
+	 * PCI - only dma32
+	 */
+	rdev->need_dma32 = false;
+	if (rdev->flags & RADEON_IS_AGP)
+		rdev->need_dma32 = true;
+	if (rdev->flags & RADEON_IS_PCI)
+		rdev->need_dma32 = true;
+
+	dma_bits = rdev->need_dma32 ? 32 : 40;
+	r = pci_set_dma_mask(rdev->pdev, DMA_BIT_MASK(dma_bits));
 	if (r) {
 		printk(KERN_WARNING "radeon: No suitable DMA available.\n");
 	}
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 1227a97f5169..4ca9aa9203d0 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -442,7 +442,8 @@ int radeon_ttm_init(struct radeon_device *rdev)
 	/* No others user of address space so set it to 0 */
 	r = ttm_bo_device_init(&rdev->mman.bdev,
 			       rdev->mman.mem_global_ref.object,
-			       &radeon_bo_driver, DRM_FILE_PAGE_OFFSET);
+			       &radeon_bo_driver, DRM_FILE_PAGE_OFFSET,
+			       rdev->need_dma32);
 	if (r) {
 		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
 		return r;
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index a753598a5e35..e55e7972c897 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -224,6 +224,9 @@ static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, bool zero_alloc)
 	TTM_ASSERT_LOCKED(&bo->mutex);
 	bo->ttm = NULL;
 
+	if (bdev->need_dma32)
+		page_flags |= TTM_PAGE_FLAG_DMA32;
+
 	switch (bo->type) {
 	case ttm_bo_type_device:
 		if (zero_alloc)
@@ -1332,7 +1335,8 @@ EXPORT_SYMBOL(ttm_bo_device_release);
 
 int ttm_bo_device_init(struct ttm_bo_device *bdev,
 		       struct ttm_mem_global *mem_glob,
-		       struct ttm_bo_driver *driver, uint64_t file_page_offset)
+		       struct ttm_bo_driver *driver, uint64_t file_page_offset,
+		       bool need_dma32)
 {
 	int ret = -EINVAL;
 
@@ -1369,6 +1373,7 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev,
 	INIT_LIST_HEAD(&bdev->ddestroy);
 	INIT_LIST_HEAD(&bdev->swap_lru);
 	bdev->dev_mapping = NULL;
+	bdev->need_dma32 = need_dma32;
 	ttm_mem_init_shrink(&bdev->shrink, ttm_bo_swapout);
 	ret = ttm_mem_register_shrink(mem_glob, &bdev->shrink);
 	if (unlikely(ret != 0)) {
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 75dc8bd24592..81ab81f030a3 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -131,10 +131,15 @@ static void ttm_tt_free_page_directory(struct ttm_tt *ttm)
 
 static struct page *ttm_tt_alloc_page(unsigned page_flags)
 {
+	gfp_t gfp_flags = GFP_HIGHUSER;
+
 	if (page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)
-		return alloc_page(GFP_HIGHUSER | __GFP_ZERO);
+		gfp_flags |= __GFP_ZERO;
+
+	if (page_flags & TTM_PAGE_FLAG_DMA32)
+		gfp_flags |= __GFP_DMA32;
 
-	return alloc_page(GFP_HIGHUSER);
+	return alloc_page(gfp_flags);
 }
 
 static void ttm_tt_free_user_pages(struct ttm_tt *ttm)
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 62ed733c52a2..ea83dd23a4d7 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -121,6 +121,7 @@ struct ttm_backend {
 #define TTM_PAGE_FLAG_SWAPPED         (1 << 4)
 #define TTM_PAGE_FLAG_PERSISTANT_SWAP (1 << 5)
 #define TTM_PAGE_FLAG_ZERO_ALLOC      (1 << 6)
+#define TTM_PAGE_FLAG_DMA32           (1 << 7)
 
 enum ttm_caching_state {
 	tt_uncached,
@@ -429,6 +430,8 @@ struct ttm_bo_device {
 	 */
 
 	struct delayed_work wq;
+
+	bool need_dma32;
 };
 
 /**
@@ -648,7 +651,7 @@ extern int ttm_bo_device_release(struct ttm_bo_device *bdev);
 extern int ttm_bo_device_init(struct ttm_bo_device *bdev,
 			      struct ttm_mem_global *mem_glob,
 			      struct ttm_bo_driver *driver,
-			      uint64_t file_page_offset);
+			      uint64_t file_page_offset, bool need_dma32);
 
 /**
  * ttm_bo_reserve:
-- 
cgit v1.2.3


From 5920dadfb4aec6c1372c5570e71bcd3b4837e63c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 15 Jul 2009 17:11:41 +0900
Subject: libata: accept late unlocking of HPA

On certain configurations, HPA isn't or can't be unlocked during
probing but it somehow ends up unlocked afterwards.  In the following
thread, the problem can be reliably reproduced after resuming from
STR.  The BIOS turns on HPA during boot but forgets to do it during
resume.

  http://thread.gmane.org/gmane.linux.kernel/858310

This patch updates libata revalidation such that it considers native
n_sectors.  If the device size has increased to match native
n_sectors, it's assumed that HPA has been unlocked involuntarily and
the device is recognized as the same one.  This should be fairly safe
while nicely working around the problem.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Christof Warlich <christof@warlich.name>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-core.c | 30 +++++++++++++++++++++++-------
 include/linux/libata.h    |  1 +
 2 files changed, 24 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 2c6aedaef718..8ac98ff16d7d 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -1515,6 +1515,7 @@ static int ata_hpa_resize(struct ata_device *dev)
 
 		return rc;
 	}
+	dev->n_native_sectors = native_sectors;
 
 	/* nothing to do? */
 	if (native_sectors <= sectors || !ata_ignore_hpa) {
@@ -4099,6 +4100,7 @@ int ata_dev_revalidate(struct ata_device *dev, unsigned int new_class,
 		       unsigned int readid_flags)
 {
 	u64 n_sectors = dev->n_sectors;
+	u64 n_native_sectors = dev->n_native_sectors;
 	int rc;
 
 	if (!ata_dev_enabled(dev))
@@ -4128,16 +4130,30 @@ int ata_dev_revalidate(struct ata_device *dev, unsigned int new_class,
 	/* verify n_sectors hasn't changed */
 	if (dev->class == ATA_DEV_ATA && n_sectors &&
 	    dev->n_sectors != n_sectors) {
-		ata_dev_printk(dev, KERN_INFO, "n_sectors mismatch "
+		ata_dev_printk(dev, KERN_WARNING, "n_sectors mismatch "
 			       "%llu != %llu\n",
 			       (unsigned long long)n_sectors,
 			       (unsigned long long)dev->n_sectors);
-
-		/* restore original n_sectors */
-		dev->n_sectors = n_sectors;
-
-		rc = -ENODEV;
-		goto fail;
+		/*
+		 * Something could have caused HPA to be unlocked
+		 * involuntarily.  If n_native_sectors hasn't changed
+		 * and the new size matches it, keep the device.
+		 */
+		if (dev->n_native_sectors == n_native_sectors &&
+		    dev->n_sectors > n_sectors &&
+		    dev->n_sectors == n_native_sectors) {
+			ata_dev_printk(dev, KERN_WARNING,
+				       "new n_sectors matches native, probably "
+				       "late HPA unlock, continuing\n");
+			/* keep using the old n_sectors */
+			dev->n_sectors = n_sectors;
+		} else {
+			/* restore original n_[native]_sectors and fail */
+			dev->n_native_sectors = n_native_sectors;
+			dev->n_sectors = n_sectors;
+			rc = -ENODEV;
+			goto fail;
+		}
 	}
 
 	return 0;
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 79b6d7fd4ac2..e5b6e33c6571 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -589,6 +589,7 @@ struct ata_device {
 #endif
 	/* n_sector is CLEAR_BEGIN, read comment above CLEAR_BEGIN */
 	u64			n_sectors;	/* size of device, if ATA */
+	u64			n_native_sectors; /* native size, if ATA */
 	unsigned int		class;		/* ATA_DEV_xxx */
 	unsigned long		unpark_deadline;
 
-- 
cgit v1.2.3


From e024e11070a0a0dc7163ce1ec2da354a638bdbed Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Wed, 24 Jun 2009 09:48:08 +1000
Subject: drm/radeon/kms: add initial colortiling support.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This adds new set/get tiling interfaces where the pitch
and macro/micro tiling enables can be set. Along with
a flag to decide if this object should have a surface when mapped.

The only thing we need to allocate with a mapped surface should be
the frontbuffer. Note rotate scanout shouldn't require one, and
back/depth shouldn't either, though mesa needs some fixes.

It fixes the TTM interfaces along Thomas's suggestions, and I've tested
the surface stealing code with two X servers and not seen any lockdep issues.

I've stopped tiling the fbcon frontbuffer, as I don't see there being
any advantage other than testing, I've left the testing commands in there,
just flip the fb_tiled to true in radeon_fb.c

Open: Can we integrate endian swapping in with this?

Future features:
texture tiling - need to relocate texture registers TXOFFSET* with tiling info.

This also merges Michel's cleanup surfaces regs at init time patch
even though it makes sense on its own, this patch really relies on it.

Some PowerMac firmwares set up a tiling surface at the beginning of VRAM
which messes us up otherwise.
that patch is:
Signed-off-by: Michel Dänzer <daenzer@vmware.com>

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/atombios_crtc.c      |  11 ++-
 drivers/gpu/drm/radeon/r100.c               |  79 ++++++++++++++++-
 drivers/gpu/drm/radeon/r300.c               |  51 ++++++++++-
 drivers/gpu/drm/radeon/r300_reg.h           |   4 +-
 drivers/gpu/drm/radeon/radeon.h             |  32 ++++++-
 drivers/gpu/drm/radeon/radeon_asic.h        |  19 ++++
 drivers/gpu/drm/radeon/radeon_device.c      |   2 +
 drivers/gpu/drm/radeon/radeon_fb.c          |  12 ++-
 drivers/gpu/drm/radeon/radeon_gem.c         |  41 +++++++++
 drivers/gpu/drm/radeon/radeon_kms.c         |   2 +
 drivers/gpu/drm/radeon/radeon_legacy_crtc.c |  15 ++--
 drivers/gpu/drm/radeon/radeon_object.c      | 130 ++++++++++++++++++++++++++++
 drivers/gpu/drm/radeon/radeon_ttm.c         |   2 +
 drivers/gpu/drm/ttm/ttm_bo.c                |   5 +-
 drivers/gpu/drm/ttm/ttm_bo_vm.c             |   3 +
 include/drm/radeon_drm.h                    |  23 ++++-
 include/drm/ttm/ttm_bo_driver.h             |  15 ++++
 17 files changed, 427 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index e64a199b5ee1..eac26cdb5dae 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -327,7 +327,7 @@ int atombios_crtc_set_base(struct drm_crtc *crtc, int x, int y,
 	struct drm_gem_object *obj;
 	struct drm_radeon_gem_object *obj_priv;
 	uint64_t fb_location;
-	uint32_t fb_format, fb_pitch_pixels;
+	uint32_t fb_format, fb_pitch_pixels, tiling_flags;
 
 	if (!crtc->fb)
 		return -EINVAL;
@@ -364,7 +364,14 @@ int atombios_crtc_set_base(struct drm_crtc *crtc, int x, int y,
 		return -EINVAL;
 	}
 
-	/* TODO tiling */
+	radeon_object_get_tiling_flags(obj->driver_private,
+				       &tiling_flags, NULL);
+	if (tiling_flags & RADEON_TILING_MACRO)
+		fb_format |= AVIVO_D1GRPH_MACRO_ADDRESS_MODE;
+
+	if (tiling_flags & RADEON_TILING_MICRO)
+		fb_format |= AVIVO_D1GRPH_TILED;
+
 	if (radeon_crtc->crtc_id == 0)
 		WREG32(AVIVO_D1VGA_CONTROL, 0);
 	else
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 0d05909f03f6..69bd7cb59972 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -909,6 +909,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
 	unsigned idx;
 	bool onereg;
 	int r;
+	u32 tile_flags = 0;
 
 	ib = p->ib->ptr;
 	ib_chunk = &p->chunks[p->chunk_ib_idx];
@@ -942,7 +943,20 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
 			}
 			tmp = ib_chunk->kdata[idx] & 0x003fffff;
 			tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
-			ib[idx] = (ib_chunk->kdata[idx] & 0xffc00000) | tmp;
+
+			if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+				tile_flags |= RADEON_DST_TILE_MACRO;
+			if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
+				if (reg == RADEON_SRC_PITCH_OFFSET) {
+					DRM_ERROR("Cannot src blit from microtiled surface\n");
+					r100_cs_dump_packet(p, pkt);
+					return -EINVAL;
+				}
+				tile_flags |= RADEON_DST_TILE_MICRO;
+			}
+
+			tmp |= tile_flags;
+			ib[idx] = (ib_chunk->kdata[idx] & 0x3fc00000) | tmp;
 			break;
 		case RADEON_RB3D_DEPTHOFFSET:
 		case RADEON_RB3D_COLOROFFSET:
@@ -987,6 +1001,25 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
 			}
 			ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
 			break;
+		case R300_RB3D_COLORPITCH0:
+		case RADEON_RB3D_COLORPITCH:
+			r = r100_cs_packet_next_reloc(p, &reloc);
+			if (r) {
+				DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
+					  idx, reg);
+				r100_cs_dump_packet(p, pkt);
+				return r;
+			}
+
+			if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+				tile_flags |= RADEON_COLOR_TILE_ENABLE;
+			if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
+				tile_flags |= RADEON_COLOR_MICROTILE_ENABLE;
+
+			tmp = ib_chunk->kdata[idx] & ~(0x7 << 16);
+			tmp |= tile_flags;
+			ib[idx] = tmp;
+			break;
 		default:
 			/* FIXME: we don't want to allow anyothers packet */
 			break;
@@ -1707,3 +1740,47 @@ int r100_debugfs_mc_info_init(struct radeon_device *rdev)
 	return 0;
 #endif
 }
+
+int r100_set_surface_reg(struct radeon_device *rdev, int reg,
+			 uint32_t tiling_flags, uint32_t pitch,
+			 uint32_t offset, uint32_t obj_size)
+{
+	int surf_index = reg * 16;
+	int flags = 0;
+
+	/* r100/r200 divide by 16 */
+	if (rdev->family < CHIP_R300)
+		flags = pitch / 16;
+	else
+		flags = pitch / 8;
+
+	if (rdev->family <= CHIP_RS200) {
+		if ((tiling_flags & (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
+				 == (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
+			flags |= RADEON_SURF_TILE_COLOR_BOTH;
+		if (tiling_flags & RADEON_TILING_MACRO)
+			flags |= RADEON_SURF_TILE_COLOR_MACRO;
+	} else if (rdev->family <= CHIP_RV280) {
+		if (tiling_flags & (RADEON_TILING_MACRO))
+			flags |= R200_SURF_TILE_COLOR_MACRO;
+		if (tiling_flags & RADEON_TILING_MICRO)
+			flags |= R200_SURF_TILE_COLOR_MICRO;
+	} else {
+		if (tiling_flags & RADEON_TILING_MACRO)
+			flags |= R300_SURF_TILE_MACRO;
+		if (tiling_flags & RADEON_TILING_MICRO)
+			flags |= R300_SURF_TILE_MICRO;
+	}
+
+	DRM_DEBUG("writing surface %d %d %x %x\n", reg, flags, offset, offset+obj_size-1);
+	WREG32(RADEON_SURFACE0_INFO + surf_index, flags);
+	WREG32(RADEON_SURFACE0_LOWER_BOUND + surf_index, offset);
+	WREG32(RADEON_SURFACE0_UPPER_BOUND + surf_index, offset + obj_size - 1);
+	return 0;
+}
+
+void r100_clear_surface_reg(struct radeon_device *rdev, int reg)
+{
+	int surf_index = reg * 16;
+	WREG32(RADEON_SURFACE0_INFO + surf_index, 0);
+}
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 0e0e094da503..28e5777658be 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -30,6 +30,7 @@
 #include "drm.h"
 #include "radeon_reg.h"
 #include "radeon.h"
+#include "radeon_drm.h"
 
 /* r300,r350,rv350,rv370,rv380 depends on : */
 void r100_hdp_reset(struct radeon_device *rdev);
@@ -1023,7 +1024,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
 	struct radeon_cs_reloc *reloc;
 	struct r300_cs_track *track;
 	volatile uint32_t *ib;
-	uint32_t tmp;
+	uint32_t tmp, tile_flags = 0;
 	unsigned i;
 	int r;
 
@@ -1052,7 +1053,19 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
 		}
 		tmp = ib_chunk->kdata[idx] & 0x003fffff;
 		tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
-		ib[idx] = (ib_chunk->kdata[idx] & 0xffc00000) | tmp;
+
+		if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+			tile_flags |= RADEON_DST_TILE_MACRO;
+		if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
+			if (reg == RADEON_SRC_PITCH_OFFSET) {
+				DRM_ERROR("Cannot src blit from microtiled surface\n");
+				r100_cs_dump_packet(p, pkt);
+				return -EINVAL;
+			}
+			tile_flags |= RADEON_DST_TILE_MICRO;
+		}
+		tmp |= tile_flags;
+		ib[idx] = (ib_chunk->kdata[idx] & 0x3fc00000) | tmp;
 		break;
 	case R300_RB3D_COLOROFFSET0:
 	case R300_RB3D_COLOROFFSET1:
@@ -1141,6 +1154,23 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
 		/* RB3D_COLORPITCH1 */
 		/* RB3D_COLORPITCH2 */
 		/* RB3D_COLORPITCH3 */
+		r = r100_cs_packet_next_reloc(p, &reloc);
+		if (r) {
+			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
+				  idx, reg);
+			r100_cs_dump_packet(p, pkt);
+			return r;
+		}
+
+		if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+			tile_flags |= R300_COLOR_TILE_ENABLE;
+		if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
+			tile_flags |= R300_COLOR_MICROTILE_ENABLE;
+
+		tmp = ib_chunk->kdata[idx] & ~(0x7 << 16);
+		tmp |= tile_flags;
+		ib[idx] = tmp;
+
 		i = (reg - 0x4E38) >> 2;
 		track->cb[i].pitch = ib_chunk->kdata[idx] & 0x3FFE;
 		switch (((ib_chunk->kdata[idx] >> 21) & 0xF)) {
@@ -1196,6 +1226,23 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
 		break;
 	case 0x4F24:
 		/* ZB_DEPTHPITCH */
+		r = r100_cs_packet_next_reloc(p, &reloc);
+		if (r) {
+			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
+				  idx, reg);
+			r100_cs_dump_packet(p, pkt);
+			return r;
+		}
+
+		if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+			tile_flags |= R300_DEPTHMACROTILE_ENABLE;
+		if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
+			tile_flags |= R300_DEPTHMICROTILE_TILED;;
+
+		tmp = ib_chunk->kdata[idx] & ~(0x7 << 16);
+		tmp |= tile_flags;
+		ib[idx] = tmp;
+
 		track->zb.pitch = ib_chunk->kdata[idx] & 0x3FFC;
 		break;
 	case 0x4104:
diff --git a/drivers/gpu/drm/radeon/r300_reg.h b/drivers/gpu/drm/radeon/r300_reg.h
index 70f48609515e..4b7afef35a65 100644
--- a/drivers/gpu/drm/radeon/r300_reg.h
+++ b/drivers/gpu/drm/radeon/r300_reg.h
@@ -27,7 +27,9 @@
 #ifndef _R300_REG_H_
 #define _R300_REG_H_
 
-
+#define R300_SURF_TILE_MACRO (1<<16)
+#define R300_SURF_TILE_MICRO (2<<16)
+#define R300_SURF_TILE_BOTH (3<<16)
 
 
 #define R300_MC_INIT_MISC_LAT_TIMER	0x180
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 7f007185e7f7..af12a2fe3221 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -201,6 +201,14 @@ int radeon_fence_wait_last(struct radeon_device *rdev);
 struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence);
 void radeon_fence_unref(struct radeon_fence **fence);
 
+/*
+ * Tiling registers
+ */
+struct radeon_surface_reg {
+	struct radeon_object *robj;
+};
+
+#define RADEON_GEM_MAX_SURFACES 8
 
 /*
  * Radeon buffer.
@@ -213,6 +221,7 @@ struct radeon_object_list {
 	uint64_t		gpu_offset;
 	unsigned		rdomain;
 	unsigned		wdomain;
+	uint32_t                tiling_flags;
 };
 
 int radeon_object_init(struct radeon_device *rdev);
@@ -242,8 +251,15 @@ void radeon_object_list_clean(struct list_head *head);
 int radeon_object_fbdev_mmap(struct radeon_object *robj,
 			     struct vm_area_struct *vma);
 unsigned long radeon_object_size(struct radeon_object *robj);
-
-
+void radeon_object_clear_surface_reg(struct radeon_object *robj);
+int radeon_object_check_tiling(struct radeon_object *robj, bool has_moved,
+			       bool force_drop);
+void radeon_object_set_tiling_flags(struct radeon_object *robj,
+				    uint32_t tiling_flags, uint32_t pitch);
+void radeon_object_get_tiling_flags(struct radeon_object *robj, uint32_t *tiling_flags, uint32_t *pitch);
+void radeon_bo_move_notify(struct ttm_buffer_object *bo,
+			   struct ttm_mem_reg *mem);
+void radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
 /*
  * GEM objects.
  */
@@ -535,6 +551,11 @@ struct radeon_asic {
 	void (*set_memory_clock)(struct radeon_device *rdev, uint32_t mem_clock);
 	void (*set_pcie_lanes)(struct radeon_device *rdev, int lanes);
 	void (*set_clock_gating)(struct radeon_device *rdev, int enable);
+
+	int (*set_surface_reg)(struct radeon_device *rdev, int reg,
+			       uint32_t tiling_flags, uint32_t pitch,
+			       uint32_t offset, uint32_t obj_size);
+	int (*clear_surface_reg)(struct radeon_device *rdev, int reg);
 };
 
 union radeon_asic_config {
@@ -568,6 +589,10 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void *data,
 int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
 			      struct drm_file *filp);
 int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+int radeon_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *filp);
+int radeon_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *filp);
 
 
 /*
@@ -627,6 +652,7 @@ struct radeon_device {
 	bool				shutdown;
 	bool				suspend;
 	bool				need_dma32;
+	struct radeon_surface_reg surface_regs[RADEON_GEM_MAX_SURFACES];
 };
 
 int radeon_device_init(struct radeon_device *rdev,
@@ -801,5 +827,7 @@ static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v)
 #define radeon_set_memory_clock(rdev, e) (rdev)->asic->set_engine_clock((rdev), (e))
 #define radeon_set_pcie_lanes(rdev, l) (rdev)->asic->set_pcie_lanes((rdev), (l))
 #define radeon_set_clock_gating(rdev, e) (rdev)->asic->set_clock_gating((rdev), (e))
+#define radeon_set_surface_reg(rdev, r, f, p, o, s) ((rdev)->asic->set_surface_reg((rdev), (r), (f), (p), (o), (s)))
+#define radeon_clear_surface_reg(rdev, r) ((rdev)->asic->clear_surface_reg((rdev), (r)))
 
 #endif
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index e2e567395df8..dd903d329406 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -71,6 +71,10 @@ int r100_copy_blit(struct radeon_device *rdev,
 		   uint64_t dst_offset,
 		   unsigned num_pages,
 		   struct radeon_fence *fence);
+int r100_set_surface_reg(struct radeon_device *rdev, int reg,
+			 uint32_t tiling_flags, uint32_t pitch,
+			 uint32_t offset, uint32_t obj_size);
+int r100_clear_surface_reg(struct radeon_device *rdev, int reg);
 
 static struct radeon_asic r100_asic = {
 	.init = &r100_init,
@@ -100,6 +104,8 @@ static struct radeon_asic r100_asic = {
 	.set_memory_clock = NULL,
 	.set_pcie_lanes = NULL,
 	.set_clock_gating = &radeon_legacy_set_clock_gating,
+	.set_surface_reg = r100_set_surface_reg,
+	.clear_surface_reg = r100_clear_surface_reg,
 };
 
 
@@ -128,6 +134,7 @@ int r300_copy_dma(struct radeon_device *rdev,
 		  uint64_t dst_offset,
 		  unsigned num_pages,
 		  struct radeon_fence *fence);
+
 static struct radeon_asic r300_asic = {
 	.init = &r300_init,
 	.errata = &r300_errata,
@@ -156,6 +163,8 @@ static struct radeon_asic r300_asic = {
 	.set_memory_clock = NULL,
 	.set_pcie_lanes = &rv370_set_pcie_lanes,
 	.set_clock_gating = &radeon_legacy_set_clock_gating,
+	.set_surface_reg = r100_set_surface_reg,
+	.clear_surface_reg = r100_clear_surface_reg,
 };
 
 /*
@@ -193,6 +202,8 @@ static struct radeon_asic r420_asic = {
 	.set_memory_clock = &radeon_atom_set_memory_clock,
 	.set_pcie_lanes = &rv370_set_pcie_lanes,
 	.set_clock_gating = &radeon_atom_set_clock_gating,
+	.set_surface_reg = r100_set_surface_reg,
+	.clear_surface_reg = r100_clear_surface_reg,
 };
 
 
@@ -237,6 +248,8 @@ static struct radeon_asic rs400_asic = {
 	.set_memory_clock = NULL,
 	.set_pcie_lanes = NULL,
 	.set_clock_gating = &radeon_legacy_set_clock_gating,
+	.set_surface_reg = r100_set_surface_reg,
+	.clear_surface_reg = r100_clear_surface_reg,
 };
 
 
@@ -322,6 +335,8 @@ static struct radeon_asic rs690_asic = {
 	.set_memory_clock = &radeon_atom_set_memory_clock,
 	.set_pcie_lanes = NULL,
 	.set_clock_gating = &radeon_atom_set_clock_gating,
+	.set_surface_reg = r100_set_surface_reg,
+	.clear_surface_reg = r100_clear_surface_reg,
 };
 
 
@@ -367,6 +382,8 @@ static struct radeon_asic rv515_asic = {
 	.set_memory_clock = &radeon_atom_set_memory_clock,
 	.set_pcie_lanes = &rv370_set_pcie_lanes,
 	.set_clock_gating = &radeon_atom_set_clock_gating,
+	.set_surface_reg = r100_set_surface_reg,
+	.clear_surface_reg = r100_clear_surface_reg,
 };
 
 
@@ -405,6 +422,8 @@ static struct radeon_asic r520_asic = {
 	.set_memory_clock = &radeon_atom_set_memory_clock,
 	.set_pcie_lanes = &rv370_set_pcie_lanes,
 	.set_clock_gating = &radeon_atom_set_clock_gating,
+	.set_surface_reg = r100_set_surface_reg,
+	.clear_surface_reg = r100_clear_surface_reg,
 };
 
 /*
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index cdef6eb01baf..f23083bbba3f 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -48,6 +48,8 @@ static void radeon_surface_init(struct radeon_device *rdev)
 			       i * (RADEON_SURFACE1_INFO - RADEON_SURFACE0_INFO),
 			       0);
 		}
+		/* enable surfaces */
+		WREG32(RADEON_SURFACE_CNTL, 0);
 	}
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c
index 260870a29d83..36d2f5588f20 100644
--- a/drivers/gpu/drm/radeon/radeon_fb.c
+++ b/drivers/gpu/drm/radeon/radeon_fb.c
@@ -471,10 +471,10 @@ static struct notifier_block paniced = {
 	.notifier_call = radeonfb_panic,
 };
 
-static int radeon_align_pitch(struct radeon_device *rdev, int width, int bpp)
+static int radeon_align_pitch(struct radeon_device *rdev, int width, int bpp, bool tiled)
 {
 	int aligned = width;
-	int align_large = (ASIC_IS_AVIVO(rdev));
+	int align_large = (ASIC_IS_AVIVO(rdev)) || tiled;
 	int pitch_mask = 0;
 
 	switch (bpp / 8) {
@@ -512,12 +512,13 @@ int radeonfb_create(struct radeon_device *rdev,
 	u64 fb_gpuaddr;
 	void *fbptr = NULL;
 	unsigned long tmp;
+	bool fb_tiled = false; /* useful for testing */
 
 	mode_cmd.width = surface_width;
 	mode_cmd.height = surface_height;
 	mode_cmd.bpp = 32;
 	/* need to align pitch with crtc limits */
-	mode_cmd.pitch = radeon_align_pitch(rdev, mode_cmd.width, mode_cmd.bpp) * ((mode_cmd.bpp + 1) / 8);
+	mode_cmd.pitch = radeon_align_pitch(rdev, mode_cmd.width, mode_cmd.bpp, fb_tiled) * ((mode_cmd.bpp + 1) / 8);
 	mode_cmd.depth = 24;
 
 	size = mode_cmd.pitch * mode_cmd.height;
@@ -535,6 +536,8 @@ int radeonfb_create(struct radeon_device *rdev,
 	}
 	robj = gobj->driver_private;
 
+	if (fb_tiled)
+		radeon_object_set_tiling_flags(robj, RADEON_TILING_MACRO|RADEON_TILING_SURFACE, mode_cmd.pitch);
 	mutex_lock(&rdev->ddev->struct_mutex);
 	fb = radeon_framebuffer_create(rdev->ddev, &mode_cmd, gobj);
 	if (fb == NULL) {
@@ -563,6 +566,9 @@ int radeonfb_create(struct radeon_device *rdev,
 	}
 	rfbdev = info->par;
 
+	if (fb_tiled)
+		radeon_object_check_tiling(robj, 0, 0);
+
 	ret = radeon_object_kmap(robj, &fbptr);
 	if (ret) {
 		goto out_unref;
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index eb516034235d..12542087b298 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -285,3 +285,44 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
 	mutex_unlock(&dev->struct_mutex);
 	return r;
 }
+
+int radeon_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *filp)
+{
+	struct drm_radeon_gem_set_tiling *args = data;
+	struct drm_gem_object *gobj;
+	struct radeon_object *robj;
+	int r = 0;
+
+	DRM_DEBUG("%d \n", args->handle);
+	gobj = drm_gem_object_lookup(dev, filp, args->handle);
+	if (gobj == NULL)
+		return -EINVAL;
+	robj = gobj->driver_private;
+	radeon_object_set_tiling_flags(robj, args->tiling_flags, args->pitch);
+	mutex_lock(&dev->struct_mutex);
+	drm_gem_object_unreference(gobj);
+	mutex_unlock(&dev->struct_mutex);
+	return r;
+}
+
+int radeon_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *filp)
+{
+	struct drm_radeon_gem_get_tiling *args = data;
+	struct drm_gem_object *gobj;
+	struct radeon_object *robj;
+	int r = 0;
+
+	DRM_DEBUG("\n");
+	gobj = drm_gem_object_lookup(dev, filp, args->handle);
+	if (gobj == NULL)
+		return -EINVAL;
+	robj = gobj->driver_private;
+	radeon_object_get_tiling_flags(robj, &args->tiling_flags,
+				       &args->pitch);
+	mutex_lock(&dev->struct_mutex);
+	drm_gem_object_unreference(gobj);
+	mutex_unlock(&dev->struct_mutex);
+	return r;
+}
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index 4612a7c146d1..937a2f1cdb46 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -291,5 +291,7 @@ struct drm_ioctl_desc radeon_ioctls_kms[] = {
 	DRM_IOCTL_DEF(DRM_RADEON_GEM_WAIT_IDLE, radeon_gem_wait_idle_ioctl, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_RADEON_CS, radeon_cs_ioctl, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_RADEON_INFO, radeon_info_ioctl, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_GEM_SET_TILING, radeon_gem_set_tiling_ioctl, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_GEM_GET_TILING, radeon_gem_get_tiling_ioctl, DRM_AUTH),
 };
 int radeon_max_kms_ioctl = DRM_ARRAY_SIZE(radeon_ioctls_kms);
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
index 14c1a5107fc9..0613790e2a5b 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
@@ -235,6 +235,7 @@ int radeon_crtc_set_base(struct drm_crtc *crtc, int x, int y,
 	uint64_t base;
 	uint32_t crtc_offset, crtc_offset_cntl, crtc_tile_x0_y0 = 0;
 	uint32_t crtc_pitch, pitch_pixels;
+	uint32_t tiling_flags;
 
 	DRM_DEBUG("\n");
 
@@ -258,8 +259,12 @@ int radeon_crtc_set_base(struct drm_crtc *crtc, int x, int y,
 		       (crtc->fb->bits_per_pixel * 8));
 	crtc_pitch |= crtc_pitch << 16;
 
-	/* TODO tiling */
-	if (0) {
+	radeon_object_get_tiling_flags(obj->driver_private,
+				       &tiling_flags, NULL);
+	if (tiling_flags & RADEON_TILING_MICRO)
+		DRM_ERROR("trying to scanout microtiled buffer\n");
+
+	if (tiling_flags & RADEON_TILING_MACRO) {
 		if (ASIC_IS_R300(rdev))
 			crtc_offset_cntl |= (R300_CRTC_X_Y_MODE_EN |
 					     R300_CRTC_MICRO_TILE_BUFFER_DIS |
@@ -275,15 +280,13 @@ int radeon_crtc_set_base(struct drm_crtc *crtc, int x, int y,
 			crtc_offset_cntl &= ~RADEON_CRTC_TILE_EN;
 	}
 
-
-	/* TODO more tiling */
-	if (0) {
+	if (tiling_flags & RADEON_TILING_MACRO) {
 		if (ASIC_IS_R300(rdev)) {
 			crtc_tile_x0_y0 = x | (y << 16);
 			base &= ~0x7ff;
 		} else {
 			int byteshift = crtc->fb->bits_per_pixel >> 4;
-			int tile_addr = (((y >> 3) * crtc->fb->width + x) >> (8 - byteshift)) << 11;
+			int tile_addr = (((y >> 3) * pitch_pixels +  x) >> (8 - byteshift)) << 11;
 			base += tile_addr + ((x << byteshift) % 256) + ((y % 8) << 8);
 			crtc_offset_cntl |= (y % 16);
 		}
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index bac0d06c52ac..d5b1fd562d88 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -44,6 +44,9 @@ struct radeon_object {
 	uint64_t			gpu_addr;
 	void				*kptr;
 	bool				is_iomem;
+	uint32_t			tiling_flags;
+	uint32_t			pitch;
+	int				surface_reg;
 };
 
 int radeon_ttm_init(struct radeon_device *rdev);
@@ -70,6 +73,7 @@ static void radeon_ttm_object_object_destroy(struct ttm_buffer_object *tobj)
 
 	robj = container_of(tobj, struct radeon_object, tobj);
 	list_del_init(&robj->list);
+	radeon_object_clear_surface_reg(robj);
 	kfree(robj);
 }
 
@@ -141,6 +145,7 @@ int radeon_object_create(struct radeon_device *rdev,
 	}
 	robj->rdev = rdev;
 	robj->gobj = gobj;
+	robj->surface_reg = -1;
 	INIT_LIST_HEAD(&robj->list);
 
 	flags = radeon_object_flags_from_domain(domain);
@@ -435,6 +440,7 @@ int radeon_object_list_validate(struct list_head *head, void *fence)
 			radeon_object_gpu_addr(robj);
 		}
 		lobj->gpu_offset = robj->gpu_addr;
+		lobj->tiling_flags = robj->tiling_flags;
 		if (fence) {
 			old_fence = (struct radeon_fence *)robj->tobj.sync_obj;
 			robj->tobj.sync_obj = radeon_fence_ref(fence);
@@ -479,3 +485,127 @@ unsigned long radeon_object_size(struct radeon_object *robj)
 {
 	return robj->tobj.num_pages << PAGE_SHIFT;
 }
+
+int radeon_object_get_surface_reg(struct radeon_object *robj)
+{
+	struct radeon_device *rdev = robj->rdev;
+	struct radeon_surface_reg *reg;
+	struct radeon_object *old_object;
+	int steal;
+	int i;
+
+	if (!robj->tiling_flags)
+		return 0;
+
+	if (robj->surface_reg >= 0) {
+		reg = &rdev->surface_regs[robj->surface_reg];
+		i = robj->surface_reg;
+		goto out;
+	}
+
+	steal = -1;
+	for (i = 0; i < RADEON_GEM_MAX_SURFACES; i++) {
+
+		reg = &rdev->surface_regs[i];
+		if (!reg->robj)
+			break;
+
+		old_object = reg->robj;
+		if (old_object->pin_count == 0)
+			steal = i;
+	}
+
+	/* if we are all out */
+	if (i == RADEON_GEM_MAX_SURFACES) {
+		if (steal == -1)
+			return -ENOMEM;
+		/* find someone with a surface reg and nuke their BO */
+		reg = &rdev->surface_regs[steal];
+		old_object = reg->robj;
+		/* blow away the mapping */
+		DRM_DEBUG("stealing surface reg %d from %p\n", steal, old_object);
+		ttm_bo_unmap_virtual(&old_object->tobj);
+		old_object->surface_reg = -1;
+		i = steal;
+	}
+
+	robj->surface_reg = i;
+	reg->robj = robj;
+
+out:
+	radeon_set_surface_reg(rdev, i, robj->tiling_flags, robj->pitch,
+			       robj->tobj.mem.mm_node->start << PAGE_SHIFT,
+			       robj->tobj.num_pages << PAGE_SHIFT);
+	return 0;
+}
+
+void radeon_object_clear_surface_reg(struct radeon_object *robj)
+{
+	struct radeon_device *rdev = robj->rdev;
+	struct radeon_surface_reg *reg;
+
+	if (robj->surface_reg == -1)
+		return;
+
+	reg = &rdev->surface_regs[robj->surface_reg];
+	radeon_clear_surface_reg(rdev, robj->surface_reg);
+
+	reg->robj = NULL;
+	robj->surface_reg = -1;
+}
+
+void radeon_object_set_tiling_flags(struct radeon_object *robj,
+				    uint32_t tiling_flags, uint32_t pitch)
+{
+	robj->tiling_flags = tiling_flags;
+	robj->pitch = pitch;
+}
+
+void radeon_object_get_tiling_flags(struct radeon_object *robj,
+				    uint32_t *tiling_flags,
+				    uint32_t *pitch)
+{
+	if (tiling_flags)
+		*tiling_flags = robj->tiling_flags;
+	if (pitch)
+		*pitch = robj->pitch;
+}
+
+int radeon_object_check_tiling(struct radeon_object *robj, bool has_moved,
+			       bool force_drop)
+{
+	if (!(robj->tiling_flags & RADEON_TILING_SURFACE))
+		return 0;
+
+	if (force_drop) {
+		radeon_object_clear_surface_reg(robj);
+		return 0;
+	}
+
+	if (robj->tobj.mem.mem_type != TTM_PL_VRAM) {
+		if (!has_moved)
+			return 0;
+
+		if (robj->surface_reg >= 0)
+			radeon_object_clear_surface_reg(robj);
+		return 0;
+	}
+
+	if ((robj->surface_reg >= 0) && !has_moved)
+		return 0;
+
+	return radeon_object_get_surface_reg(robj);
+}
+
+void radeon_bo_move_notify(struct ttm_buffer_object *bo,
+			  struct ttm_mem_reg *mem)
+{
+	struct radeon_object *robj = container_of(bo, struct radeon_object, tobj);
+	radeon_object_check_tiling(robj, 0, 1);
+}
+
+void radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
+{
+	struct radeon_object *robj = container_of(bo, struct radeon_object, tobj);
+	radeon_object_check_tiling(robj, 0, 0);
+}
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 4ca9aa9203d0..37e1cbcce3a9 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -429,6 +429,8 @@ static struct ttm_bo_driver radeon_bo_driver = {
 	.sync_obj_flush = &radeon_sync_obj_flush,
 	.sync_obj_unref = &radeon_sync_obj_unref,
 	.sync_obj_ref = &radeon_sync_obj_ref,
+	.move_notify = &radeon_bo_move_notify,
+	.fault_reserve_notify = &radeon_bo_fault_reserve_notify,
 };
 
 int radeon_ttm_init(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index e55e7972c897..6538d4236989 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -43,7 +43,6 @@
 #define TTM_BO_HASH_ORDER 13
 
 static int ttm_bo_setup_vm(struct ttm_buffer_object *bo);
-static void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo);
 static int ttm_bo_swapout(struct ttm_mem_shrink *shrink);
 
 static inline uint32_t ttm_bo_type_flags(unsigned type)
@@ -307,6 +306,9 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
 
 	}
 
+	if (bdev->driver->move_notify)
+		bdev->driver->move_notify(bo, mem);
+
 	if (!(old_man->flags & TTM_MEMTYPE_FLAG_FIXED) &&
 	    !(new_man->flags & TTM_MEMTYPE_FLAG_FIXED))
 		ret = ttm_bo_move_ttm(bo, evict, no_wait, mem);
@@ -1451,6 +1453,7 @@ void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo)
 
 	unmap_mapping_range(bdev->dev_mapping, offset, holelen, 1);
 }
+EXPORT_SYMBOL(ttm_bo_unmap_virtual);
 
 static void ttm_bo_vm_insert_rb(struct ttm_buffer_object *bo)
 {
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 40b75032ea47..41c907f6c560 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -101,6 +101,9 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 		return VM_FAULT_NOPAGE;
 	}
 
+	if (bdev->driver->fault_reserve_notify)
+		bdev->driver->fault_reserve_notify(bo);
+
 	/*
 	 * Wait for buffer data in transit, due to a pipelined
 	 * move.
diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h
index 41862e9a4c20..af4b4826997e 100644
--- a/include/drm/radeon_drm.h
+++ b/include/drm/radeon_drm.h
@@ -506,6 +506,8 @@ typedef struct {
 #define DRM_RADEON_GEM_WAIT_IDLE	0x24
 #define DRM_RADEON_CS			0x26
 #define DRM_RADEON_INFO			0x27
+#define DRM_RADEON_GEM_SET_TILING	0x28
+#define DRM_RADEON_GEM_GET_TILING	0x29
 
 #define DRM_IOCTL_RADEON_CP_INIT    DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CP_INIT, drm_radeon_init_t)
 #define DRM_IOCTL_RADEON_CP_START   DRM_IO(  DRM_COMMAND_BASE + DRM_RADEON_CP_START)
@@ -544,7 +546,8 @@ typedef struct {
 #define DRM_IOCTL_RADEON_GEM_WAIT_IDLE	DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_GEM_WAIT_IDLE, struct drm_radeon_gem_wait_idle)
 #define DRM_IOCTL_RADEON_CS		DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_CS, struct drm_radeon_cs)
 #define DRM_IOCTL_RADEON_INFO		DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_INFO, struct drm_radeon_info)
-
+#define DRM_IOCTL_RADEON_SET_TILING	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_SET_TILING, struct drm_radeon_gem_set_tiling)
+#define DRM_IOCTL_RADEON_GET_TILING	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_GET_TILING, struct drm_radeon_gem_get_tiling)
 
 typedef struct drm_radeon_init {
 	enum {
@@ -796,6 +799,24 @@ struct drm_radeon_gem_create {
 	uint32_t	flags;
 };
 
+#define RADEON_TILING_MACRO 0x1
+#define RADEON_TILING_MICRO 0x2
+#define RADEON_TILING_SWAP  0x4
+#define RADEON_TILING_SURFACE  0x8 /* this object requires a surface
+				    * when mapped - i.e. front buffer */
+
+struct drm_radeon_gem_set_tiling {
+	uint32_t	handle;
+	uint32_t	tiling_flags;
+	uint32_t	pitch;
+};
+
+struct drm_radeon_gem_get_tiling {
+	uint32_t	handle;
+	uint32_t	tiling_flags;
+	uint32_t	pitch;
+};
+
 struct drm_radeon_gem_mmap {
 	uint32_t	handle;
 	uint32_t	pad;
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index ea83dd23a4d7..a68829db381a 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -354,6 +354,14 @@ struct ttm_bo_driver {
 	int (*sync_obj_flush) (void *sync_obj, void *sync_arg);
 	void (*sync_obj_unref) (void **sync_obj);
 	void *(*sync_obj_ref) (void *sync_obj);
+
+	/* hook to notify driver about a driver move so it
+	 * can do tiling things */
+	void (*move_notify)(struct ttm_buffer_object *bo,
+			    struct ttm_mem_reg *new_mem);
+	/* notify the driver we are taking a fault on this BO
+	 * and have reserved it */
+	void (*fault_reserve_notify)(struct ttm_buffer_object *bo);
 };
 
 #define TTM_NUM_MEM_TYPES 8
@@ -653,6 +661,13 @@ extern int ttm_bo_device_init(struct ttm_bo_device *bdev,
 			      struct ttm_bo_driver *driver,
 			      uint64_t file_page_offset, bool need_dma32);
 
+/**
+ * ttm_bo_unmap_virtual
+ *
+ * @bo: tear down the virtual mappings for this BO
+ */
+extern void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo);
+
 /**
  * ttm_bo_reserve:
  *
-- 
cgit v1.2.3


From dddac6a7b445de95515f64fdf82fe5dc36c02f26 Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Wed, 29 Jul 2009 21:07:55 +0200
Subject: PM / Hibernate: Replace bdget call with simple atomic_inc of i_count

Create bdgrab().  This function copies an existing reference to a
block_device.  It is safe to call from any context.

Hibernation code wishes to copy a reference to the active swap device.
Right now it calls bdget() under a spinlock, but this is wrong because
bdget() can sleep.  It doesn't need a full bdget() because we already
hold a reference to active swap devices (and the spinlock protects
against swapoff).

Fixes http://bugzilla.kernel.org/show_bug.cgi?id=13827

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 fs/block_dev.c     | 10 ++++++++++
 include/linux/fs.h |  1 +
 mm/swapfile.c      |  4 ++--
 3 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 3a6d4fb2a329..94dfda24c06e 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -564,6 +564,16 @@ struct block_device *bdget(dev_t dev)
 
 EXPORT_SYMBOL(bdget);
 
+/**
+ * bdgrab -- Grab a reference to an already referenced block device
+ * @bdev:	Block device to grab a reference to.
+ */
+struct block_device *bdgrab(struct block_device *bdev)
+{
+	atomic_inc(&bdev->bd_inode->i_count);
+	return bdev;
+}
+
 long nr_blockdev_pages(void)
 {
 	struct block_device *bdev;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0872372184fe..a36ffa5a77a4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1946,6 +1946,7 @@ extern void putname(const char *name);
 extern int register_blkdev(unsigned int, const char *);
 extern void unregister_blkdev(unsigned int, const char *);
 extern struct block_device *bdget(dev_t);
+extern struct block_device *bdgrab(struct block_device *bdev);
 extern void bd_set_size(struct block_device *, loff_t size);
 extern void bd_forget(struct inode *inode);
 extern void bdput(struct block_device *);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index d1ade1a48ee7..8ffdc0d23c53 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -753,7 +753,7 @@ int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p)
 
 		if (!bdev) {
 			if (bdev_p)
-				*bdev_p = bdget(sis->bdev->bd_dev);
+				*bdev_p = bdgrab(sis->bdev);
 
 			spin_unlock(&swap_lock);
 			return i;
@@ -765,7 +765,7 @@ int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p)
 					struct swap_extent, list);
 			if (se->start_block == offset) {
 				if (bdev_p)
-					*bdev_p = bdget(sis->bdev->bd_dev);
+					*bdev_p = bdgrab(sis->bdev);
 
 				spin_unlock(&swap_lock);
 				bdput(bdev);
-- 
cgit v1.2.3


From e043e42bdb66885b3ac10d27a01ccb9972e2b0a3 Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Wed, 29 Jul 2009 12:15:56 -0700
Subject: pty: avoid forcing 'low_latency' tty flag

We really don't want to mark the pty as a low-latency device, because as
Alan points out, the ->write method can be called from an IRQ (ppp?),
and that means we can't use ->low_latency=1 as we take mutexes in the
low_latency case.

So rather than using low_latency to force the written data to be pushed
to the ldisc handling at 'write()' time, just make the reader side (or
the poll function) do the flush when it checks whether there is data to
be had.

This also fixes the problem with lost data in an emacs compile buffer
(bugzilla 13815), and we can thus revert the low_latency pty hack
(commit 3a54297478e6578f96fd54bf4daa1751130aca86: "pty: quickfix for the
pty ENXIO timing problems").

Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Tested-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
[ Modified to do the tty_flush_to_ldisc() inside input_available_p() so
  that it triggers for both read and poll()  - Linus]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/n_tty.c      |  1 +
 drivers/char/pty.c        |  2 --
 drivers/char/tty_buffer.c | 13 +++++++++++++
 include/linux/tty.h       |  1 +
 4 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/char/n_tty.c b/drivers/char/n_tty.c
index ff47907ff1bf..973be2f44195 100644
--- a/drivers/char/n_tty.c
+++ b/drivers/char/n_tty.c
@@ -1583,6 +1583,7 @@ static int n_tty_open(struct tty_struct *tty)
 
 static inline int input_available_p(struct tty_struct *tty, int amt)
 {
+	tty_flush_to_ldisc(tty);
 	if (tty->icanon) {
 		if (tty->canon_data)
 			return 1;
diff --git a/drivers/char/pty.c b/drivers/char/pty.c
index 3850a68f265a..6e6942c45f5b 100644
--- a/drivers/char/pty.c
+++ b/drivers/char/pty.c
@@ -52,7 +52,6 @@ static void pty_close(struct tty_struct *tty, struct file *filp)
 		return;
 	tty->link->packet = 0;
 	set_bit(TTY_OTHER_CLOSED, &tty->link->flags);
-	tty_flip_buffer_push(tty->link);
 	wake_up_interruptible(&tty->link->read_wait);
 	wake_up_interruptible(&tty->link->write_wait);
 	if (tty->driver->subtype == PTY_TYPE_MASTER) {
@@ -208,7 +207,6 @@ static int pty_open(struct tty_struct *tty, struct file *filp)
 	clear_bit(TTY_OTHER_CLOSED, &tty->link->flags);
 	set_bit(TTY_THROTTLED, &tty->flags);
 	retval = 0;
-	tty->low_latency = 1;
 out:
 	return retval;
 }
diff --git a/drivers/char/tty_buffer.c b/drivers/char/tty_buffer.c
index 810ee25d66a4..3108991c5c8b 100644
--- a/drivers/char/tty_buffer.c
+++ b/drivers/char/tty_buffer.c
@@ -461,6 +461,19 @@ static void flush_to_ldisc(struct work_struct *work)
 	tty_ldisc_deref(disc);
 }
 
+/**
+ *	tty_flush_to_ldisc
+ *	@tty: tty to push
+ *
+ *	Push the terminal flip buffers to the line discipline.
+ *
+ *	Must not be called from IRQ context.
+ */
+void tty_flush_to_ldisc(struct tty_struct *tty)
+{
+	flush_to_ldisc(&tty->buf.work.work);
+}
+
 /**
  *	tty_flip_buffer_push	-	terminal
  *	@tty: tty to push
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 1488d8c81aac..e8c6c9136c97 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -394,6 +394,7 @@ extern void __do_SAK(struct tty_struct *tty);
 extern void disassociate_ctty(int priv);
 extern void no_tty(void);
 extern void tty_flip_buffer_push(struct tty_struct *tty);
+extern void tty_flush_to_ldisc(struct tty_struct *tty);
 extern void tty_buffer_free_all(struct tty_struct *tty);
 extern void tty_buffer_flush(struct tty_struct *tty);
 extern void tty_buffer_init(struct tty_struct *tty);
-- 
cgit v1.2.3


From 096b7fe012d66ed55e98bc8022405ede0cc80e96 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Wed, 29 Jul 2009 15:04:04 -0700
Subject: cgroups: fix pid namespace bug

The bug was introduced by commit cc31edceee04a7b87f2be48f9489ebb72d264844
("cgroups: convert tasks file to use a seq_file with shared pid array").

We cache a pid array for all threads that are opening the same "tasks"
file, but the pids in the array are always from the namespace of the
last process that opened the file, so all other threads will read pids
from that namespace instead of their own namespaces.

To fix it, we maintain a list of pid arrays, which is keyed by pid_ns.
The list will be of length 1 at most time.

Reported-by: Paul Menage <menage@google.com>
Idea-by: Paul Menage <menage@google.com>
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Reviewed-by: Serge Hallyn <serue@us.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h | 11 +++---
 kernel/cgroup.c        | 96 +++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 76 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 665fa70e4094..20411d2876f8 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -179,14 +179,11 @@ struct cgroup {
 	 */
 	struct list_head release_list;
 
-	/* pids_mutex protects the fields below */
+	/* pids_mutex protects pids_list and cached pid arrays. */
 	struct rw_semaphore pids_mutex;
-	/* Array of process ids in the cgroup */
-	pid_t *tasks_pids;
-	/* How many files are using the current tasks_pids array */
-	int pids_use_count;
-	/* Length of the current tasks_pids array */
-	int pids_length;
+
+	/* Linked list of struct cgroup_pids */
+	struct list_head pids_list;
 
 	/* For RCU-protected deletion */
 	struct rcu_head rcu_head;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 3737a682cdf5..250dac05680f 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -47,6 +47,7 @@
 #include <linux/hash.h>
 #include <linux/namei.h>
 #include <linux/smp_lock.h>
+#include <linux/pid_namespace.h>
 
 #include <asm/atomic.h>
 
@@ -960,6 +961,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
 	INIT_LIST_HEAD(&cgrp->children);
 	INIT_LIST_HEAD(&cgrp->css_sets);
 	INIT_LIST_HEAD(&cgrp->release_list);
+	INIT_LIST_HEAD(&cgrp->pids_list);
 	init_rwsem(&cgrp->pids_mutex);
 }
 static void init_cgroup_root(struct cgroupfs_root *root)
@@ -2201,12 +2203,30 @@ err:
 	return ret;
 }
 
+/*
+ * Cache pids for all threads in the same pid namespace that are
+ * opening the same "tasks" file.
+ */
+struct cgroup_pids {
+	/* The node in cgrp->pids_list */
+	struct list_head list;
+	/* The cgroup those pids belong to */
+	struct cgroup *cgrp;
+	/* The namepsace those pids belong to */
+	struct pid_namespace *ns;
+	/* Array of process ids in the cgroup */
+	pid_t *tasks_pids;
+	/* How many files are using the this tasks_pids array */
+	int use_count;
+	/* Length of the current tasks_pids array */
+	int length;
+};
+
 static int cmppid(const void *a, const void *b)
 {
 	return *(pid_t *)a - *(pid_t *)b;
 }
 
-
 /*
  * seq_file methods for the "tasks" file. The seq_file position is the
  * next pid to display; the seq_file iterator is a pointer to the pid
@@ -2221,45 +2241,47 @@ static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos)
 	 * after a seek to the start). Use a binary-search to find the
 	 * next pid to display, if any
 	 */
-	struct cgroup *cgrp = s->private;
+	struct cgroup_pids *cp = s->private;
+	struct cgroup *cgrp = cp->cgrp;
 	int index = 0, pid = *pos;
 	int *iter;
 
 	down_read(&cgrp->pids_mutex);
 	if (pid) {
-		int end = cgrp->pids_length;
+		int end = cp->length;
 
 		while (index < end) {
 			int mid = (index + end) / 2;
-			if (cgrp->tasks_pids[mid] == pid) {
+			if (cp->tasks_pids[mid] == pid) {
 				index = mid;
 				break;
-			} else if (cgrp->tasks_pids[mid] <= pid)
+			} else if (cp->tasks_pids[mid] <= pid)
 				index = mid + 1;
 			else
 				end = mid;
 		}
 	}
 	/* If we're off the end of the array, we're done */
-	if (index >= cgrp->pids_length)
+	if (index >= cp->length)
 		return NULL;
 	/* Update the abstract position to be the actual pid that we found */
-	iter = cgrp->tasks_pids + index;
+	iter = cp->tasks_pids + index;
 	*pos = *iter;
 	return iter;
 }
 
 static void cgroup_tasks_stop(struct seq_file *s, void *v)
 {
-	struct cgroup *cgrp = s->private;
+	struct cgroup_pids *cp = s->private;
+	struct cgroup *cgrp = cp->cgrp;
 	up_read(&cgrp->pids_mutex);
 }
 
 static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos)
 {
-	struct cgroup *cgrp = s->private;
+	struct cgroup_pids *cp = s->private;
 	int *p = v;
-	int *end = cgrp->tasks_pids + cgrp->pids_length;
+	int *end = cp->tasks_pids + cp->length;
 
 	/*
 	 * Advance to the next pid in the array. If this goes off the
@@ -2286,26 +2308,33 @@ static struct seq_operations cgroup_tasks_seq_operations = {
 	.show = cgroup_tasks_show,
 };
 
-static void release_cgroup_pid_array(struct cgroup *cgrp)
+static void release_cgroup_pid_array(struct cgroup_pids *cp)
 {
+	struct cgroup *cgrp = cp->cgrp;
+
 	down_write(&cgrp->pids_mutex);
-	BUG_ON(!cgrp->pids_use_count);
-	if (!--cgrp->pids_use_count) {
-		kfree(cgrp->tasks_pids);
-		cgrp->tasks_pids = NULL;
-		cgrp->pids_length = 0;
+	BUG_ON(!cp->use_count);
+	if (!--cp->use_count) {
+		list_del(&cp->list);
+		put_pid_ns(cp->ns);
+		kfree(cp->tasks_pids);
+		kfree(cp);
 	}
 	up_write(&cgrp->pids_mutex);
 }
 
 static int cgroup_tasks_release(struct inode *inode, struct file *file)
 {
-	struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
+	struct seq_file *seq;
+	struct cgroup_pids *cp;
 
 	if (!(file->f_mode & FMODE_READ))
 		return 0;
 
-	release_cgroup_pid_array(cgrp);
+	seq = file->private_data;
+	cp = seq->private;
+
+	release_cgroup_pid_array(cp);
 	return seq_release(inode, file);
 }
 
@@ -2324,6 +2353,8 @@ static struct file_operations cgroup_tasks_operations = {
 static int cgroup_tasks_open(struct inode *unused, struct file *file)
 {
 	struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
+	struct pid_namespace *ns = current->nsproxy->pid_ns;
+	struct cgroup_pids *cp;
 	pid_t *pidarray;
 	int npids;
 	int retval;
@@ -2350,20 +2381,37 @@ static int cgroup_tasks_open(struct inode *unused, struct file *file)
 	 * array if necessary
 	 */
 	down_write(&cgrp->pids_mutex);
-	kfree(cgrp->tasks_pids);
-	cgrp->tasks_pids = pidarray;
-	cgrp->pids_length = npids;
-	cgrp->pids_use_count++;
+
+	list_for_each_entry(cp, &cgrp->pids_list, list) {
+		if (ns == cp->ns)
+			goto found;
+	}
+
+	cp = kzalloc(sizeof(*cp), GFP_KERNEL);
+	if (!cp) {
+		up_write(&cgrp->pids_mutex);
+		kfree(pidarray);
+		return -ENOMEM;
+	}
+	cp->cgrp = cgrp;
+	cp->ns = ns;
+	get_pid_ns(ns);
+	list_add(&cp->list, &cgrp->pids_list);
+found:
+	kfree(cp->tasks_pids);
+	cp->tasks_pids = pidarray;
+	cp->length = npids;
+	cp->use_count++;
 	up_write(&cgrp->pids_mutex);
 
 	file->f_op = &cgroup_tasks_operations;
 
 	retval = seq_open(file, &cgroup_tasks_seq_operations);
 	if (retval) {
-		release_cgroup_pid_array(cgrp);
+		release_cgroup_pid_array(cp);
 		return retval;
 	}
-	((struct seq_file *)file->private_data)->private = cgrp;
+	((struct seq_file *)file->private_data)->private = cp;
 	return 0;
 }
 
-- 
cgit v1.2.3


From 887032670d47366a8c8f25396ea7c14b7b2cc620 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Wed, 29 Jul 2009 15:04:06 -0700
Subject: cgroup avoid permanent sleep at rmdir

After commit ec64f51545fffbc4cb968f0cea56341a4b07e85a ("cgroup: fix
frequent -EBUSY at rmdir"), cgroup's rmdir (especially against memcg)
doesn't return -EBUSY by temporary ref counts.  That commit expects all
refs after pre_destroy() is temporary but...it wasn't.  Then, rmdir can
wait permanently.  This patch tries to fix that and change followings.

 - set CGRP_WAIT_ON_RMDIR flag before pre_destroy().
 - clear CGRP_WAIT_ON_RMDIR flag when the subsys finds racy case.
   if there are sleeping ones, wakes them up.
 - rmdir() sleeps only when CGRP_WAIT_ON_RMDIR flag is set.

Tested-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Reported-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Reviewed-by: Paul Menage <menage@google.com>
Acked-by: Balbir Sigh <balbir@linux.vnet.ibm.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h | 17 ++++++++++++++++
 kernel/cgroup.c        | 55 ++++++++++++++++++++++++++++++++++----------------
 mm/memcontrol.c        | 23 ++++++++++++++++++---
 3 files changed, 75 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 20411d2876f8..90bba9e62286 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -362,6 +362,23 @@ int cgroup_task_count(const struct cgroup *cgrp);
 /* Return true if cgrp is a descendant of the task's cgroup */
 int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task);
 
+/*
+ * When the subsys has to access css and may add permanent refcnt to css,
+ * it should take care of racy conditions with rmdir(). Following set of
+ * functions, is for stop/restart rmdir if necessary.
+ * Because these will call css_get/put, "css" should be alive css.
+ *
+ *  cgroup_exclude_rmdir();
+ *  ...do some jobs which may access arbitrary empty cgroup
+ *  cgroup_release_and_wakeup_rmdir();
+ *
+ *  When someone removes a cgroup while cgroup_exclude_rmdir() holds it,
+ *  it sleeps and cgroup_release_and_wakeup_rmdir() will wake him up.
+ */
+
+void cgroup_exclude_rmdir(struct cgroup_subsys_state *css);
+void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css);
+
 /*
  * Control Group subsystem type.
  * See Documentation/cgroups/cgroups.txt for details
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 250dac05680f..b6eadfe30e7b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -735,16 +735,28 @@ static void cgroup_d_remove_dir(struct dentry *dentry)
  * reference to css->refcnt. In general, this refcnt is expected to goes down
  * to zero, soon.
  *
- * CGRP_WAIT_ON_RMDIR flag is modified under cgroup's inode->i_mutex;
+ * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex;
  */
 DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
 
-static void cgroup_wakeup_rmdir_waiters(const struct cgroup *cgrp)
+static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
 {
-	if (unlikely(test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
+	if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
 		wake_up_all(&cgroup_rmdir_waitq);
 }
 
+void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
+{
+	css_get(css);
+}
+
+void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
+{
+	cgroup_wakeup_rmdir_waiter(css->cgroup);
+	css_put(css);
+}
+
+
 static int rebind_subsystems(struct cgroupfs_root *root,
 			      unsigned long final_bits)
 {
@@ -1359,7 +1371,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 	 * wake up rmdir() waiter. the rmdir should fail since the cgroup
 	 * is no longer empty.
 	 */
-	cgroup_wakeup_rmdir_waiters(cgrp);
+	cgroup_wakeup_rmdir_waiter(cgrp);
 	return 0;
 }
 
@@ -2743,34 +2755,43 @@ again:
 	}
 	mutex_unlock(&cgroup_mutex);
 
+	/*
+	 * In general, subsystem has no css->refcnt after pre_destroy(). But
+	 * in racy cases, subsystem may have to get css->refcnt after
+	 * pre_destroy() and it makes rmdir return with -EBUSY. This sometimes
+	 * make rmdir return -EBUSY too often. To avoid that, we use waitqueue
+	 * for cgroup's rmdir. CGRP_WAIT_ON_RMDIR is for synchronizing rmdir
+	 * and subsystem's reference count handling. Please see css_get/put
+	 * and css_tryget() and cgroup_wakeup_rmdir_waiter() implementation.
+	 */
+	set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
+
 	/*
 	 * Call pre_destroy handlers of subsys. Notify subsystems
 	 * that rmdir() request comes.
 	 */
 	ret = cgroup_call_pre_destroy(cgrp);
-	if (ret)
+	if (ret) {
+		clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
 		return ret;
+	}
 
 	mutex_lock(&cgroup_mutex);
 	parent = cgrp->parent;
 	if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) {
+		clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
 		mutex_unlock(&cgroup_mutex);
 		return -EBUSY;
 	}
-	/*
-	 * css_put/get is provided for subsys to grab refcnt to css. In typical
-	 * case, subsystem has no reference after pre_destroy(). But, under
-	 * hierarchy management, some *temporal* refcnt can be hold.
-	 * To avoid returning -EBUSY to a user, waitqueue is used. If subsys
-	 * is really busy, it should return -EBUSY at pre_destroy(). wake_up
-	 * is called when css_put() is called and refcnt goes down to 0.
-	 */
-	set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
 	prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE);
-
 	if (!cgroup_clear_css_refs(cgrp)) {
 		mutex_unlock(&cgroup_mutex);
-		schedule();
+		/*
+		 * Because someone may call cgroup_wakeup_rmdir_waiter() before
+		 * prepare_to_wait(), we need to check this flag.
+		 */
+		if (test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags))
+			schedule();
 		finish_wait(&cgroup_rmdir_waitq, &wait);
 		clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
 		if (signal_pending(current))
@@ -3342,7 +3363,7 @@ void __css_put(struct cgroup_subsys_state *css)
 			set_bit(CGRP_RELEASABLE, &cgrp->flags);
 			check_for_release(cgrp);
 		}
-		cgroup_wakeup_rmdir_waiters(cgrp);
+		cgroup_wakeup_rmdir_waiter(cgrp);
 	}
 	rcu_read_unlock();
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e717964cb5a0..fd4529d86de5 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1207,6 +1207,12 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
 	ret = 0;
 out:
 	unlock_page_cgroup(pc);
+	/*
+	 * We charges against "to" which may not have any tasks. Then, "to"
+	 * can be under rmdir(). But in current implementation, caller of
+	 * this function is just force_empty() and it's garanteed that
+	 * "to" is never removed. So, we don't check rmdir status here.
+	 */
 	return ret;
 }
 
@@ -1428,6 +1434,7 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
 		return;
 	if (!ptr)
 		return;
+	cgroup_exclude_rmdir(&ptr->css);
 	pc = lookup_page_cgroup(page);
 	mem_cgroup_lru_del_before_commit_swapcache(page);
 	__mem_cgroup_commit_charge(ptr, pc, ctype);
@@ -1457,8 +1464,12 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
 		}
 		rcu_read_unlock();
 	}
-	/* add this page(page_cgroup) to the LRU we want. */
-
+	/*
+	 * At swapin, we may charge account against cgroup which has no tasks.
+	 * So, rmdir()->pre_destroy() can be called while we do this charge.
+	 * In that case, we need to call pre_destroy() again. check it here.
+	 */
+	cgroup_release_and_wakeup_rmdir(&ptr->css);
 }
 
 void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr)
@@ -1664,7 +1675,7 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem,
 
 	if (!mem)
 		return;
-
+	cgroup_exclude_rmdir(&mem->css);
 	/* at migration success, oldpage->mapping is NULL. */
 	if (oldpage->mapping) {
 		target = oldpage;
@@ -1704,6 +1715,12 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem,
 	 */
 	if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED)
 		mem_cgroup_uncharge_page(target);
+	/*
+	 * At migration, we may charge account against cgroup which has no tasks
+	 * So, rmdir()->pre_destroy() can be called while we do this charge.
+	 * In that case, we need to call pre_destroy() again. check it here.
+	 */
+	cgroup_release_and_wakeup_rmdir(&mem->css);
 }
 
 /*
-- 
cgit v1.2.3


From f5a55efa140f5e9c9dd0f398fef54f20cdb74ec9 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Wed, 29 Jul 2009 15:04:12 -0700
Subject: pps.h needs <linux/types.h>

Found with make headers_check

/usr/include/linux/pps.h:52: found __[us]{8,16,32,64} type without #include <linux/types.h>

Signed-off-by: Dave Jones <davej@redhat.com>
Cc: Rodolfo Giometti <giometti@linux.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pps.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/pps.h b/include/linux/pps.h
index cfe5c7214ec6..0194ab06177b 100644
--- a/include/linux/pps.h
+++ b/include/linux/pps.h
@@ -22,6 +22,8 @@
 #ifndef _PPS_H_
 #define _PPS_H_
 
+#include <linux/types.h>
+
 #define PPS_VERSION		"5.3.6"
 #define PPS_MAX_SOURCES		16		/* should be enough... */
 
-- 
cgit v1.2.3


From 534acc057b5a08ec33fa57cdd2f5a09ef124e7f2 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave@linux.vnet.ibm.com>
Date: Wed, 29 Jul 2009 15:04:18 -0700
Subject: lib: flexible array implementation

Once a structure goes over PAGE_SIZE*2, we see occasional allocation
failures.  Some people have chosen to switch over to things like vmalloc()
that will let them keep array-like access to such a large structures.
But, vmalloc() has plenty of downsides.

Here's an alternative.  I think it's what Andrew was suggesting here:

	http://lkml.org/lkml/2009/7/2/518

I call it a flexible array.  It does all of its work in PAGE_SIZE bits, so
never does an order>0 allocation.  The base level has
PAGE_SIZE-2*sizeof(int) bytes of storage for pointers to the second level.
 So, with a 32-bit arch, you get about 4MB (4183112 bytes) of total
storage when the objects pack nicely into a page.  It is half that on
64-bit because the pointers are twice the size.  There's a table detailing
this in the code.

There are kerneldocs for the functions, but here's an
overview:

flex_array_alloc() - dynamically allocate a base structure
flex_array_free() - free the array and all of the
		    second-level pages
flex_array_free_parts() - free the second-level pages, but
			  not the base (for static bases)
flex_array_put() - copy into the array at the given index
flex_array_get() - copy out of the array at the given index
flex_array_prealloc() - preallocate the second-level pages
			between the given indexes to
			guarantee no allocs will occur at
			put() time.

We could also potentially just pass the "element_size" into each of the
API functions instead of storing it internally.  That would get us one
more base pointer on 32-bit.

I've been testing this by running it in userspace.  The header and patch
that I've been using are here, as well as the little script I'm using to
generate the size table which goes in the kerneldocs.

	http://sr71.net/~dave/linux/flexarray/

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Dave Hansen <dave@linux.vnet.ibm.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/flex_array.h |  47 ++++++++
 lib/Makefile               |   2 +-
 lib/flex_array.c           | 269 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 317 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/flex_array.h
 create mode 100644 lib/flex_array.c

(limited to 'include')

diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h
new file mode 100644
index 000000000000..23c1ec79a31b
--- /dev/null
+++ b/include/linux/flex_array.h
@@ -0,0 +1,47 @@
+#ifndef _FLEX_ARRAY_H
+#define _FLEX_ARRAY_H
+
+#include <linux/types.h>
+#include <asm/page.h>
+
+#define FLEX_ARRAY_PART_SIZE PAGE_SIZE
+#define FLEX_ARRAY_BASE_SIZE PAGE_SIZE
+
+struct flex_array_part;
+
+/*
+ * This is meant to replace cases where an array-like
+ * structure has gotten too big to fit into kmalloc()
+ * and the developer is getting tempted to use
+ * vmalloc().
+ */
+
+struct flex_array {
+	union {
+		struct {
+			int element_size;
+			int total_nr_elements;
+			struct flex_array_part *parts[0];
+		};
+		/*
+		 * This little trick makes sure that
+		 * sizeof(flex_array) == PAGE_SIZE
+		 */
+		char padding[FLEX_ARRAY_BASE_SIZE];
+	};
+};
+
+#define FLEX_ARRAY_INIT(size, total) { { {\
+	.element_size = (size),		\
+	.total_nr_elements = (total),	\
+} } }
+
+struct flex_array *flex_array_alloc(int element_size, int total, gfp_t flags);
+int flex_array_prealloc(struct flex_array *fa, int start, int end, gfp_t flags);
+void flex_array_free(struct flex_array *fa);
+void flex_array_free_parts(struct flex_array *fa);
+int flex_array_put(struct flex_array *fa, int element_nr, void *src,
+		gfp_t flags);
+void *flex_array_get(struct flex_array *fa, int element_nr);
+
+#endif /* _FLEX_ARRAY_H */
diff --git a/lib/Makefile b/lib/Makefile
index b6d1857bbf08..2e78277eff9d 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -12,7 +12,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 idr.o int_sqrt.o extable.o prio_tree.o \
 	 sha1.o irq_regs.o reciprocal_div.o argv_split.o \
 	 proportions.o prio_heap.o ratelimit.o show_mem.o \
-	 is_single_threaded.o plist.o decompress.o
+	 is_single_threaded.o plist.o decompress.o flex_array.o
 
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
diff --git a/lib/flex_array.c b/lib/flex_array.c
new file mode 100644
index 000000000000..0e7894ce8882
--- /dev/null
+++ b/lib/flex_array.c
@@ -0,0 +1,269 @@
+/*
+ * Flexible array managed in PAGE_SIZE parts
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2009
+ *
+ * Author: Dave Hansen <dave@linux.vnet.ibm.com>
+ */
+
+#include <linux/flex_array.h>
+#include <linux/slab.h>
+#include <linux/stddef.h>
+
+struct flex_array_part {
+	char elements[FLEX_ARRAY_PART_SIZE];
+};
+
+static inline int __elements_per_part(int element_size)
+{
+	return FLEX_ARRAY_PART_SIZE / element_size;
+}
+
+static inline int bytes_left_in_base(void)
+{
+	int element_offset = offsetof(struct flex_array, parts);
+	int bytes_left = FLEX_ARRAY_BASE_SIZE - element_offset;
+	return bytes_left;
+}
+
+static inline int nr_base_part_ptrs(void)
+{
+	return bytes_left_in_base() / sizeof(struct flex_array_part *);
+}
+
+/*
+ * If a user requests an allocation which is small
+ * enough, we may simply use the space in the
+ * flex_array->parts[] array to store the user
+ * data.
+ */
+static inline int elements_fit_in_base(struct flex_array *fa)
+{
+	int data_size = fa->element_size * fa->total_nr_elements;
+	if (data_size <= bytes_left_in_base())
+		return 1;
+	return 0;
+}
+
+/**
+ * flex_array_alloc - allocate a new flexible array
+ * @element_size:	the size of individual elements in the array
+ * @total:		total number of elements that this should hold
+ *
+ * Note: all locking must be provided by the caller.
+ *
+ * @total is used to size internal structures.  If the user ever
+ * accesses any array indexes >=@total, it will produce errors.
+ *
+ * The maximum number of elements is defined as: the number of
+ * elements that can be stored in a page times the number of
+ * page pointers that we can fit in the base structure or (using
+ * integer math):
+ *
+ * 	(PAGE_SIZE/element_size) * (PAGE_SIZE-8)/sizeof(void *)
+ *
+ * Here's a table showing example capacities.  Note that the maximum
+ * index that the get/put() functions is just nr_objects-1.   This
+ * basically means that you get 4MB of storage on 32-bit and 2MB on
+ * 64-bit.
+ *
+ *
+ * Element size | Objects | Objects |
+ * PAGE_SIZE=4k |  32-bit |  64-bit |
+ * ---------------------------------|
+ *      1 bytes | 4186112 | 2093056 |
+ *      2 bytes | 2093056 | 1046528 |
+ *      3 bytes | 1395030 |  697515 |
+ *      4 bytes | 1046528 |  523264 |
+ *     32 bytes |  130816 |   65408 |
+ *     33 bytes |  126728 |   63364 |
+ *   2048 bytes |    2044 |    1022 |
+ *   2049 bytes |    1022 |     511 |
+ *       void * | 1046528 |  261632 |
+ *
+ * Since 64-bit pointers are twice the size, we lose half the
+ * capacity in the base structure.  Also note that no effort is made
+ * to efficiently pack objects across page boundaries.
+ */
+struct flex_array *flex_array_alloc(int element_size, int total, gfp_t flags)
+{
+	struct flex_array *ret;
+	int max_size = nr_base_part_ptrs() * __elements_per_part(element_size);
+
+	/* max_size will end up 0 if element_size > PAGE_SIZE */
+	if (total > max_size)
+		return NULL;
+	ret = kzalloc(sizeof(struct flex_array), flags);
+	if (!ret)
+		return NULL;
+	ret->element_size = element_size;
+	ret->total_nr_elements = total;
+	return ret;
+}
+
+static int fa_element_to_part_nr(struct flex_array *fa, int element_nr)
+{
+	return element_nr / __elements_per_part(fa->element_size);
+}
+
+/**
+ * flex_array_free_parts - just free the second-level pages
+ * @src:	address of data to copy into the array
+ * @element_nr:	index of the position in which to insert
+ * 		the new element.
+ *
+ * This is to be used in cases where the base 'struct flex_array'
+ * has been statically allocated and should not be free.
+ */
+void flex_array_free_parts(struct flex_array *fa)
+{
+	int part_nr;
+	int max_part = nr_base_part_ptrs();
+
+	if (elements_fit_in_base(fa))
+		return;
+	for (part_nr = 0; part_nr < max_part; part_nr++)
+		kfree(fa->parts[part_nr]);
+}
+
+void flex_array_free(struct flex_array *fa)
+{
+	flex_array_free_parts(fa);
+	kfree(fa);
+}
+
+static int fa_index_inside_part(struct flex_array *fa, int element_nr)
+{
+	return element_nr % __elements_per_part(fa->element_size);
+}
+
+static int index_inside_part(struct flex_array *fa, int element_nr)
+{
+	int part_offset = fa_index_inside_part(fa, element_nr);
+	return part_offset * fa->element_size;
+}
+
+static struct flex_array_part *
+__fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags)
+{
+	struct flex_array_part *part = fa->parts[part_nr];
+	if (!part) {
+		/*
+		 * This leaves the part pages uninitialized
+		 * and with potentially random data, just
+		 * as if the user had kmalloc()'d the whole.
+		 * __GFP_ZERO can be used to zero it.
+		 */
+		part = kmalloc(FLEX_ARRAY_PART_SIZE, flags);
+		if (!part)
+			return NULL;
+		fa->parts[part_nr] = part;
+	}
+	return part;
+}
+
+/**
+ * flex_array_put - copy data into the array at @element_nr
+ * @src:	address of data to copy into the array
+ * @element_nr:	index of the position in which to insert
+ * 		the new element.
+ *
+ * Note that this *copies* the contents of @src into
+ * the array.  If you are trying to store an array of
+ * pointers, make sure to pass in &ptr instead of ptr.
+ *
+ * Locking must be provided by the caller.
+ */
+int flex_array_put(struct flex_array *fa, int element_nr, void *src, gfp_t flags)
+{
+	int part_nr = fa_element_to_part_nr(fa, element_nr);
+	struct flex_array_part *part;
+	void *dst;
+
+	if (element_nr >= fa->total_nr_elements)
+		return -ENOSPC;
+	if (elements_fit_in_base(fa))
+		part = (struct flex_array_part *)&fa->parts[0];
+	else
+		part = __fa_get_part(fa, part_nr, flags);
+	if (!part)
+		return -ENOMEM;
+	dst = &part->elements[index_inside_part(fa, element_nr)];
+	memcpy(dst, src, fa->element_size);
+	return 0;
+}
+
+/**
+ * flex_array_prealloc - guarantee that array space exists
+ * @start:	index of first array element for which space is allocated
+ * @end:	index of last (inclusive) element for which space is allocated
+ *
+ * This will guarantee that no future calls to flex_array_put()
+ * will allocate memory.  It can be used if you are expecting to
+ * be holding a lock or in some atomic context while writing
+ * data into the array.
+ *
+ * Locking must be provided by the caller.
+ */
+int flex_array_prealloc(struct flex_array *fa, int start, int end, gfp_t flags)
+{
+	int start_part;
+	int end_part;
+	int part_nr;
+	struct flex_array_part *part;
+
+	if (start >= fa->total_nr_elements || end >= fa->total_nr_elements)
+		return -ENOSPC;
+	if (elements_fit_in_base(fa))
+		return 0;
+	start_part = fa_element_to_part_nr(fa, start);
+	end_part = fa_element_to_part_nr(fa, end);
+	for (part_nr = start_part; part_nr <= end_part; part_nr++) {
+		part = __fa_get_part(fa, part_nr, flags);
+		if (!part)
+			return -ENOMEM;
+	}
+	return 0;
+}
+
+/**
+ * flex_array_get - pull data back out of the array
+ * @element_nr:	index of the element to fetch from the array
+ *
+ * Returns a pointer to the data at index @element_nr.  Note
+ * that this is a copy of the data that was passed in.  If you
+ * are using this to store pointers, you'll get back &ptr.
+ *
+ * Locking must be provided by the caller.
+ */
+void *flex_array_get(struct flex_array *fa, int element_nr)
+{
+	int part_nr = fa_element_to_part_nr(fa, element_nr);
+	struct flex_array_part *part;
+	int index;
+
+	if (element_nr >= fa->total_nr_elements)
+		return NULL;
+	if (!fa->parts[part_nr])
+		return NULL;
+	if (elements_fit_in_base(fa))
+		part = (struct flex_array_part *)&fa->parts[0];
+	else
+		part = fa->parts[part_nr];
+	index = index_inside_part(fa, element_nr);
+	return &part->elements[index_inside_part(fa, element_nr)];
+}
-- 
cgit v1.2.3


From 812ed032cdc8138b7546eecc996879756b92d801 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Wed, 29 Jul 2009 15:04:19 -0700
Subject: uio: mark uio.h functions __KERNEL__ only

To avoid userspace build failures such as:

.../linux/uio.h:37: error: expected `=', `,', `;', `asm' or `__attribute__' before `iov_length'
.../linux/uio.h:47: error: expected declaration specifiers or `...' before `size_t'

move uio functions inside a __KERNEL__ block.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Acked-by: Sam Ravnborg <sam@ravnborg.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/uio.h | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/uio.h b/include/linux/uio.h
index b7fe13883bdb..98c114323a8b 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -19,15 +19,6 @@ struct iovec
 	__kernel_size_t iov_len; /* Must be size_t (1003.1g) */
 };
 
-#ifdef __KERNEL__
-
-struct kvec {
-	void *iov_base; /* and that should *never* hold a userland pointer */
-	size_t iov_len;
-};
-
-#endif
-
 /*
  *	UIO_MAXIOV shall be at least 16 1003.1g (5.4.1.1)
  */
@@ -35,6 +26,13 @@ struct kvec {
 #define UIO_FASTIOV	8
 #define UIO_MAXIOV	1024
 
+#ifdef __KERNEL__
+
+struct kvec {
+	void *iov_base; /* and that should *never* hold a userland pointer */
+	size_t iov_len;
+};
+
 /*
  * Total number of bytes covered by an iovec.
  *
@@ -53,5 +51,6 @@ static inline size_t iov_length(const struct iovec *iov, unsigned long nr_segs)
 }
 
 unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to);
+#endif
 
 #endif
-- 
cgit v1.2.3