summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/radeon
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/radeon')
-rw-r--r--drivers/gpu/drm/radeon/evergreen.c51
-rw-r--r--drivers/gpu/drm/radeon/evergreen_blit_kms.c348
-rw-r--r--drivers/gpu/drm/radeon/evergreen_cs.c32
-rw-r--r--drivers/gpu/drm/radeon/evergreend.h42
-rw-r--r--drivers/gpu/drm/radeon/ni.c21
-rw-r--r--drivers/gpu/drm/radeon/r100.c149
-rw-r--r--drivers/gpu/drm/radeon/r100_track.h110
-rw-r--r--drivers/gpu/drm/radeon/r300.c5
-rw-r--r--drivers/gpu/drm/radeon/r300_cmdbuf.c2
-rw-r--r--drivers/gpu/drm/radeon/r600.c37
-rw-r--r--drivers/gpu/drm/radeon/r600_blit.c24
-rw-r--r--drivers/gpu/drm/radeon/r600_blit_kms.c359
-rw-r--r--drivers/gpu/drm/radeon/r600_cs.c41
-rw-r--r--drivers/gpu/drm/radeon/r600d.h22
-rw-r--r--drivers/gpu/drm/radeon/radeon.h109
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.c16
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.h14
-rw-r--r--drivers/gpu/drm/radeon/radeon_atombios.c4
-rw-r--r--drivers/gpu/drm/radeon/radeon_benchmark.c247
-rw-r--r--drivers/gpu/drm/radeon/radeon_combios.c4
-rw-r--r--drivers/gpu/drm/radeon/radeon_connectors.c28
-rw-r--r--drivers/gpu/drm/radeon/radeon_device.c22
-rw-r--r--drivers/gpu/drm/radeon/radeon_fence.c2
-rw-r--r--drivers/gpu/drm/radeon/radeon_gart.c4
-rw-r--r--drivers/gpu/drm/radeon/radeon_i2c.c48
-rw-r--r--drivers/gpu/drm/radeon/radeon_irq.c2
-rw-r--r--drivers/gpu/drm/radeon/radeon_legacy_tv.c2
-rw-r--r--drivers/gpu/drm/radeon/radeon_mode.h1
-rw-r--r--drivers/gpu/drm/radeon/radeon_object.c41
-rw-r--r--drivers/gpu/drm/radeon/radeon_object.h42
-rw-r--r--drivers/gpu/drm/radeon/radeon_ring.c38
-rw-r--r--drivers/gpu/drm/radeon/radeon_state.c16
-rw-r--r--drivers/gpu/drm/radeon/rs400.c3
-rw-r--r--drivers/gpu/drm/radeon/rs600.c3
-rw-r--r--drivers/gpu/drm/radeon/rv770.c15
35 files changed, 869 insertions, 1035 deletions
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index c4ffa14fb2f4..ed406e8404a3 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -39,7 +39,7 @@
static void evergreen_gpu_init(struct radeon_device *rdev);
void evergreen_fini(struct radeon_device *rdev);
-static void evergreen_pcie_gen2_enable(struct radeon_device *rdev);
+void evergreen_pcie_gen2_enable(struct radeon_device *rdev);
void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev)
{
@@ -935,6 +935,9 @@ int evergreen_pcie_gart_enable(struct radeon_device *rdev)
WREG32(VM_CONTEXT1_CNTL, 0);
evergreen_pcie_gart_tlb_flush(rdev);
+ DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+ (unsigned)(rdev->mc.gtt_size >> 20),
+ (unsigned long long)rdev->gart.table_addr);
rdev->gart.ready = true;
return 0;
}
@@ -2586,7 +2589,7 @@ int evergreen_irq_set(struct radeon_device *rdev)
return 0;
}
-static inline void evergreen_irq_ack(struct radeon_device *rdev)
+static void evergreen_irq_ack(struct radeon_device *rdev)
{
u32 tmp;
@@ -2697,7 +2700,7 @@ void evergreen_irq_suspend(struct radeon_device *rdev)
r600_rlc_stop(rdev);
}
-static inline u32 evergreen_get_ih_wptr(struct radeon_device *rdev)
+static u32 evergreen_get_ih_wptr(struct radeon_device *rdev)
{
u32 wptr, tmp;
@@ -3003,8 +3006,7 @@ static int evergreen_startup(struct radeon_device *rdev)
int r;
/* enable pcie gen2 link */
- if (!ASIC_IS_DCE5(rdev))
- evergreen_pcie_gen2_enable(rdev);
+ evergreen_pcie_gen2_enable(rdev);
if (ASIC_IS_DCE5(rdev)) {
if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw || !rdev->mc_fw) {
@@ -3041,7 +3043,7 @@ static int evergreen_startup(struct radeon_device *rdev)
r = evergreen_blit_init(rdev);
if (r) {
- evergreen_blit_fini(rdev);
+ r600_blit_fini(rdev);
rdev->asic->copy = NULL;
dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
}
@@ -3107,45 +3109,14 @@ int evergreen_resume(struct radeon_device *rdev)
int evergreen_suspend(struct radeon_device *rdev)
{
- int r;
-
/* FIXME: we should wait for ring to be empty */
r700_cp_stop(rdev);
rdev->cp.ready = false;
evergreen_irq_suspend(rdev);
radeon_wb_disable(rdev);
evergreen_pcie_gart_disable(rdev);
+ r600_blit_suspend(rdev);
- /* unpin shaders bo */
- r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
- if (likely(r == 0)) {
- radeon_bo_unpin(rdev->r600_blit.shader_obj);
- radeon_bo_unreserve(rdev->r600_blit.shader_obj);
- }
-
- return 0;
-}
-
-int evergreen_copy_blit(struct radeon_device *rdev,
- uint64_t src_offset,
- uint64_t dst_offset,
- unsigned num_gpu_pages,
- struct radeon_fence *fence)
-{
- int r;
-
- mutex_lock(&rdev->r600_blit.mutex);
- rdev->r600_blit.vb_ib = NULL;
- r = evergreen_blit_prepare_copy(rdev, num_gpu_pages * RADEON_GPU_PAGE_SIZE);
- if (r) {
- if (rdev->r600_blit.vb_ib)
- radeon_ib_free(rdev, &rdev->r600_blit.vb_ib);
- mutex_unlock(&rdev->r600_blit.mutex);
- return r;
- }
- evergreen_kms_blit_copy(rdev, src_offset, dst_offset, num_gpu_pages * RADEON_GPU_PAGE_SIZE);
- evergreen_blit_done_copy(rdev, fence);
- mutex_unlock(&rdev->r600_blit.mutex);
return 0;
}
@@ -3257,7 +3228,7 @@ int evergreen_init(struct radeon_device *rdev)
void evergreen_fini(struct radeon_device *rdev)
{
- evergreen_blit_fini(rdev);
+ r600_blit_fini(rdev);
r700_cp_fini(rdev);
r600_irq_fini(rdev);
radeon_wb_fini(rdev);
@@ -3273,7 +3244,7 @@ void evergreen_fini(struct radeon_device *rdev)
rdev->bios = NULL;
}
-static void evergreen_pcie_gen2_enable(struct radeon_device *rdev)
+void evergreen_pcie_gen2_enable(struct radeon_device *rdev)
{
u32 link_width_cntl, speed_cntl;
diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
index 2eb251858e72..dcf11bbc06d9 100644
--- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c
+++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
@@ -56,7 +56,9 @@ set_render_target(struct radeon_device *rdev, int format,
if (h < 8)
h = 8;
- cb_color_info = ((format << 2) | (1 << 24) | (1 << 8));
+ cb_color_info = CB_FORMAT(format) |
+ CB_SOURCE_FORMAT(CB_SF_EXPORT_NORM) |
+ CB_ARRAY_MODE(ARRAY_1D_TILED_THIN1);
pitch = (w / 8) - 1;
slice = ((w * h) / 64) - 1;
@@ -67,7 +69,7 @@ set_render_target(struct radeon_device *rdev, int format,
radeon_ring_write(rdev, slice);
radeon_ring_write(rdev, 0);
radeon_ring_write(rdev, cb_color_info);
- radeon_ring_write(rdev, (1 << 4));
+ radeon_ring_write(rdev, 0);
radeon_ring_write(rdev, (w - 1) | ((h - 1) << 16));
radeon_ring_write(rdev, 0);
radeon_ring_write(rdev, 0);
@@ -133,12 +135,16 @@ set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr)
u32 sq_vtx_constant_word2, sq_vtx_constant_word3;
/* high addr, stride */
- sq_vtx_constant_word2 = ((upper_32_bits(gpu_addr) & 0xff) | (16 << 8));
+ sq_vtx_constant_word2 = SQ_VTXC_BASE_ADDR_HI(upper_32_bits(gpu_addr) & 0xff) |
+ SQ_VTXC_STRIDE(16);
#ifdef __BIG_ENDIAN
- sq_vtx_constant_word2 |= (2 << 30);
+ sq_vtx_constant_word2 |= SQ_VTXC_ENDIAN_SWAP(SQ_ENDIAN_8IN32);
#endif
/* xyzw swizzles */
- sq_vtx_constant_word3 = (0 << 3) | (1 << 6) | (2 << 9) | (3 << 12);
+ sq_vtx_constant_word3 = SQ_VTCX_SEL_X(SQ_SEL_X) |
+ SQ_VTCX_SEL_Y(SQ_SEL_Y) |
+ SQ_VTCX_SEL_Z(SQ_SEL_Z) |
+ SQ_VTCX_SEL_W(SQ_SEL_W);
radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 8));
radeon_ring_write(rdev, 0x580);
@@ -149,7 +155,7 @@ set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr)
radeon_ring_write(rdev, 0);
radeon_ring_write(rdev, 0);
radeon_ring_write(rdev, 0);
- radeon_ring_write(rdev, SQ_TEX_VTX_VALID_BUFFER << 30);
+ radeon_ring_write(rdev, S__SQ_CONSTANT_TYPE(SQ_TEX_VTX_VALID_BUFFER));
if ((rdev->family == CHIP_CEDAR) ||
(rdev->family == CHIP_PALM) ||
@@ -176,14 +182,19 @@ set_tex_resource(struct radeon_device *rdev,
if (h < 1)
h = 1;
- sq_tex_resource_word0 = (1 << 0); /* 2D */
+ sq_tex_resource_word0 = TEX_DIM(SQ_TEX_DIM_2D);
sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 6) |
((w - 1) << 18));
- sq_tex_resource_word1 = ((h - 1) << 0) | (1 << 28);
+ sq_tex_resource_word1 = ((h - 1) << 0) |
+ TEX_ARRAY_MODE(ARRAY_1D_TILED_THIN1);
/* xyzw swizzles */
- sq_tex_resource_word4 = (0 << 16) | (1 << 19) | (2 << 22) | (3 << 25);
+ sq_tex_resource_word4 = TEX_DST_SEL_X(SQ_SEL_X) |
+ TEX_DST_SEL_Y(SQ_SEL_Y) |
+ TEX_DST_SEL_Z(SQ_SEL_Z) |
+ TEX_DST_SEL_W(SQ_SEL_W);
- sq_tex_resource_word7 = format | (SQ_TEX_VTX_VALID_TEXTURE << 30);
+ sq_tex_resource_word7 = format |
+ S__SQ_CONSTANT_TYPE(SQ_TEX_VTX_VALID_TEXTURE);
radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 8));
radeon_ring_write(rdev, 0);
@@ -584,31 +595,6 @@ set_default_state(struct radeon_device *rdev)
}
-static inline uint32_t i2f(uint32_t input)
-{
- u32 result, i, exponent, fraction;
-
- if ((input & 0x3fff) == 0)
- result = 0; /* 0 is a special case */
- else {
- exponent = 140; /* exponent biased by 127; */
- fraction = (input & 0x3fff) << 10; /* cheat and only
- handle numbers below 2^^15 */
- for (i = 0; i < 14; i++) {
- if (fraction & 0x800000)
- break;
- else {
- fraction = fraction << 1; /* keep
- shifting left until top bit = 1 */
- exponent = exponent - 1;
- }
- }
- result = exponent << 23 | (fraction & 0x7fffff); /* mask
- off top bit; assumed 1 */
- }
- return result;
-}
-
int evergreen_blit_init(struct radeon_device *rdev)
{
u32 obj_size;
@@ -617,6 +603,24 @@ int evergreen_blit_init(struct radeon_device *rdev)
u32 packet2s[16];
int num_packet2s = 0;
+ rdev->r600_blit.primitives.set_render_target = set_render_target;
+ rdev->r600_blit.primitives.cp_set_surface_sync = cp_set_surface_sync;
+ rdev->r600_blit.primitives.set_shaders = set_shaders;
+ rdev->r600_blit.primitives.set_vtx_resource = set_vtx_resource;
+ rdev->r600_blit.primitives.set_tex_resource = set_tex_resource;
+ rdev->r600_blit.primitives.set_scissors = set_scissors;
+ rdev->r600_blit.primitives.draw_auto = draw_auto;
+ rdev->r600_blit.primitives.set_default_state = set_default_state;
+
+ rdev->r600_blit.ring_size_common = 55; /* shaders + def state */
+ rdev->r600_blit.ring_size_common += 10; /* fence emit for VB IB */
+ rdev->r600_blit.ring_size_common += 5; /* done copy */
+ rdev->r600_blit.ring_size_common += 10; /* fence emit for done copy */
+
+ rdev->r600_blit.ring_size_per_loop = 74;
+
+ rdev->r600_blit.max_dim = 16384;
+
/* pin copy shader into vram if already initialized */
if (rdev->r600_blit.shader_obj)
goto done;
@@ -712,277 +716,3 @@ done:
radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
return 0;
}
-
-void evergreen_blit_fini(struct radeon_device *rdev)
-{
- int r;
-
- radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
- if (rdev->r600_blit.shader_obj == NULL)
- return;
- /* If we can't reserve the bo, unref should be enough to destroy
- * it when it becomes idle.
- */
- r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
- if (!r) {
- radeon_bo_unpin(rdev->r600_blit.shader_obj);
- radeon_bo_unreserve(rdev->r600_blit.shader_obj);
- }
- radeon_bo_unref(&rdev->r600_blit.shader_obj);
-}
-
-static int evergreen_vb_ib_get(struct radeon_device *rdev)
-{
- int r;
- r = radeon_ib_get(rdev, &rdev->r600_blit.vb_ib);
- if (r) {
- DRM_ERROR("failed to get IB for vertex buffer\n");
- return r;
- }
-
- rdev->r600_blit.vb_total = 64*1024;
- rdev->r600_blit.vb_used = 0;
- return 0;
-}
-
-static void evergreen_vb_ib_put(struct radeon_device *rdev)
-{
- radeon_fence_emit(rdev, rdev->r600_blit.vb_ib->fence);
- radeon_ib_free(rdev, &rdev->r600_blit.vb_ib);
-}
-
-int evergreen_blit_prepare_copy(struct radeon_device *rdev, int size_bytes)
-{
- int r;
- int ring_size, line_size;
- int max_size;
- /* loops of emits + fence emit possible */
- int dwords_per_loop = 74, num_loops;
-
- r = evergreen_vb_ib_get(rdev);
- if (r)
- return r;
-
- /* 8 bpp vs 32 bpp for xfer unit */
- if (size_bytes & 3)
- line_size = 8192;
- else
- line_size = 8192 * 4;
-
- max_size = 8192 * line_size;
-
- /* major loops cover the max size transfer */
- num_loops = ((size_bytes + max_size) / max_size);
- /* minor loops cover the extra non aligned bits */
- num_loops += ((size_bytes % line_size) ? 1 : 0);
- /* calculate number of loops correctly */
- ring_size = num_loops * dwords_per_loop;
- /* set default + shaders */
- ring_size += 55; /* shaders + def state */
- ring_size += 10; /* fence emit for VB IB */
- ring_size += 5; /* done copy */
- ring_size += 10; /* fence emit for done copy */
- r = radeon_ring_lock(rdev, ring_size);
- if (r)
- return r;
-
- set_default_state(rdev); /* 36 */
- set_shaders(rdev); /* 16 */
- return 0;
-}
-
-void evergreen_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence)
-{
- int r;
-
- if (rdev->r600_blit.vb_ib)
- evergreen_vb_ib_put(rdev);
-
- if (fence)
- r = radeon_fence_emit(rdev, fence);
-
- radeon_ring_unlock_commit(rdev);
-}
-
-void evergreen_kms_blit_copy(struct radeon_device *rdev,
- u64 src_gpu_addr, u64 dst_gpu_addr,
- int size_bytes)
-{
- int max_bytes;
- u64 vb_gpu_addr;
- u32 *vb;
-
- DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", src_gpu_addr, dst_gpu_addr,
- size_bytes, rdev->r600_blit.vb_used);
- vb = (u32 *)(rdev->r600_blit.vb_ib->ptr + rdev->r600_blit.vb_used);
- if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {
- max_bytes = 8192;
-
- while (size_bytes) {
- int cur_size = size_bytes;
- int src_x = src_gpu_addr & 255;
- int dst_x = dst_gpu_addr & 255;
- int h = 1;
- src_gpu_addr = src_gpu_addr & ~255ULL;
- dst_gpu_addr = dst_gpu_addr & ~255ULL;
-
- if (!src_x && !dst_x) {
- h = (cur_size / max_bytes);
- if (h > 8192)
- h = 8192;
- if (h == 0)
- h = 1;
- else
- cur_size = max_bytes;
- } else {
- if (cur_size > max_bytes)
- cur_size = max_bytes;
- if (cur_size > (max_bytes - dst_x))
- cur_size = (max_bytes - dst_x);
- if (cur_size > (max_bytes - src_x))
- cur_size = (max_bytes - src_x);
- }
-
- if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
- WARN_ON(1);
- }
-
- vb[0] = i2f(dst_x);
- vb[1] = 0;
- vb[2] = i2f(src_x);
- vb[3] = 0;
-
- vb[4] = i2f(dst_x);
- vb[5] = i2f(h);
- vb[6] = i2f(src_x);
- vb[7] = i2f(h);
-
- vb[8] = i2f(dst_x + cur_size);
- vb[9] = i2f(h);
- vb[10] = i2f(src_x + cur_size);
- vb[11] = i2f(h);
-
- /* src 10 */
- set_tex_resource(rdev, FMT_8,
- src_x + cur_size, h, src_x + cur_size,
- src_gpu_addr);
-
- /* 5 */
- cp_set_surface_sync(rdev,
- PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
-
-
- /* dst 17 */
- set_render_target(rdev, COLOR_8,
- dst_x + cur_size, h,
- dst_gpu_addr);
-
- /* scissors 12 */
- set_scissors(rdev, dst_x, 0, dst_x + cur_size, h);
-
- /* 15 */
- vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
- set_vtx_resource(rdev, vb_gpu_addr);
-
- /* draw 10 */
- draw_auto(rdev);
-
- /* 5 */
- cp_set_surface_sync(rdev,
- PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
- cur_size * h, dst_gpu_addr);
-
- vb += 12;
- rdev->r600_blit.vb_used += 12 * 4;
-
- src_gpu_addr += cur_size * h;
- dst_gpu_addr += cur_size * h;
- size_bytes -= cur_size * h;
- }
- } else {
- max_bytes = 8192 * 4;
-
- while (size_bytes) {
- int cur_size = size_bytes;
- int src_x = (src_gpu_addr & 255);
- int dst_x = (dst_gpu_addr & 255);
- int h = 1;
- src_gpu_addr = src_gpu_addr & ~255ULL;
- dst_gpu_addr = dst_gpu_addr & ~255ULL;
-
- if (!src_x && !dst_x) {
- h = (cur_size / max_bytes);
- if (h > 8192)
- h = 8192;
- if (h == 0)
- h = 1;
- else
- cur_size = max_bytes;
- } else {
- if (cur_size > max_bytes)
- cur_size = max_bytes;
- if (cur_size > (max_bytes - dst_x))
- cur_size = (max_bytes - dst_x);
- if (cur_size > (max_bytes - src_x))
- cur_size = (max_bytes - src_x);
- }
-
- if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
- WARN_ON(1);
- }
-
- vb[0] = i2f(dst_x / 4);
- vb[1] = 0;
- vb[2] = i2f(src_x / 4);
- vb[3] = 0;
-
- vb[4] = i2f(dst_x / 4);
- vb[5] = i2f(h);
- vb[6] = i2f(src_x / 4);
- vb[7] = i2f(h);
-
- vb[8] = i2f((dst_x + cur_size) / 4);
- vb[9] = i2f(h);
- vb[10] = i2f((src_x + cur_size) / 4);
- vb[11] = i2f(h);
-
- /* src 10 */
- set_tex_resource(rdev, FMT_8_8_8_8,
- (src_x + cur_size) / 4,
- h, (src_x + cur_size) / 4,
- src_gpu_addr);
- /* 5 */
- cp_set_surface_sync(rdev,
- PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
-
- /* dst 17 */
- set_render_target(rdev, COLOR_8_8_8_8,
- (dst_x + cur_size) / 4, h,
- dst_gpu_addr);
-
- /* scissors 12 */
- set_scissors(rdev, (dst_x / 4), 0, (dst_x + cur_size / 4), h);
-
- /* Vertex buffer setup 15 */
- vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
- set_vtx_resource(rdev, vb_gpu_addr);
-
- /* draw 10 */
- draw_auto(rdev);
-
- /* 5 */
- cp_set_surface_sync(rdev,
- PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
- cur_size * h, dst_gpu_addr);
-
- /* 74 ring dwords per loop */
- vb += 12;
- rdev->r600_blit.vb_used += 12 * 4;
-
- src_gpu_addr += cur_size * h;
- dst_gpu_addr += cur_size * h;
- size_bytes -= cur_size * h;
- }
- }
-}
-
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index a134790903d3..7fdfa8ea7570 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -122,12 +122,6 @@ static void evergreen_cs_track_init(struct evergreen_cs_track *track)
track->db_s_write_bo = NULL;
}
-static inline int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, int i)
-{
- /* XXX fill in */
- return 0;
-}
-
static int evergreen_cs_track_check(struct radeon_cs_parser *p)
{
struct evergreen_cs_track *track = p->track;
@@ -236,28 +230,6 @@ static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser *p,
}
/**
- * evergreen_cs_packet_next_is_pkt3_nop() - test if next packet is packet3 nop for reloc
- * @parser: parser structure holding parsing context.
- *
- * Check next packet is relocation packet3, do bo validation and compute
- * GPU offset using the provided start.
- **/
-static inline int evergreen_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
-{
- struct radeon_cs_packet p3reloc;
- int r;
-
- r = evergreen_cs_packet_parse(p, &p3reloc, p->idx);
- if (r) {
- return 0;
- }
- if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
- return 0;
- }
- return 1;
-}
-
-/**
* evergreen_cs_packet_next_vline() - parse userspace VLINE packet
* @parser: parser structure holding parsing context.
*
@@ -414,7 +386,7 @@ static int evergreen_cs_parse_packet0(struct radeon_cs_parser *p,
* if register is safe. If register is not flag as safe this function
* will test it against a list of register needind special handling.
*/
-static inline int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
+static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
{
struct evergreen_cs_track *track = (struct evergreen_cs_track *)p->track;
struct radeon_cs_reloc *reloc;
@@ -990,7 +962,7 @@ static inline int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u3
* This function will check that the resource has valid field and that
* the texture and mipmap bo object are big enough to cover this resource.
*/
-static inline int evergreen_check_texture_resource(struct radeon_cs_parser *p, u32 idx,
+static int evergreen_check_texture_resource(struct radeon_cs_parser *p, u32 idx,
struct radeon_bo *texture,
struct radeon_bo *mipmap)
{
diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h
index 7363d9dec909..b937c49054d9 100644
--- a/drivers/gpu/drm/radeon/evergreend.h
+++ b/drivers/gpu/drm/radeon/evergreend.h
@@ -941,11 +941,15 @@
#define CB_COLOR0_SLICE 0x28c68
#define CB_COLOR0_VIEW 0x28c6c
#define CB_COLOR0_INFO 0x28c70
+# define CB_FORMAT(x) ((x) << 2)
# define CB_ARRAY_MODE(x) ((x) << 8)
# define ARRAY_LINEAR_GENERAL 0
# define ARRAY_LINEAR_ALIGNED 1
# define ARRAY_1D_TILED_THIN1 2
# define ARRAY_2D_TILED_THIN1 4
+# define CB_SOURCE_FORMAT(x) ((x) << 24)
+# define CB_SF_EXPORT_FULL 0
+# define CB_SF_EXPORT_NORM 1
#define CB_COLOR0_ATTRIB 0x28c74
#define CB_COLOR0_DIM 0x28c78
/* only CB0-7 blocks have these regs */
@@ -1107,15 +1111,53 @@
#define CB_COLOR7_CLEAR_WORD3 0x28e3c
#define SQ_TEX_RESOURCE_WORD0_0 0x30000
+# define TEX_DIM(x) ((x) << 0)
+# define SQ_TEX_DIM_1D 0
+# define SQ_TEX_DIM_2D 1
+# define SQ_TEX_DIM_3D 2
+# define SQ_TEX_DIM_CUBEMAP 3
+# define SQ_TEX_DIM_1D_ARRAY 4
+# define SQ_TEX_DIM_2D_ARRAY 5
+# define SQ_TEX_DIM_2D_MSAA 6
+# define SQ_TEX_DIM_2D_ARRAY_MSAA 7
#define SQ_TEX_RESOURCE_WORD1_0 0x30004
# define TEX_ARRAY_MODE(x) ((x) << 28)
#define SQ_TEX_RESOURCE_WORD2_0 0x30008
#define SQ_TEX_RESOURCE_WORD3_0 0x3000C
#define SQ_TEX_RESOURCE_WORD4_0 0x30010
+# define TEX_DST_SEL_X(x) ((x) << 16)
+# define TEX_DST_SEL_Y(x) ((x) << 19)
+# define TEX_DST_SEL_Z(x) ((x) << 22)
+# define TEX_DST_SEL_W(x) ((x) << 25)
+# define SQ_SEL_X 0
+# define SQ_SEL_Y 1
+# define SQ_SEL_Z 2
+# define SQ_SEL_W 3
+# define SQ_SEL_0 4
+# define SQ_SEL_1 5
#define SQ_TEX_RESOURCE_WORD5_0 0x30014
#define SQ_TEX_RESOURCE_WORD6_0 0x30018
#define SQ_TEX_RESOURCE_WORD7_0 0x3001c
+#define SQ_VTX_CONSTANT_WORD0_0 0x30000
+#define SQ_VTX_CONSTANT_WORD1_0 0x30004
+#define SQ_VTX_CONSTANT_WORD2_0 0x30008
+# define SQ_VTXC_BASE_ADDR_HI(x) ((x) << 0)
+# define SQ_VTXC_STRIDE(x) ((x) << 8)
+# define SQ_VTXC_ENDIAN_SWAP(x) ((x) << 30)
+# define SQ_ENDIAN_NONE 0
+# define SQ_ENDIAN_8IN16 1
+# define SQ_ENDIAN_8IN32 2
+#define SQ_VTX_CONSTANT_WORD3_0 0x3000C
+# define SQ_VTCX_SEL_X(x) ((x) << 3)
+# define SQ_VTCX_SEL_Y(x) ((x) << 6)
+# define SQ_VTCX_SEL_Z(x) ((x) << 9)
+# define SQ_VTCX_SEL_W(x) ((x) << 12)
+#define SQ_VTX_CONSTANT_WORD4_0 0x30010
+#define SQ_VTX_CONSTANT_WORD5_0 0x30014
+#define SQ_VTX_CONSTANT_WORD6_0 0x30018
+#define SQ_VTX_CONSTANT_WORD7_0 0x3001c
+
/* cayman 3D regs */
#define CAYMAN_VGT_OFFCHIP_LDS_BASE 0x89B0
#define CAYMAN_DB_EQAA 0x28804
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 8c79ca97753d..556b7bc3418b 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -40,6 +40,7 @@ extern void evergreen_mc_program(struct radeon_device *rdev);
extern void evergreen_irq_suspend(struct radeon_device *rdev);
extern int evergreen_mc_init(struct radeon_device *rdev);
extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
+extern void evergreen_pcie_gen2_enable(struct radeon_device *rdev);
#define EVERGREEN_PFP_UCODE_SIZE 1120
#define EVERGREEN_PM4_UCODE_SIZE 1376
@@ -967,6 +968,9 @@ int cayman_pcie_gart_enable(struct radeon_device *rdev)
WREG32(VM_CONTEXT1_CNTL, 0);
cayman_pcie_gart_tlb_flush(rdev);
+ DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+ (unsigned)(rdev->mc.gtt_size >> 20),
+ (unsigned long long)rdev->gart.table_addr);
rdev->gart.ready = true;
return 0;
}
@@ -1341,6 +1345,9 @@ static int cayman_startup(struct radeon_device *rdev)
{
int r;
+ /* enable pcie gen2 link */
+ evergreen_pcie_gen2_enable(rdev);
+
if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw || !rdev->mc_fw) {
r = ni_init_microcode(rdev);
if (r) {
@@ -1362,7 +1369,7 @@ static int cayman_startup(struct radeon_device *rdev)
r = evergreen_blit_init(rdev);
if (r) {
- evergreen_blit_fini(rdev);
+ r600_blit_fini(rdev);
rdev->asic->copy = NULL;
dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
}
@@ -1423,21 +1430,13 @@ int cayman_resume(struct radeon_device *rdev)
int cayman_suspend(struct radeon_device *rdev)
{
- int r;
-
/* FIXME: we should wait for ring to be empty */
cayman_cp_enable(rdev, false);
rdev->cp.ready = false;
evergreen_irq_suspend(rdev);
radeon_wb_disable(rdev);
cayman_pcie_gart_disable(rdev);
-
- /* unpin shaders bo */
- r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
- if (likely(r == 0)) {
- radeon_bo_unpin(rdev->r600_blit.shader_obj);
- radeon_bo_unreserve(rdev->r600_blit.shader_obj);
- }
+ r600_blit_suspend(rdev);
return 0;
}
@@ -1550,7 +1549,7 @@ int cayman_init(struct radeon_device *rdev)
void cayman_fini(struct radeon_device *rdev)
{
- evergreen_blit_fini(rdev);
+ r600_blit_fini(rdev);
cayman_cp_fini(rdev);
r600_irq_fini(rdev);
radeon_wb_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 7fcdbbbf2979..8f8b8fa14357 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -68,6 +68,108 @@ MODULE_FIRMWARE(FIRMWARE_R520);
* r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280
*/
+int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
+ struct radeon_cs_packet *pkt,
+ unsigned idx,
+ unsigned reg)
+{
+ int r;
+ u32 tile_flags = 0;
+ u32 tmp;
+ struct radeon_cs_reloc *reloc;
+ u32 value;
+
+ r = r100_cs_packet_next_reloc(p, &reloc);
+ if (r) {
+ DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
+ idx, reg);
+ r100_cs_dump_packet(p, pkt);
+ return r;
+ }
+ value = radeon_get_ib_value(p, idx);
+ tmp = value & 0x003fffff;
+ tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
+
+ if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+ tile_flags |= RADEON_DST_TILE_MACRO;
+ if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
+ if (reg == RADEON_SRC_PITCH_OFFSET) {
+ DRM_ERROR("Cannot src blit from microtiled surface\n");
+ r100_cs_dump_packet(p, pkt);
+ return -EINVAL;
+ }
+ tile_flags |= RADEON_DST_TILE_MICRO;
+ }
+
+ tmp |= tile_flags;
+ p->ib->ptr[idx] = (value & 0x3fc00000) | tmp;
+ return 0;
+}
+
+int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
+ struct radeon_cs_packet *pkt,
+ int idx)
+{
+ unsigned c, i;
+ struct radeon_cs_reloc *reloc;
+ struct r100_cs_track *track;
+ int r = 0;
+ volatile uint32_t *ib;
+ u32 idx_value;
+
+ ib = p->ib->ptr;
+ track = (struct r100_cs_track *)p->track;
+ c = radeon_get_ib_value(p, idx++) & 0x1F;
+ if (c > 16) {
+ DRM_ERROR("Only 16 vertex buffers are allowed %d\n",
+ pkt->opcode);
+ r100_cs_dump_packet(p, pkt);
+ return -EINVAL;
+ }
+ track->num_arrays = c;
+ for (i = 0; i < (c - 1); i+=2, idx+=3) {
+ r = r100_cs_packet_next_reloc(p, &reloc);
+ if (r) {
+ DRM_ERROR("No reloc for packet3 %d\n",
+ pkt->opcode);
+ r100_cs_dump_packet(p, pkt);
+ return r;
+ }
+ idx_value = radeon_get_ib_value(p, idx);
+ ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
+
+ track->arrays[i + 0].esize = idx_value >> 8;
+ track->arrays[i + 0].robj = reloc->robj;
+ track->arrays[i + 0].esize &= 0x7F;
+ r = r100_cs_packet_next_reloc(p, &reloc);
+ if (r) {
+ DRM_ERROR("No reloc for packet3 %d\n",
+ pkt->opcode);
+ r100_cs_dump_packet(p, pkt);
+ return r;
+ }
+ ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset);
+ track->arrays[i + 1].robj = reloc->robj;
+ track->arrays[i + 1].esize = idx_value >> 24;
+ track->arrays[i + 1].esize &= 0x7F;
+ }
+ if (c & 1) {
+ r = r100_cs_packet_next_reloc(p, &reloc);
+ if (r) {
+ DRM_ERROR("No reloc for packet3 %d\n",
+ pkt->opcode);
+ r100_cs_dump_packet(p, pkt);
+ return r;
+ }
+ idx_value = radeon_get_ib_value(p, idx);
+ ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
+ track->arrays[i + 0].robj = reloc->robj;
+ track->arrays[i + 0].esize = idx_value >> 8;
+ track->arrays[i + 0].esize &= 0x7F;
+ }
+ return r;
+}
+
void r100_pre_page_flip(struct radeon_device *rdev, int crtc)
{
/* enable the pflip int */
@@ -513,6 +615,9 @@ int r100_pci_gart_enable(struct radeon_device *rdev)
tmp = RREG32(RADEON_AIC_CNTL) | RADEON_PCIGART_TRANSLATE_EN;
WREG32(RADEON_AIC_CNTL, tmp);
r100_pci_gart_tlb_flush(rdev);
+ DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+ (unsigned)(rdev->mc.gtt_size >> 20),
+ (unsigned long long)rdev->gart.table_addr);
rdev->gart.ready = true;
return 0;
}
@@ -588,7 +693,7 @@ void r100_irq_disable(struct radeon_device *rdev)
WREG32(R_000044_GEN_INT_STATUS, tmp);
}
-static inline uint32_t r100_irq_ack(struct radeon_device *rdev)
+static uint32_t r100_irq_ack(struct radeon_device *rdev)
{
uint32_t irqs = RREG32(RADEON_GEN_INT_STATUS);
uint32_t irq_mask = RADEON_SW_INT_TEST |
@@ -3147,7 +3252,7 @@ void r100_bandwidth_update(struct radeon_device *rdev)
}
}
-static inline void r100_cs_track_texture_print(struct r100_cs_track_texture *t)
+static void r100_cs_track_texture_print(struct r100_cs_track_texture *t)
{
DRM_ERROR("pitch %d\n", t->pitch);
DRM_ERROR("use_pitch %d\n", t->use_pitch);
@@ -3965,3 +4070,43 @@ int r100_init(struct radeon_device *rdev)
}
return 0;
}
+
+uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg)
+{
+ if (reg < rdev->rmmio_size)
+ return readl(((void __iomem *)rdev->rmmio) + reg);
+ else {
+ writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
+ return readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
+ }
+}
+
+void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
+{
+ if (reg < rdev->rmmio_size)
+ writel(v, ((void __iomem *)rdev->rmmio) + reg);
+ else {
+ writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
+ writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
+ }
+}
+
+u32 r100_io_rreg(struct radeon_device *rdev, u32 reg)
+{
+ if (reg < rdev->rio_mem_size)
+ return ioread32(rdev->rio_mem + reg);
+ else {
+ iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
+ return ioread32(rdev->rio_mem + RADEON_MM_DATA);
+ }
+}
+
+void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v)
+{
+ if (reg < rdev->rio_mem_size)
+ iowrite32(v, rdev->rio_mem + reg);
+ else {
+ iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
+ iowrite32(v, rdev->rio_mem + RADEON_MM_DATA);
+ }
+}
diff --git a/drivers/gpu/drm/radeon/r100_track.h b/drivers/gpu/drm/radeon/r100_track.h
index 686f9dc5d4bd..6a603b378adb 100644
--- a/drivers/gpu/drm/radeon/r100_track.h
+++ b/drivers/gpu/drm/radeon/r100_track.h
@@ -92,106 +92,10 @@ int r200_packet0_check(struct radeon_cs_parser *p,
struct radeon_cs_packet *pkt,
unsigned idx, unsigned reg);
-
-
-static inline int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
- struct radeon_cs_packet *pkt,
- unsigned idx,
- unsigned reg)
-{
- int r;
- u32 tile_flags = 0;
- u32 tmp;
- struct radeon_cs_reloc *reloc;
- u32 value;
-
- r = r100_cs_packet_next_reloc(p, &reloc);
- if (r) {
- DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
- idx, reg);
- r100_cs_dump_packet(p, pkt);
- return r;
- }
- value = radeon_get_ib_value(p, idx);
- tmp = value & 0x003fffff;
- tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
-
- if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
- tile_flags |= RADEON_DST_TILE_MACRO;
- if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
- if (reg == RADEON_SRC_PITCH_OFFSET) {
- DRM_ERROR("Cannot src blit from microtiled surface\n");
- r100_cs_dump_packet(p, pkt);
- return -EINVAL;
- }
- tile_flags |= RADEON_DST_TILE_MICRO;
- }
-
- tmp |= tile_flags;
- p->ib->ptr[idx] = (value & 0x3fc00000) | tmp;
- return 0;
-}
-
-static inline int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
- struct radeon_cs_packet *pkt,
- int idx)
-{
- unsigned c, i;
- struct radeon_cs_reloc *reloc;
- struct r100_cs_track *track;
- int r = 0;
- volatile uint32_t *ib;
- u32 idx_value;
-
- ib = p->ib->ptr;
- track = (struct r100_cs_track *)p->track;
- c = radeon_get_ib_value(p, idx++) & 0x1F;
- if (c > 16) {
- DRM_ERROR("Only 16 vertex buffers are allowed %d\n",
- pkt->opcode);
- r100_cs_dump_packet(p, pkt);
- return -EINVAL;
- }
- track->num_arrays = c;
- for (i = 0; i < (c - 1); i+=2, idx+=3) {
- r = r100_cs_packet_next_reloc(p, &reloc);
- if (r) {
- DRM_ERROR("No reloc for packet3 %d\n",
- pkt->opcode);
- r100_cs_dump_packet(p, pkt);
- return r;
- }
- idx_value = radeon_get_ib_value(p, idx);
- ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
-
- track->arrays[i + 0].esize = idx_value >> 8;
- track->arrays[i + 0].robj = reloc->robj;
- track->arrays[i + 0].esize &= 0x7F;
- r = r100_cs_packet_next_reloc(p, &reloc);
- if (r) {
- DRM_ERROR("No reloc for packet3 %d\n",
- pkt->opcode);
- r100_cs_dump_packet(p, pkt);
- return r;
- }
- ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset);
- track->arrays[i + 1].robj = reloc->robj;
- track->arrays[i + 1].esize = idx_value >> 24;
- track->arrays[i + 1].esize &= 0x7F;
- }
- if (c & 1) {
- r = r100_cs_packet_next_reloc(p, &reloc);
- if (r) {
- DRM_ERROR("No reloc for packet3 %d\n",
- pkt->opcode);
- r100_cs_dump_packet(p, pkt);
- return r;
- }
- idx_value = radeon_get_ib_value(p, idx);
- ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
- track->arrays[i + 0].robj = reloc->robj;
- track->arrays[i + 0].esize = idx_value >> 8;
- track->arrays[i + 0].esize &= 0x7F;
- }
- return r;
-}
+int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
+ struct radeon_cs_packet *pkt,
+ unsigned idx,
+ unsigned reg);
+int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
+ struct radeon_cs_packet *pkt,
+ int idx);
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 55a7f190027e..33f2b68c680b 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -144,8 +144,9 @@ int rv370_pcie_gart_enable(struct radeon_device *rdev)
tmp |= RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
rv370_pcie_gart_tlb_flush(rdev);
- DRM_INFO("PCIE GART of %uM enabled (table at 0x%08X).\n",
- (unsigned)(rdev->mc.gtt_size >> 20), table_addr);
+ DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+ (unsigned)(rdev->mc.gtt_size >> 20),
+ (unsigned long long)table_addr);
rdev->gart.ready = true;
return 0;
}
diff --git a/drivers/gpu/drm/radeon/r300_cmdbuf.c b/drivers/gpu/drm/radeon/r300_cmdbuf.c
index c5c2742e4140..1fe98b421c9b 100644
--- a/drivers/gpu/drm/radeon/r300_cmdbuf.c
+++ b/drivers/gpu/drm/radeon/r300_cmdbuf.c
@@ -791,7 +791,7 @@ static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv,
/**
* Emit the sequence to pacify R300.
*/
-static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv)
+static void r300_pacify(drm_radeon_private_t *dev_priv)
{
uint32_t cache_z, cache_3d, cache_2d;
RING_LOCALS;
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 720dd99163f8..12470b090ddf 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -993,6 +993,9 @@ int r600_pcie_gart_enable(struct radeon_device *rdev)
WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
r600_pcie_gart_tlb_flush(rdev);
+ DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+ (unsigned)(rdev->mc.gtt_size >> 20),
+ (unsigned long long)rdev->gart.table_addr);
rdev->gart.ready = true;
return 0;
}
@@ -2362,19 +2365,33 @@ int r600_copy_blit(struct radeon_device *rdev,
mutex_lock(&rdev->r600_blit.mutex);
rdev->r600_blit.vb_ib = NULL;
- r = r600_blit_prepare_copy(rdev, num_gpu_pages * RADEON_GPU_PAGE_SIZE);
+ r = r600_blit_prepare_copy(rdev, num_gpu_pages);
if (r) {
if (rdev->r600_blit.vb_ib)
radeon_ib_free(rdev, &rdev->r600_blit.vb_ib);
mutex_unlock(&rdev->r600_blit.mutex);
return r;
}
- r600_kms_blit_copy(rdev, src_offset, dst_offset, num_gpu_pages * RADEON_GPU_PAGE_SIZE);
+ r600_kms_blit_copy(rdev, src_offset, dst_offset, num_gpu_pages);
r600_blit_done_copy(rdev, fence);
mutex_unlock(&rdev->r600_blit.mutex);
return 0;
}
+void r600_blit_suspend(struct radeon_device *rdev)
+{
+ int r;
+
+ /* unpin shaders bo */
+ if (rdev->r600_blit.shader_obj) {
+ r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
+ if (!r) {
+ radeon_bo_unpin(rdev->r600_blit.shader_obj);
+ radeon_bo_unreserve(rdev->r600_blit.shader_obj);
+ }
+ }
+}
+
int r600_set_surface_reg(struct radeon_device *rdev, int reg,
uint32_t tiling_flags, uint32_t pitch,
uint32_t offset, uint32_t obj_size)
@@ -2494,8 +2511,6 @@ int r600_resume(struct radeon_device *rdev)
int r600_suspend(struct radeon_device *rdev)
{
- int r;
-
r600_audio_fini(rdev);
/* FIXME: we should wait for ring to be empty */
r600_cp_stop(rdev);
@@ -2503,14 +2518,8 @@ int r600_suspend(struct radeon_device *rdev)
r600_irq_suspend(rdev);
radeon_wb_disable(rdev);
r600_pcie_gart_disable(rdev);
- /* unpin shaders bo */
- if (rdev->r600_blit.shader_obj) {
- r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
- if (!r) {
- radeon_bo_unpin(rdev->r600_blit.shader_obj);
- radeon_bo_unreserve(rdev->r600_blit.shader_obj);
- }
- }
+ r600_blit_suspend(rdev);
+
return 0;
}
@@ -3137,7 +3146,7 @@ int r600_irq_set(struct radeon_device *rdev)
return 0;
}
-static inline void r600_irq_ack(struct radeon_device *rdev)
+static void r600_irq_ack(struct radeon_device *rdev)
{
u32 tmp;
@@ -3238,7 +3247,7 @@ void r600_irq_disable(struct radeon_device *rdev)
r600_disable_interrupt_state(rdev);
}
-static inline u32 r600_get_ih_wptr(struct radeon_device *rdev)
+static u32 r600_get_ih_wptr(struct radeon_device *rdev)
{
u32 wptr, tmp;
diff --git a/drivers/gpu/drm/radeon/r600_blit.c b/drivers/gpu/drm/radeon/r600_blit.c
index 7f1043448d25..3c031a48205d 100644
--- a/drivers/gpu/drm/radeon/r600_blit.c
+++ b/drivers/gpu/drm/radeon/r600_blit.c
@@ -41,7 +41,7 @@
#define COLOR_5_6_5 0x8
#define COLOR_8_8_8_8 0x1a
-static inline void
+static void
set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 gpu_addr)
{
u32 cb_color_info;
@@ -99,7 +99,7 @@ set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64
ADVANCE_RING();
}
-static inline void
+static void
cp_set_surface_sync(drm_radeon_private_t *dev_priv,
u32 sync_type, u32 size, u64 mc_addr)
{
@@ -121,7 +121,7 @@ cp_set_surface_sync(drm_radeon_private_t *dev_priv,
ADVANCE_RING();
}
-static inline void
+static void
set_shaders(struct drm_device *dev)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
@@ -184,7 +184,7 @@ set_shaders(struct drm_device *dev)
R600_SH_ACTION_ENA, 512, gpu_addr);
}
-static inline void
+static void
set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr)
{
uint32_t sq_vtx_constant_word2;
@@ -220,7 +220,7 @@ set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr)
R600_VC_ACTION_ENA, 48, gpu_addr);
}
-static inline void
+static void
set_tex_resource(drm_radeon_private_t *dev_priv,
int format, int w, int h, int pitch, u64 gpu_addr)
{
@@ -258,7 +258,7 @@ set_tex_resource(drm_radeon_private_t *dev_priv,
}
-static inline void
+static void
set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2)
{
RING_LOCALS;
@@ -282,7 +282,7 @@ set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2)
ADVANCE_RING();
}
-static inline void
+static void
draw_auto(drm_radeon_private_t *dev_priv)
{
RING_LOCALS;
@@ -311,7 +311,7 @@ draw_auto(drm_radeon_private_t *dev_priv)
COMMIT_RING();
}
-static inline void
+static void
set_default_state(drm_radeon_private_t *dev_priv)
{
int i;
@@ -489,7 +489,7 @@ set_default_state(drm_radeon_private_t *dev_priv)
ADVANCE_RING();
}
-static inline uint32_t i2f(uint32_t input)
+static uint32_t i2f(uint32_t input)
{
u32 result, i, exponent, fraction;
@@ -515,7 +515,7 @@ static inline uint32_t i2f(uint32_t input)
}
-static inline int r600_nomm_get_vb(struct drm_device *dev)
+static int r600_nomm_get_vb(struct drm_device *dev)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
dev_priv->blit_vb = radeon_freelist_get(dev);
@@ -526,7 +526,7 @@ static inline int r600_nomm_get_vb(struct drm_device *dev)
return 0;
}
-static inline void r600_nomm_put_vb(struct drm_device *dev)
+static void r600_nomm_put_vb(struct drm_device *dev)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
@@ -534,7 +534,7 @@ static inline void r600_nomm_put_vb(struct drm_device *dev)
radeon_cp_discard_buffer(dev, dev_priv->blit_vb->file_priv->master, dev_priv->blit_vb);
}
-static inline void *r600_nomm_get_vb_ptr(struct drm_device *dev)
+static void *r600_nomm_get_vb_ptr(struct drm_device *dev)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
return (((char *)dev->agp_buffer_map->handle +
diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c
index 9aa74c3f8cb6..c4cf1308d4a1 100644
--- a/drivers/gpu/drm/radeon/r600_blit_kms.c
+++ b/drivers/gpu/drm/radeon/r600_blit_kms.c
@@ -42,6 +42,9 @@
#define COLOR_5_6_5 0x8
#define COLOR_8_8_8_8 0x1a
+#define RECT_UNIT_H 32
+#define RECT_UNIT_W (RADEON_GPU_PAGE_SIZE / 4 / RECT_UNIT_H)
+
/* emits 21 on rv770+, 23 on r600 */
static void
set_render_target(struct radeon_device *rdev, int format,
@@ -54,7 +57,9 @@ set_render_target(struct radeon_device *rdev, int format,
if (h < 8)
h = 8;
- cb_color_info = ((format << 2) | (1 << 27) | (1 << 8));
+ cb_color_info = CB_FORMAT(format) |
+ CB_SOURCE_FORMAT(CB_SF_EXPORT_NORM) |
+ CB_ARRAY_MODE(ARRAY_1D_TILED_THIN1);
pitch = (w / 8) - 1;
slice = ((w * h) / 64) - 1;
@@ -164,9 +169,10 @@ set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr)
{
u32 sq_vtx_constant_word2;
- sq_vtx_constant_word2 = ((upper_32_bits(gpu_addr) & 0xff) | (16 << 8));
+ sq_vtx_constant_word2 = SQ_VTXC_BASE_ADDR_HI(upper_32_bits(gpu_addr) & 0xff) |
+ SQ_VTXC_STRIDE(16);
#ifdef __BIG_ENDIAN
- sq_vtx_constant_word2 |= (2 << 30);
+ sq_vtx_constant_word2 |= SQ_VTXC_ENDIAN_SWAP(SQ_ENDIAN_8IN32);
#endif
radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 7));
@@ -202,18 +208,19 @@ set_tex_resource(struct radeon_device *rdev,
if (h < 1)
h = 1;
- sq_tex_resource_word0 = (1 << 0) | (1 << 3);
- sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) |
- ((w - 1) << 19));
+ sq_tex_resource_word0 = S_038000_DIM(V_038000_SQ_TEX_DIM_2D) |
+ S_038000_TILE_MODE(V_038000_ARRAY_1D_TILED_THIN1);
+ sq_tex_resource_word0 |= S_038000_PITCH((pitch >> 3) - 1) |
+ S_038000_TEX_WIDTH(w - 1);
- sq_tex_resource_word1 = (format << 26);
- sq_tex_resource_word1 |= ((h - 1) << 0);
+ sq_tex_resource_word1 = S_038004_DATA_FORMAT(format);
+ sq_tex_resource_word1 |= S_038004_TEX_HEIGHT(h - 1);
- sq_tex_resource_word4 = ((1 << 14) |
- (0 << 16) |
- (1 << 19) |
- (2 << 22) |
- (3 << 25));
+ sq_tex_resource_word4 = S_038010_REQUEST_SIZE(1) |
+ S_038010_DST_SEL_X(SQ_SEL_X) |
+ S_038010_DST_SEL_Y(SQ_SEL_Y) |
+ S_038010_DST_SEL_Z(SQ_SEL_Z) |
+ S_038010_DST_SEL_W(SQ_SEL_W);
radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 7));
radeon_ring_write(rdev, 0);
@@ -450,7 +457,7 @@ set_default_state(struct radeon_device *rdev)
radeon_ring_write(rdev, sq_stack_resource_mgmt_2);
}
-static inline uint32_t i2f(uint32_t input)
+static uint32_t i2f(uint32_t input)
{
u32 result, i, exponent, fraction;
@@ -483,6 +490,27 @@ int r600_blit_init(struct radeon_device *rdev)
u32 packet2s[16];
int num_packet2s = 0;
+ rdev->r600_blit.primitives.set_render_target = set_render_target;
+ rdev->r600_blit.primitives.cp_set_surface_sync = cp_set_surface_sync;
+ rdev->r600_blit.primitives.set_shaders = set_shaders;
+ rdev->r600_blit.primitives.set_vtx_resource = set_vtx_resource;
+ rdev->r600_blit.primitives.set_tex_resource = set_tex_resource;
+ rdev->r600_blit.primitives.set_scissors = set_scissors;
+ rdev->r600_blit.primitives.draw_auto = draw_auto;
+ rdev->r600_blit.primitives.set_default_state = set_default_state;
+
+ rdev->r600_blit.ring_size_common = 40; /* shaders + def state */
+ rdev->r600_blit.ring_size_common += 10; /* fence emit for VB IB */
+ rdev->r600_blit.ring_size_common += 5; /* done copy */
+ rdev->r600_blit.ring_size_common += 10; /* fence emit for done copy */
+
+ rdev->r600_blit.ring_size_per_loop = 76;
+ /* set_render_target emits 2 extra dwords on rv6xx */
+ if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770)
+ rdev->r600_blit.ring_size_per_loop += 2;
+
+ rdev->r600_blit.max_dim = 8192;
+
/* pin copy shader into vram if already initialized */
if (rdev->r600_blit.shader_obj)
goto done;
@@ -600,47 +628,80 @@ static void r600_vb_ib_put(struct radeon_device *rdev)
radeon_ib_free(rdev, &rdev->r600_blit.vb_ib);
}
-int r600_blit_prepare_copy(struct radeon_device *rdev, int size_bytes)
+static unsigned r600_blit_create_rect(unsigned num_gpu_pages,
+ int *width, int *height, int max_dim)
+{
+ unsigned max_pages;
+ unsigned pages = num_gpu_pages;
+ int w, h;
+
+ if (num_gpu_pages == 0) {
+ /* not supposed to be called with no pages, but just in case */
+ h = 0;
+ w = 0;
+ pages = 0;
+ WARN_ON(1);
+ } else {
+ int rect_order = 2;
+ h = RECT_UNIT_H;
+ while (num_gpu_pages / rect_order) {
+ h *= 2;
+ rect_order *= 4;
+ if (h >= max_dim) {
+ h = max_dim;
+ break;
+ }
+ }
+ max_pages = (max_dim * h) / (RECT_UNIT_W * RECT_UNIT_H);
+ if (pages > max_pages)
+ pages = max_pages;
+ w = (pages * RECT_UNIT_W * RECT_UNIT_H) / h;
+ w = (w / RECT_UNIT_W) * RECT_UNIT_W;
+ pages = (w * h) / (RECT_UNIT_W * RECT_UNIT_H);
+ BUG_ON(pages == 0);
+ }
+
+
+ DRM_DEBUG("blit_rectangle: h=%d, w=%d, pages=%d\n", h, w, pages);
+
+ /* return width and height only of the caller wants it */
+ if (height)
+ *height = h;
+ if (width)
+ *width = w;
+
+ return pages;
+}
+
+
+int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages)
{
int r;
- int ring_size, line_size;
- int max_size;
- /* loops of emits 64 + fence emit possible */
- int dwords_per_loop = 76, num_loops;
+ int ring_size;
+ int num_loops = 0;
+ int dwords_per_loop = rdev->r600_blit.ring_size_per_loop;
r = r600_vb_ib_get(rdev);
if (r)
return r;
- /* set_render_target emits 2 extra dwords on rv6xx */
- if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770)
- dwords_per_loop += 2;
-
- /* 8 bpp vs 32 bpp for xfer unit */
- if (size_bytes & 3)
- line_size = 8192;
- else
- line_size = 8192*4;
-
- max_size = 8192 * line_size;
+ /* num loops */
+ while (num_gpu_pages) {
+ num_gpu_pages -=
+ r600_blit_create_rect(num_gpu_pages, NULL, NULL,
+ rdev->r600_blit.max_dim);
+ num_loops++;
+ }
- /* major loops cover the max size transfer */
- num_loops = ((size_bytes + max_size) / max_size);
- /* minor loops cover the extra non aligned bits */
- num_loops += ((size_bytes % line_size) ? 1 : 0);
/* calculate number of loops correctly */
ring_size = num_loops * dwords_per_loop;
- /* set default + shaders */
- ring_size += 40; /* shaders + def state */
- ring_size += 10; /* fence emit for VB IB */
- ring_size += 5; /* done copy */
- ring_size += 10; /* fence emit for done copy */
+ ring_size += rdev->r600_blit.ring_size_common;
r = radeon_ring_lock(rdev, ring_size);
if (r)
return r;
- set_default_state(rdev); /* 14 */
- set_shaders(rdev); /* 26 */
+ rdev->r600_blit.primitives.set_default_state(rdev);
+ rdev->r600_blit.primitives.set_shaders(rdev);
return 0;
}
@@ -659,182 +720,64 @@ void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence)
void r600_kms_blit_copy(struct radeon_device *rdev,
u64 src_gpu_addr, u64 dst_gpu_addr,
- int size_bytes)
+ unsigned num_gpu_pages)
{
- int max_bytes;
u64 vb_gpu_addr;
u32 *vb;
- DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", src_gpu_addr, dst_gpu_addr,
- size_bytes, rdev->r600_blit.vb_used);
+ DRM_DEBUG("emitting copy %16llx %16llx %d %d\n",
+ src_gpu_addr, dst_gpu_addr,
+ num_gpu_pages, rdev->r600_blit.vb_used);
vb = (u32 *)(rdev->r600_blit.vb_ib->ptr + rdev->r600_blit.vb_used);
- if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {
- max_bytes = 8192;
-
- while (size_bytes) {
- int cur_size = size_bytes;
- int src_x = src_gpu_addr & 255;
- int dst_x = dst_gpu_addr & 255;
- int h = 1;
- src_gpu_addr = src_gpu_addr & ~255ULL;
- dst_gpu_addr = dst_gpu_addr & ~255ULL;
-
- if (!src_x && !dst_x) {
- h = (cur_size / max_bytes);
- if (h > 8192)
- h = 8192;
- if (h == 0)
- h = 1;
- else
- cur_size = max_bytes;
- } else {
- if (cur_size > max_bytes)
- cur_size = max_bytes;
- if (cur_size > (max_bytes - dst_x))
- cur_size = (max_bytes - dst_x);
- if (cur_size > (max_bytes - src_x))
- cur_size = (max_bytes - src_x);
- }
-
- if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
- WARN_ON(1);
- }
-
- vb[0] = i2f(dst_x);
- vb[1] = 0;
- vb[2] = i2f(src_x);
- vb[3] = 0;
-
- vb[4] = i2f(dst_x);
- vb[5] = i2f(h);
- vb[6] = i2f(src_x);
- vb[7] = i2f(h);
-
- vb[8] = i2f(dst_x + cur_size);
- vb[9] = i2f(h);
- vb[10] = i2f(src_x + cur_size);
- vb[11] = i2f(h);
-
- /* src 9 */
- set_tex_resource(rdev, FMT_8,
- src_x + cur_size, h, src_x + cur_size,
- src_gpu_addr);
-
- /* 5 */
- cp_set_surface_sync(rdev,
- PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
- /* dst 23 */
- set_render_target(rdev, COLOR_8,
- dst_x + cur_size, h,
- dst_gpu_addr);
+ while (num_gpu_pages) {
+ int w, h;
+ unsigned size_in_bytes;
+ unsigned pages_per_loop =
+ r600_blit_create_rect(num_gpu_pages, &w, &h,
+ rdev->r600_blit.max_dim);
- /* scissors 12 */
- set_scissors(rdev, dst_x, 0, dst_x + cur_size, h);
+ size_in_bytes = pages_per_loop * RADEON_GPU_PAGE_SIZE;
+ DRM_DEBUG("rectangle w=%d h=%d\n", w, h);
- /* 14 */
- vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
- set_vtx_resource(rdev, vb_gpu_addr);
-
- /* draw 10 */
- draw_auto(rdev);
-
- /* 5 */
- cp_set_surface_sync(rdev,
- PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
- cur_size * h, dst_gpu_addr);
-
- vb += 12;
- rdev->r600_blit.vb_used += 12 * 4;
-
- src_gpu_addr += cur_size * h;
- dst_gpu_addr += cur_size * h;
- size_bytes -= cur_size * h;
+ if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
+ WARN_ON(1);
}
- } else {
- max_bytes = 8192 * 4;
-
- while (size_bytes) {
- int cur_size = size_bytes;
- int src_x = (src_gpu_addr & 255);
- int dst_x = (dst_gpu_addr & 255);
- int h = 1;
- src_gpu_addr = src_gpu_addr & ~255ULL;
- dst_gpu_addr = dst_gpu_addr & ~255ULL;
-
- if (!src_x && !dst_x) {
- h = (cur_size / max_bytes);
- if (h > 8192)
- h = 8192;
- if (h == 0)
- h = 1;
- else
- cur_size = max_bytes;
- } else {
- if (cur_size > max_bytes)
- cur_size = max_bytes;
- if (cur_size > (max_bytes - dst_x))
- cur_size = (max_bytes - dst_x);
- if (cur_size > (max_bytes - src_x))
- cur_size = (max_bytes - src_x);
- }
-
- if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
- WARN_ON(1);
- }
- vb[0] = i2f(dst_x / 4);
- vb[1] = 0;
- vb[2] = i2f(src_x / 4);
- vb[3] = 0;
-
- vb[4] = i2f(dst_x / 4);
- vb[5] = i2f(h);
- vb[6] = i2f(src_x / 4);
- vb[7] = i2f(h);
-
- vb[8] = i2f((dst_x + cur_size) / 4);
- vb[9] = i2f(h);
- vb[10] = i2f((src_x + cur_size) / 4);
- vb[11] = i2f(h);
-
- /* src 9 */
- set_tex_resource(rdev, FMT_8_8_8_8,
- (src_x + cur_size) / 4,
- h, (src_x + cur_size) / 4,
- src_gpu_addr);
- /* 5 */
- cp_set_surface_sync(rdev,
- PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
-
- /* dst 23 */
- set_render_target(rdev, COLOR_8_8_8_8,
- (dst_x + cur_size) / 4, h,
- dst_gpu_addr);
-
- /* scissors 12 */
- set_scissors(rdev, (dst_x / 4), 0, (dst_x + cur_size / 4), h);
-
- /* Vertex buffer setup 14 */
- vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
- set_vtx_resource(rdev, vb_gpu_addr);
-
- /* draw 10 */
- draw_auto(rdev);
-
- /* 5 */
- cp_set_surface_sync(rdev,
- PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
- cur_size * h, dst_gpu_addr);
-
- /* 78 ring dwords per loop */
- vb += 12;
- rdev->r600_blit.vb_used += 12 * 4;
-
- src_gpu_addr += cur_size * h;
- dst_gpu_addr += cur_size * h;
- size_bytes -= cur_size * h;
- }
+ vb[0] = 0;
+ vb[1] = 0;
+ vb[2] = 0;
+ vb[3] = 0;
+
+ vb[4] = 0;
+ vb[5] = i2f(h);
+ vb[6] = 0;
+ vb[7] = i2f(h);
+
+ vb[8] = i2f(w);
+ vb[9] = i2f(h);
+ vb[10] = i2f(w);
+ vb[11] = i2f(h);
+
+ rdev->r600_blit.primitives.set_tex_resource(rdev, FMT_8_8_8_8,
+ w, h, w, src_gpu_addr);
+ rdev->r600_blit.primitives.cp_set_surface_sync(rdev,
+ PACKET3_TC_ACTION_ENA,
+ size_in_bytes, src_gpu_addr);
+ rdev->r600_blit.primitives.set_render_target(rdev, COLOR_8_8_8_8,
+ w, h, dst_gpu_addr);
+ rdev->r600_blit.primitives.set_scissors(rdev, 0, 0, w, h);
+ vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
+ rdev->r600_blit.primitives.set_vtx_resource(rdev, vb_gpu_addr);
+ rdev->r600_blit.primitives.draw_auto(rdev);
+ rdev->r600_blit.primitives.cp_set_surface_sync(rdev,
+ PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
+ size_in_bytes, dst_gpu_addr);
+
+ vb += 12;
+ rdev->r600_blit.vb_used += 4*12;
+ src_gpu_addr += size_in_bytes;
+ dst_gpu_addr += size_in_bytes;
+ num_gpu_pages -= pages_per_loop;
}
}
-
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index cf83aa05a684..0a2e023c1557 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -162,7 +162,7 @@ static const struct gpu_formats color_formats_table[] = {
[V_038004_FMT_32_AS_32_32_32_32] = { 1, 1, 4, 0, CHIP_CEDAR},
};
-static inline bool fmt_is_valid_color(u32 format)
+static bool fmt_is_valid_color(u32 format)
{
if (format >= ARRAY_SIZE(color_formats_table))
return false;
@@ -173,7 +173,7 @@ static inline bool fmt_is_valid_color(u32 format)
return false;
}
-static inline bool fmt_is_valid_texture(u32 format, enum radeon_family family)
+static bool fmt_is_valid_texture(u32 format, enum radeon_family family)
{
if (format >= ARRAY_SIZE(color_formats_table))
return false;
@@ -187,7 +187,7 @@ static inline bool fmt_is_valid_texture(u32 format, enum radeon_family family)
return false;
}
-static inline int fmt_get_blocksize(u32 format)
+static int fmt_get_blocksize(u32 format)
{
if (format >= ARRAY_SIZE(color_formats_table))
return 0;
@@ -195,7 +195,7 @@ static inline int fmt_get_blocksize(u32 format)
return color_formats_table[format].blocksize;
}
-static inline int fmt_get_nblocksx(u32 format, u32 w)
+static int fmt_get_nblocksx(u32 format, u32 w)
{
unsigned bw;
@@ -209,7 +209,7 @@ static inline int fmt_get_nblocksx(u32 format, u32 w)
return (w + bw - 1) / bw;
}
-static inline int fmt_get_nblocksy(u32 format, u32 h)
+static int fmt_get_nblocksy(u32 format, u32 h)
{
unsigned bh;
@@ -223,25 +223,6 @@ static inline int fmt_get_nblocksy(u32 format, u32 h)
return (h + bh - 1) / bh;
}
-static inline int r600_bpe_from_format(u32 *bpe, u32 format)
-{
- unsigned res;
-
- if (format >= ARRAY_SIZE(color_formats_table))
- goto fail;
-
- res = color_formats_table[format].blocksize;
- if (res == 0)
- goto fail;
-
- *bpe = res;
- return 0;
-
-fail:
- *bpe = 16;
- return -EINVAL;
-}
-
struct array_mode_checker {
int array_mode;
u32 group_size;
@@ -252,7 +233,7 @@ struct array_mode_checker {
};
/* returns alignment in pixels for pitch/height/depth and bytes for base */
-static inline int r600_get_array_mode_alignment(struct array_mode_checker *values,
+static int r600_get_array_mode_alignment(struct array_mode_checker *values,
u32 *pitch_align,
u32 *height_align,
u32 *depth_align,
@@ -331,7 +312,7 @@ static void r600_cs_track_init(struct r600_cs_track *track)
track->db_depth_control = 0xFFFFFFFF;
}
-static inline int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i)
+static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i)
{
struct r600_cs_track *track = p->track;
u32 slice_tile_max, size, tmp;
@@ -737,7 +718,7 @@ static int r600_cs_packet_next_reloc_nomm(struct radeon_cs_parser *p,
* Check next packet is relocation packet3, do bo validation and compute
* GPU offset using the provided start.
**/
-static inline int r600_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
+static int r600_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
{
struct radeon_cs_packet p3reloc;
int r;
@@ -911,7 +892,7 @@ static int r600_cs_parse_packet0(struct radeon_cs_parser *p,
* if register is safe. If register is not flag as safe this function
* will test it against a list of register needind special handling.
*/
-static inline int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
+static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
{
struct r600_cs_track *track = (struct r600_cs_track *)p->track;
struct radeon_cs_reloc *reloc;
@@ -1215,7 +1196,7 @@ static inline int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx
return 0;
}
-static inline unsigned mip_minify(unsigned size, unsigned level)
+static unsigned mip_minify(unsigned size, unsigned level)
{
unsigned val;
@@ -1285,7 +1266,7 @@ static void r600_texture_size(unsigned nfaces, unsigned blevel, unsigned llevel,
* This function will check that the resource has valid field and that
* the texture and mipmap bo object are big enough to cover this resource.
*/
-static inline int r600_check_texture_resource(struct radeon_cs_parser *p, u32 idx,
+static int r600_check_texture_resource(struct radeon_cs_parser *p, u32 idx,
struct radeon_bo *texture,
struct radeon_bo *mipmap,
u64 base_offset,
diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h
index 0245ae6c204e..bfe1b5d92afe 100644
--- a/drivers/gpu/drm/radeon/r600d.h
+++ b/drivers/gpu/drm/radeon/r600d.h
@@ -79,6 +79,11 @@
#define CB_COLOR0_SIZE 0x28060
#define CB_COLOR0_VIEW 0x28080
#define CB_COLOR0_INFO 0x280a0
+# define CB_FORMAT(x) ((x) << 2)
+# define CB_ARRAY_MODE(x) ((x) << 8)
+# define CB_SOURCE_FORMAT(x) ((x) << 27)
+# define CB_SF_EXPORT_FULL 0
+# define CB_SF_EXPORT_NORM 1
#define CB_COLOR0_TILE 0x280c0
#define CB_COLOR0_FRAG 0x280e0
#define CB_COLOR0_MASK 0x28100
@@ -417,6 +422,17 @@
#define SQ_PGM_START_VS 0x28858
#define SQ_PGM_RESOURCES_VS 0x28868
#define SQ_PGM_CF_OFFSET_VS 0x288d0
+
+#define SQ_VTX_CONSTANT_WORD0_0 0x30000
+#define SQ_VTX_CONSTANT_WORD1_0 0x30004
+#define SQ_VTX_CONSTANT_WORD2_0 0x30008
+# define SQ_VTXC_BASE_ADDR_HI(x) ((x) << 0)
+# define SQ_VTXC_STRIDE(x) ((x) << 8)
+# define SQ_VTXC_ENDIAN_SWAP(x) ((x) << 30)
+# define SQ_ENDIAN_NONE 0
+# define SQ_ENDIAN_8IN16 1
+# define SQ_ENDIAN_8IN32 2
+#define SQ_VTX_CONSTANT_WORD3_0 0x3000c
#define SQ_VTX_CONSTANT_WORD6_0 0x38018
#define S__SQ_VTX_CONSTANT_TYPE(x) (((x) & 3) << 30)
#define G__SQ_VTX_CONSTANT_TYPE(x) (((x) >> 30) & 3)
@@ -1352,6 +1368,12 @@
#define S_038010_DST_SEL_W(x) (((x) & 0x7) << 25)
#define G_038010_DST_SEL_W(x) (((x) >> 25) & 0x7)
#define C_038010_DST_SEL_W 0xF1FFFFFF
+# define SQ_SEL_X 0
+# define SQ_SEL_Y 1
+# define SQ_SEL_Z 2
+# define SQ_SEL_W 3
+# define SQ_SEL_0 4
+# define SQ_SEL_1 5
#define S_038010_BASE_LEVEL(x) (((x) & 0xF) << 28)
#define G_038010_BASE_LEVEL(x) (((x) >> 28) & 0xF)
#define C_038010_BASE_LEVEL 0x0FFFFFFF
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index c1e056b35b29..e3170c794c1d 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -102,7 +102,7 @@ extern int radeon_pcie_gen2;
#define RADEON_FENCE_JIFFIES_TIMEOUT (HZ / 2)
/* RADEON_IB_POOL_SIZE must be a power of 2 */
#define RADEON_IB_POOL_SIZE 16
-#define RADEON_DEBUGFS_MAX_NUM_FILES 32
+#define RADEON_DEBUGFS_MAX_COMPONENTS 32
#define RADEONFB_CONN_LIMIT 4
#define RADEON_BIOS_NUM_SCRATCH 8
@@ -523,9 +523,30 @@ struct r600_ih {
bool enabled;
};
+struct r600_blit_cp_primitives {
+ void (*set_render_target)(struct radeon_device *rdev, int format,
+ int w, int h, u64 gpu_addr);
+ void (*cp_set_surface_sync)(struct radeon_device *rdev,
+ u32 sync_type, u32 size,
+ u64 mc_addr);
+ void (*set_shaders)(struct radeon_device *rdev);
+ void (*set_vtx_resource)(struct radeon_device *rdev, u64 gpu_addr);
+ void (*set_tex_resource)(struct radeon_device *rdev,
+ int format, int w, int h, int pitch,
+ u64 gpu_addr);
+ void (*set_scissors)(struct radeon_device *rdev, int x1, int y1,
+ int x2, int y2);
+ void (*draw_auto)(struct radeon_device *rdev);
+ void (*set_default_state)(struct radeon_device *rdev);
+};
+
struct r600_blit {
struct mutex mutex;
struct radeon_bo *shader_obj;
+ struct r600_blit_cp_primitives primitives;
+ int max_dim;
+ int ring_size_common;
+ int ring_size_per_loop;
u64 shader_gpu_addr;
u32 vs_offset, ps_offset;
u32 state_offset;
@@ -534,6 +555,8 @@ struct r600_blit {
struct radeon_ib *vb_ib;
};
+void r600_blit_suspend(struct radeon_device *rdev);
+
int radeon_ib_get(struct radeon_device *rdev, struct radeon_ib **ib);
void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib);
int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib);
@@ -601,32 +624,7 @@ struct radeon_cs_parser {
extern int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx);
extern int radeon_cs_finish_pages(struct radeon_cs_parser *p);
-
-
-static inline u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx)
-{
- struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx];
- u32 pg_idx, pg_offset;
- u32 idx_value = 0;
- int new_page;
-
- pg_idx = (idx * 4) / PAGE_SIZE;
- pg_offset = (idx * 4) % PAGE_SIZE;
-
- if (ibc->kpage_idx[0] == pg_idx)
- return ibc->kpage[0][pg_offset/4];
- if (ibc->kpage_idx[1] == pg_idx)
- return ibc->kpage[1][pg_offset/4];
-
- new_page = radeon_cs_update_pages(p, pg_idx);
- if (new_page < 0) {
- p->parser_error = new_page;
- return 0;
- }
-
- idx_value = ibc->kpage[new_page][pg_offset/4];
- return idx_value;
-}
+extern u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx);
struct radeon_cs_packet {
unsigned idx;
@@ -869,7 +867,7 @@ struct radeon_pm {
/*
* Benchmarking
*/
-void radeon_benchmark(struct radeon_device *rdev);
+void radeon_benchmark(struct radeon_device *rdev, int test_number);
/*
@@ -1252,45 +1250,10 @@ int radeon_device_init(struct radeon_device *rdev,
void radeon_device_fini(struct radeon_device *rdev);
int radeon_gpu_wait_for_idle(struct radeon_device *rdev);
-static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg)
-{
- if (reg < rdev->rmmio_size)
- return readl((rdev->rmmio) + reg);
- else {
- writel(reg, (rdev->rmmio) + RADEON_MM_INDEX);
- return readl((rdev->rmmio) + RADEON_MM_DATA);
- }
-}
-
-static inline void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
-{
- if (reg < rdev->rmmio_size)
- writel(v, (rdev->rmmio) + reg);
- else {
- writel(reg, (rdev->rmmio) + RADEON_MM_INDEX);
- writel(v, (rdev->rmmio) + RADEON_MM_DATA);
- }
-}
-
-static inline u32 r100_io_rreg(struct radeon_device *rdev, u32 reg)
-{
- if (reg < rdev->rio_mem_size)
- return ioread32(rdev->rio_mem + reg);
- else {
- iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
- return ioread32(rdev->rio_mem + RADEON_MM_DATA);
- }
-}
-
-static inline void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v)
-{
- if (reg < rdev->rio_mem_size)
- iowrite32(v, rdev->rio_mem + reg);
- else {
- iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
- iowrite32(v, rdev->rio_mem + RADEON_MM_DATA);
- }
-}
+uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg);
+void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
+u32 r100_io_rreg(struct radeon_device *rdev, u32 reg);
+void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v);
/*
* Cast helper
@@ -1413,19 +1376,19 @@ void radeon_atombios_fini(struct radeon_device *rdev);
/*
* RING helpers.
*/
+
+#if DRM_DEBUG_CODE == 0
static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v)
{
-#if DRM_DEBUG_CODE
- if (rdev->cp.count_dw <= 0) {
- DRM_ERROR("radeon: writting more dword to ring than expected !\n");
- }
-#endif
rdev->cp.ring[rdev->cp.wptr++] = v;
rdev->cp.wptr &= rdev->cp.ptr_mask;
rdev->cp.count_dw--;
rdev->cp.ring_free_dw--;
}
-
+#else
+/* With debugging this is just too big to inline */
+void radeon_ring_write(struct radeon_device *rdev, uint32_t v);
+#endif
/*
* ASICs macro.
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index df8218bb83a6..e2944566ffea 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -765,9 +765,9 @@ static struct radeon_asic evergreen_asic = {
.get_vblank_counter = &evergreen_get_vblank_counter,
.fence_ring_emit = &r600_fence_ring_emit,
.cs_parse = &evergreen_cs_parse,
- .copy_blit = &evergreen_copy_blit,
+ .copy_blit = &r600_copy_blit,
.copy_dma = NULL,
- .copy = &evergreen_copy_blit,
+ .copy = &r600_copy_blit,
.get_engine_clock = &radeon_atom_get_engine_clock,
.set_engine_clock = &radeon_atom_set_engine_clock,
.get_memory_clock = &radeon_atom_get_memory_clock,
@@ -812,9 +812,9 @@ static struct radeon_asic sumo_asic = {
.get_vblank_counter = &evergreen_get_vblank_counter,
.fence_ring_emit = &r600_fence_ring_emit,
.cs_parse = &evergreen_cs_parse,
- .copy_blit = &evergreen_copy_blit,
+ .copy_blit = &r600_copy_blit,
.copy_dma = NULL,
- .copy = &evergreen_copy_blit,
+ .copy = &r600_copy_blit,
.get_engine_clock = &radeon_atom_get_engine_clock,
.set_engine_clock = &radeon_atom_set_engine_clock,
.get_memory_clock = NULL,
@@ -859,9 +859,9 @@ static struct radeon_asic btc_asic = {
.get_vblank_counter = &evergreen_get_vblank_counter,
.fence_ring_emit = &r600_fence_ring_emit,
.cs_parse = &evergreen_cs_parse,
- .copy_blit = &evergreen_copy_blit,
+ .copy_blit = &r600_copy_blit,
.copy_dma = NULL,
- .copy = &evergreen_copy_blit,
+ .copy = &r600_copy_blit,
.get_engine_clock = &radeon_atom_get_engine_clock,
.set_engine_clock = &radeon_atom_set_engine_clock,
.get_memory_clock = &radeon_atom_get_memory_clock,
@@ -906,9 +906,9 @@ static struct radeon_asic cayman_asic = {
.get_vblank_counter = &evergreen_get_vblank_counter,
.fence_ring_emit = &r600_fence_ring_emit,
.cs_parse = &evergreen_cs_parse,
- .copy_blit = &evergreen_copy_blit,
+ .copy_blit = &r600_copy_blit,
.copy_dma = NULL,
- .copy = &evergreen_copy_blit,
+ .copy = &r600_copy_blit,
.get_engine_clock = &radeon_atom_get_engine_clock,
.set_engine_clock = &radeon_atom_set_engine_clock,
.get_memory_clock = &radeon_atom_get_memory_clock,
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index 3dedaa07aac1..85f14f0337e4 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -364,11 +364,11 @@ void r600_hdmi_init(struct drm_encoder *encoder);
int r600_hdmi_buffer_status_changed(struct drm_encoder *encoder);
void r600_hdmi_update_audio_settings(struct drm_encoder *encoder);
/* r600 blit */
-int r600_blit_prepare_copy(struct radeon_device *rdev, int size_bytes);
+int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages);
void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence);
void r600_kms_blit_copy(struct radeon_device *rdev,
u64 src_gpu_addr, u64 dst_gpu_addr,
- int size_bytes);
+ unsigned num_gpu_pages);
/*
* rv770,rv730,rv710,rv740
@@ -401,9 +401,6 @@ bool evergreen_gpu_is_lockup(struct radeon_device *rdev);
int evergreen_asic_reset(struct radeon_device *rdev);
void evergreen_bandwidth_update(struct radeon_device *rdev);
void evergreen_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
-int evergreen_copy_blit(struct radeon_device *rdev,
- uint64_t src_offset, uint64_t dst_offset,
- unsigned num_gpu_pages, struct radeon_fence *fence);
void evergreen_hpd_init(struct radeon_device *rdev);
void evergreen_hpd_fini(struct radeon_device *rdev);
bool evergreen_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd);
@@ -421,13 +418,6 @@ extern u32 evergreen_page_flip(struct radeon_device *rdev, int crtc, u64 crtc_ba
extern void evergreen_post_page_flip(struct radeon_device *rdev, int crtc);
void evergreen_disable_interrupt_state(struct radeon_device *rdev);
int evergreen_blit_init(struct radeon_device *rdev);
-void evergreen_blit_fini(struct radeon_device *rdev);
-/* evergreen blit */
-int evergreen_blit_prepare_copy(struct radeon_device *rdev, int size_bytes);
-void evergreen_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence);
-void evergreen_kms_blit_copy(struct radeon_device *rdev,
- u64 src_gpu_addr, u64 dst_gpu_addr,
- int size_bytes);
/*
* cayman
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index bf2b61584cdb..08d0b94332e6 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -62,7 +62,7 @@ union atom_supported_devices {
struct _ATOM_SUPPORTED_DEVICES_INFO_2d1 info_2d1;
};
-static inline struct radeon_i2c_bus_rec radeon_lookup_i2c_gpio(struct radeon_device *rdev,
+static struct radeon_i2c_bus_rec radeon_lookup_i2c_gpio(struct radeon_device *rdev,
uint8_t id)
{
struct atom_context *ctx = rdev->mode_info.atom_context;
@@ -228,7 +228,7 @@ void radeon_atombios_i2c_init(struct radeon_device *rdev)
}
}
-static inline struct radeon_gpio_rec radeon_lookup_gpio(struct radeon_device *rdev,
+static struct radeon_gpio_rec radeon_lookup_gpio(struct radeon_device *rdev,
u8 id)
{
struct atom_context *ctx = rdev->mode_info.atom_context;
diff --git a/drivers/gpu/drm/radeon/radeon_benchmark.c b/drivers/gpu/drm/radeon/radeon_benchmark.c
index 10191d9372d8..5cafc90de7f8 100644
--- a/drivers/gpu/drm/radeon/radeon_benchmark.c
+++ b/drivers/gpu/drm/radeon/radeon_benchmark.c
@@ -26,21 +26,81 @@
#include "radeon_reg.h"
#include "radeon.h"
-void radeon_benchmark_move(struct radeon_device *rdev, unsigned bsize,
- unsigned sdomain, unsigned ddomain)
+#define RADEON_BENCHMARK_COPY_BLIT 1
+#define RADEON_BENCHMARK_COPY_DMA 0
+
+#define RADEON_BENCHMARK_ITERATIONS 1024
+#define RADEON_BENCHMARK_COMMON_MODES_N 17
+
+static int radeon_benchmark_do_move(struct radeon_device *rdev, unsigned size,
+ uint64_t saddr, uint64_t daddr,
+ int flag, int n)
+{
+ unsigned long start_jiffies;
+ unsigned long end_jiffies;
+ struct radeon_fence *fence = NULL;
+ int i, r;
+
+ start_jiffies = jiffies;
+ for (i = 0; i < n; i++) {
+ r = radeon_fence_create(rdev, &fence);
+ if (r)
+ return r;
+
+ switch (flag) {
+ case RADEON_BENCHMARK_COPY_DMA:
+ r = radeon_copy_dma(rdev, saddr, daddr,
+ size / RADEON_GPU_PAGE_SIZE,
+ fence);
+ break;
+ case RADEON_BENCHMARK_COPY_BLIT:
+ r = radeon_copy_blit(rdev, saddr, daddr,
+ size / RADEON_GPU_PAGE_SIZE,
+ fence);
+ break;
+ default:
+ DRM_ERROR("Unknown copy method\n");
+ r = -EINVAL;
+ }
+ if (r)
+ goto exit_do_move;
+ r = radeon_fence_wait(fence, false);
+ if (r)
+ goto exit_do_move;
+ radeon_fence_unref(&fence);
+ }
+ end_jiffies = jiffies;
+ r = jiffies_to_msecs(end_jiffies - start_jiffies);
+
+exit_do_move:
+ if (fence)
+ radeon_fence_unref(&fence);
+ return r;
+}
+
+
+static void radeon_benchmark_log_results(int n, unsigned size,
+ unsigned int time,
+ unsigned sdomain, unsigned ddomain,
+ char *kind)
+{
+ unsigned int throughput = (n * (size >> 10)) / time;
+ DRM_INFO("radeon: %s %u bo moves of %u kB from"
+ " %d to %d in %u ms, throughput: %u Mb/s or %u MB/s\n",
+ kind, n, size >> 10, sdomain, ddomain, time,
+ throughput * 8, throughput);
+}
+
+static void radeon_benchmark_move(struct radeon_device *rdev, unsigned size,
+ unsigned sdomain, unsigned ddomain)
{
struct radeon_bo *dobj = NULL;
struct radeon_bo *sobj = NULL;
- struct radeon_fence *fence = NULL;
uint64_t saddr, daddr;
- unsigned long start_jiffies;
- unsigned long end_jiffies;
- unsigned long time;
- unsigned i, n, size;
- int r;
+ int r, n;
+ unsigned int time;
- size = bsize;
- n = 1024;
+ n = RADEON_BENCHMARK_ITERATIONS;
r = radeon_bo_create(rdev, size, PAGE_SIZE, true, sdomain, &sobj);
if (r) {
goto out_cleanup;
@@ -67,65 +127,26 @@ void radeon_benchmark_move(struct radeon_device *rdev, unsigned bsize,
}
/* r100 doesn't have dma engine so skip the test */
- if (rdev->asic->copy_dma) {
-
- start_jiffies = jiffies;
- for (i = 0; i < n; i++) {
- r = radeon_fence_create(rdev, &fence);
- if (r) {
- goto out_cleanup;
- }
-
- r = radeon_copy_dma(rdev, saddr, daddr,
- size / RADEON_GPU_PAGE_SIZE, fence);
-
- if (r) {
- goto out_cleanup;
- }
- r = radeon_fence_wait(fence, false);
- if (r) {
- goto out_cleanup;
- }
- radeon_fence_unref(&fence);
- }
- end_jiffies = jiffies;
- time = end_jiffies - start_jiffies;
- time = jiffies_to_msecs(time);
- if (time > 0) {
- i = ((n * size) >> 10) / time;
- printk(KERN_INFO "radeon: dma %u bo moves of %ukb from"
- " %d to %d in %lums (%ukb/ms %ukb/s %uM/s)\n",
- n, size >> 10,
- sdomain, ddomain, time,
- i, i * 1000, (i * 1000) / 1024);
- }
- }
-
- start_jiffies = jiffies;
- for (i = 0; i < n; i++) {
- r = radeon_fence_create(rdev, &fence);
- if (r) {
- goto out_cleanup;
- }
- r = radeon_copy_blit(rdev, saddr, daddr, size / RADEON_GPU_PAGE_SIZE, fence);
- if (r) {
- goto out_cleanup;
- }
- r = radeon_fence_wait(fence, false);
- if (r) {
+ /* also, VRAM-to-VRAM test doesn't make much sense for DMA */
+ /* skip it as well if domains are the same */
+ if ((rdev->asic->copy_dma) && (sdomain != ddomain)) {
+ time = radeon_benchmark_do_move(rdev, size, saddr, daddr,
+ RADEON_BENCHMARK_COPY_DMA, n);
+ if (time < 0)
goto out_cleanup;
- }
- radeon_fence_unref(&fence);
- }
- end_jiffies = jiffies;
- time = end_jiffies - start_jiffies;
- time = jiffies_to_msecs(time);
- if (time > 0) {
- i = ((n * size) >> 10) / time;
- printk(KERN_INFO "radeon: blit %u bo moves of %ukb from %d to %d"
- " in %lums (%ukb/ms %ukb/s %uM/s)\n", n, size >> 10,
- sdomain, ddomain, time, i, i * 1000, (i * 1000) / 1024);
+ if (time > 0)
+ radeon_benchmark_log_results(n, size, time,
+ sdomain, ddomain, "dma");
}
+
+ time = radeon_benchmark_do_move(rdev, size, saddr, daddr,
+ RADEON_BENCHMARK_COPY_BLIT, n);
+ if (time < 0)
+ goto out_cleanup;
+ if (time > 0)
+ radeon_benchmark_log_results(n, size, time,
+ sdomain, ddomain, "blit");
+
out_cleanup:
if (sobj) {
r = radeon_bo_reserve(sobj, false);
@@ -143,18 +164,92 @@ out_cleanup:
}
radeon_bo_unref(&dobj);
}
- if (fence) {
- radeon_fence_unref(&fence);
- }
+
if (r) {
- printk(KERN_WARNING "Error while benchmarking BO move.\n");
+ DRM_ERROR("Error while benchmarking BO move.\n");
}
}
-void radeon_benchmark(struct radeon_device *rdev)
+void radeon_benchmark(struct radeon_device *rdev, int test_number)
{
- radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_GTT,
- RADEON_GEM_DOMAIN_VRAM);
- radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_VRAM,
- RADEON_GEM_DOMAIN_GTT);
+ int i;
+ int common_modes[RADEON_BENCHMARK_COMMON_MODES_N] = {
+ 640 * 480 * 4,
+ 720 * 480 * 4,
+ 800 * 600 * 4,
+ 848 * 480 * 4,
+ 1024 * 768 * 4,
+ 1152 * 768 * 4,
+ 1280 * 720 * 4,
+ 1280 * 800 * 4,
+ 1280 * 854 * 4,
+ 1280 * 960 * 4,
+ 1280 * 1024 * 4,
+ 1440 * 900 * 4,
+ 1400 * 1050 * 4,
+ 1680 * 1050 * 4,
+ 1600 * 1200 * 4,
+ 1920 * 1080 * 4,
+ 1920 * 1200 * 4
+ };
+
+ switch (test_number) {
+ case 1:
+ /* simple test, VRAM to GTT and GTT to VRAM */
+ radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_GTT,
+ RADEON_GEM_DOMAIN_VRAM);
+ radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_VRAM,
+ RADEON_GEM_DOMAIN_GTT);
+ break;
+ case 2:
+ /* simple test, VRAM to VRAM */
+ radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_VRAM,
+ RADEON_GEM_DOMAIN_VRAM);
+ break;
+ case 3:
+ /* GTT to VRAM, buffer size sweep, powers of 2 */
+ for (i = 1; i <= 65536; i <<= 1)
+ radeon_benchmark_move(rdev, i*1024,
+ RADEON_GEM_DOMAIN_GTT,
+ RADEON_GEM_DOMAIN_VRAM);
+ break;
+ case 4:
+ /* VRAM to GTT, buffer size sweep, powers of 2 */
+ for (i = 1; i <= 65536; i <<= 1)
+ radeon_benchmark_move(rdev, i*1024,
+ RADEON_GEM_DOMAIN_VRAM,
+ RADEON_GEM_DOMAIN_GTT);
+ break;
+ case 5:
+ /* VRAM to VRAM, buffer size sweep, powers of 2 */
+ for (i = 1; i <= 65536; i <<= 1)
+ radeon_benchmark_move(rdev, i*1024,
+ RADEON_GEM_DOMAIN_VRAM,
+ RADEON_GEM_DOMAIN_VRAM);
+ break;
+ case 6:
+ /* GTT to VRAM, buffer size sweep, common modes */
+ for (i = 1; i < RADEON_BENCHMARK_COMMON_MODES_N; i++)
+ radeon_benchmark_move(rdev, common_modes[i],
+ RADEON_GEM_DOMAIN_GTT,
+ RADEON_GEM_DOMAIN_VRAM);
+ break;
+ case 7:
+ /* VRAM to GTT, buffer size sweep, common modes */
+ for (i = 1; i < RADEON_BENCHMARK_COMMON_MODES_N; i++)
+ radeon_benchmark_move(rdev, common_modes[i],
+ RADEON_GEM_DOMAIN_VRAM,
+ RADEON_GEM_DOMAIN_GTT);
+ break;
+ case 8:
+ /* VRAM to VRAM, buffer size sweep, common modes */
+ for (i = 1; i < RADEON_BENCHMARK_COMMON_MODES_N; i++)
+ radeon_benchmark_move(rdev, common_modes[i],
+ RADEON_GEM_DOMAIN_VRAM,
+ RADEON_GEM_DOMAIN_VRAM);
+ break;
+
+ default:
+ DRM_ERROR("Unknown benchmark\n");
+ }
}
diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c
index 63675241c7ff..8bf83c4b4147 100644
--- a/drivers/gpu/drm/radeon/radeon_combios.c
+++ b/drivers/gpu/drm/radeon/radeon_combios.c
@@ -620,8 +620,8 @@ static struct radeon_i2c_bus_rec combios_setup_i2c_bus(struct radeon_device *rde
i2c.y_data_mask = 0x80;
} else {
/* default masks for ddc pads */
- i2c.mask_clk_mask = RADEON_GPIO_EN_1;
- i2c.mask_data_mask = RADEON_GPIO_EN_0;
+ i2c.mask_clk_mask = RADEON_GPIO_MASK_1;
+ i2c.mask_data_mask = RADEON_GPIO_MASK_0;
i2c.a_clk_mask = RADEON_GPIO_A_1;
i2c.a_data_mask = RADEON_GPIO_A_0;
i2c.en_clk_mask = RADEON_GPIO_EN_1;
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index 449c3d8c6836..dec6cbe6a0a6 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -724,6 +724,7 @@ radeon_vga_detect(struct drm_connector *connector, bool force)
dret = radeon_ddc_probe(radeon_connector,
radeon_connector->requires_extended_probe);
if (dret) {
+ radeon_connector->detected_by_load = false;
if (radeon_connector->edid) {
kfree(radeon_connector->edid);
radeon_connector->edid = NULL;
@@ -750,12 +751,21 @@ radeon_vga_detect(struct drm_connector *connector, bool force)
} else {
/* if we aren't forcing don't do destructive polling */
- if (!force)
- return connector->status;
+ if (!force) {
+ /* only return the previous status if we last
+ * detected a monitor via load.
+ */
+ if (radeon_connector->detected_by_load)
+ return connector->status;
+ else
+ return ret;
+ }
if (radeon_connector->dac_load_detect && encoder) {
encoder_funcs = encoder->helper_private;
ret = encoder_funcs->detect(encoder, connector);
+ if (ret == connector_status_connected)
+ radeon_connector->detected_by_load = true;
}
}
@@ -897,6 +907,7 @@ radeon_dvi_detect(struct drm_connector *connector, bool force)
dret = radeon_ddc_probe(radeon_connector,
radeon_connector->requires_extended_probe);
if (dret) {
+ radeon_connector->detected_by_load = false;
if (radeon_connector->edid) {
kfree(radeon_connector->edid);
radeon_connector->edid = NULL;
@@ -959,8 +970,18 @@ radeon_dvi_detect(struct drm_connector *connector, bool force)
if ((ret == connector_status_connected) && (radeon_connector->use_digital == true))
goto out;
+ /* DVI-D and HDMI-A are digital only */
+ if ((connector->connector_type == DRM_MODE_CONNECTOR_DVID) ||
+ (connector->connector_type == DRM_MODE_CONNECTOR_HDMIA))
+ goto out;
+
+ /* if we aren't forcing don't do destructive polling */
if (!force) {
- ret = connector->status;
+ /* only return the previous status if we last
+ * detected a monitor via load.
+ */
+ if (radeon_connector->detected_by_load)
+ ret = connector->status;
goto out;
}
@@ -984,6 +1005,7 @@ radeon_dvi_detect(struct drm_connector *connector, bool force)
ret = encoder_funcs->detect(encoder, connector);
if (ret == connector_status_connected) {
radeon_connector->use_digital = false;
+ radeon_connector->detected_by_load = true;
}
}
break;
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index b51e15725c6e..c33bc914d93d 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -750,14 +750,15 @@ int radeon_device_init(struct radeon_device *rdev,
/* set DMA mask + need_dma32 flags.
* PCIE - can handle 40-bits.
- * IGP - can handle 40-bits (in theory)
+ * IGP - can handle 40-bits
* AGP - generally dma32 is safest
- * PCI - only dma32
+ * PCI - dma32 for legacy pci gart, 40 bits on newer asics
*/
rdev->need_dma32 = false;
if (rdev->flags & RADEON_IS_AGP)
rdev->need_dma32 = true;
- if (rdev->flags & RADEON_IS_PCI)
+ if ((rdev->flags & RADEON_IS_PCI) &&
+ (rdev->family < CHIP_RS400))
rdev->need_dma32 = true;
dma_bits = rdev->need_dma32 ? 32 : 40;
@@ -817,7 +818,7 @@ int radeon_device_init(struct radeon_device *rdev,
radeon_test_moves(rdev);
}
if (radeon_benchmarking) {
- radeon_benchmark(rdev);
+ radeon_benchmark(rdev, radeon_benchmarking);
}
return 0;
}
@@ -981,7 +982,7 @@ struct radeon_debugfs {
struct drm_info_list *files;
unsigned num_files;
};
-static struct radeon_debugfs _radeon_debugfs[RADEON_DEBUGFS_MAX_NUM_FILES];
+static struct radeon_debugfs _radeon_debugfs[RADEON_DEBUGFS_MAX_COMPONENTS];
static unsigned _radeon_debugfs_count = 0;
int radeon_debugfs_add_files(struct radeon_device *rdev,
@@ -996,14 +997,17 @@ int radeon_debugfs_add_files(struct radeon_device *rdev,
return 0;
}
}
- if ((_radeon_debugfs_count + nfiles) > RADEON_DEBUGFS_MAX_NUM_FILES) {
- DRM_ERROR("Reached maximum number of debugfs files.\n");
- DRM_ERROR("Report so we increase RADEON_DEBUGFS_MAX_NUM_FILES.\n");
+
+ i = _radeon_debugfs_count + 1;
+ if (i > RADEON_DEBUGFS_MAX_COMPONENTS) {
+ DRM_ERROR("Reached maximum number of debugfs components.\n");
+ DRM_ERROR("Report so we increase "
+ "RADEON_DEBUGFS_MAX_COMPONENTS.\n");
return -EINVAL;
}
_radeon_debugfs[_radeon_debugfs_count].files = files;
_radeon_debugfs[_radeon_debugfs_count].num_files = nfiles;
- _radeon_debugfs_count++;
+ _radeon_debugfs_count = i;
#if defined(CONFIG_DEBUG_FS)
drm_debugfs_create_files(files, nfiles,
rdev->ddev->control->debugfs_root,
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 3475a09f946b..76ec0e9ed8ae 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -263,7 +263,7 @@ retry:
*/
if (seq == rdev->fence_drv.last_seq && radeon_gpu_is_lockup(rdev)) {
/* good news we believe it's a lockup */
- WARN(1, "GPU lockup (waiting for 0x%08X last fence id 0x%08X)\n",
+ printk(KERN_WARNING "GPU lockup (waiting for 0x%08X last fence id 0x%08X)\n",
fence->seq, seq);
/* FIXME: what should we do ? marking everyone
* as signaled for now
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index a533f52fd163..fdc3a9a54bf8 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -142,7 +142,7 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset,
u64 page_base;
if (!rdev->gart.ready) {
- WARN(1, "trying to unbind memory to unitialized GART !\n");
+ WARN(1, "trying to unbind memory from uninitialized GART !\n");
return;
}
t = offset / RADEON_GPU_PAGE_SIZE;
@@ -174,7 +174,7 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
int i, j;
if (!rdev->gart.ready) {
- WARN(1, "trying to bind memory to unitialized GART !\n");
+ WARN(1, "trying to bind memory to uninitialized GART !\n");
return -EINVAL;
}
t = offset / RADEON_GPU_PAGE_SIZE;
diff --git a/drivers/gpu/drm/radeon/radeon_i2c.c b/drivers/gpu/drm/radeon/radeon_i2c.c
index 6c111c1fa3f9..02cb7da4124d 100644
--- a/drivers/gpu/drm/radeon/radeon_i2c.c
+++ b/drivers/gpu/drm/radeon/radeon_i2c.c
@@ -81,8 +81,9 @@ bool radeon_ddc_probe(struct radeon_connector *radeon_connector, bool requires_e
/* bit banging i2c */
-static void radeon_i2c_do_lock(struct radeon_i2c_chan *i2c, int lock_state)
+static int pre_xfer(struct i2c_adapter *i2c_adap)
{
+ struct radeon_i2c_chan *i2c = i2c_get_adapdata(i2c_adap);
struct radeon_device *rdev = i2c->dev->dev_private;
struct radeon_i2c_bus_rec *rec = &i2c->rec;
uint32_t temp;
@@ -137,19 +138,30 @@ static void radeon_i2c_do_lock(struct radeon_i2c_chan *i2c, int lock_state)
WREG32(rec->en_data_reg, temp);
/* mask the gpio pins for software use */
- temp = RREG32(rec->mask_clk_reg);
- if (lock_state)
- temp |= rec->mask_clk_mask;
- else
- temp &= ~rec->mask_clk_mask;
+ temp = RREG32(rec->mask_clk_reg) | rec->mask_clk_mask;
WREG32(rec->mask_clk_reg, temp);
temp = RREG32(rec->mask_clk_reg);
+ temp = RREG32(rec->mask_data_reg) | rec->mask_data_mask;
+ WREG32(rec->mask_data_reg, temp);
temp = RREG32(rec->mask_data_reg);
- if (lock_state)
- temp |= rec->mask_data_mask;
- else
- temp &= ~rec->mask_data_mask;
+
+ return 0;
+}
+
+static void post_xfer(struct i2c_adapter *i2c_adap)
+{
+ struct radeon_i2c_chan *i2c = i2c_get_adapdata(i2c_adap);
+ struct radeon_device *rdev = i2c->dev->dev_private;
+ struct radeon_i2c_bus_rec *rec = &i2c->rec;
+ uint32_t temp;
+
+ /* unmask the gpio pins for software use */
+ temp = RREG32(rec->mask_clk_reg) & ~rec->mask_clk_mask;
+ WREG32(rec->mask_clk_reg, temp);
+ temp = RREG32(rec->mask_clk_reg);
+
+ temp = RREG32(rec->mask_data_reg) & ~rec->mask_data_mask;
WREG32(rec->mask_data_reg, temp);
temp = RREG32(rec->mask_data_reg);
}
@@ -209,22 +221,6 @@ static void set_data(void *i2c_priv, int data)
WREG32(rec->en_data_reg, val);
}
-static int pre_xfer(struct i2c_adapter *i2c_adap)
-{
- struct radeon_i2c_chan *i2c = i2c_get_adapdata(i2c_adap);
-
- radeon_i2c_do_lock(i2c, 1);
-
- return 0;
-}
-
-static void post_xfer(struct i2c_adapter *i2c_adap)
-{
- struct radeon_i2c_chan *i2c = i2c_get_adapdata(i2c_adap);
-
- radeon_i2c_do_lock(i2c, 0);
-}
-
/* hw i2c */
static u32 radeon_get_i2c_prescale(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/radeon/radeon_irq.c b/drivers/gpu/drm/radeon/radeon_irq.c
index 465746bd51b7..00da38424dfc 100644
--- a/drivers/gpu/drm/radeon/radeon_irq.c
+++ b/drivers/gpu/drm/radeon/radeon_irq.c
@@ -129,7 +129,7 @@ void radeon_disable_vblank(struct drm_device *dev, int crtc)
}
}
-static inline u32 radeon_acknowledge_irqs(drm_radeon_private_t *dev_priv, u32 *r500_disp_int)
+static u32 radeon_acknowledge_irqs(drm_radeon_private_t *dev_priv, u32 *r500_disp_int)
{
u32 irqs = RADEON_READ(RADEON_GEN_INT_STATUS);
u32 irq_mask = RADEON_SW_INT_TEST;
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_tv.c b/drivers/gpu/drm/radeon/radeon_legacy_tv.c
index c7b6cb428d09..b37ec0f1413a 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_tv.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_tv.c
@@ -864,7 +864,7 @@ void radeon_legacy_tv_adjust_crtc_reg(struct drm_encoder *encoder,
*v_sync_strt_wid = tmp;
}
-static inline int get_post_div(int value)
+static int get_post_div(int value)
{
int post_div;
switch (value) {
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index 68820f5f6303..ed0178f03235 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -447,6 +447,7 @@ struct radeon_connector {
struct edid *edid;
void *con_priv;
bool dac_load_detect;
+ bool detected_by_load; /* if the connection status was determined by load */
uint16_t connector_object_id;
struct radeon_hpd hpd;
struct radeon_router router;
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 976c3b1b1b6e..1c851521f458 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -515,3 +515,44 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
}
return 0;
}
+
+int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, bool no_wait)
+{
+ int r;
+
+ r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0);
+ if (unlikely(r != 0))
+ return r;
+ spin_lock(&bo->tbo.bdev->fence_lock);
+ if (mem_type)
+ *mem_type = bo->tbo.mem.mem_type;
+ if (bo->tbo.sync_obj)
+ r = ttm_bo_wait(&bo->tbo, true, true, no_wait);
+ spin_unlock(&bo->tbo.bdev->fence_lock);
+ ttm_bo_unreserve(&bo->tbo);
+ return r;
+}
+
+
+/**
+ * radeon_bo_reserve - reserve bo
+ * @bo: bo structure
+ * @no_wait: don't sleep while trying to reserve (return -EBUSY)
+ *
+ * Returns:
+ * -EBUSY: buffer is busy and @no_wait is true
+ * -ERESTARTSYS: A wait for the buffer to become unreserved was interrupted by
+ * a signal. Release all buffer reservations and return to user-space.
+ */
+int radeon_bo_reserve(struct radeon_bo *bo, bool no_wait)
+{
+ int r;
+
+ r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0);
+ if (unlikely(r != 0)) {
+ if (r != -ERESTARTSYS)
+ dev_err(bo->rdev->dev, "%p reserve failed\n", bo);
+ return r;
+ }
+ return 0;
+}
diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index ede6c13628f2..b07f0f9b8627 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -52,28 +52,7 @@ static inline unsigned radeon_mem_type_to_domain(u32 mem_type)
return 0;
}
-/**
- * radeon_bo_reserve - reserve bo
- * @bo: bo structure
- * @no_wait: don't sleep while trying to reserve (return -EBUSY)
- *
- * Returns:
- * -EBUSY: buffer is busy and @no_wait is true
- * -ERESTARTSYS: A wait for the buffer to become unreserved was interrupted by
- * a signal. Release all buffer reservations and return to user-space.
- */
-static inline int radeon_bo_reserve(struct radeon_bo *bo, bool no_wait)
-{
- int r;
-
- r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0);
- if (unlikely(r != 0)) {
- if (r != -ERESTARTSYS)
- dev_err(bo->rdev->dev, "%p reserve failed\n", bo);
- return r;
- }
- return 0;
-}
+int radeon_bo_reserve(struct radeon_bo *bo, bool no_wait);
static inline void radeon_bo_unreserve(struct radeon_bo *bo)
{
@@ -118,23 +97,8 @@ static inline u64 radeon_bo_mmap_offset(struct radeon_bo *bo)
return bo->tbo.addr_space_offset;
}
-static inline int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type,
- bool no_wait)
-{
- int r;
-
- r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0);
- if (unlikely(r != 0))
- return r;
- spin_lock(&bo->tbo.bdev->fence_lock);
- if (mem_type)
- *mem_type = bo->tbo.mem.mem_type;
- if (bo->tbo.sync_obj)
- r = ttm_bo_wait(&bo->tbo, true, true, no_wait);
- spin_unlock(&bo->tbo.bdev->fence_lock);
- ttm_bo_unreserve(&bo->tbo);
- return r;
-}
+extern int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type,
+ bool no_wait);
extern int radeon_bo_create(struct radeon_device *rdev,
unsigned long size, int byte_align,
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index 08c0233db1b8..49d58202202c 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -35,6 +35,44 @@
int radeon_debugfs_ib_init(struct radeon_device *rdev);
+u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx)
+{
+ struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx];
+ u32 pg_idx, pg_offset;
+ u32 idx_value = 0;
+ int new_page;
+
+ pg_idx = (idx * 4) / PAGE_SIZE;
+ pg_offset = (idx * 4) % PAGE_SIZE;
+
+ if (ibc->kpage_idx[0] == pg_idx)
+ return ibc->kpage[0][pg_offset/4];
+ if (ibc->kpage_idx[1] == pg_idx)
+ return ibc->kpage[1][pg_offset/4];
+
+ new_page = radeon_cs_update_pages(p, pg_idx);
+ if (new_page < 0) {
+ p->parser_error = new_page;
+ return 0;
+ }
+
+ idx_value = ibc->kpage[new_page][pg_offset/4];
+ return idx_value;
+}
+
+void radeon_ring_write(struct radeon_device *rdev, uint32_t v)
+{
+#if DRM_DEBUG_CODE
+ if (rdev->cp.count_dw <= 0) {
+ DRM_ERROR("radeon: writting more dword to ring than expected !\n");
+ }
+#endif
+ rdev->cp.ring[rdev->cp.wptr++] = v;
+ rdev->cp.wptr &= rdev->cp.ptr_mask;
+ rdev->cp.count_dw--;
+ rdev->cp.ring_free_dw--;
+}
+
void radeon_ib_bogus_cleanup(struct radeon_device *rdev)
{
struct radeon_ib *ib, *n;
diff --git a/drivers/gpu/drm/radeon/radeon_state.c b/drivers/gpu/drm/radeon/radeon_state.c
index 92e7ea73b7c5..e8422ae7fe74 100644
--- a/drivers/gpu/drm/radeon/radeon_state.c
+++ b/drivers/gpu/drm/radeon/radeon_state.c
@@ -272,12 +272,12 @@ static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
return 0;
}
-static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
- dev_priv,
- struct drm_file *file_priv,
- drm_radeon_kcmd_buffer_t *
- cmdbuf,
- unsigned int *cmdsz)
+static int radeon_check_and_fixup_packet3(drm_radeon_private_t *
+ dev_priv,
+ struct drm_file *file_priv,
+ drm_radeon_kcmd_buffer_t *
+ cmdbuf,
+ unsigned int *cmdsz)
{
u32 *cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
u32 offset, narrays;
@@ -446,8 +446,8 @@ static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
* CP hardware state programming functions
*/
-static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
- struct drm_clip_rect * box)
+static void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
+ struct drm_clip_rect * box)
{
RING_LOCALS;
diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c
index aa6a66eeb4ec..89a6e1ecea8d 100644
--- a/drivers/gpu/drm/radeon/rs400.c
+++ b/drivers/gpu/drm/radeon/rs400.c
@@ -182,6 +182,9 @@ int rs400_gart_enable(struct radeon_device *rdev)
/* Enable gart */
WREG32_MC(RS480_AGP_ADDRESS_SPACE_SIZE, (RS480_GART_EN | size_reg));
rs400_gart_tlb_flush(rdev);
+ DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+ (unsigned)(rdev->mc.gtt_size >> 20),
+ (unsigned long long)rdev->gart.table_addr);
rdev->gart.ready = true;
return 0;
}
diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
index 4b5d0e6974a8..9320dd6404f6 100644
--- a/drivers/gpu/drm/radeon/rs600.c
+++ b/drivers/gpu/drm/radeon/rs600.c
@@ -484,6 +484,9 @@ static int rs600_gart_enable(struct radeon_device *rdev)
tmp = RREG32_MC(R_000009_MC_CNTL1);
WREG32_MC(R_000009_MC_CNTL1, (tmp | S_000009_ENABLE_PAGE_TABLES(1)));
rs600_gart_tlb_flush(rdev);
+ DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+ (unsigned)(rdev->mc.gtt_size >> 20),
+ (unsigned long long)rdev->gart.table_addr);
rdev->gart.ready = true;
return 0;
}
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index b13c2eedc321..87cc1feee3ac 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -161,6 +161,9 @@ int rv770_pcie_gart_enable(struct radeon_device *rdev)
WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
r600_pcie_gart_tlb_flush(rdev);
+ DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+ (unsigned)(rdev->mc.gtt_size >> 20),
+ (unsigned long long)rdev->gart.table_addr);
rdev->gart.ready = true;
return 0;
}
@@ -1184,8 +1187,6 @@ int rv770_resume(struct radeon_device *rdev)
int rv770_suspend(struct radeon_device *rdev)
{
- int r;
-
r600_audio_fini(rdev);
/* FIXME: we should wait for ring to be empty */
r700_cp_stop(rdev);
@@ -1193,14 +1194,8 @@ int rv770_suspend(struct radeon_device *rdev)
r600_irq_suspend(rdev);
radeon_wb_disable(rdev);
rv770_pcie_gart_disable(rdev);
- /* unpin shaders bo */
- if (rdev->r600_blit.shader_obj) {
- r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
- if (likely(r == 0)) {
- radeon_bo_unpin(rdev->r600_blit.shader_obj);
- radeon_bo_unreserve(rdev->r600_blit.shader_obj);
- }
- }
+ r600_blit_suspend(rdev);
+
return 0;
}