diff options
Diffstat (limited to 'drivers/gpu/drm/radeon/r600_blit_kms.c')
-rw-r--r-- | drivers/gpu/drm/radeon/r600_blit_kms.c | 359 |
1 files changed, 151 insertions, 208 deletions
diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c index 9aa74c3f8cb6..c4cf1308d4a1 100644 --- a/drivers/gpu/drm/radeon/r600_blit_kms.c +++ b/drivers/gpu/drm/radeon/r600_blit_kms.c @@ -42,6 +42,9 @@ #define COLOR_5_6_5 0x8 #define COLOR_8_8_8_8 0x1a +#define RECT_UNIT_H 32 +#define RECT_UNIT_W (RADEON_GPU_PAGE_SIZE / 4 / RECT_UNIT_H) + /* emits 21 on rv770+, 23 on r600 */ static void set_render_target(struct radeon_device *rdev, int format, @@ -54,7 +57,9 @@ set_render_target(struct radeon_device *rdev, int format, if (h < 8) h = 8; - cb_color_info = ((format << 2) | (1 << 27) | (1 << 8)); + cb_color_info = CB_FORMAT(format) | + CB_SOURCE_FORMAT(CB_SF_EXPORT_NORM) | + CB_ARRAY_MODE(ARRAY_1D_TILED_THIN1); pitch = (w / 8) - 1; slice = ((w * h) / 64) - 1; @@ -164,9 +169,10 @@ set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr) { u32 sq_vtx_constant_word2; - sq_vtx_constant_word2 = ((upper_32_bits(gpu_addr) & 0xff) | (16 << 8)); + sq_vtx_constant_word2 = SQ_VTXC_BASE_ADDR_HI(upper_32_bits(gpu_addr) & 0xff) | + SQ_VTXC_STRIDE(16); #ifdef __BIG_ENDIAN - sq_vtx_constant_word2 |= (2 << 30); + sq_vtx_constant_word2 |= SQ_VTXC_ENDIAN_SWAP(SQ_ENDIAN_8IN32); #endif radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 7)); @@ -202,18 +208,19 @@ set_tex_resource(struct radeon_device *rdev, if (h < 1) h = 1; - sq_tex_resource_word0 = (1 << 0) | (1 << 3); - sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) | - ((w - 1) << 19)); + sq_tex_resource_word0 = S_038000_DIM(V_038000_SQ_TEX_DIM_2D) | + S_038000_TILE_MODE(V_038000_ARRAY_1D_TILED_THIN1); + sq_tex_resource_word0 |= S_038000_PITCH((pitch >> 3) - 1) | + S_038000_TEX_WIDTH(w - 1); - sq_tex_resource_word1 = (format << 26); - sq_tex_resource_word1 |= ((h - 1) << 0); + sq_tex_resource_word1 = S_038004_DATA_FORMAT(format); + sq_tex_resource_word1 |= S_038004_TEX_HEIGHT(h - 1); - sq_tex_resource_word4 = ((1 << 14) | - (0 << 16) | - (1 << 19) | - (2 << 22) | - (3 << 25)); + sq_tex_resource_word4 = S_038010_REQUEST_SIZE(1) | + S_038010_DST_SEL_X(SQ_SEL_X) | + S_038010_DST_SEL_Y(SQ_SEL_Y) | + S_038010_DST_SEL_Z(SQ_SEL_Z) | + S_038010_DST_SEL_W(SQ_SEL_W); radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 7)); radeon_ring_write(rdev, 0); @@ -450,7 +457,7 @@ set_default_state(struct radeon_device *rdev) radeon_ring_write(rdev, sq_stack_resource_mgmt_2); } -static inline uint32_t i2f(uint32_t input) +static uint32_t i2f(uint32_t input) { u32 result, i, exponent, fraction; @@ -483,6 +490,27 @@ int r600_blit_init(struct radeon_device *rdev) u32 packet2s[16]; int num_packet2s = 0; + rdev->r600_blit.primitives.set_render_target = set_render_target; + rdev->r600_blit.primitives.cp_set_surface_sync = cp_set_surface_sync; + rdev->r600_blit.primitives.set_shaders = set_shaders; + rdev->r600_blit.primitives.set_vtx_resource = set_vtx_resource; + rdev->r600_blit.primitives.set_tex_resource = set_tex_resource; + rdev->r600_blit.primitives.set_scissors = set_scissors; + rdev->r600_blit.primitives.draw_auto = draw_auto; + rdev->r600_blit.primitives.set_default_state = set_default_state; + + rdev->r600_blit.ring_size_common = 40; /* shaders + def state */ + rdev->r600_blit.ring_size_common += 10; /* fence emit for VB IB */ + rdev->r600_blit.ring_size_common += 5; /* done copy */ + rdev->r600_blit.ring_size_common += 10; /* fence emit for done copy */ + + rdev->r600_blit.ring_size_per_loop = 76; + /* set_render_target emits 2 extra dwords on rv6xx */ + if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770) + rdev->r600_blit.ring_size_per_loop += 2; + + rdev->r600_blit.max_dim = 8192; + /* pin copy shader into vram if already initialized */ if (rdev->r600_blit.shader_obj) goto done; @@ -600,47 +628,80 @@ static void r600_vb_ib_put(struct radeon_device *rdev) radeon_ib_free(rdev, &rdev->r600_blit.vb_ib); } -int r600_blit_prepare_copy(struct radeon_device *rdev, int size_bytes) +static unsigned r600_blit_create_rect(unsigned num_gpu_pages, + int *width, int *height, int max_dim) +{ + unsigned max_pages; + unsigned pages = num_gpu_pages; + int w, h; + + if (num_gpu_pages == 0) { + /* not supposed to be called with no pages, but just in case */ + h = 0; + w = 0; + pages = 0; + WARN_ON(1); + } else { + int rect_order = 2; + h = RECT_UNIT_H; + while (num_gpu_pages / rect_order) { + h *= 2; + rect_order *= 4; + if (h >= max_dim) { + h = max_dim; + break; + } + } + max_pages = (max_dim * h) / (RECT_UNIT_W * RECT_UNIT_H); + if (pages > max_pages) + pages = max_pages; + w = (pages * RECT_UNIT_W * RECT_UNIT_H) / h; + w = (w / RECT_UNIT_W) * RECT_UNIT_W; + pages = (w * h) / (RECT_UNIT_W * RECT_UNIT_H); + BUG_ON(pages == 0); + } + + + DRM_DEBUG("blit_rectangle: h=%d, w=%d, pages=%d\n", h, w, pages); + + /* return width and height only of the caller wants it */ + if (height) + *height = h; + if (width) + *width = w; + + return pages; +} + + +int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages) { int r; - int ring_size, line_size; - int max_size; - /* loops of emits 64 + fence emit possible */ - int dwords_per_loop = 76, num_loops; + int ring_size; + int num_loops = 0; + int dwords_per_loop = rdev->r600_blit.ring_size_per_loop; r = r600_vb_ib_get(rdev); if (r) return r; - /* set_render_target emits 2 extra dwords on rv6xx */ - if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770) - dwords_per_loop += 2; - - /* 8 bpp vs 32 bpp for xfer unit */ - if (size_bytes & 3) - line_size = 8192; - else - line_size = 8192*4; - - max_size = 8192 * line_size; + /* num loops */ + while (num_gpu_pages) { + num_gpu_pages -= + r600_blit_create_rect(num_gpu_pages, NULL, NULL, + rdev->r600_blit.max_dim); + num_loops++; + } - /* major loops cover the max size transfer */ - num_loops = ((size_bytes + max_size) / max_size); - /* minor loops cover the extra non aligned bits */ - num_loops += ((size_bytes % line_size) ? 1 : 0); /* calculate number of loops correctly */ ring_size = num_loops * dwords_per_loop; - /* set default + shaders */ - ring_size += 40; /* shaders + def state */ - ring_size += 10; /* fence emit for VB IB */ - ring_size += 5; /* done copy */ - ring_size += 10; /* fence emit for done copy */ + ring_size += rdev->r600_blit.ring_size_common; r = radeon_ring_lock(rdev, ring_size); if (r) return r; - set_default_state(rdev); /* 14 */ - set_shaders(rdev); /* 26 */ + rdev->r600_blit.primitives.set_default_state(rdev); + rdev->r600_blit.primitives.set_shaders(rdev); return 0; } @@ -659,182 +720,64 @@ void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence) void r600_kms_blit_copy(struct radeon_device *rdev, u64 src_gpu_addr, u64 dst_gpu_addr, - int size_bytes) + unsigned num_gpu_pages) { - int max_bytes; u64 vb_gpu_addr; u32 *vb; - DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", src_gpu_addr, dst_gpu_addr, - size_bytes, rdev->r600_blit.vb_used); + DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", + src_gpu_addr, dst_gpu_addr, + num_gpu_pages, rdev->r600_blit.vb_used); vb = (u32 *)(rdev->r600_blit.vb_ib->ptr + rdev->r600_blit.vb_used); - if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) { - max_bytes = 8192; - - while (size_bytes) { - int cur_size = size_bytes; - int src_x = src_gpu_addr & 255; - int dst_x = dst_gpu_addr & 255; - int h = 1; - src_gpu_addr = src_gpu_addr & ~255ULL; - dst_gpu_addr = dst_gpu_addr & ~255ULL; - - if (!src_x && !dst_x) { - h = (cur_size / max_bytes); - if (h > 8192) - h = 8192; - if (h == 0) - h = 1; - else - cur_size = max_bytes; - } else { - if (cur_size > max_bytes) - cur_size = max_bytes; - if (cur_size > (max_bytes - dst_x)) - cur_size = (max_bytes - dst_x); - if (cur_size > (max_bytes - src_x)) - cur_size = (max_bytes - src_x); - } - - if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) { - WARN_ON(1); - } - - vb[0] = i2f(dst_x); - vb[1] = 0; - vb[2] = i2f(src_x); - vb[3] = 0; - - vb[4] = i2f(dst_x); - vb[5] = i2f(h); - vb[6] = i2f(src_x); - vb[7] = i2f(h); - - vb[8] = i2f(dst_x + cur_size); - vb[9] = i2f(h); - vb[10] = i2f(src_x + cur_size); - vb[11] = i2f(h); - - /* src 9 */ - set_tex_resource(rdev, FMT_8, - src_x + cur_size, h, src_x + cur_size, - src_gpu_addr); - - /* 5 */ - cp_set_surface_sync(rdev, - PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr); - /* dst 23 */ - set_render_target(rdev, COLOR_8, - dst_x + cur_size, h, - dst_gpu_addr); + while (num_gpu_pages) { + int w, h; + unsigned size_in_bytes; + unsigned pages_per_loop = + r600_blit_create_rect(num_gpu_pages, &w, &h, + rdev->r600_blit.max_dim); - /* scissors 12 */ - set_scissors(rdev, dst_x, 0, dst_x + cur_size, h); + size_in_bytes = pages_per_loop * RADEON_GPU_PAGE_SIZE; + DRM_DEBUG("rectangle w=%d h=%d\n", w, h); - /* 14 */ - vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used; - set_vtx_resource(rdev, vb_gpu_addr); - - /* draw 10 */ - draw_auto(rdev); - - /* 5 */ - cp_set_surface_sync(rdev, - PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA, - cur_size * h, dst_gpu_addr); - - vb += 12; - rdev->r600_blit.vb_used += 12 * 4; - - src_gpu_addr += cur_size * h; - dst_gpu_addr += cur_size * h; - size_bytes -= cur_size * h; + if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) { + WARN_ON(1); } - } else { - max_bytes = 8192 * 4; - - while (size_bytes) { - int cur_size = size_bytes; - int src_x = (src_gpu_addr & 255); - int dst_x = (dst_gpu_addr & 255); - int h = 1; - src_gpu_addr = src_gpu_addr & ~255ULL; - dst_gpu_addr = dst_gpu_addr & ~255ULL; - - if (!src_x && !dst_x) { - h = (cur_size / max_bytes); - if (h > 8192) - h = 8192; - if (h == 0) - h = 1; - else - cur_size = max_bytes; - } else { - if (cur_size > max_bytes) - cur_size = max_bytes; - if (cur_size > (max_bytes - dst_x)) - cur_size = (max_bytes - dst_x); - if (cur_size > (max_bytes - src_x)) - cur_size = (max_bytes - src_x); - } - - if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) { - WARN_ON(1); - } - vb[0] = i2f(dst_x / 4); - vb[1] = 0; - vb[2] = i2f(src_x / 4); - vb[3] = 0; - - vb[4] = i2f(dst_x / 4); - vb[5] = i2f(h); - vb[6] = i2f(src_x / 4); - vb[7] = i2f(h); - - vb[8] = i2f((dst_x + cur_size) / 4); - vb[9] = i2f(h); - vb[10] = i2f((src_x + cur_size) / 4); - vb[11] = i2f(h); - - /* src 9 */ - set_tex_resource(rdev, FMT_8_8_8_8, - (src_x + cur_size) / 4, - h, (src_x + cur_size) / 4, - src_gpu_addr); - /* 5 */ - cp_set_surface_sync(rdev, - PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr); - - /* dst 23 */ - set_render_target(rdev, COLOR_8_8_8_8, - (dst_x + cur_size) / 4, h, - dst_gpu_addr); - - /* scissors 12 */ - set_scissors(rdev, (dst_x / 4), 0, (dst_x + cur_size / 4), h); - - /* Vertex buffer setup 14 */ - vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used; - set_vtx_resource(rdev, vb_gpu_addr); - - /* draw 10 */ - draw_auto(rdev); - - /* 5 */ - cp_set_surface_sync(rdev, - PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA, - cur_size * h, dst_gpu_addr); - - /* 78 ring dwords per loop */ - vb += 12; - rdev->r600_blit.vb_used += 12 * 4; - - src_gpu_addr += cur_size * h; - dst_gpu_addr += cur_size * h; - size_bytes -= cur_size * h; - } + vb[0] = 0; + vb[1] = 0; + vb[2] = 0; + vb[3] = 0; + + vb[4] = 0; + vb[5] = i2f(h); + vb[6] = 0; + vb[7] = i2f(h); + + vb[8] = i2f(w); + vb[9] = i2f(h); + vb[10] = i2f(w); + vb[11] = i2f(h); + + rdev->r600_blit.primitives.set_tex_resource(rdev, FMT_8_8_8_8, + w, h, w, src_gpu_addr); + rdev->r600_blit.primitives.cp_set_surface_sync(rdev, + PACKET3_TC_ACTION_ENA, + size_in_bytes, src_gpu_addr); + rdev->r600_blit.primitives.set_render_target(rdev, COLOR_8_8_8_8, + w, h, dst_gpu_addr); + rdev->r600_blit.primitives.set_scissors(rdev, 0, 0, w, h); + vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used; + rdev->r600_blit.primitives.set_vtx_resource(rdev, vb_gpu_addr); + rdev->r600_blit.primitives.draw_auto(rdev); + rdev->r600_blit.primitives.cp_set_surface_sync(rdev, + PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA, + size_in_bytes, dst_gpu_addr); + + vb += 12; + rdev->r600_blit.vb_used += 4*12; + src_gpu_addr += size_in_bytes; + dst_gpu_addr += size_in_bytes; + num_gpu_pages -= pages_per_loop; } } - |