diff options
Diffstat (limited to 'drivers/gpu/drm/radeon')
35 files changed, 869 insertions, 1035 deletions
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index c4ffa14fb2f4..ed406e8404a3 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -39,7 +39,7 @@ static void evergreen_gpu_init(struct radeon_device *rdev); void evergreen_fini(struct radeon_device *rdev); -static void evergreen_pcie_gen2_enable(struct radeon_device *rdev); +void evergreen_pcie_gen2_enable(struct radeon_device *rdev); void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev) { @@ -935,6 +935,9 @@ int evergreen_pcie_gart_enable(struct radeon_device *rdev) WREG32(VM_CONTEXT1_CNTL, 0); evergreen_pcie_gart_tlb_flush(rdev); + DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", + (unsigned)(rdev->mc.gtt_size >> 20), + (unsigned long long)rdev->gart.table_addr); rdev->gart.ready = true; return 0; } @@ -2586,7 +2589,7 @@ int evergreen_irq_set(struct radeon_device *rdev) return 0; } -static inline void evergreen_irq_ack(struct radeon_device *rdev) +static void evergreen_irq_ack(struct radeon_device *rdev) { u32 tmp; @@ -2697,7 +2700,7 @@ void evergreen_irq_suspend(struct radeon_device *rdev) r600_rlc_stop(rdev); } -static inline u32 evergreen_get_ih_wptr(struct radeon_device *rdev) +static u32 evergreen_get_ih_wptr(struct radeon_device *rdev) { u32 wptr, tmp; @@ -3003,8 +3006,7 @@ static int evergreen_startup(struct radeon_device *rdev) int r; /* enable pcie gen2 link */ - if (!ASIC_IS_DCE5(rdev)) - evergreen_pcie_gen2_enable(rdev); + evergreen_pcie_gen2_enable(rdev); if (ASIC_IS_DCE5(rdev)) { if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw || !rdev->mc_fw) { @@ -3041,7 +3043,7 @@ static int evergreen_startup(struct radeon_device *rdev) r = evergreen_blit_init(rdev); if (r) { - evergreen_blit_fini(rdev); + r600_blit_fini(rdev); rdev->asic->copy = NULL; dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r); } @@ -3107,45 +3109,14 @@ int evergreen_resume(struct radeon_device *rdev) int evergreen_suspend(struct radeon_device *rdev) { - int r; - /* FIXME: we should wait for ring to be empty */ r700_cp_stop(rdev); rdev->cp.ready = false; evergreen_irq_suspend(rdev); radeon_wb_disable(rdev); evergreen_pcie_gart_disable(rdev); + r600_blit_suspend(rdev); - /* unpin shaders bo */ - r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); - if (likely(r == 0)) { - radeon_bo_unpin(rdev->r600_blit.shader_obj); - radeon_bo_unreserve(rdev->r600_blit.shader_obj); - } - - return 0; -} - -int evergreen_copy_blit(struct radeon_device *rdev, - uint64_t src_offset, - uint64_t dst_offset, - unsigned num_gpu_pages, - struct radeon_fence *fence) -{ - int r; - - mutex_lock(&rdev->r600_blit.mutex); - rdev->r600_blit.vb_ib = NULL; - r = evergreen_blit_prepare_copy(rdev, num_gpu_pages * RADEON_GPU_PAGE_SIZE); - if (r) { - if (rdev->r600_blit.vb_ib) - radeon_ib_free(rdev, &rdev->r600_blit.vb_ib); - mutex_unlock(&rdev->r600_blit.mutex); - return r; - } - evergreen_kms_blit_copy(rdev, src_offset, dst_offset, num_gpu_pages * RADEON_GPU_PAGE_SIZE); - evergreen_blit_done_copy(rdev, fence); - mutex_unlock(&rdev->r600_blit.mutex); return 0; } @@ -3257,7 +3228,7 @@ int evergreen_init(struct radeon_device *rdev) void evergreen_fini(struct radeon_device *rdev) { - evergreen_blit_fini(rdev); + r600_blit_fini(rdev); r700_cp_fini(rdev); r600_irq_fini(rdev); radeon_wb_fini(rdev); @@ -3273,7 +3244,7 @@ void evergreen_fini(struct radeon_device *rdev) rdev->bios = NULL; } -static void evergreen_pcie_gen2_enable(struct radeon_device *rdev) +void evergreen_pcie_gen2_enable(struct radeon_device *rdev) { u32 link_width_cntl, speed_cntl; diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c index 2eb251858e72..dcf11bbc06d9 100644 --- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c +++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c @@ -56,7 +56,9 @@ set_render_target(struct radeon_device *rdev, int format, if (h < 8) h = 8; - cb_color_info = ((format << 2) | (1 << 24) | (1 << 8)); + cb_color_info = CB_FORMAT(format) | + CB_SOURCE_FORMAT(CB_SF_EXPORT_NORM) | + CB_ARRAY_MODE(ARRAY_1D_TILED_THIN1); pitch = (w / 8) - 1; slice = ((w * h) / 64) - 1; @@ -67,7 +69,7 @@ set_render_target(struct radeon_device *rdev, int format, radeon_ring_write(rdev, slice); radeon_ring_write(rdev, 0); radeon_ring_write(rdev, cb_color_info); - radeon_ring_write(rdev, (1 << 4)); + radeon_ring_write(rdev, 0); radeon_ring_write(rdev, (w - 1) | ((h - 1) << 16)); radeon_ring_write(rdev, 0); radeon_ring_write(rdev, 0); @@ -133,12 +135,16 @@ set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr) u32 sq_vtx_constant_word2, sq_vtx_constant_word3; /* high addr, stride */ - sq_vtx_constant_word2 = ((upper_32_bits(gpu_addr) & 0xff) | (16 << 8)); + sq_vtx_constant_word2 = SQ_VTXC_BASE_ADDR_HI(upper_32_bits(gpu_addr) & 0xff) | + SQ_VTXC_STRIDE(16); #ifdef __BIG_ENDIAN - sq_vtx_constant_word2 |= (2 << 30); + sq_vtx_constant_word2 |= SQ_VTXC_ENDIAN_SWAP(SQ_ENDIAN_8IN32); #endif /* xyzw swizzles */ - sq_vtx_constant_word3 = (0 << 3) | (1 << 6) | (2 << 9) | (3 << 12); + sq_vtx_constant_word3 = SQ_VTCX_SEL_X(SQ_SEL_X) | + SQ_VTCX_SEL_Y(SQ_SEL_Y) | + SQ_VTCX_SEL_Z(SQ_SEL_Z) | + SQ_VTCX_SEL_W(SQ_SEL_W); radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 8)); radeon_ring_write(rdev, 0x580); @@ -149,7 +155,7 @@ set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr) radeon_ring_write(rdev, 0); radeon_ring_write(rdev, 0); radeon_ring_write(rdev, 0); - radeon_ring_write(rdev, SQ_TEX_VTX_VALID_BUFFER << 30); + radeon_ring_write(rdev, S__SQ_CONSTANT_TYPE(SQ_TEX_VTX_VALID_BUFFER)); if ((rdev->family == CHIP_CEDAR) || (rdev->family == CHIP_PALM) || @@ -176,14 +182,19 @@ set_tex_resource(struct radeon_device *rdev, if (h < 1) h = 1; - sq_tex_resource_word0 = (1 << 0); /* 2D */ + sq_tex_resource_word0 = TEX_DIM(SQ_TEX_DIM_2D); sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 6) | ((w - 1) << 18)); - sq_tex_resource_word1 = ((h - 1) << 0) | (1 << 28); + sq_tex_resource_word1 = ((h - 1) << 0) | + TEX_ARRAY_MODE(ARRAY_1D_TILED_THIN1); /* xyzw swizzles */ - sq_tex_resource_word4 = (0 << 16) | (1 << 19) | (2 << 22) | (3 << 25); + sq_tex_resource_word4 = TEX_DST_SEL_X(SQ_SEL_X) | + TEX_DST_SEL_Y(SQ_SEL_Y) | + TEX_DST_SEL_Z(SQ_SEL_Z) | + TEX_DST_SEL_W(SQ_SEL_W); - sq_tex_resource_word7 = format | (SQ_TEX_VTX_VALID_TEXTURE << 30); + sq_tex_resource_word7 = format | + S__SQ_CONSTANT_TYPE(SQ_TEX_VTX_VALID_TEXTURE); radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 8)); radeon_ring_write(rdev, 0); @@ -584,31 +595,6 @@ set_default_state(struct radeon_device *rdev) } -static inline uint32_t i2f(uint32_t input) -{ - u32 result, i, exponent, fraction; - - if ((input & 0x3fff) == 0) - result = 0; /* 0 is a special case */ - else { - exponent = 140; /* exponent biased by 127; */ - fraction = (input & 0x3fff) << 10; /* cheat and only - handle numbers below 2^^15 */ - for (i = 0; i < 14; i++) { - if (fraction & 0x800000) - break; - else { - fraction = fraction << 1; /* keep - shifting left until top bit = 1 */ - exponent = exponent - 1; - } - } - result = exponent << 23 | (fraction & 0x7fffff); /* mask - off top bit; assumed 1 */ - } - return result; -} - int evergreen_blit_init(struct radeon_device *rdev) { u32 obj_size; @@ -617,6 +603,24 @@ int evergreen_blit_init(struct radeon_device *rdev) u32 packet2s[16]; int num_packet2s = 0; + rdev->r600_blit.primitives.set_render_target = set_render_target; + rdev->r600_blit.primitives.cp_set_surface_sync = cp_set_surface_sync; + rdev->r600_blit.primitives.set_shaders = set_shaders; + rdev->r600_blit.primitives.set_vtx_resource = set_vtx_resource; + rdev->r600_blit.primitives.set_tex_resource = set_tex_resource; + rdev->r600_blit.primitives.set_scissors = set_scissors; + rdev->r600_blit.primitives.draw_auto = draw_auto; + rdev->r600_blit.primitives.set_default_state = set_default_state; + + rdev->r600_blit.ring_size_common = 55; /* shaders + def state */ + rdev->r600_blit.ring_size_common += 10; /* fence emit for VB IB */ + rdev->r600_blit.ring_size_common += 5; /* done copy */ + rdev->r600_blit.ring_size_common += 10; /* fence emit for done copy */ + + rdev->r600_blit.ring_size_per_loop = 74; + + rdev->r600_blit.max_dim = 16384; + /* pin copy shader into vram if already initialized */ if (rdev->r600_blit.shader_obj) goto done; @@ -712,277 +716,3 @@ done: radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); return 0; } - -void evergreen_blit_fini(struct radeon_device *rdev) -{ - int r; - - radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); - if (rdev->r600_blit.shader_obj == NULL) - return; - /* If we can't reserve the bo, unref should be enough to destroy - * it when it becomes idle. - */ - r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); - if (!r) { - radeon_bo_unpin(rdev->r600_blit.shader_obj); - radeon_bo_unreserve(rdev->r600_blit.shader_obj); - } - radeon_bo_unref(&rdev->r600_blit.shader_obj); -} - -static int evergreen_vb_ib_get(struct radeon_device *rdev) -{ - int r; - r = radeon_ib_get(rdev, &rdev->r600_blit.vb_ib); - if (r) { - DRM_ERROR("failed to get IB for vertex buffer\n"); - return r; - } - - rdev->r600_blit.vb_total = 64*1024; - rdev->r600_blit.vb_used = 0; - return 0; -} - -static void evergreen_vb_ib_put(struct radeon_device *rdev) -{ - radeon_fence_emit(rdev, rdev->r600_blit.vb_ib->fence); - radeon_ib_free(rdev, &rdev->r600_blit.vb_ib); -} - -int evergreen_blit_prepare_copy(struct radeon_device *rdev, int size_bytes) -{ - int r; - int ring_size, line_size; - int max_size; - /* loops of emits + fence emit possible */ - int dwords_per_loop = 74, num_loops; - - r = evergreen_vb_ib_get(rdev); - if (r) - return r; - - /* 8 bpp vs 32 bpp for xfer unit */ - if (size_bytes & 3) - line_size = 8192; - else - line_size = 8192 * 4; - - max_size = 8192 * line_size; - - /* major loops cover the max size transfer */ - num_loops = ((size_bytes + max_size) / max_size); - /* minor loops cover the extra non aligned bits */ - num_loops += ((size_bytes % line_size) ? 1 : 0); - /* calculate number of loops correctly */ - ring_size = num_loops * dwords_per_loop; - /* set default + shaders */ - ring_size += 55; /* shaders + def state */ - ring_size += 10; /* fence emit for VB IB */ - ring_size += 5; /* done copy */ - ring_size += 10; /* fence emit for done copy */ - r = radeon_ring_lock(rdev, ring_size); - if (r) - return r; - - set_default_state(rdev); /* 36 */ - set_shaders(rdev); /* 16 */ - return 0; -} - -void evergreen_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence) -{ - int r; - - if (rdev->r600_blit.vb_ib) - evergreen_vb_ib_put(rdev); - - if (fence) - r = radeon_fence_emit(rdev, fence); - - radeon_ring_unlock_commit(rdev); -} - -void evergreen_kms_blit_copy(struct radeon_device *rdev, - u64 src_gpu_addr, u64 dst_gpu_addr, - int size_bytes) -{ - int max_bytes; - u64 vb_gpu_addr; - u32 *vb; - - DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", src_gpu_addr, dst_gpu_addr, - size_bytes, rdev->r600_blit.vb_used); - vb = (u32 *)(rdev->r600_blit.vb_ib->ptr + rdev->r600_blit.vb_used); - if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) { - max_bytes = 8192; - - while (size_bytes) { - int cur_size = size_bytes; - int src_x = src_gpu_addr & 255; - int dst_x = dst_gpu_addr & 255; - int h = 1; - src_gpu_addr = src_gpu_addr & ~255ULL; - dst_gpu_addr = dst_gpu_addr & ~255ULL; - - if (!src_x && !dst_x) { - h = (cur_size / max_bytes); - if (h > 8192) - h = 8192; - if (h == 0) - h = 1; - else - cur_size = max_bytes; - } else { - if (cur_size > max_bytes) - cur_size = max_bytes; - if (cur_size > (max_bytes - dst_x)) - cur_size = (max_bytes - dst_x); - if (cur_size > (max_bytes - src_x)) - cur_size = (max_bytes - src_x); - } - - if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) { - WARN_ON(1); - } - - vb[0] = i2f(dst_x); - vb[1] = 0; - vb[2] = i2f(src_x); - vb[3] = 0; - - vb[4] = i2f(dst_x); - vb[5] = i2f(h); - vb[6] = i2f(src_x); - vb[7] = i2f(h); - - vb[8] = i2f(dst_x + cur_size); - vb[9] = i2f(h); - vb[10] = i2f(src_x + cur_size); - vb[11] = i2f(h); - - /* src 10 */ - set_tex_resource(rdev, FMT_8, - src_x + cur_size, h, src_x + cur_size, - src_gpu_addr); - - /* 5 */ - cp_set_surface_sync(rdev, - PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr); - - - /* dst 17 */ - set_render_target(rdev, COLOR_8, - dst_x + cur_size, h, - dst_gpu_addr); - - /* scissors 12 */ - set_scissors(rdev, dst_x, 0, dst_x + cur_size, h); - - /* 15 */ - vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used; - set_vtx_resource(rdev, vb_gpu_addr); - - /* draw 10 */ - draw_auto(rdev); - - /* 5 */ - cp_set_surface_sync(rdev, - PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA, - cur_size * h, dst_gpu_addr); - - vb += 12; - rdev->r600_blit.vb_used += 12 * 4; - - src_gpu_addr += cur_size * h; - dst_gpu_addr += cur_size * h; - size_bytes -= cur_size * h; - } - } else { - max_bytes = 8192 * 4; - - while (size_bytes) { - int cur_size = size_bytes; - int src_x = (src_gpu_addr & 255); - int dst_x = (dst_gpu_addr & 255); - int h = 1; - src_gpu_addr = src_gpu_addr & ~255ULL; - dst_gpu_addr = dst_gpu_addr & ~255ULL; - - if (!src_x && !dst_x) { - h = (cur_size / max_bytes); - if (h > 8192) - h = 8192; - if (h == 0) - h = 1; - else - cur_size = max_bytes; - } else { - if (cur_size > max_bytes) - cur_size = max_bytes; - if (cur_size > (max_bytes - dst_x)) - cur_size = (max_bytes - dst_x); - if (cur_size > (max_bytes - src_x)) - cur_size = (max_bytes - src_x); - } - - if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) { - WARN_ON(1); - } - - vb[0] = i2f(dst_x / 4); - vb[1] = 0; - vb[2] = i2f(src_x / 4); - vb[3] = 0; - - vb[4] = i2f(dst_x / 4); - vb[5] = i2f(h); - vb[6] = i2f(src_x / 4); - vb[7] = i2f(h); - - vb[8] = i2f((dst_x + cur_size) / 4); - vb[9] = i2f(h); - vb[10] = i2f((src_x + cur_size) / 4); - vb[11] = i2f(h); - - /* src 10 */ - set_tex_resource(rdev, FMT_8_8_8_8, - (src_x + cur_size) / 4, - h, (src_x + cur_size) / 4, - src_gpu_addr); - /* 5 */ - cp_set_surface_sync(rdev, - PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr); - - /* dst 17 */ - set_render_target(rdev, COLOR_8_8_8_8, - (dst_x + cur_size) / 4, h, - dst_gpu_addr); - - /* scissors 12 */ - set_scissors(rdev, (dst_x / 4), 0, (dst_x + cur_size / 4), h); - - /* Vertex buffer setup 15 */ - vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used; - set_vtx_resource(rdev, vb_gpu_addr); - - /* draw 10 */ - draw_auto(rdev); - - /* 5 */ - cp_set_surface_sync(rdev, - PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA, - cur_size * h, dst_gpu_addr); - - /* 74 ring dwords per loop */ - vb += 12; - rdev->r600_blit.vb_used += 12 * 4; - - src_gpu_addr += cur_size * h; - dst_gpu_addr += cur_size * h; - size_bytes -= cur_size * h; - } - } -} - diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index a134790903d3..7fdfa8ea7570 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -122,12 +122,6 @@ static void evergreen_cs_track_init(struct evergreen_cs_track *track) track->db_s_write_bo = NULL; } -static inline int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, int i) -{ - /* XXX fill in */ - return 0; -} - static int evergreen_cs_track_check(struct radeon_cs_parser *p) { struct evergreen_cs_track *track = p->track; @@ -236,28 +230,6 @@ static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser *p, } /** - * evergreen_cs_packet_next_is_pkt3_nop() - test if next packet is packet3 nop for reloc - * @parser: parser structure holding parsing context. - * - * Check next packet is relocation packet3, do bo validation and compute - * GPU offset using the provided start. - **/ -static inline int evergreen_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p) -{ - struct radeon_cs_packet p3reloc; - int r; - - r = evergreen_cs_packet_parse(p, &p3reloc, p->idx); - if (r) { - return 0; - } - if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) { - return 0; - } - return 1; -} - -/** * evergreen_cs_packet_next_vline() - parse userspace VLINE packet * @parser: parser structure holding parsing context. * @@ -414,7 +386,7 @@ static int evergreen_cs_parse_packet0(struct radeon_cs_parser *p, * if register is safe. If register is not flag as safe this function * will test it against a list of register needind special handling. */ -static inline int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) +static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) { struct evergreen_cs_track *track = (struct evergreen_cs_track *)p->track; struct radeon_cs_reloc *reloc; @@ -990,7 +962,7 @@ static inline int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u3 * This function will check that the resource has valid field and that * the texture and mipmap bo object are big enough to cover this resource. */ -static inline int evergreen_check_texture_resource(struct radeon_cs_parser *p, u32 idx, +static int evergreen_check_texture_resource(struct radeon_cs_parser *p, u32 idx, struct radeon_bo *texture, struct radeon_bo *mipmap) { diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h index 7363d9dec909..b937c49054d9 100644 --- a/drivers/gpu/drm/radeon/evergreend.h +++ b/drivers/gpu/drm/radeon/evergreend.h @@ -941,11 +941,15 @@ #define CB_COLOR0_SLICE 0x28c68 #define CB_COLOR0_VIEW 0x28c6c #define CB_COLOR0_INFO 0x28c70 +# define CB_FORMAT(x) ((x) << 2) # define CB_ARRAY_MODE(x) ((x) << 8) # define ARRAY_LINEAR_GENERAL 0 # define ARRAY_LINEAR_ALIGNED 1 # define ARRAY_1D_TILED_THIN1 2 # define ARRAY_2D_TILED_THIN1 4 +# define CB_SOURCE_FORMAT(x) ((x) << 24) +# define CB_SF_EXPORT_FULL 0 +# define CB_SF_EXPORT_NORM 1 #define CB_COLOR0_ATTRIB 0x28c74 #define CB_COLOR0_DIM 0x28c78 /* only CB0-7 blocks have these regs */ @@ -1107,15 +1111,53 @@ #define CB_COLOR7_CLEAR_WORD3 0x28e3c #define SQ_TEX_RESOURCE_WORD0_0 0x30000 +# define TEX_DIM(x) ((x) << 0) +# define SQ_TEX_DIM_1D 0 +# define SQ_TEX_DIM_2D 1 +# define SQ_TEX_DIM_3D 2 +# define SQ_TEX_DIM_CUBEMAP 3 +# define SQ_TEX_DIM_1D_ARRAY 4 +# define SQ_TEX_DIM_2D_ARRAY 5 +# define SQ_TEX_DIM_2D_MSAA 6 +# define SQ_TEX_DIM_2D_ARRAY_MSAA 7 #define SQ_TEX_RESOURCE_WORD1_0 0x30004 # define TEX_ARRAY_MODE(x) ((x) << 28) #define SQ_TEX_RESOURCE_WORD2_0 0x30008 #define SQ_TEX_RESOURCE_WORD3_0 0x3000C #define SQ_TEX_RESOURCE_WORD4_0 0x30010 +# define TEX_DST_SEL_X(x) ((x) << 16) +# define TEX_DST_SEL_Y(x) ((x) << 19) +# define TEX_DST_SEL_Z(x) ((x) << 22) +# define TEX_DST_SEL_W(x) ((x) << 25) +# define SQ_SEL_X 0 +# define SQ_SEL_Y 1 +# define SQ_SEL_Z 2 +# define SQ_SEL_W 3 +# define SQ_SEL_0 4 +# define SQ_SEL_1 5 #define SQ_TEX_RESOURCE_WORD5_0 0x30014 #define SQ_TEX_RESOURCE_WORD6_0 0x30018 #define SQ_TEX_RESOURCE_WORD7_0 0x3001c +#define SQ_VTX_CONSTANT_WORD0_0 0x30000 +#define SQ_VTX_CONSTANT_WORD1_0 0x30004 +#define SQ_VTX_CONSTANT_WORD2_0 0x30008 +# define SQ_VTXC_BASE_ADDR_HI(x) ((x) << 0) +# define SQ_VTXC_STRIDE(x) ((x) << 8) +# define SQ_VTXC_ENDIAN_SWAP(x) ((x) << 30) +# define SQ_ENDIAN_NONE 0 +# define SQ_ENDIAN_8IN16 1 +# define SQ_ENDIAN_8IN32 2 +#define SQ_VTX_CONSTANT_WORD3_0 0x3000C +# define SQ_VTCX_SEL_X(x) ((x) << 3) +# define SQ_VTCX_SEL_Y(x) ((x) << 6) +# define SQ_VTCX_SEL_Z(x) ((x) << 9) +# define SQ_VTCX_SEL_W(x) ((x) << 12) +#define SQ_VTX_CONSTANT_WORD4_0 0x30010 +#define SQ_VTX_CONSTANT_WORD5_0 0x30014 +#define SQ_VTX_CONSTANT_WORD6_0 0x30018 +#define SQ_VTX_CONSTANT_WORD7_0 0x3001c + /* cayman 3D regs */ #define CAYMAN_VGT_OFFCHIP_LDS_BASE 0x89B0 #define CAYMAN_DB_EQAA 0x28804 diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 8c79ca97753d..556b7bc3418b 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -40,6 +40,7 @@ extern void evergreen_mc_program(struct radeon_device *rdev); extern void evergreen_irq_suspend(struct radeon_device *rdev); extern int evergreen_mc_init(struct radeon_device *rdev); extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev); +extern void evergreen_pcie_gen2_enable(struct radeon_device *rdev); #define EVERGREEN_PFP_UCODE_SIZE 1120 #define EVERGREEN_PM4_UCODE_SIZE 1376 @@ -967,6 +968,9 @@ int cayman_pcie_gart_enable(struct radeon_device *rdev) WREG32(VM_CONTEXT1_CNTL, 0); cayman_pcie_gart_tlb_flush(rdev); + DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", + (unsigned)(rdev->mc.gtt_size >> 20), + (unsigned long long)rdev->gart.table_addr); rdev->gart.ready = true; return 0; } @@ -1341,6 +1345,9 @@ static int cayman_startup(struct radeon_device *rdev) { int r; + /* enable pcie gen2 link */ + evergreen_pcie_gen2_enable(rdev); + if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw || !rdev->mc_fw) { r = ni_init_microcode(rdev); if (r) { @@ -1362,7 +1369,7 @@ static int cayman_startup(struct radeon_device *rdev) r = evergreen_blit_init(rdev); if (r) { - evergreen_blit_fini(rdev); + r600_blit_fini(rdev); rdev->asic->copy = NULL; dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r); } @@ -1423,21 +1430,13 @@ int cayman_resume(struct radeon_device *rdev) int cayman_suspend(struct radeon_device *rdev) { - int r; - /* FIXME: we should wait for ring to be empty */ cayman_cp_enable(rdev, false); rdev->cp.ready = false; evergreen_irq_suspend(rdev); radeon_wb_disable(rdev); cayman_pcie_gart_disable(rdev); - - /* unpin shaders bo */ - r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); - if (likely(r == 0)) { - radeon_bo_unpin(rdev->r600_blit.shader_obj); - radeon_bo_unreserve(rdev->r600_blit.shader_obj); - } + r600_blit_suspend(rdev); return 0; } @@ -1550,7 +1549,7 @@ int cayman_init(struct radeon_device *rdev) void cayman_fini(struct radeon_device *rdev) { - evergreen_blit_fini(rdev); + r600_blit_fini(rdev); cayman_cp_fini(rdev); r600_irq_fini(rdev); radeon_wb_fini(rdev); diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 7fcdbbbf2979..8f8b8fa14357 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -68,6 +68,108 @@ MODULE_FIRMWARE(FIRMWARE_R520); * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280 */ +int r100_reloc_pitch_offset(struct radeon_cs_parser *p, + struct radeon_cs_packet *pkt, + unsigned idx, + unsigned reg) +{ + int r; + u32 tile_flags = 0; + u32 tmp; + struct radeon_cs_reloc *reloc; + u32 value; + + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for ib[%d]=0x%04X\n", + idx, reg); + r100_cs_dump_packet(p, pkt); + return r; + } + value = radeon_get_ib_value(p, idx); + tmp = value & 0x003fffff; + tmp += (((u32)reloc->lobj.gpu_offset) >> 10); + + if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) + tile_flags |= RADEON_DST_TILE_MACRO; + if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) { + if (reg == RADEON_SRC_PITCH_OFFSET) { + DRM_ERROR("Cannot src blit from microtiled surface\n"); + r100_cs_dump_packet(p, pkt); + return -EINVAL; + } + tile_flags |= RADEON_DST_TILE_MICRO; + } + + tmp |= tile_flags; + p->ib->ptr[idx] = (value & 0x3fc00000) | tmp; + return 0; +} + +int r100_packet3_load_vbpntr(struct radeon_cs_parser *p, + struct radeon_cs_packet *pkt, + int idx) +{ + unsigned c, i; + struct radeon_cs_reloc *reloc; + struct r100_cs_track *track; + int r = 0; + volatile uint32_t *ib; + u32 idx_value; + + ib = p->ib->ptr; + track = (struct r100_cs_track *)p->track; + c = radeon_get_ib_value(p, idx++) & 0x1F; + if (c > 16) { + DRM_ERROR("Only 16 vertex buffers are allowed %d\n", + pkt->opcode); + r100_cs_dump_packet(p, pkt); + return -EINVAL; + } + track->num_arrays = c; + for (i = 0; i < (c - 1); i+=2, idx+=3) { + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for packet3 %d\n", + pkt->opcode); + r100_cs_dump_packet(p, pkt); + return r; + } + idx_value = radeon_get_ib_value(p, idx); + ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); + + track->arrays[i + 0].esize = idx_value >> 8; + track->arrays[i + 0].robj = reloc->robj; + track->arrays[i + 0].esize &= 0x7F; + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for packet3 %d\n", + pkt->opcode); + r100_cs_dump_packet(p, pkt); + return r; + } + ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset); + track->arrays[i + 1].robj = reloc->robj; + track->arrays[i + 1].esize = idx_value >> 24; + track->arrays[i + 1].esize &= 0x7F; + } + if (c & 1) { + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for packet3 %d\n", + pkt->opcode); + r100_cs_dump_packet(p, pkt); + return r; + } + idx_value = radeon_get_ib_value(p, idx); + ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); + track->arrays[i + 0].robj = reloc->robj; + track->arrays[i + 0].esize = idx_value >> 8; + track->arrays[i + 0].esize &= 0x7F; + } + return r; +} + void r100_pre_page_flip(struct radeon_device *rdev, int crtc) { /* enable the pflip int */ @@ -513,6 +615,9 @@ int r100_pci_gart_enable(struct radeon_device *rdev) tmp = RREG32(RADEON_AIC_CNTL) | RADEON_PCIGART_TRANSLATE_EN; WREG32(RADEON_AIC_CNTL, tmp); r100_pci_gart_tlb_flush(rdev); + DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", + (unsigned)(rdev->mc.gtt_size >> 20), + (unsigned long long)rdev->gart.table_addr); rdev->gart.ready = true; return 0; } @@ -588,7 +693,7 @@ void r100_irq_disable(struct radeon_device *rdev) WREG32(R_000044_GEN_INT_STATUS, tmp); } -static inline uint32_t r100_irq_ack(struct radeon_device *rdev) +static uint32_t r100_irq_ack(struct radeon_device *rdev) { uint32_t irqs = RREG32(RADEON_GEN_INT_STATUS); uint32_t irq_mask = RADEON_SW_INT_TEST | @@ -3147,7 +3252,7 @@ void r100_bandwidth_update(struct radeon_device *rdev) } } -static inline void r100_cs_track_texture_print(struct r100_cs_track_texture *t) +static void r100_cs_track_texture_print(struct r100_cs_track_texture *t) { DRM_ERROR("pitch %d\n", t->pitch); DRM_ERROR("use_pitch %d\n", t->use_pitch); @@ -3965,3 +4070,43 @@ int r100_init(struct radeon_device *rdev) } return 0; } + +uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg) +{ + if (reg < rdev->rmmio_size) + return readl(((void __iomem *)rdev->rmmio) + reg); + else { + writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); + return readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); + } +} + +void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) +{ + if (reg < rdev->rmmio_size) + writel(v, ((void __iomem *)rdev->rmmio) + reg); + else { + writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); + writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); + } +} + +u32 r100_io_rreg(struct radeon_device *rdev, u32 reg) +{ + if (reg < rdev->rio_mem_size) + return ioread32(rdev->rio_mem + reg); + else { + iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX); + return ioread32(rdev->rio_mem + RADEON_MM_DATA); + } +} + +void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v) +{ + if (reg < rdev->rio_mem_size) + iowrite32(v, rdev->rio_mem + reg); + else { + iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX); + iowrite32(v, rdev->rio_mem + RADEON_MM_DATA); + } +} diff --git a/drivers/gpu/drm/radeon/r100_track.h b/drivers/gpu/drm/radeon/r100_track.h index 686f9dc5d4bd..6a603b378adb 100644 --- a/drivers/gpu/drm/radeon/r100_track.h +++ b/drivers/gpu/drm/radeon/r100_track.h @@ -92,106 +92,10 @@ int r200_packet0_check(struct radeon_cs_parser *p, struct radeon_cs_packet *pkt, unsigned idx, unsigned reg); - - -static inline int r100_reloc_pitch_offset(struct radeon_cs_parser *p, - struct radeon_cs_packet *pkt, - unsigned idx, - unsigned reg) -{ - int r; - u32 tile_flags = 0; - u32 tmp; - struct radeon_cs_reloc *reloc; - u32 value; - - r = r100_cs_packet_next_reloc(p, &reloc); - if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); - r100_cs_dump_packet(p, pkt); - return r; - } - value = radeon_get_ib_value(p, idx); - tmp = value & 0x003fffff; - tmp += (((u32)reloc->lobj.gpu_offset) >> 10); - - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) - tile_flags |= RADEON_DST_TILE_MACRO; - if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) { - if (reg == RADEON_SRC_PITCH_OFFSET) { - DRM_ERROR("Cannot src blit from microtiled surface\n"); - r100_cs_dump_packet(p, pkt); - return -EINVAL; - } - tile_flags |= RADEON_DST_TILE_MICRO; - } - - tmp |= tile_flags; - p->ib->ptr[idx] = (value & 0x3fc00000) | tmp; - return 0; -} - -static inline int r100_packet3_load_vbpntr(struct radeon_cs_parser *p, - struct radeon_cs_packet *pkt, - int idx) -{ - unsigned c, i; - struct radeon_cs_reloc *reloc; - struct r100_cs_track *track; - int r = 0; - volatile uint32_t *ib; - u32 idx_value; - - ib = p->ib->ptr; - track = (struct r100_cs_track *)p->track; - c = radeon_get_ib_value(p, idx++) & 0x1F; - if (c > 16) { - DRM_ERROR("Only 16 vertex buffers are allowed %d\n", - pkt->opcode); - r100_cs_dump_packet(p, pkt); - return -EINVAL; - } - track->num_arrays = c; - for (i = 0; i < (c - 1); i+=2, idx+=3) { - r = r100_cs_packet_next_reloc(p, &reloc); - if (r) { - DRM_ERROR("No reloc for packet3 %d\n", - pkt->opcode); - r100_cs_dump_packet(p, pkt); - return r; - } - idx_value = radeon_get_ib_value(p, idx); - ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); - - track->arrays[i + 0].esize = idx_value >> 8; - track->arrays[i + 0].robj = reloc->robj; - track->arrays[i + 0].esize &= 0x7F; - r = r100_cs_packet_next_reloc(p, &reloc); - if (r) { - DRM_ERROR("No reloc for packet3 %d\n", - pkt->opcode); - r100_cs_dump_packet(p, pkt); - return r; - } - ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset); - track->arrays[i + 1].robj = reloc->robj; - track->arrays[i + 1].esize = idx_value >> 24; - track->arrays[i + 1].esize &= 0x7F; - } - if (c & 1) { - r = r100_cs_packet_next_reloc(p, &reloc); - if (r) { - DRM_ERROR("No reloc for packet3 %d\n", - pkt->opcode); - r100_cs_dump_packet(p, pkt); - return r; - } - idx_value = radeon_get_ib_value(p, idx); - ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); - track->arrays[i + 0].robj = reloc->robj; - track->arrays[i + 0].esize = idx_value >> 8; - track->arrays[i + 0].esize &= 0x7F; - } - return r; -} +int r100_reloc_pitch_offset(struct radeon_cs_parser *p, + struct radeon_cs_packet *pkt, + unsigned idx, + unsigned reg); +int r100_packet3_load_vbpntr(struct radeon_cs_parser *p, + struct radeon_cs_packet *pkt, + int idx); diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index 55a7f190027e..33f2b68c680b 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -144,8 +144,9 @@ int rv370_pcie_gart_enable(struct radeon_device *rdev) tmp |= RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD; WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp); rv370_pcie_gart_tlb_flush(rdev); - DRM_INFO("PCIE GART of %uM enabled (table at 0x%08X).\n", - (unsigned)(rdev->mc.gtt_size >> 20), table_addr); + DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", + (unsigned)(rdev->mc.gtt_size >> 20), + (unsigned long long)table_addr); rdev->gart.ready = true; return 0; } diff --git a/drivers/gpu/drm/radeon/r300_cmdbuf.c b/drivers/gpu/drm/radeon/r300_cmdbuf.c index c5c2742e4140..1fe98b421c9b 100644 --- a/drivers/gpu/drm/radeon/r300_cmdbuf.c +++ b/drivers/gpu/drm/radeon/r300_cmdbuf.c @@ -791,7 +791,7 @@ static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv, /** * Emit the sequence to pacify R300. */ -static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv) +static void r300_pacify(drm_radeon_private_t *dev_priv) { uint32_t cache_z, cache_3d, cache_2d; RING_LOCALS; diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 720dd99163f8..12470b090ddf 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -993,6 +993,9 @@ int r600_pcie_gart_enable(struct radeon_device *rdev) WREG32(VM_CONTEXT0_CNTL + (i * 4), 0); r600_pcie_gart_tlb_flush(rdev); + DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", + (unsigned)(rdev->mc.gtt_size >> 20), + (unsigned long long)rdev->gart.table_addr); rdev->gart.ready = true; return 0; } @@ -2362,19 +2365,33 @@ int r600_copy_blit(struct radeon_device *rdev, mutex_lock(&rdev->r600_blit.mutex); rdev->r600_blit.vb_ib = NULL; - r = r600_blit_prepare_copy(rdev, num_gpu_pages * RADEON_GPU_PAGE_SIZE); + r = r600_blit_prepare_copy(rdev, num_gpu_pages); if (r) { if (rdev->r600_blit.vb_ib) radeon_ib_free(rdev, &rdev->r600_blit.vb_ib); mutex_unlock(&rdev->r600_blit.mutex); return r; } - r600_kms_blit_copy(rdev, src_offset, dst_offset, num_gpu_pages * RADEON_GPU_PAGE_SIZE); + r600_kms_blit_copy(rdev, src_offset, dst_offset, num_gpu_pages); r600_blit_done_copy(rdev, fence); mutex_unlock(&rdev->r600_blit.mutex); return 0; } +void r600_blit_suspend(struct radeon_device *rdev) +{ + int r; + + /* unpin shaders bo */ + if (rdev->r600_blit.shader_obj) { + r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); + if (!r) { + radeon_bo_unpin(rdev->r600_blit.shader_obj); + radeon_bo_unreserve(rdev->r600_blit.shader_obj); + } + } +} + int r600_set_surface_reg(struct radeon_device *rdev, int reg, uint32_t tiling_flags, uint32_t pitch, uint32_t offset, uint32_t obj_size) @@ -2494,8 +2511,6 @@ int r600_resume(struct radeon_device *rdev) int r600_suspend(struct radeon_device *rdev) { - int r; - r600_audio_fini(rdev); /* FIXME: we should wait for ring to be empty */ r600_cp_stop(rdev); @@ -2503,14 +2518,8 @@ int r600_suspend(struct radeon_device *rdev) r600_irq_suspend(rdev); radeon_wb_disable(rdev); r600_pcie_gart_disable(rdev); - /* unpin shaders bo */ - if (rdev->r600_blit.shader_obj) { - r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); - if (!r) { - radeon_bo_unpin(rdev->r600_blit.shader_obj); - radeon_bo_unreserve(rdev->r600_blit.shader_obj); - } - } + r600_blit_suspend(rdev); + return 0; } @@ -3137,7 +3146,7 @@ int r600_irq_set(struct radeon_device *rdev) return 0; } -static inline void r600_irq_ack(struct radeon_device *rdev) +static void r600_irq_ack(struct radeon_device *rdev) { u32 tmp; @@ -3238,7 +3247,7 @@ void r600_irq_disable(struct radeon_device *rdev) r600_disable_interrupt_state(rdev); } -static inline u32 r600_get_ih_wptr(struct radeon_device *rdev) +static u32 r600_get_ih_wptr(struct radeon_device *rdev) { u32 wptr, tmp; diff --git a/drivers/gpu/drm/radeon/r600_blit.c b/drivers/gpu/drm/radeon/r600_blit.c index 7f1043448d25..3c031a48205d 100644 --- a/drivers/gpu/drm/radeon/r600_blit.c +++ b/drivers/gpu/drm/radeon/r600_blit.c @@ -41,7 +41,7 @@ #define COLOR_5_6_5 0x8 #define COLOR_8_8_8_8 0x1a -static inline void +static void set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 gpu_addr) { u32 cb_color_info; @@ -99,7 +99,7 @@ set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 ADVANCE_RING(); } -static inline void +static void cp_set_surface_sync(drm_radeon_private_t *dev_priv, u32 sync_type, u32 size, u64 mc_addr) { @@ -121,7 +121,7 @@ cp_set_surface_sync(drm_radeon_private_t *dev_priv, ADVANCE_RING(); } -static inline void +static void set_shaders(struct drm_device *dev) { drm_radeon_private_t *dev_priv = dev->dev_private; @@ -184,7 +184,7 @@ set_shaders(struct drm_device *dev) R600_SH_ACTION_ENA, 512, gpu_addr); } -static inline void +static void set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr) { uint32_t sq_vtx_constant_word2; @@ -220,7 +220,7 @@ set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr) R600_VC_ACTION_ENA, 48, gpu_addr); } -static inline void +static void set_tex_resource(drm_radeon_private_t *dev_priv, int format, int w, int h, int pitch, u64 gpu_addr) { @@ -258,7 +258,7 @@ set_tex_resource(drm_radeon_private_t *dev_priv, } -static inline void +static void set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2) { RING_LOCALS; @@ -282,7 +282,7 @@ set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2) ADVANCE_RING(); } -static inline void +static void draw_auto(drm_radeon_private_t *dev_priv) { RING_LOCALS; @@ -311,7 +311,7 @@ draw_auto(drm_radeon_private_t *dev_priv) COMMIT_RING(); } -static inline void +static void set_default_state(drm_radeon_private_t *dev_priv) { int i; @@ -489,7 +489,7 @@ set_default_state(drm_radeon_private_t *dev_priv) ADVANCE_RING(); } -static inline uint32_t i2f(uint32_t input) +static uint32_t i2f(uint32_t input) { u32 result, i, exponent, fraction; @@ -515,7 +515,7 @@ static inline uint32_t i2f(uint32_t input) } -static inline int r600_nomm_get_vb(struct drm_device *dev) +static int r600_nomm_get_vb(struct drm_device *dev) { drm_radeon_private_t *dev_priv = dev->dev_private; dev_priv->blit_vb = radeon_freelist_get(dev); @@ -526,7 +526,7 @@ static inline int r600_nomm_get_vb(struct drm_device *dev) return 0; } -static inline void r600_nomm_put_vb(struct drm_device *dev) +static void r600_nomm_put_vb(struct drm_device *dev) { drm_radeon_private_t *dev_priv = dev->dev_private; @@ -534,7 +534,7 @@ static inline void r600_nomm_put_vb(struct drm_device *dev) radeon_cp_discard_buffer(dev, dev_priv->blit_vb->file_priv->master, dev_priv->blit_vb); } -static inline void *r600_nomm_get_vb_ptr(struct drm_device *dev) +static void *r600_nomm_get_vb_ptr(struct drm_device *dev) { drm_radeon_private_t *dev_priv = dev->dev_private; return (((char *)dev->agp_buffer_map->handle + diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c index 9aa74c3f8cb6..c4cf1308d4a1 100644 --- a/drivers/gpu/drm/radeon/r600_blit_kms.c +++ b/drivers/gpu/drm/radeon/r600_blit_kms.c @@ -42,6 +42,9 @@ #define COLOR_5_6_5 0x8 #define COLOR_8_8_8_8 0x1a +#define RECT_UNIT_H 32 +#define RECT_UNIT_W (RADEON_GPU_PAGE_SIZE / 4 / RECT_UNIT_H) + /* emits 21 on rv770+, 23 on r600 */ static void set_render_target(struct radeon_device *rdev, int format, @@ -54,7 +57,9 @@ set_render_target(struct radeon_device *rdev, int format, if (h < 8) h = 8; - cb_color_info = ((format << 2) | (1 << 27) | (1 << 8)); + cb_color_info = CB_FORMAT(format) | + CB_SOURCE_FORMAT(CB_SF_EXPORT_NORM) | + CB_ARRAY_MODE(ARRAY_1D_TILED_THIN1); pitch = (w / 8) - 1; slice = ((w * h) / 64) - 1; @@ -164,9 +169,10 @@ set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr) { u32 sq_vtx_constant_word2; - sq_vtx_constant_word2 = ((upper_32_bits(gpu_addr) & 0xff) | (16 << 8)); + sq_vtx_constant_word2 = SQ_VTXC_BASE_ADDR_HI(upper_32_bits(gpu_addr) & 0xff) | + SQ_VTXC_STRIDE(16); #ifdef __BIG_ENDIAN - sq_vtx_constant_word2 |= (2 << 30); + sq_vtx_constant_word2 |= SQ_VTXC_ENDIAN_SWAP(SQ_ENDIAN_8IN32); #endif radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 7)); @@ -202,18 +208,19 @@ set_tex_resource(struct radeon_device *rdev, if (h < 1) h = 1; - sq_tex_resource_word0 = (1 << 0) | (1 << 3); - sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) | - ((w - 1) << 19)); + sq_tex_resource_word0 = S_038000_DIM(V_038000_SQ_TEX_DIM_2D) | + S_038000_TILE_MODE(V_038000_ARRAY_1D_TILED_THIN1); + sq_tex_resource_word0 |= S_038000_PITCH((pitch >> 3) - 1) | + S_038000_TEX_WIDTH(w - 1); - sq_tex_resource_word1 = (format << 26); - sq_tex_resource_word1 |= ((h - 1) << 0); + sq_tex_resource_word1 = S_038004_DATA_FORMAT(format); + sq_tex_resource_word1 |= S_038004_TEX_HEIGHT(h - 1); - sq_tex_resource_word4 = ((1 << 14) | - (0 << 16) | - (1 << 19) | - (2 << 22) | - (3 << 25)); + sq_tex_resource_word4 = S_038010_REQUEST_SIZE(1) | + S_038010_DST_SEL_X(SQ_SEL_X) | + S_038010_DST_SEL_Y(SQ_SEL_Y) | + S_038010_DST_SEL_Z(SQ_SEL_Z) | + S_038010_DST_SEL_W(SQ_SEL_W); radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 7)); radeon_ring_write(rdev, 0); @@ -450,7 +457,7 @@ set_default_state(struct radeon_device *rdev) radeon_ring_write(rdev, sq_stack_resource_mgmt_2); } -static inline uint32_t i2f(uint32_t input) +static uint32_t i2f(uint32_t input) { u32 result, i, exponent, fraction; @@ -483,6 +490,27 @@ int r600_blit_init(struct radeon_device *rdev) u32 packet2s[16]; int num_packet2s = 0; + rdev->r600_blit.primitives.set_render_target = set_render_target; + rdev->r600_blit.primitives.cp_set_surface_sync = cp_set_surface_sync; + rdev->r600_blit.primitives.set_shaders = set_shaders; + rdev->r600_blit.primitives.set_vtx_resource = set_vtx_resource; + rdev->r600_blit.primitives.set_tex_resource = set_tex_resource; + rdev->r600_blit.primitives.set_scissors = set_scissors; + rdev->r600_blit.primitives.draw_auto = draw_auto; + rdev->r600_blit.primitives.set_default_state = set_default_state; + + rdev->r600_blit.ring_size_common = 40; /* shaders + def state */ + rdev->r600_blit.ring_size_common += 10; /* fence emit for VB IB */ + rdev->r600_blit.ring_size_common += 5; /* done copy */ + rdev->r600_blit.ring_size_common += 10; /* fence emit for done copy */ + + rdev->r600_blit.ring_size_per_loop = 76; + /* set_render_target emits 2 extra dwords on rv6xx */ + if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770) + rdev->r600_blit.ring_size_per_loop += 2; + + rdev->r600_blit.max_dim = 8192; + /* pin copy shader into vram if already initialized */ if (rdev->r600_blit.shader_obj) goto done; @@ -600,47 +628,80 @@ static void r600_vb_ib_put(struct radeon_device *rdev) radeon_ib_free(rdev, &rdev->r600_blit.vb_ib); } -int r600_blit_prepare_copy(struct radeon_device *rdev, int size_bytes) +static unsigned r600_blit_create_rect(unsigned num_gpu_pages, + int *width, int *height, int max_dim) +{ + unsigned max_pages; + unsigned pages = num_gpu_pages; + int w, h; + + if (num_gpu_pages == 0) { + /* not supposed to be called with no pages, but just in case */ + h = 0; + w = 0; + pages = 0; + WARN_ON(1); + } else { + int rect_order = 2; + h = RECT_UNIT_H; + while (num_gpu_pages / rect_order) { + h *= 2; + rect_order *= 4; + if (h >= max_dim) { + h = max_dim; + break; + } + } + max_pages = (max_dim * h) / (RECT_UNIT_W * RECT_UNIT_H); + if (pages > max_pages) + pages = max_pages; + w = (pages * RECT_UNIT_W * RECT_UNIT_H) / h; + w = (w / RECT_UNIT_W) * RECT_UNIT_W; + pages = (w * h) / (RECT_UNIT_W * RECT_UNIT_H); + BUG_ON(pages == 0); + } + + + DRM_DEBUG("blit_rectangle: h=%d, w=%d, pages=%d\n", h, w, pages); + + /* return width and height only of the caller wants it */ + if (height) + *height = h; + if (width) + *width = w; + + return pages; +} + + +int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages) { int r; - int ring_size, line_size; - int max_size; - /* loops of emits 64 + fence emit possible */ - int dwords_per_loop = 76, num_loops; + int ring_size; + int num_loops = 0; + int dwords_per_loop = rdev->r600_blit.ring_size_per_loop; r = r600_vb_ib_get(rdev); if (r) return r; - /* set_render_target emits 2 extra dwords on rv6xx */ - if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770) - dwords_per_loop += 2; - - /* 8 bpp vs 32 bpp for xfer unit */ - if (size_bytes & 3) - line_size = 8192; - else - line_size = 8192*4; - - max_size = 8192 * line_size; + /* num loops */ + while (num_gpu_pages) { + num_gpu_pages -= + r600_blit_create_rect(num_gpu_pages, NULL, NULL, + rdev->r600_blit.max_dim); + num_loops++; + } - /* major loops cover the max size transfer */ - num_loops = ((size_bytes + max_size) / max_size); - /* minor loops cover the extra non aligned bits */ - num_loops += ((size_bytes % line_size) ? 1 : 0); /* calculate number of loops correctly */ ring_size = num_loops * dwords_per_loop; - /* set default + shaders */ - ring_size += 40; /* shaders + def state */ - ring_size += 10; /* fence emit for VB IB */ - ring_size += 5; /* done copy */ - ring_size += 10; /* fence emit for done copy */ + ring_size += rdev->r600_blit.ring_size_common; r = radeon_ring_lock(rdev, ring_size); if (r) return r; - set_default_state(rdev); /* 14 */ - set_shaders(rdev); /* 26 */ + rdev->r600_blit.primitives.set_default_state(rdev); + rdev->r600_blit.primitives.set_shaders(rdev); return 0; } @@ -659,182 +720,64 @@ void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence) void r600_kms_blit_copy(struct radeon_device *rdev, u64 src_gpu_addr, u64 dst_gpu_addr, - int size_bytes) + unsigned num_gpu_pages) { - int max_bytes; u64 vb_gpu_addr; u32 *vb; - DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", src_gpu_addr, dst_gpu_addr, - size_bytes, rdev->r600_blit.vb_used); + DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", + src_gpu_addr, dst_gpu_addr, + num_gpu_pages, rdev->r600_blit.vb_used); vb = (u32 *)(rdev->r600_blit.vb_ib->ptr + rdev->r600_blit.vb_used); - if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) { - max_bytes = 8192; - - while (size_bytes) { - int cur_size = size_bytes; - int src_x = src_gpu_addr & 255; - int dst_x = dst_gpu_addr & 255; - int h = 1; - src_gpu_addr = src_gpu_addr & ~255ULL; - dst_gpu_addr = dst_gpu_addr & ~255ULL; - - if (!src_x && !dst_x) { - h = (cur_size / max_bytes); - if (h > 8192) - h = 8192; - if (h == 0) - h = 1; - else - cur_size = max_bytes; - } else { - if (cur_size > max_bytes) - cur_size = max_bytes; - if (cur_size > (max_bytes - dst_x)) - cur_size = (max_bytes - dst_x); - if (cur_size > (max_bytes - src_x)) - cur_size = (max_bytes - src_x); - } - - if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) { - WARN_ON(1); - } - - vb[0] = i2f(dst_x); - vb[1] = 0; - vb[2] = i2f(src_x); - vb[3] = 0; - - vb[4] = i2f(dst_x); - vb[5] = i2f(h); - vb[6] = i2f(src_x); - vb[7] = i2f(h); - - vb[8] = i2f(dst_x + cur_size); - vb[9] = i2f(h); - vb[10] = i2f(src_x + cur_size); - vb[11] = i2f(h); - - /* src 9 */ - set_tex_resource(rdev, FMT_8, - src_x + cur_size, h, src_x + cur_size, - src_gpu_addr); - - /* 5 */ - cp_set_surface_sync(rdev, - PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr); - /* dst 23 */ - set_render_target(rdev, COLOR_8, - dst_x + cur_size, h, - dst_gpu_addr); + while (num_gpu_pages) { + int w, h; + unsigned size_in_bytes; + unsigned pages_per_loop = + r600_blit_create_rect(num_gpu_pages, &w, &h, + rdev->r600_blit.max_dim); - /* scissors 12 */ - set_scissors(rdev, dst_x, 0, dst_x + cur_size, h); + size_in_bytes = pages_per_loop * RADEON_GPU_PAGE_SIZE; + DRM_DEBUG("rectangle w=%d h=%d\n", w, h); - /* 14 */ - vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used; - set_vtx_resource(rdev, vb_gpu_addr); - - /* draw 10 */ - draw_auto(rdev); - - /* 5 */ - cp_set_surface_sync(rdev, - PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA, - cur_size * h, dst_gpu_addr); - - vb += 12; - rdev->r600_blit.vb_used += 12 * 4; - - src_gpu_addr += cur_size * h; - dst_gpu_addr += cur_size * h; - size_bytes -= cur_size * h; + if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) { + WARN_ON(1); } - } else { - max_bytes = 8192 * 4; - - while (size_bytes) { - int cur_size = size_bytes; - int src_x = (src_gpu_addr & 255); - int dst_x = (dst_gpu_addr & 255); - int h = 1; - src_gpu_addr = src_gpu_addr & ~255ULL; - dst_gpu_addr = dst_gpu_addr & ~255ULL; - - if (!src_x && !dst_x) { - h = (cur_size / max_bytes); - if (h > 8192) - h = 8192; - if (h == 0) - h = 1; - else - cur_size = max_bytes; - } else { - if (cur_size > max_bytes) - cur_size = max_bytes; - if (cur_size > (max_bytes - dst_x)) - cur_size = (max_bytes - dst_x); - if (cur_size > (max_bytes - src_x)) - cur_size = (max_bytes - src_x); - } - - if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) { - WARN_ON(1); - } - vb[0] = i2f(dst_x / 4); - vb[1] = 0; - vb[2] = i2f(src_x / 4); - vb[3] = 0; - - vb[4] = i2f(dst_x / 4); - vb[5] = i2f(h); - vb[6] = i2f(src_x / 4); - vb[7] = i2f(h); - - vb[8] = i2f((dst_x + cur_size) / 4); - vb[9] = i2f(h); - vb[10] = i2f((src_x + cur_size) / 4); - vb[11] = i2f(h); - - /* src 9 */ - set_tex_resource(rdev, FMT_8_8_8_8, - (src_x + cur_size) / 4, - h, (src_x + cur_size) / 4, - src_gpu_addr); - /* 5 */ - cp_set_surface_sync(rdev, - PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr); - - /* dst 23 */ - set_render_target(rdev, COLOR_8_8_8_8, - (dst_x + cur_size) / 4, h, - dst_gpu_addr); - - /* scissors 12 */ - set_scissors(rdev, (dst_x / 4), 0, (dst_x + cur_size / 4), h); - - /* Vertex buffer setup 14 */ - vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used; - set_vtx_resource(rdev, vb_gpu_addr); - - /* draw 10 */ - draw_auto(rdev); - - /* 5 */ - cp_set_surface_sync(rdev, - PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA, - cur_size * h, dst_gpu_addr); - - /* 78 ring dwords per loop */ - vb += 12; - rdev->r600_blit.vb_used += 12 * 4; - - src_gpu_addr += cur_size * h; - dst_gpu_addr += cur_size * h; - size_bytes -= cur_size * h; - } + vb[0] = 0; + vb[1] = 0; + vb[2] = 0; + vb[3] = 0; + + vb[4] = 0; + vb[5] = i2f(h); + vb[6] = 0; + vb[7] = i2f(h); + + vb[8] = i2f(w); + vb[9] = i2f(h); + vb[10] = i2f(w); + vb[11] = i2f(h); + + rdev->r600_blit.primitives.set_tex_resource(rdev, FMT_8_8_8_8, + w, h, w, src_gpu_addr); + rdev->r600_blit.primitives.cp_set_surface_sync(rdev, + PACKET3_TC_ACTION_ENA, + size_in_bytes, src_gpu_addr); + rdev->r600_blit.primitives.set_render_target(rdev, COLOR_8_8_8_8, + w, h, dst_gpu_addr); + rdev->r600_blit.primitives.set_scissors(rdev, 0, 0, w, h); + vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used; + rdev->r600_blit.primitives.set_vtx_resource(rdev, vb_gpu_addr); + rdev->r600_blit.primitives.draw_auto(rdev); + rdev->r600_blit.primitives.cp_set_surface_sync(rdev, + PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA, + size_in_bytes, dst_gpu_addr); + + vb += 12; + rdev->r600_blit.vb_used += 4*12; + src_gpu_addr += size_in_bytes; + dst_gpu_addr += size_in_bytes; + num_gpu_pages -= pages_per_loop; } } - diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c index cf83aa05a684..0a2e023c1557 100644 --- a/drivers/gpu/drm/radeon/r600_cs.c +++ b/drivers/gpu/drm/radeon/r600_cs.c @@ -162,7 +162,7 @@ static const struct gpu_formats color_formats_table[] = { [V_038004_FMT_32_AS_32_32_32_32] = { 1, 1, 4, 0, CHIP_CEDAR}, }; -static inline bool fmt_is_valid_color(u32 format) +static bool fmt_is_valid_color(u32 format) { if (format >= ARRAY_SIZE(color_formats_table)) return false; @@ -173,7 +173,7 @@ static inline bool fmt_is_valid_color(u32 format) return false; } -static inline bool fmt_is_valid_texture(u32 format, enum radeon_family family) +static bool fmt_is_valid_texture(u32 format, enum radeon_family family) { if (format >= ARRAY_SIZE(color_formats_table)) return false; @@ -187,7 +187,7 @@ static inline bool fmt_is_valid_texture(u32 format, enum radeon_family family) return false; } -static inline int fmt_get_blocksize(u32 format) +static int fmt_get_blocksize(u32 format) { if (format >= ARRAY_SIZE(color_formats_table)) return 0; @@ -195,7 +195,7 @@ static inline int fmt_get_blocksize(u32 format) return color_formats_table[format].blocksize; } -static inline int fmt_get_nblocksx(u32 format, u32 w) +static int fmt_get_nblocksx(u32 format, u32 w) { unsigned bw; @@ -209,7 +209,7 @@ static inline int fmt_get_nblocksx(u32 format, u32 w) return (w + bw - 1) / bw; } -static inline int fmt_get_nblocksy(u32 format, u32 h) +static int fmt_get_nblocksy(u32 format, u32 h) { unsigned bh; @@ -223,25 +223,6 @@ static inline int fmt_get_nblocksy(u32 format, u32 h) return (h + bh - 1) / bh; } -static inline int r600_bpe_from_format(u32 *bpe, u32 format) -{ - unsigned res; - - if (format >= ARRAY_SIZE(color_formats_table)) - goto fail; - - res = color_formats_table[format].blocksize; - if (res == 0) - goto fail; - - *bpe = res; - return 0; - -fail: - *bpe = 16; - return -EINVAL; -} - struct array_mode_checker { int array_mode; u32 group_size; @@ -252,7 +233,7 @@ struct array_mode_checker { }; /* returns alignment in pixels for pitch/height/depth and bytes for base */ -static inline int r600_get_array_mode_alignment(struct array_mode_checker *values, +static int r600_get_array_mode_alignment(struct array_mode_checker *values, u32 *pitch_align, u32 *height_align, u32 *depth_align, @@ -331,7 +312,7 @@ static void r600_cs_track_init(struct r600_cs_track *track) track->db_depth_control = 0xFFFFFFFF; } -static inline int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) +static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) { struct r600_cs_track *track = p->track; u32 slice_tile_max, size, tmp; @@ -737,7 +718,7 @@ static int r600_cs_packet_next_reloc_nomm(struct radeon_cs_parser *p, * Check next packet is relocation packet3, do bo validation and compute * GPU offset using the provided start. **/ -static inline int r600_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p) +static int r600_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p) { struct radeon_cs_packet p3reloc; int r; @@ -911,7 +892,7 @@ static int r600_cs_parse_packet0(struct radeon_cs_parser *p, * if register is safe. If register is not flag as safe this function * will test it against a list of register needind special handling. */ -static inline int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) +static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) { struct r600_cs_track *track = (struct r600_cs_track *)p->track; struct radeon_cs_reloc *reloc; @@ -1215,7 +1196,7 @@ static inline int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx return 0; } -static inline unsigned mip_minify(unsigned size, unsigned level) +static unsigned mip_minify(unsigned size, unsigned level) { unsigned val; @@ -1285,7 +1266,7 @@ static void r600_texture_size(unsigned nfaces, unsigned blevel, unsigned llevel, * This function will check that the resource has valid field and that * the texture and mipmap bo object are big enough to cover this resource. */ -static inline int r600_check_texture_resource(struct radeon_cs_parser *p, u32 idx, +static int r600_check_texture_resource(struct radeon_cs_parser *p, u32 idx, struct radeon_bo *texture, struct radeon_bo *mipmap, u64 base_offset, diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h index 0245ae6c204e..bfe1b5d92afe 100644 --- a/drivers/gpu/drm/radeon/r600d.h +++ b/drivers/gpu/drm/radeon/r600d.h @@ -79,6 +79,11 @@ #define CB_COLOR0_SIZE 0x28060 #define CB_COLOR0_VIEW 0x28080 #define CB_COLOR0_INFO 0x280a0 +# define CB_FORMAT(x) ((x) << 2) +# define CB_ARRAY_MODE(x) ((x) << 8) +# define CB_SOURCE_FORMAT(x) ((x) << 27) +# define CB_SF_EXPORT_FULL 0 +# define CB_SF_EXPORT_NORM 1 #define CB_COLOR0_TILE 0x280c0 #define CB_COLOR0_FRAG 0x280e0 #define CB_COLOR0_MASK 0x28100 @@ -417,6 +422,17 @@ #define SQ_PGM_START_VS 0x28858 #define SQ_PGM_RESOURCES_VS 0x28868 #define SQ_PGM_CF_OFFSET_VS 0x288d0 + +#define SQ_VTX_CONSTANT_WORD0_0 0x30000 +#define SQ_VTX_CONSTANT_WORD1_0 0x30004 +#define SQ_VTX_CONSTANT_WORD2_0 0x30008 +# define SQ_VTXC_BASE_ADDR_HI(x) ((x) << 0) +# define SQ_VTXC_STRIDE(x) ((x) << 8) +# define SQ_VTXC_ENDIAN_SWAP(x) ((x) << 30) +# define SQ_ENDIAN_NONE 0 +# define SQ_ENDIAN_8IN16 1 +# define SQ_ENDIAN_8IN32 2 +#define SQ_VTX_CONSTANT_WORD3_0 0x3000c #define SQ_VTX_CONSTANT_WORD6_0 0x38018 #define S__SQ_VTX_CONSTANT_TYPE(x) (((x) & 3) << 30) #define G__SQ_VTX_CONSTANT_TYPE(x) (((x) >> 30) & 3) @@ -1352,6 +1368,12 @@ #define S_038010_DST_SEL_W(x) (((x) & 0x7) << 25) #define G_038010_DST_SEL_W(x) (((x) >> 25) & 0x7) #define C_038010_DST_SEL_W 0xF1FFFFFF +# define SQ_SEL_X 0 +# define SQ_SEL_Y 1 +# define SQ_SEL_Z 2 +# define SQ_SEL_W 3 +# define SQ_SEL_0 4 +# define SQ_SEL_1 5 #define S_038010_BASE_LEVEL(x) (((x) & 0xF) << 28) #define G_038010_BASE_LEVEL(x) (((x) >> 28) & 0xF) #define C_038010_BASE_LEVEL 0x0FFFFFFF diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index c1e056b35b29..e3170c794c1d 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -102,7 +102,7 @@ extern int radeon_pcie_gen2; #define RADEON_FENCE_JIFFIES_TIMEOUT (HZ / 2) /* RADEON_IB_POOL_SIZE must be a power of 2 */ #define RADEON_IB_POOL_SIZE 16 -#define RADEON_DEBUGFS_MAX_NUM_FILES 32 +#define RADEON_DEBUGFS_MAX_COMPONENTS 32 #define RADEONFB_CONN_LIMIT 4 #define RADEON_BIOS_NUM_SCRATCH 8 @@ -523,9 +523,30 @@ struct r600_ih { bool enabled; }; +struct r600_blit_cp_primitives { + void (*set_render_target)(struct radeon_device *rdev, int format, + int w, int h, u64 gpu_addr); + void (*cp_set_surface_sync)(struct radeon_device *rdev, + u32 sync_type, u32 size, + u64 mc_addr); + void (*set_shaders)(struct radeon_device *rdev); + void (*set_vtx_resource)(struct radeon_device *rdev, u64 gpu_addr); + void (*set_tex_resource)(struct radeon_device *rdev, + int format, int w, int h, int pitch, + u64 gpu_addr); + void (*set_scissors)(struct radeon_device *rdev, int x1, int y1, + int x2, int y2); + void (*draw_auto)(struct radeon_device *rdev); + void (*set_default_state)(struct radeon_device *rdev); +}; + struct r600_blit { struct mutex mutex; struct radeon_bo *shader_obj; + struct r600_blit_cp_primitives primitives; + int max_dim; + int ring_size_common; + int ring_size_per_loop; u64 shader_gpu_addr; u32 vs_offset, ps_offset; u32 state_offset; @@ -534,6 +555,8 @@ struct r600_blit { struct radeon_ib *vb_ib; }; +void r600_blit_suspend(struct radeon_device *rdev); + int radeon_ib_get(struct radeon_device *rdev, struct radeon_ib **ib); void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib); int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib); @@ -601,32 +624,7 @@ struct radeon_cs_parser { extern int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx); extern int radeon_cs_finish_pages(struct radeon_cs_parser *p); - - -static inline u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx) -{ - struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx]; - u32 pg_idx, pg_offset; - u32 idx_value = 0; - int new_page; - - pg_idx = (idx * 4) / PAGE_SIZE; - pg_offset = (idx * 4) % PAGE_SIZE; - - if (ibc->kpage_idx[0] == pg_idx) - return ibc->kpage[0][pg_offset/4]; - if (ibc->kpage_idx[1] == pg_idx) - return ibc->kpage[1][pg_offset/4]; - - new_page = radeon_cs_update_pages(p, pg_idx); - if (new_page < 0) { - p->parser_error = new_page; - return 0; - } - - idx_value = ibc->kpage[new_page][pg_offset/4]; - return idx_value; -} +extern u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx); struct radeon_cs_packet { unsigned idx; @@ -869,7 +867,7 @@ struct radeon_pm { /* * Benchmarking */ -void radeon_benchmark(struct radeon_device *rdev); +void radeon_benchmark(struct radeon_device *rdev, int test_number); /* @@ -1252,45 +1250,10 @@ int radeon_device_init(struct radeon_device *rdev, void radeon_device_fini(struct radeon_device *rdev); int radeon_gpu_wait_for_idle(struct radeon_device *rdev); -static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg) -{ - if (reg < rdev->rmmio_size) - return readl((rdev->rmmio) + reg); - else { - writel(reg, (rdev->rmmio) + RADEON_MM_INDEX); - return readl((rdev->rmmio) + RADEON_MM_DATA); - } -} - -static inline void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) -{ - if (reg < rdev->rmmio_size) - writel(v, (rdev->rmmio) + reg); - else { - writel(reg, (rdev->rmmio) + RADEON_MM_INDEX); - writel(v, (rdev->rmmio) + RADEON_MM_DATA); - } -} - -static inline u32 r100_io_rreg(struct radeon_device *rdev, u32 reg) -{ - if (reg < rdev->rio_mem_size) - return ioread32(rdev->rio_mem + reg); - else { - iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX); - return ioread32(rdev->rio_mem + RADEON_MM_DATA); - } -} - -static inline void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v) -{ - if (reg < rdev->rio_mem_size) - iowrite32(v, rdev->rio_mem + reg); - else { - iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX); - iowrite32(v, rdev->rio_mem + RADEON_MM_DATA); - } -} +uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg); +void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v); +u32 r100_io_rreg(struct radeon_device *rdev, u32 reg); +void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v); /* * Cast helper @@ -1413,19 +1376,19 @@ void radeon_atombios_fini(struct radeon_device *rdev); /* * RING helpers. */ + +#if DRM_DEBUG_CODE == 0 static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v) { -#if DRM_DEBUG_CODE - if (rdev->cp.count_dw <= 0) { - DRM_ERROR("radeon: writting more dword to ring than expected !\n"); - } -#endif rdev->cp.ring[rdev->cp.wptr++] = v; rdev->cp.wptr &= rdev->cp.ptr_mask; rdev->cp.count_dw--; rdev->cp.ring_free_dw--; } - +#else +/* With debugging this is just too big to inline */ +void radeon_ring_write(struct radeon_device *rdev, uint32_t v); +#endif /* * ASICs macro. diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index df8218bb83a6..e2944566ffea 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -765,9 +765,9 @@ static struct radeon_asic evergreen_asic = { .get_vblank_counter = &evergreen_get_vblank_counter, .fence_ring_emit = &r600_fence_ring_emit, .cs_parse = &evergreen_cs_parse, - .copy_blit = &evergreen_copy_blit, + .copy_blit = &r600_copy_blit, .copy_dma = NULL, - .copy = &evergreen_copy_blit, + .copy = &r600_copy_blit, .get_engine_clock = &radeon_atom_get_engine_clock, .set_engine_clock = &radeon_atom_set_engine_clock, .get_memory_clock = &radeon_atom_get_memory_clock, @@ -812,9 +812,9 @@ static struct radeon_asic sumo_asic = { .get_vblank_counter = &evergreen_get_vblank_counter, .fence_ring_emit = &r600_fence_ring_emit, .cs_parse = &evergreen_cs_parse, - .copy_blit = &evergreen_copy_blit, + .copy_blit = &r600_copy_blit, .copy_dma = NULL, - .copy = &evergreen_copy_blit, + .copy = &r600_copy_blit, .get_engine_clock = &radeon_atom_get_engine_clock, .set_engine_clock = &radeon_atom_set_engine_clock, .get_memory_clock = NULL, @@ -859,9 +859,9 @@ static struct radeon_asic btc_asic = { .get_vblank_counter = &evergreen_get_vblank_counter, .fence_ring_emit = &r600_fence_ring_emit, .cs_parse = &evergreen_cs_parse, - .copy_blit = &evergreen_copy_blit, + .copy_blit = &r600_copy_blit, .copy_dma = NULL, - .copy = &evergreen_copy_blit, + .copy = &r600_copy_blit, .get_engine_clock = &radeon_atom_get_engine_clock, .set_engine_clock = &radeon_atom_set_engine_clock, .get_memory_clock = &radeon_atom_get_memory_clock, @@ -906,9 +906,9 @@ static struct radeon_asic cayman_asic = { .get_vblank_counter = &evergreen_get_vblank_counter, .fence_ring_emit = &r600_fence_ring_emit, .cs_parse = &evergreen_cs_parse, - .copy_blit = &evergreen_copy_blit, + .copy_blit = &r600_copy_blit, .copy_dma = NULL, - .copy = &evergreen_copy_blit, + .copy = &r600_copy_blit, .get_engine_clock = &radeon_atom_get_engine_clock, .set_engine_clock = &radeon_atom_set_engine_clock, .get_memory_clock = &radeon_atom_get_memory_clock, diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 3dedaa07aac1..85f14f0337e4 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -364,11 +364,11 @@ void r600_hdmi_init(struct drm_encoder *encoder); int r600_hdmi_buffer_status_changed(struct drm_encoder *encoder); void r600_hdmi_update_audio_settings(struct drm_encoder *encoder); /* r600 blit */ -int r600_blit_prepare_copy(struct radeon_device *rdev, int size_bytes); +int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages); void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence); void r600_kms_blit_copy(struct radeon_device *rdev, u64 src_gpu_addr, u64 dst_gpu_addr, - int size_bytes); + unsigned num_gpu_pages); /* * rv770,rv730,rv710,rv740 @@ -401,9 +401,6 @@ bool evergreen_gpu_is_lockup(struct radeon_device *rdev); int evergreen_asic_reset(struct radeon_device *rdev); void evergreen_bandwidth_update(struct radeon_device *rdev); void evergreen_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); -int evergreen_copy_blit(struct radeon_device *rdev, - uint64_t src_offset, uint64_t dst_offset, - unsigned num_gpu_pages, struct radeon_fence *fence); void evergreen_hpd_init(struct radeon_device *rdev); void evergreen_hpd_fini(struct radeon_device *rdev); bool evergreen_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd); @@ -421,13 +418,6 @@ extern u32 evergreen_page_flip(struct radeon_device *rdev, int crtc, u64 crtc_ba extern void evergreen_post_page_flip(struct radeon_device *rdev, int crtc); void evergreen_disable_interrupt_state(struct radeon_device *rdev); int evergreen_blit_init(struct radeon_device *rdev); -void evergreen_blit_fini(struct radeon_device *rdev); -/* evergreen blit */ -int evergreen_blit_prepare_copy(struct radeon_device *rdev, int size_bytes); -void evergreen_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence); -void evergreen_kms_blit_copy(struct radeon_device *rdev, - u64 src_gpu_addr, u64 dst_gpu_addr, - int size_bytes); /* * cayman diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index bf2b61584cdb..08d0b94332e6 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -62,7 +62,7 @@ union atom_supported_devices { struct _ATOM_SUPPORTED_DEVICES_INFO_2d1 info_2d1; }; -static inline struct radeon_i2c_bus_rec radeon_lookup_i2c_gpio(struct radeon_device *rdev, +static struct radeon_i2c_bus_rec radeon_lookup_i2c_gpio(struct radeon_device *rdev, uint8_t id) { struct atom_context *ctx = rdev->mode_info.atom_context; @@ -228,7 +228,7 @@ void radeon_atombios_i2c_init(struct radeon_device *rdev) } } -static inline struct radeon_gpio_rec radeon_lookup_gpio(struct radeon_device *rdev, +static struct radeon_gpio_rec radeon_lookup_gpio(struct radeon_device *rdev, u8 id) { struct atom_context *ctx = rdev->mode_info.atom_context; diff --git a/drivers/gpu/drm/radeon/radeon_benchmark.c b/drivers/gpu/drm/radeon/radeon_benchmark.c index 10191d9372d8..5cafc90de7f8 100644 --- a/drivers/gpu/drm/radeon/radeon_benchmark.c +++ b/drivers/gpu/drm/radeon/radeon_benchmark.c @@ -26,21 +26,81 @@ #include "radeon_reg.h" #include "radeon.h" -void radeon_benchmark_move(struct radeon_device *rdev, unsigned bsize, - unsigned sdomain, unsigned ddomain) +#define RADEON_BENCHMARK_COPY_BLIT 1 +#define RADEON_BENCHMARK_COPY_DMA 0 + +#define RADEON_BENCHMARK_ITERATIONS 1024 +#define RADEON_BENCHMARK_COMMON_MODES_N 17 + +static int radeon_benchmark_do_move(struct radeon_device *rdev, unsigned size, + uint64_t saddr, uint64_t daddr, + int flag, int n) +{ + unsigned long start_jiffies; + unsigned long end_jiffies; + struct radeon_fence *fence = NULL; + int i, r; + + start_jiffies = jiffies; + for (i = 0; i < n; i++) { + r = radeon_fence_create(rdev, &fence); + if (r) + return r; + + switch (flag) { + case RADEON_BENCHMARK_COPY_DMA: + r = radeon_copy_dma(rdev, saddr, daddr, + size / RADEON_GPU_PAGE_SIZE, + fence); + break; + case RADEON_BENCHMARK_COPY_BLIT: + r = radeon_copy_blit(rdev, saddr, daddr, + size / RADEON_GPU_PAGE_SIZE, + fence); + break; + default: + DRM_ERROR("Unknown copy method\n"); + r = -EINVAL; + } + if (r) + goto exit_do_move; + r = radeon_fence_wait(fence, false); + if (r) + goto exit_do_move; + radeon_fence_unref(&fence); + } + end_jiffies = jiffies; + r = jiffies_to_msecs(end_jiffies - start_jiffies); + +exit_do_move: + if (fence) + radeon_fence_unref(&fence); + return r; +} + + +static void radeon_benchmark_log_results(int n, unsigned size, + unsigned int time, + unsigned sdomain, unsigned ddomain, + char *kind) +{ + unsigned int throughput = (n * (size >> 10)) / time; + DRM_INFO("radeon: %s %u bo moves of %u kB from" + " %d to %d in %u ms, throughput: %u Mb/s or %u MB/s\n", + kind, n, size >> 10, sdomain, ddomain, time, + throughput * 8, throughput); +} + +static void radeon_benchmark_move(struct radeon_device *rdev, unsigned size, + unsigned sdomain, unsigned ddomain) { struct radeon_bo *dobj = NULL; struct radeon_bo *sobj = NULL; - struct radeon_fence *fence = NULL; uint64_t saddr, daddr; - unsigned long start_jiffies; - unsigned long end_jiffies; - unsigned long time; - unsigned i, n, size; - int r; + int r, n; + unsigned int time; - size = bsize; - n = 1024; + n = RADEON_BENCHMARK_ITERATIONS; r = radeon_bo_create(rdev, size, PAGE_SIZE, true, sdomain, &sobj); if (r) { goto out_cleanup; @@ -67,65 +127,26 @@ void radeon_benchmark_move(struct radeon_device *rdev, unsigned bsize, } /* r100 doesn't have dma engine so skip the test */ - if (rdev->asic->copy_dma) { - - start_jiffies = jiffies; - for (i = 0; i < n; i++) { - r = radeon_fence_create(rdev, &fence); - if (r) { - goto out_cleanup; - } - - r = radeon_copy_dma(rdev, saddr, daddr, - size / RADEON_GPU_PAGE_SIZE, fence); - - if (r) { - goto out_cleanup; - } - r = radeon_fence_wait(fence, false); - if (r) { - goto out_cleanup; - } - radeon_fence_unref(&fence); - } - end_jiffies = jiffies; - time = end_jiffies - start_jiffies; - time = jiffies_to_msecs(time); - if (time > 0) { - i = ((n * size) >> 10) / time; - printk(KERN_INFO "radeon: dma %u bo moves of %ukb from" - " %d to %d in %lums (%ukb/ms %ukb/s %uM/s)\n", - n, size >> 10, - sdomain, ddomain, time, - i, i * 1000, (i * 1000) / 1024); - } - } - - start_jiffies = jiffies; - for (i = 0; i < n; i++) { - r = radeon_fence_create(rdev, &fence); - if (r) { - goto out_cleanup; - } - r = radeon_copy_blit(rdev, saddr, daddr, size / RADEON_GPU_PAGE_SIZE, fence); - if (r) { - goto out_cleanup; - } - r = radeon_fence_wait(fence, false); - if (r) { + /* also, VRAM-to-VRAM test doesn't make much sense for DMA */ + /* skip it as well if domains are the same */ + if ((rdev->asic->copy_dma) && (sdomain != ddomain)) { + time = radeon_benchmark_do_move(rdev, size, saddr, daddr, + RADEON_BENCHMARK_COPY_DMA, n); + if (time < 0) goto out_cleanup; - } - radeon_fence_unref(&fence); - } - end_jiffies = jiffies; - time = end_jiffies - start_jiffies; - time = jiffies_to_msecs(time); - if (time > 0) { - i = ((n * size) >> 10) / time; - printk(KERN_INFO "radeon: blit %u bo moves of %ukb from %d to %d" - " in %lums (%ukb/ms %ukb/s %uM/s)\n", n, size >> 10, - sdomain, ddomain, time, i, i * 1000, (i * 1000) / 1024); + if (time > 0) + radeon_benchmark_log_results(n, size, time, + sdomain, ddomain, "dma"); } + + time = radeon_benchmark_do_move(rdev, size, saddr, daddr, + RADEON_BENCHMARK_COPY_BLIT, n); + if (time < 0) + goto out_cleanup; + if (time > 0) + radeon_benchmark_log_results(n, size, time, + sdomain, ddomain, "blit"); + out_cleanup: if (sobj) { r = radeon_bo_reserve(sobj, false); @@ -143,18 +164,92 @@ out_cleanup: } radeon_bo_unref(&dobj); } - if (fence) { - radeon_fence_unref(&fence); - } + if (r) { - printk(KERN_WARNING "Error while benchmarking BO move.\n"); + DRM_ERROR("Error while benchmarking BO move.\n"); } } -void radeon_benchmark(struct radeon_device *rdev) +void radeon_benchmark(struct radeon_device *rdev, int test_number) { - radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_GTT, - RADEON_GEM_DOMAIN_VRAM); - radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_VRAM, - RADEON_GEM_DOMAIN_GTT); + int i; + int common_modes[RADEON_BENCHMARK_COMMON_MODES_N] = { + 640 * 480 * 4, + 720 * 480 * 4, + 800 * 600 * 4, + 848 * 480 * 4, + 1024 * 768 * 4, + 1152 * 768 * 4, + 1280 * 720 * 4, + 1280 * 800 * 4, + 1280 * 854 * 4, + 1280 * 960 * 4, + 1280 * 1024 * 4, + 1440 * 900 * 4, + 1400 * 1050 * 4, + 1680 * 1050 * 4, + 1600 * 1200 * 4, + 1920 * 1080 * 4, + 1920 * 1200 * 4 + }; + + switch (test_number) { + case 1: + /* simple test, VRAM to GTT and GTT to VRAM */ + radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_GTT, + RADEON_GEM_DOMAIN_VRAM); + radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_VRAM, + RADEON_GEM_DOMAIN_GTT); + break; + case 2: + /* simple test, VRAM to VRAM */ + radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_VRAM, + RADEON_GEM_DOMAIN_VRAM); + break; + case 3: + /* GTT to VRAM, buffer size sweep, powers of 2 */ + for (i = 1; i <= 65536; i <<= 1) + radeon_benchmark_move(rdev, i*1024, + RADEON_GEM_DOMAIN_GTT, + RADEON_GEM_DOMAIN_VRAM); + break; + case 4: + /* VRAM to GTT, buffer size sweep, powers of 2 */ + for (i = 1; i <= 65536; i <<= 1) + radeon_benchmark_move(rdev, i*1024, + RADEON_GEM_DOMAIN_VRAM, + RADEON_GEM_DOMAIN_GTT); + break; + case 5: + /* VRAM to VRAM, buffer size sweep, powers of 2 */ + for (i = 1; i <= 65536; i <<= 1) + radeon_benchmark_move(rdev, i*1024, + RADEON_GEM_DOMAIN_VRAM, + RADEON_GEM_DOMAIN_VRAM); + break; + case 6: + /* GTT to VRAM, buffer size sweep, common modes */ + for (i = 1; i < RADEON_BENCHMARK_COMMON_MODES_N; i++) + radeon_benchmark_move(rdev, common_modes[i], + RADEON_GEM_DOMAIN_GTT, + RADEON_GEM_DOMAIN_VRAM); + break; + case 7: + /* VRAM to GTT, buffer size sweep, common modes */ + for (i = 1; i < RADEON_BENCHMARK_COMMON_MODES_N; i++) + radeon_benchmark_move(rdev, common_modes[i], + RADEON_GEM_DOMAIN_VRAM, + RADEON_GEM_DOMAIN_GTT); + break; + case 8: + /* VRAM to VRAM, buffer size sweep, common modes */ + for (i = 1; i < RADEON_BENCHMARK_COMMON_MODES_N; i++) + radeon_benchmark_move(rdev, common_modes[i], + RADEON_GEM_DOMAIN_VRAM, + RADEON_GEM_DOMAIN_VRAM); + break; + + default: + DRM_ERROR("Unknown benchmark\n"); + } } diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c index 63675241c7ff..8bf83c4b4147 100644 --- a/drivers/gpu/drm/radeon/radeon_combios.c +++ b/drivers/gpu/drm/radeon/radeon_combios.c @@ -620,8 +620,8 @@ static struct radeon_i2c_bus_rec combios_setup_i2c_bus(struct radeon_device *rde i2c.y_data_mask = 0x80; } else { /* default masks for ddc pads */ - i2c.mask_clk_mask = RADEON_GPIO_EN_1; - i2c.mask_data_mask = RADEON_GPIO_EN_0; + i2c.mask_clk_mask = RADEON_GPIO_MASK_1; + i2c.mask_data_mask = RADEON_GPIO_MASK_0; i2c.a_clk_mask = RADEON_GPIO_A_1; i2c.a_data_mask = RADEON_GPIO_A_0; i2c.en_clk_mask = RADEON_GPIO_EN_1; diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 449c3d8c6836..dec6cbe6a0a6 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -724,6 +724,7 @@ radeon_vga_detect(struct drm_connector *connector, bool force) dret = radeon_ddc_probe(radeon_connector, radeon_connector->requires_extended_probe); if (dret) { + radeon_connector->detected_by_load = false; if (radeon_connector->edid) { kfree(radeon_connector->edid); radeon_connector->edid = NULL; @@ -750,12 +751,21 @@ radeon_vga_detect(struct drm_connector *connector, bool force) } else { /* if we aren't forcing don't do destructive polling */ - if (!force) - return connector->status; + if (!force) { + /* only return the previous status if we last + * detected a monitor via load. + */ + if (radeon_connector->detected_by_load) + return connector->status; + else + return ret; + } if (radeon_connector->dac_load_detect && encoder) { encoder_funcs = encoder->helper_private; ret = encoder_funcs->detect(encoder, connector); + if (ret == connector_status_connected) + radeon_connector->detected_by_load = true; } } @@ -897,6 +907,7 @@ radeon_dvi_detect(struct drm_connector *connector, bool force) dret = radeon_ddc_probe(radeon_connector, radeon_connector->requires_extended_probe); if (dret) { + radeon_connector->detected_by_load = false; if (radeon_connector->edid) { kfree(radeon_connector->edid); radeon_connector->edid = NULL; @@ -959,8 +970,18 @@ radeon_dvi_detect(struct drm_connector *connector, bool force) if ((ret == connector_status_connected) && (radeon_connector->use_digital == true)) goto out; + /* DVI-D and HDMI-A are digital only */ + if ((connector->connector_type == DRM_MODE_CONNECTOR_DVID) || + (connector->connector_type == DRM_MODE_CONNECTOR_HDMIA)) + goto out; + + /* if we aren't forcing don't do destructive polling */ if (!force) { - ret = connector->status; + /* only return the previous status if we last + * detected a monitor via load. + */ + if (radeon_connector->detected_by_load) + ret = connector->status; goto out; } @@ -984,6 +1005,7 @@ radeon_dvi_detect(struct drm_connector *connector, bool force) ret = encoder_funcs->detect(encoder, connector); if (ret == connector_status_connected) { radeon_connector->use_digital = false; + radeon_connector->detected_by_load = true; } } break; diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index b51e15725c6e..c33bc914d93d 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -750,14 +750,15 @@ int radeon_device_init(struct radeon_device *rdev, /* set DMA mask + need_dma32 flags. * PCIE - can handle 40-bits. - * IGP - can handle 40-bits (in theory) + * IGP - can handle 40-bits * AGP - generally dma32 is safest - * PCI - only dma32 + * PCI - dma32 for legacy pci gart, 40 bits on newer asics */ rdev->need_dma32 = false; if (rdev->flags & RADEON_IS_AGP) rdev->need_dma32 = true; - if (rdev->flags & RADEON_IS_PCI) + if ((rdev->flags & RADEON_IS_PCI) && + (rdev->family < CHIP_RS400)) rdev->need_dma32 = true; dma_bits = rdev->need_dma32 ? 32 : 40; @@ -817,7 +818,7 @@ int radeon_device_init(struct radeon_device *rdev, radeon_test_moves(rdev); } if (radeon_benchmarking) { - radeon_benchmark(rdev); + radeon_benchmark(rdev, radeon_benchmarking); } return 0; } @@ -981,7 +982,7 @@ struct radeon_debugfs { struct drm_info_list *files; unsigned num_files; }; -static struct radeon_debugfs _radeon_debugfs[RADEON_DEBUGFS_MAX_NUM_FILES]; +static struct radeon_debugfs _radeon_debugfs[RADEON_DEBUGFS_MAX_COMPONENTS]; static unsigned _radeon_debugfs_count = 0; int radeon_debugfs_add_files(struct radeon_device *rdev, @@ -996,14 +997,17 @@ int radeon_debugfs_add_files(struct radeon_device *rdev, return 0; } } - if ((_radeon_debugfs_count + nfiles) > RADEON_DEBUGFS_MAX_NUM_FILES) { - DRM_ERROR("Reached maximum number of debugfs files.\n"); - DRM_ERROR("Report so we increase RADEON_DEBUGFS_MAX_NUM_FILES.\n"); + + i = _radeon_debugfs_count + 1; + if (i > RADEON_DEBUGFS_MAX_COMPONENTS) { + DRM_ERROR("Reached maximum number of debugfs components.\n"); + DRM_ERROR("Report so we increase " + "RADEON_DEBUGFS_MAX_COMPONENTS.\n"); return -EINVAL; } _radeon_debugfs[_radeon_debugfs_count].files = files; _radeon_debugfs[_radeon_debugfs_count].num_files = nfiles; - _radeon_debugfs_count++; + _radeon_debugfs_count = i; #if defined(CONFIG_DEBUG_FS) drm_debugfs_create_files(files, nfiles, rdev->ddev->control->debugfs_root, diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 3475a09f946b..76ec0e9ed8ae 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -263,7 +263,7 @@ retry: */ if (seq == rdev->fence_drv.last_seq && radeon_gpu_is_lockup(rdev)) { /* good news we believe it's a lockup */ - WARN(1, "GPU lockup (waiting for 0x%08X last fence id 0x%08X)\n", + printk(KERN_WARNING "GPU lockup (waiting for 0x%08X last fence id 0x%08X)\n", fence->seq, seq); /* FIXME: what should we do ? marking everyone * as signaled for now diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index a533f52fd163..fdc3a9a54bf8 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -142,7 +142,7 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset, u64 page_base; if (!rdev->gart.ready) { - WARN(1, "trying to unbind memory to unitialized GART !\n"); + WARN(1, "trying to unbind memory from uninitialized GART !\n"); return; } t = offset / RADEON_GPU_PAGE_SIZE; @@ -174,7 +174,7 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset, int i, j; if (!rdev->gart.ready) { - WARN(1, "trying to bind memory to unitialized GART !\n"); + WARN(1, "trying to bind memory to uninitialized GART !\n"); return -EINVAL; } t = offset / RADEON_GPU_PAGE_SIZE; diff --git a/drivers/gpu/drm/radeon/radeon_i2c.c b/drivers/gpu/drm/radeon/radeon_i2c.c index 6c111c1fa3f9..02cb7da4124d 100644 --- a/drivers/gpu/drm/radeon/radeon_i2c.c +++ b/drivers/gpu/drm/radeon/radeon_i2c.c @@ -81,8 +81,9 @@ bool radeon_ddc_probe(struct radeon_connector *radeon_connector, bool requires_e /* bit banging i2c */ -static void radeon_i2c_do_lock(struct radeon_i2c_chan *i2c, int lock_state) +static int pre_xfer(struct i2c_adapter *i2c_adap) { + struct radeon_i2c_chan *i2c = i2c_get_adapdata(i2c_adap); struct radeon_device *rdev = i2c->dev->dev_private; struct radeon_i2c_bus_rec *rec = &i2c->rec; uint32_t temp; @@ -137,19 +138,30 @@ static void radeon_i2c_do_lock(struct radeon_i2c_chan *i2c, int lock_state) WREG32(rec->en_data_reg, temp); /* mask the gpio pins for software use */ - temp = RREG32(rec->mask_clk_reg); - if (lock_state) - temp |= rec->mask_clk_mask; - else - temp &= ~rec->mask_clk_mask; + temp = RREG32(rec->mask_clk_reg) | rec->mask_clk_mask; WREG32(rec->mask_clk_reg, temp); temp = RREG32(rec->mask_clk_reg); + temp = RREG32(rec->mask_data_reg) | rec->mask_data_mask; + WREG32(rec->mask_data_reg, temp); temp = RREG32(rec->mask_data_reg); - if (lock_state) - temp |= rec->mask_data_mask; - else - temp &= ~rec->mask_data_mask; + + return 0; +} + +static void post_xfer(struct i2c_adapter *i2c_adap) +{ + struct radeon_i2c_chan *i2c = i2c_get_adapdata(i2c_adap); + struct radeon_device *rdev = i2c->dev->dev_private; + struct radeon_i2c_bus_rec *rec = &i2c->rec; + uint32_t temp; + + /* unmask the gpio pins for software use */ + temp = RREG32(rec->mask_clk_reg) & ~rec->mask_clk_mask; + WREG32(rec->mask_clk_reg, temp); + temp = RREG32(rec->mask_clk_reg); + + temp = RREG32(rec->mask_data_reg) & ~rec->mask_data_mask; WREG32(rec->mask_data_reg, temp); temp = RREG32(rec->mask_data_reg); } @@ -209,22 +221,6 @@ static void set_data(void *i2c_priv, int data) WREG32(rec->en_data_reg, val); } -static int pre_xfer(struct i2c_adapter *i2c_adap) -{ - struct radeon_i2c_chan *i2c = i2c_get_adapdata(i2c_adap); - - radeon_i2c_do_lock(i2c, 1); - - return 0; -} - -static void post_xfer(struct i2c_adapter *i2c_adap) -{ - struct radeon_i2c_chan *i2c = i2c_get_adapdata(i2c_adap); - - radeon_i2c_do_lock(i2c, 0); -} - /* hw i2c */ static u32 radeon_get_i2c_prescale(struct radeon_device *rdev) diff --git a/drivers/gpu/drm/radeon/radeon_irq.c b/drivers/gpu/drm/radeon/radeon_irq.c index 465746bd51b7..00da38424dfc 100644 --- a/drivers/gpu/drm/radeon/radeon_irq.c +++ b/drivers/gpu/drm/radeon/radeon_irq.c @@ -129,7 +129,7 @@ void radeon_disable_vblank(struct drm_device *dev, int crtc) } } -static inline u32 radeon_acknowledge_irqs(drm_radeon_private_t *dev_priv, u32 *r500_disp_int) +static u32 radeon_acknowledge_irqs(drm_radeon_private_t *dev_priv, u32 *r500_disp_int) { u32 irqs = RADEON_READ(RADEON_GEN_INT_STATUS); u32 irq_mask = RADEON_SW_INT_TEST; diff --git a/drivers/gpu/drm/radeon/radeon_legacy_tv.c b/drivers/gpu/drm/radeon/radeon_legacy_tv.c index c7b6cb428d09..b37ec0f1413a 100644 --- a/drivers/gpu/drm/radeon/radeon_legacy_tv.c +++ b/drivers/gpu/drm/radeon/radeon_legacy_tv.c @@ -864,7 +864,7 @@ void radeon_legacy_tv_adjust_crtc_reg(struct drm_encoder *encoder, *v_sync_strt_wid = tmp; } -static inline int get_post_div(int value) +static int get_post_div(int value) { int post_div; switch (value) { diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index 68820f5f6303..ed0178f03235 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -447,6 +447,7 @@ struct radeon_connector { struct edid *edid; void *con_priv; bool dac_load_detect; + bool detected_by_load; /* if the connection status was determined by load */ uint16_t connector_object_id; struct radeon_hpd hpd; struct radeon_router router; diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 976c3b1b1b6e..1c851521f458 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -515,3 +515,44 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo) } return 0; } + +int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, bool no_wait) +{ + int r; + + r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0); + if (unlikely(r != 0)) + return r; + spin_lock(&bo->tbo.bdev->fence_lock); + if (mem_type) + *mem_type = bo->tbo.mem.mem_type; + if (bo->tbo.sync_obj) + r = ttm_bo_wait(&bo->tbo, true, true, no_wait); + spin_unlock(&bo->tbo.bdev->fence_lock); + ttm_bo_unreserve(&bo->tbo); + return r; +} + + +/** + * radeon_bo_reserve - reserve bo + * @bo: bo structure + * @no_wait: don't sleep while trying to reserve (return -EBUSY) + * + * Returns: + * -EBUSY: buffer is busy and @no_wait is true + * -ERESTARTSYS: A wait for the buffer to become unreserved was interrupted by + * a signal. Release all buffer reservations and return to user-space. + */ +int radeon_bo_reserve(struct radeon_bo *bo, bool no_wait) +{ + int r; + + r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0); + if (unlikely(r != 0)) { + if (r != -ERESTARTSYS) + dev_err(bo->rdev->dev, "%p reserve failed\n", bo); + return r; + } + return 0; +} diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h index ede6c13628f2..b07f0f9b8627 100644 --- a/drivers/gpu/drm/radeon/radeon_object.h +++ b/drivers/gpu/drm/radeon/radeon_object.h @@ -52,28 +52,7 @@ static inline unsigned radeon_mem_type_to_domain(u32 mem_type) return 0; } -/** - * radeon_bo_reserve - reserve bo - * @bo: bo structure - * @no_wait: don't sleep while trying to reserve (return -EBUSY) - * - * Returns: - * -EBUSY: buffer is busy and @no_wait is true - * -ERESTARTSYS: A wait for the buffer to become unreserved was interrupted by - * a signal. Release all buffer reservations and return to user-space. - */ -static inline int radeon_bo_reserve(struct radeon_bo *bo, bool no_wait) -{ - int r; - - r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0); - if (unlikely(r != 0)) { - if (r != -ERESTARTSYS) - dev_err(bo->rdev->dev, "%p reserve failed\n", bo); - return r; - } - return 0; -} +int radeon_bo_reserve(struct radeon_bo *bo, bool no_wait); static inline void radeon_bo_unreserve(struct radeon_bo *bo) { @@ -118,23 +97,8 @@ static inline u64 radeon_bo_mmap_offset(struct radeon_bo *bo) return bo->tbo.addr_space_offset; } -static inline int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, - bool no_wait) -{ - int r; - - r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0); - if (unlikely(r != 0)) - return r; - spin_lock(&bo->tbo.bdev->fence_lock); - if (mem_type) - *mem_type = bo->tbo.mem.mem_type; - if (bo->tbo.sync_obj) - r = ttm_bo_wait(&bo->tbo, true, true, no_wait); - spin_unlock(&bo->tbo.bdev->fence_lock); - ttm_bo_unreserve(&bo->tbo); - return r; -} +extern int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, + bool no_wait); extern int radeon_bo_create(struct radeon_device *rdev, unsigned long size, int byte_align, diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 08c0233db1b8..49d58202202c 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -35,6 +35,44 @@ int radeon_debugfs_ib_init(struct radeon_device *rdev); +u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx) +{ + struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx]; + u32 pg_idx, pg_offset; + u32 idx_value = 0; + int new_page; + + pg_idx = (idx * 4) / PAGE_SIZE; + pg_offset = (idx * 4) % PAGE_SIZE; + + if (ibc->kpage_idx[0] == pg_idx) + return ibc->kpage[0][pg_offset/4]; + if (ibc->kpage_idx[1] == pg_idx) + return ibc->kpage[1][pg_offset/4]; + + new_page = radeon_cs_update_pages(p, pg_idx); + if (new_page < 0) { + p->parser_error = new_page; + return 0; + } + + idx_value = ibc->kpage[new_page][pg_offset/4]; + return idx_value; +} + +void radeon_ring_write(struct radeon_device *rdev, uint32_t v) +{ +#if DRM_DEBUG_CODE + if (rdev->cp.count_dw <= 0) { + DRM_ERROR("radeon: writting more dword to ring than expected !\n"); + } +#endif + rdev->cp.ring[rdev->cp.wptr++] = v; + rdev->cp.wptr &= rdev->cp.ptr_mask; + rdev->cp.count_dw--; + rdev->cp.ring_free_dw--; +} + void radeon_ib_bogus_cleanup(struct radeon_device *rdev) { struct radeon_ib *ib, *n; diff --git a/drivers/gpu/drm/radeon/radeon_state.c b/drivers/gpu/drm/radeon/radeon_state.c index 92e7ea73b7c5..e8422ae7fe74 100644 --- a/drivers/gpu/drm/radeon/radeon_state.c +++ b/drivers/gpu/drm/radeon/radeon_state.c @@ -272,12 +272,12 @@ static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t * return 0; } -static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t * - dev_priv, - struct drm_file *file_priv, - drm_radeon_kcmd_buffer_t * - cmdbuf, - unsigned int *cmdsz) +static int radeon_check_and_fixup_packet3(drm_radeon_private_t * + dev_priv, + struct drm_file *file_priv, + drm_radeon_kcmd_buffer_t * + cmdbuf, + unsigned int *cmdsz) { u32 *cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0); u32 offset, narrays; @@ -446,8 +446,8 @@ static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t * * CP hardware state programming functions */ -static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv, - struct drm_clip_rect * box) +static void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv, + struct drm_clip_rect * box) { RING_LOCALS; diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c index aa6a66eeb4ec..89a6e1ecea8d 100644 --- a/drivers/gpu/drm/radeon/rs400.c +++ b/drivers/gpu/drm/radeon/rs400.c @@ -182,6 +182,9 @@ int rs400_gart_enable(struct radeon_device *rdev) /* Enable gart */ WREG32_MC(RS480_AGP_ADDRESS_SPACE_SIZE, (RS480_GART_EN | size_reg)); rs400_gart_tlb_flush(rdev); + DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", + (unsigned)(rdev->mc.gtt_size >> 20), + (unsigned long long)rdev->gart.table_addr); rdev->gart.ready = true; return 0; } diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index 4b5d0e6974a8..9320dd6404f6 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -484,6 +484,9 @@ static int rs600_gart_enable(struct radeon_device *rdev) tmp = RREG32_MC(R_000009_MC_CNTL1); WREG32_MC(R_000009_MC_CNTL1, (tmp | S_000009_ENABLE_PAGE_TABLES(1))); rs600_gart_tlb_flush(rdev); + DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", + (unsigned)(rdev->mc.gtt_size >> 20), + (unsigned long long)rdev->gart.table_addr); rdev->gart.ready = true; return 0; } diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index b13c2eedc321..87cc1feee3ac 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -161,6 +161,9 @@ int rv770_pcie_gart_enable(struct radeon_device *rdev) WREG32(VM_CONTEXT0_CNTL + (i * 4), 0); r600_pcie_gart_tlb_flush(rdev); + DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", + (unsigned)(rdev->mc.gtt_size >> 20), + (unsigned long long)rdev->gart.table_addr); rdev->gart.ready = true; return 0; } @@ -1184,8 +1187,6 @@ int rv770_resume(struct radeon_device *rdev) int rv770_suspend(struct radeon_device *rdev) { - int r; - r600_audio_fini(rdev); /* FIXME: we should wait for ring to be empty */ r700_cp_stop(rdev); @@ -1193,14 +1194,8 @@ int rv770_suspend(struct radeon_device *rdev) r600_irq_suspend(rdev); radeon_wb_disable(rdev); rv770_pcie_gart_disable(rdev); - /* unpin shaders bo */ - if (rdev->r600_blit.shader_obj) { - r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); - if (likely(r == 0)) { - radeon_bo_unpin(rdev->r600_blit.shader_obj); - radeon_bo_unreserve(rdev->r600_blit.shader_obj); - } - } + r600_blit_suspend(rdev); + return 0; } |