summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/radeon
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/radeon')
-rw-r--r--drivers/gpu/drm/radeon/atombios_crtc.c46
-rw-r--r--drivers/gpu/drm/radeon/atombios_encoders.c4
-rw-r--r--drivers/gpu/drm/radeon/evergreen.c308
-rw-r--r--drivers/gpu/drm/radeon/evergreen_cs.c772
-rw-r--r--drivers/gpu/drm/radeon/evergreend.h149
-rw-r--r--drivers/gpu/drm/radeon/ni.c467
-rw-r--r--drivers/gpu/drm/radeon/nid.h87
-rw-r--r--drivers/gpu/drm/radeon/r100.c23
-rw-r--r--drivers/gpu/drm/radeon/r600.c557
-rw-r--r--drivers/gpu/drm/radeon/r600_cs.c399
-rw-r--r--drivers/gpu/drm/radeon/r600_reg.h9
-rw-r--r--drivers/gpu/drm/radeon/r600d.h86
-rw-r--r--drivers/gpu/drm/radeon/radeon.h46
-rw-r--r--drivers/gpu/drm/radeon/radeon_agp.c5
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.c198
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.h38
-rw-r--r--drivers/gpu/drm/radeon/radeon_combios.c57
-rw-r--r--drivers/gpu/drm/radeon/radeon_connectors.c72
-rw-r--r--drivers/gpu/drm/radeon/radeon_cp.c14
-rw-r--r--drivers/gpu/drm/radeon/radeon_cs.c13
-rw-r--r--drivers/gpu/drm/radeon/radeon_cursor.c17
-rw-r--r--drivers/gpu/drm/radeon/radeon_device.c46
-rw-r--r--drivers/gpu/drm/radeon/radeon_display.c17
-rw-r--r--drivers/gpu/drm/radeon/radeon_drv.c10
-rw-r--r--drivers/gpu/drm/radeon/radeon_drv.h1
-rw-r--r--drivers/gpu/drm/radeon/radeon_fence.c51
-rw-r--r--drivers/gpu/drm/radeon/radeon_gart.c1
-rw-r--r--drivers/gpu/drm/radeon/radeon_i2c.c10
-rw-r--r--drivers/gpu/drm/radeon/radeon_kms.c16
-rw-r--r--drivers/gpu/drm/radeon/radeon_mode.h5
-rw-r--r--drivers/gpu/drm/radeon/radeon_object.c46
-rw-r--r--drivers/gpu/drm/radeon/radeon_pm.c15
-rw-r--r--drivers/gpu/drm/radeon/radeon_prime.c1
-rw-r--r--drivers/gpu/drm/radeon/radeon_ring.c28
-rw-r--r--drivers/gpu/drm/radeon/radeon_test.c37
-rw-r--r--drivers/gpu/drm/radeon/radeon_ttm.c31
-rw-r--r--drivers/gpu/drm/radeon/rv515.c122
-rw-r--r--drivers/gpu/drm/radeon/rv770.c105
-rw-r--r--drivers/gpu/drm/radeon/rv770d.h71
-rw-r--r--drivers/gpu/drm/radeon/si.c440
-rw-r--r--drivers/gpu/drm/radeon/sid.h138
41 files changed, 4090 insertions, 468 deletions
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index 2e566e123e9e..9175615bbd8a 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -561,6 +561,8 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc,
/* use frac fb div on APUs */
if (ASIC_IS_DCE41(rdev) || ASIC_IS_DCE61(rdev))
radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV;
+ if (ASIC_IS_DCE32(rdev) && mode->clock > 165000)
+ radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV;
} else {
radeon_crtc->pll_flags |= RADEON_PLL_LEGACY;
@@ -1697,34 +1699,22 @@ static int radeon_atom_pick_pll(struct drm_crtc *crtc)
DRM_ERROR("unable to allocate a PPLL\n");
return ATOM_PPLL_INVALID;
} else {
- if (ASIC_IS_AVIVO(rdev)) {
- /* in DP mode, the DP ref clock can come from either PPLL
- * depending on the asic:
- * DCE3: PPLL1 or PPLL2
- */
- if (ENCODER_MODE_IS_DP(atombios_get_encoder_mode(radeon_crtc->encoder))) {
- /* use the same PPLL for all DP monitors */
- pll = radeon_get_shared_dp_ppll(crtc);
- if (pll != ATOM_PPLL_INVALID)
- return pll;
- } else {
- /* use the same PPLL for all monitors with the same clock */
- pll = radeon_get_shared_nondp_ppll(crtc);
- if (pll != ATOM_PPLL_INVALID)
- return pll;
- }
- /* all other cases */
- pll_in_use = radeon_get_pll_use_mask(crtc);
- if (!(pll_in_use & (1 << ATOM_PPLL1)))
- return ATOM_PPLL1;
- if (!(pll_in_use & (1 << ATOM_PPLL2)))
- return ATOM_PPLL2;
- DRM_ERROR("unable to allocate a PPLL\n");
- return ATOM_PPLL_INVALID;
- } else {
- /* on pre-R5xx asics, the crtc to pll mapping is hardcoded */
- return radeon_crtc->crtc_id;
- }
+ /* on pre-R5xx asics, the crtc to pll mapping is hardcoded */
+ /* some atombios (observed in some DCE2/DCE3) code have a bug,
+ * the matching btw pll and crtc is done through
+ * PCLK_CRTC[1|2]_CNTL (0x480/0x484) but atombios code use the
+ * pll (1 or 2) to select which register to write. ie if using
+ * pll1 it will use PCLK_CRTC1_CNTL (0x480) and if using pll2
+ * it will use PCLK_CRTC2_CNTL (0x484), it then use crtc id to
+ * choose which value to write. Which is reverse order from
+ * register logic. So only case that works is when pllid is
+ * same as crtcid or when both pll and crtc are enabled and
+ * both use same clock.
+ *
+ * So just return crtc id as if crtc and pll were hard linked
+ * together even if they aren't
+ */
+ return radeon_crtc->crtc_id;
}
}
diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c
index ba498f8e47a2..4552d4aff317 100644
--- a/drivers/gpu/drm/radeon/atombios_encoders.c
+++ b/drivers/gpu/drm/radeon/atombios_encoders.c
@@ -340,7 +340,7 @@ static bool radeon_atom_mode_fixup(struct drm_encoder *encoder,
((radeon_encoder->active_device & (ATOM_DEVICE_DFP_SUPPORT | ATOM_DEVICE_LCD_SUPPORT)) ||
(radeon_encoder_get_dp_bridge_encoder_id(encoder) != ENCODER_OBJECT_ID_NONE))) {
struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
- radeon_dp_set_link_config(connector, mode);
+ radeon_dp_set_link_config(connector, adjusted_mode);
}
return true;
@@ -1625,7 +1625,7 @@ radeon_atom_encoder_dpms_dig(struct drm_encoder *encoder, int mode)
atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_SETUP, 0, 0);
atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE, 0, 0);
/* some early dce3.2 boards have a bug in their transmitter control table */
- if ((rdev->family != CHIP_RV710) || (rdev->family != CHIP_RV730))
+ if ((rdev->family != CHIP_RV710) && (rdev->family != CHIP_RV730))
atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE_OUTPUT, 0, 0);
}
if (ENCODER_MODE_IS_DP(atombios_get_encoder_mode(encoder)) && connector) {
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 14313ad43b76..061fa0a28900 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -1330,6 +1330,8 @@ void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *sav
break;
udelay(1);
}
+ } else {
+ save->crtc_enabled[i] = false;
}
}
@@ -1372,7 +1374,7 @@ void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *s
WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
for (i = 0; i < rdev->num_crtc; i++) {
- if (save->crtc_enabled) {
+ if (save->crtc_enabled[i]) {
if (ASIC_IS_DCE6(rdev)) {
tmp = RREG32(EVERGREEN_CRTC_BLANK_CONTROL + crtc_offsets[i]);
tmp |= EVERGREEN_CRTC_BLANK_DATA_EN;
@@ -1648,7 +1650,7 @@ static int evergreen_cp_resume(struct radeon_device *rdev)
ring->wptr = 0;
WREG32(CP_RB_WPTR, ring->wptr);
- /* set the wb address wether it's enabled or not */
+ /* set the wb address whether it's enabled or not */
WREG32(CP_RB_RPTR_ADDR,
((rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC));
WREG32(CP_RB_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
@@ -1819,7 +1821,7 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
case CHIP_SUMO:
rdev->config.evergreen.num_ses = 1;
rdev->config.evergreen.max_pipes = 4;
- rdev->config.evergreen.max_tile_pipes = 2;
+ rdev->config.evergreen.max_tile_pipes = 4;
if (rdev->pdev->device == 0x9648)
rdev->config.evergreen.max_simds = 3;
else if ((rdev->pdev->device == 0x9647) ||
@@ -1842,7 +1844,7 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
rdev->config.evergreen.sc_prim_fifo_size = 0x40;
rdev->config.evergreen.sc_hiz_tile_fifo_size = 0x30;
rdev->config.evergreen.sc_earlyz_tile_fifo_size = 0x130;
- gb_addr_config = REDWOOD_GB_ADDR_CONFIG_GOLDEN;
+ gb_addr_config = SUMO_GB_ADDR_CONFIG_GOLDEN;
break;
case CHIP_SUMO2:
rdev->config.evergreen.num_ses = 1;
@@ -1864,7 +1866,7 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
rdev->config.evergreen.sc_prim_fifo_size = 0x40;
rdev->config.evergreen.sc_hiz_tile_fifo_size = 0x30;
rdev->config.evergreen.sc_earlyz_tile_fifo_size = 0x130;
- gb_addr_config = REDWOOD_GB_ADDR_CONFIG_GOLDEN;
+ gb_addr_config = SUMO2_GB_ADDR_CONFIG_GOLDEN;
break;
case CHIP_BARTS:
rdev->config.evergreen.num_ses = 2;
@@ -1912,7 +1914,7 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
break;
case CHIP_CAICOS:
rdev->config.evergreen.num_ses = 1;
- rdev->config.evergreen.max_pipes = 4;
+ rdev->config.evergreen.max_pipes = 2;
rdev->config.evergreen.max_tile_pipes = 2;
rdev->config.evergreen.max_simds = 2;
rdev->config.evergreen.max_backends = 1 * rdev->config.evergreen.num_ses;
@@ -2032,6 +2034,7 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
WREG32(GB_ADDR_CONFIG, gb_addr_config);
WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
WREG32(HDP_ADDR_CONFIG, gb_addr_config);
+ WREG32(DMA_TILING_CONFIG, gb_addr_config);
tmp = gb_addr_config & NUM_PIPES_MASK;
tmp = r6xx_remap_render_backend(rdev, tmp, rdev->config.evergreen.max_backends,
@@ -2303,22 +2306,20 @@ bool evergreen_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *rin
return radeon_ring_test_lockup(rdev, ring);
}
-static int evergreen_gpu_soft_reset(struct radeon_device *rdev)
+static void evergreen_gpu_soft_reset_gfx(struct radeon_device *rdev)
{
- struct evergreen_mc_save save;
u32 grbm_reset = 0;
if (!(RREG32(GRBM_STATUS) & GUI_ACTIVE))
- return 0;
+ return;
- dev_info(rdev->dev, "GPU softreset \n");
- dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
+ dev_info(rdev->dev, " GRBM_STATUS = 0x%08X\n",
RREG32(GRBM_STATUS));
- dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
+ dev_info(rdev->dev, " GRBM_STATUS_SE0 = 0x%08X\n",
RREG32(GRBM_STATUS_SE0));
- dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
+ dev_info(rdev->dev, " GRBM_STATUS_SE1 = 0x%08X\n",
RREG32(GRBM_STATUS_SE1));
- dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
+ dev_info(rdev->dev, " SRBM_STATUS = 0x%08X\n",
RREG32(SRBM_STATUS));
dev_info(rdev->dev, " R_008674_CP_STALLED_STAT1 = 0x%08X\n",
RREG32(CP_STALLED_STAT1));
@@ -2328,10 +2329,7 @@ static int evergreen_gpu_soft_reset(struct radeon_device *rdev)
RREG32(CP_BUSY_STAT));
dev_info(rdev->dev, " R_008680_CP_STAT = 0x%08X\n",
RREG32(CP_STAT));
- evergreen_mc_stop(rdev, &save);
- if (evergreen_mc_wait_for_idle(rdev)) {
- dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
- }
+
/* Disable CP parsing/prefetching */
WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT);
@@ -2355,15 +2353,14 @@ static int evergreen_gpu_soft_reset(struct radeon_device *rdev)
udelay(50);
WREG32(GRBM_SOFT_RESET, 0);
(void)RREG32(GRBM_SOFT_RESET);
- /* Wait a little for things to settle down */
- udelay(50);
- dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
+
+ dev_info(rdev->dev, " GRBM_STATUS = 0x%08X\n",
RREG32(GRBM_STATUS));
- dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
+ dev_info(rdev->dev, " GRBM_STATUS_SE0 = 0x%08X\n",
RREG32(GRBM_STATUS_SE0));
- dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
+ dev_info(rdev->dev, " GRBM_STATUS_SE1 = 0x%08X\n",
RREG32(GRBM_STATUS_SE1));
- dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
+ dev_info(rdev->dev, " SRBM_STATUS = 0x%08X\n",
RREG32(SRBM_STATUS));
dev_info(rdev->dev, " R_008674_CP_STALLED_STAT1 = 0x%08X\n",
RREG32(CP_STALLED_STAT1));
@@ -2373,13 +2370,65 @@ static int evergreen_gpu_soft_reset(struct radeon_device *rdev)
RREG32(CP_BUSY_STAT));
dev_info(rdev->dev, " R_008680_CP_STAT = 0x%08X\n",
RREG32(CP_STAT));
+}
+
+static void evergreen_gpu_soft_reset_dma(struct radeon_device *rdev)
+{
+ u32 tmp;
+
+ if (RREG32(DMA_STATUS_REG) & DMA_IDLE)
+ return;
+
+ dev_info(rdev->dev, " R_00D034_DMA_STATUS_REG = 0x%08X\n",
+ RREG32(DMA_STATUS_REG));
+
+ /* Disable DMA */
+ tmp = RREG32(DMA_RB_CNTL);
+ tmp &= ~DMA_RB_ENABLE;
+ WREG32(DMA_RB_CNTL, tmp);
+
+ /* Reset dma */
+ WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA);
+ RREG32(SRBM_SOFT_RESET);
+ udelay(50);
+ WREG32(SRBM_SOFT_RESET, 0);
+
+ dev_info(rdev->dev, " R_00D034_DMA_STATUS_REG = 0x%08X\n",
+ RREG32(DMA_STATUS_REG));
+}
+
+static int evergreen_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
+{
+ struct evergreen_mc_save save;
+
+ if (reset_mask == 0)
+ return 0;
+
+ dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
+
+ evergreen_mc_stop(rdev, &save);
+ if (evergreen_mc_wait_for_idle(rdev)) {
+ dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
+ }
+
+ if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE))
+ evergreen_gpu_soft_reset_gfx(rdev);
+
+ if (reset_mask & RADEON_RESET_DMA)
+ evergreen_gpu_soft_reset_dma(rdev);
+
+ /* Wait a little for things to settle down */
+ udelay(50);
+
evergreen_mc_resume(rdev, &save);
return 0;
}
int evergreen_asic_reset(struct radeon_device *rdev)
{
- return evergreen_gpu_soft_reset(rdev);
+ return evergreen_gpu_soft_reset(rdev, (RADEON_RESET_GFX |
+ RADEON_RESET_COMPUTE |
+ RADEON_RESET_DMA));
}
/* Interrupts */
@@ -2401,8 +2450,12 @@ void evergreen_disable_interrupt_state(struct radeon_device *rdev)
CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
cayman_cp_int_cntl_setup(rdev, 1, 0);
cayman_cp_int_cntl_setup(rdev, 2, 0);
+ tmp = RREG32(CAYMAN_DMA1_CNTL) & ~TRAP_ENABLE;
+ WREG32(CAYMAN_DMA1_CNTL, tmp);
} else
WREG32(CP_INT_CNTL, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
+ tmp = RREG32(DMA_CNTL) & ~TRAP_ENABLE;
+ WREG32(DMA_CNTL, tmp);
WREG32(GRBM_INT_CNTL, 0);
WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
@@ -2455,6 +2508,7 @@ int evergreen_irq_set(struct radeon_device *rdev)
u32 grbm_int_cntl = 0;
u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
u32 afmt1 = 0, afmt2 = 0, afmt3 = 0, afmt4 = 0, afmt5 = 0, afmt6 = 0;
+ u32 dma_cntl, dma_cntl1 = 0;
if (!rdev->irq.installed) {
WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
@@ -2482,6 +2536,8 @@ int evergreen_irq_set(struct radeon_device *rdev)
afmt5 = RREG32(AFMT_AUDIO_PACKET_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET) & ~AFMT_AZ_FORMAT_WTRIG_MASK;
afmt6 = RREG32(AFMT_AUDIO_PACKET_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET) & ~AFMT_AZ_FORMAT_WTRIG_MASK;
+ dma_cntl = RREG32(DMA_CNTL) & ~TRAP_ENABLE;
+
if (rdev->family >= CHIP_CAYMAN) {
/* enable CP interrupts on all rings */
if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
@@ -2504,6 +2560,19 @@ int evergreen_irq_set(struct radeon_device *rdev)
}
}
+ if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
+ DRM_DEBUG("r600_irq_set: sw int dma\n");
+ dma_cntl |= TRAP_ENABLE;
+ }
+
+ if (rdev->family >= CHIP_CAYMAN) {
+ dma_cntl1 = RREG32(CAYMAN_DMA1_CNTL) & ~TRAP_ENABLE;
+ if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
+ DRM_DEBUG("r600_irq_set: sw int dma1\n");
+ dma_cntl1 |= TRAP_ENABLE;
+ }
+ }
+
if (rdev->irq.crtc_vblank_int[0] ||
atomic_read(&rdev->irq.pflip[0])) {
DRM_DEBUG("evergreen_irq_set: vblank 0\n");
@@ -2589,6 +2658,12 @@ int evergreen_irq_set(struct radeon_device *rdev)
cayman_cp_int_cntl_setup(rdev, 2, cp_int_cntl2);
} else
WREG32(CP_INT_CNTL, cp_int_cntl);
+
+ WREG32(DMA_CNTL, dma_cntl);
+
+ if (rdev->family >= CHIP_CAYMAN)
+ WREG32(CAYMAN_DMA1_CNTL, dma_cntl1);
+
WREG32(GRBM_INT_CNTL, grbm_int_cntl);
WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
@@ -3091,6 +3166,16 @@ restart_ih:
break;
}
break;
+ case 146:
+ case 147:
+ dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
+ dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
+ RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
+ dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
+ RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
+ /* reset addr and status */
+ WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
+ break;
case 176: /* CP_INT in ring buffer */
case 177: /* CP_INT in IB1 */
case 178: /* CP_INT in IB2 */
@@ -3114,9 +3199,19 @@ restart_ih:
} else
radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
break;
+ case 224: /* DMA trap event */
+ DRM_DEBUG("IH: DMA trap\n");
+ radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
+ break;
case 233: /* GUI IDLE */
DRM_DEBUG("IH: GUI idle\n");
break;
+ case 244: /* DMA trap event */
+ if (rdev->family >= CHIP_CAYMAN) {
+ DRM_DEBUG("IH: DMA1 trap\n");
+ radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
+ }
+ break;
default:
DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
break;
@@ -3142,6 +3237,143 @@ restart_ih:
return IRQ_HANDLED;
}
+/**
+ * evergreen_dma_fence_ring_emit - emit a fence on the DMA ring
+ *
+ * @rdev: radeon_device pointer
+ * @fence: radeon fence object
+ *
+ * Add a DMA fence packet to the ring to write
+ * the fence seq number and DMA trap packet to generate
+ * an interrupt if needed (evergreen-SI).
+ */
+void evergreen_dma_fence_ring_emit(struct radeon_device *rdev,
+ struct radeon_fence *fence)
+{
+ struct radeon_ring *ring = &rdev->ring[fence->ring];
+ u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
+ /* write the fence */
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0));
+ radeon_ring_write(ring, addr & 0xfffffffc);
+ radeon_ring_write(ring, (upper_32_bits(addr) & 0xff));
+ radeon_ring_write(ring, fence->seq);
+ /* generate an interrupt */
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0));
+ /* flush HDP */
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
+ radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
+ radeon_ring_write(ring, 1);
+}
+
+/**
+ * evergreen_dma_ring_ib_execute - schedule an IB on the DMA engine
+ *
+ * @rdev: radeon_device pointer
+ * @ib: IB object to schedule
+ *
+ * Schedule an IB in the DMA ring (evergreen).
+ */
+void evergreen_dma_ring_ib_execute(struct radeon_device *rdev,
+ struct radeon_ib *ib)
+{
+ struct radeon_ring *ring = &rdev->ring[ib->ring];
+
+ if (rdev->wb.enabled) {
+ u32 next_rptr = ring->wptr + 4;
+ while ((next_rptr & 7) != 5)
+ next_rptr++;
+ next_rptr += 3;
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
+ radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
+ radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
+ radeon_ring_write(ring, next_rptr);
+ }
+
+ /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
+ * Pad as necessary with NOPs.
+ */
+ while ((ring->wptr & 7) != 5)
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0));
+ radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
+ radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
+
+}
+
+/**
+ * evergreen_copy_dma - copy pages using the DMA engine
+ *
+ * @rdev: radeon_device pointer
+ * @src_offset: src GPU address
+ * @dst_offset: dst GPU address
+ * @num_gpu_pages: number of GPU pages to xfer
+ * @fence: radeon fence object
+ *
+ * Copy GPU paging using the DMA engine (evergreen-cayman).
+ * Used by the radeon ttm implementation to move pages if
+ * registered as the asic copy callback.
+ */
+int evergreen_copy_dma(struct radeon_device *rdev,
+ uint64_t src_offset, uint64_t dst_offset,
+ unsigned num_gpu_pages,
+ struct radeon_fence **fence)
+{
+ struct radeon_semaphore *sem = NULL;
+ int ring_index = rdev->asic->copy.dma_ring_index;
+ struct radeon_ring *ring = &rdev->ring[ring_index];
+ u32 size_in_dw, cur_size_in_dw;
+ int i, num_loops;
+ int r = 0;
+
+ r = radeon_semaphore_create(rdev, &sem);
+ if (r) {
+ DRM_ERROR("radeon: moving bo (%d).\n", r);
+ return r;
+ }
+
+ size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
+ num_loops = DIV_ROUND_UP(size_in_dw, 0xfffff);
+ r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
+ if (r) {
+ DRM_ERROR("radeon: moving bo (%d).\n", r);
+ radeon_semaphore_free(rdev, &sem, NULL);
+ return r;
+ }
+
+ if (radeon_fence_need_sync(*fence, ring->idx)) {
+ radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
+ ring->idx);
+ radeon_fence_note_sync(*fence, ring->idx);
+ } else {
+ radeon_semaphore_free(rdev, &sem, NULL);
+ }
+
+ for (i = 0; i < num_loops; i++) {
+ cur_size_in_dw = size_in_dw;
+ if (cur_size_in_dw > 0xFFFFF)
+ cur_size_in_dw = 0xFFFFF;
+ size_in_dw -= cur_size_in_dw;
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw));
+ radeon_ring_write(ring, dst_offset & 0xfffffffc);
+ radeon_ring_write(ring, src_offset & 0xfffffffc);
+ radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
+ radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
+ src_offset += cur_size_in_dw * 4;
+ dst_offset += cur_size_in_dw * 4;
+ }
+
+ r = radeon_fence_emit(rdev, fence, ring->idx);
+ if (r) {
+ radeon_ring_unlock_undo(rdev, ring);
+ return r;
+ }
+
+ radeon_ring_unlock_commit(rdev, ring);
+ radeon_semaphore_free(rdev, &sem, *fence);
+
+ return r;
+}
+
static int evergreen_startup(struct radeon_device *rdev)
{
struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
@@ -3205,6 +3437,12 @@ static int evergreen_startup(struct radeon_device *rdev)
return r;
}
+ r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
+ if (r) {
+ dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
+ return r;
+ }
+
/* Enable IRQ */
r = r600_irq_init(rdev);
if (r) {
@@ -3219,12 +3457,23 @@ static int evergreen_startup(struct radeon_device *rdev)
0, 0xfffff, RADEON_CP_PACKET2);
if (r)
return r;
+
+ ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+ r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
+ DMA_RB_RPTR, DMA_RB_WPTR,
+ 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+ if (r)
+ return r;
+
r = evergreen_cp_load_microcode(rdev);
if (r)
return r;
r = evergreen_cp_resume(rdev);
if (r)
return r;
+ r = r600_dma_resume(rdev);
+ if (r)
+ return r;
r = radeon_ib_pool_init(rdev);
if (r) {
@@ -3271,11 +3520,9 @@ int evergreen_resume(struct radeon_device *rdev)
int evergreen_suspend(struct radeon_device *rdev)
{
- struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
-
r600_audio_fini(rdev);
r700_cp_stop(rdev);
- ring->ready = false;
+ r600_dma_stop(rdev);
evergreen_irq_suspend(rdev);
radeon_wb_disable(rdev);
evergreen_pcie_gart_disable(rdev);
@@ -3352,6 +3599,9 @@ int evergreen_init(struct radeon_device *rdev)
rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL;
r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024);
+ rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL;
+ r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024);
+
rdev->ih.ring_obj = NULL;
r600_ih_ring_init(rdev, 64 * 1024);
@@ -3364,6 +3614,7 @@ int evergreen_init(struct radeon_device *rdev)
if (r) {
dev_err(rdev->dev, "disabling GPU acceleration\n");
r700_cp_fini(rdev);
+ r600_dma_fini(rdev);
r600_irq_fini(rdev);
radeon_wb_fini(rdev);
radeon_ib_pool_fini(rdev);
@@ -3391,6 +3642,7 @@ void evergreen_fini(struct radeon_device *rdev)
r600_audio_fini(rdev);
r600_blit_fini(rdev);
r700_cp_fini(rdev);
+ r600_dma_fini(rdev);
r600_irq_fini(rdev);
radeon_wb_fini(rdev);
radeon_ib_pool_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index 95e6318b6268..7a445666e71f 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -34,6 +34,8 @@
#define MAX(a,b) (((a)>(b))?(a):(b))
#define MIN(a,b) (((a)<(b))?(a):(b))
+int r600_dma_cs_next_reloc(struct radeon_cs_parser *p,
+ struct radeon_cs_reloc **cs_reloc);
static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser *p,
struct radeon_cs_reloc **cs_reloc);
@@ -507,20 +509,28 @@ static int evergreen_cs_track_validate_htile(struct radeon_cs_parser *p,
/* height is npipes htiles aligned == npipes * 8 pixel aligned */
nby = round_up(nby, track->npipes * 8);
} else {
+ /* always assume 8x8 htile */
+ /* align is htile align * 8, htile align vary according to
+ * number of pipe and tile width and nby
+ */
switch (track->npipes) {
case 8:
+ /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
nbx = round_up(nbx, 64 * 8);
nby = round_up(nby, 64 * 8);
break;
case 4:
+ /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
nbx = round_up(nbx, 64 * 8);
nby = round_up(nby, 32 * 8);
break;
case 2:
+ /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
nbx = round_up(nbx, 32 * 8);
nby = round_up(nby, 32 * 8);
break;
case 1:
+ /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
nbx = round_up(nbx, 32 * 8);
nby = round_up(nby, 16 * 8);
break;
@@ -531,9 +541,10 @@ static int evergreen_cs_track_validate_htile(struct radeon_cs_parser *p,
}
}
/* compute number of htile */
- nbx = nbx / 8;
- nby = nby / 8;
- size = nbx * nby * 4;
+ nbx = nbx >> 3;
+ nby = nby >> 3;
+ /* size must be aligned on npipes * 2K boundary */
+ size = roundup(nbx * nby * 4, track->npipes * (2 << 10));
size += track->htile_offset;
if (size > radeon_bo_size(track->htile_bo)) {
@@ -1790,6 +1801,8 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
case DB_HTILE_SURFACE:
/* 8x8 only */
track->htile_surface = radeon_get_ib_value(p, idx);
+ /* force 8x8 htile width and height */
+ ib[idx] |= 3;
track->db_dirty = true;
break;
case CB_IMMED0_BASE:
@@ -2232,6 +2245,107 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
ib[idx+2] = upper_32_bits(offset) & 0xff;
}
break;
+ case PACKET3_CP_DMA:
+ {
+ u32 command, size, info;
+ u64 offset, tmp;
+ if (pkt->count != 4) {
+ DRM_ERROR("bad CP DMA\n");
+ return -EINVAL;
+ }
+ command = radeon_get_ib_value(p, idx+4);
+ size = command & 0x1fffff;
+ info = radeon_get_ib_value(p, idx+1);
+ if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
+ (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
+ ((((info & 0x00300000) >> 20) == 0) &&
+ (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
+ ((((info & 0x60000000) >> 29) == 0) &&
+ (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
+ /* non mem to mem copies requires dw aligned count */
+ if (size % 4) {
+ DRM_ERROR("CP DMA command requires dw count alignment\n");
+ return -EINVAL;
+ }
+ }
+ if (command & PACKET3_CP_DMA_CMD_SAS) {
+ /* src address space is register */
+ /* GDS is ok */
+ if (((info & 0x60000000) >> 29) != 1) {
+ DRM_ERROR("CP DMA SAS not supported\n");
+ return -EINVAL;
+ }
+ } else {
+ if (command & PACKET3_CP_DMA_CMD_SAIC) {
+ DRM_ERROR("CP DMA SAIC only supported for registers\n");
+ return -EINVAL;
+ }
+ /* src address space is memory */
+ if (((info & 0x60000000) >> 29) == 0) {
+ r = evergreen_cs_packet_next_reloc(p, &reloc);
+ if (r) {
+ DRM_ERROR("bad CP DMA SRC\n");
+ return -EINVAL;
+ }
+
+ tmp = radeon_get_ib_value(p, idx) +
+ ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
+
+ offset = reloc->lobj.gpu_offset + tmp;
+
+ if ((tmp + size) > radeon_bo_size(reloc->robj)) {
+ dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n",
+ tmp + size, radeon_bo_size(reloc->robj));
+ return -EINVAL;
+ }
+
+ ib[idx] = offset;
+ ib[idx+1] = (ib[idx+1] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
+ } else if (((info & 0x60000000) >> 29) != 2) {
+ DRM_ERROR("bad CP DMA SRC_SEL\n");
+ return -EINVAL;
+ }
+ }
+ if (command & PACKET3_CP_DMA_CMD_DAS) {
+ /* dst address space is register */
+ /* GDS is ok */
+ if (((info & 0x00300000) >> 20) != 1) {
+ DRM_ERROR("CP DMA DAS not supported\n");
+ return -EINVAL;
+ }
+ } else {
+ /* dst address space is memory */
+ if (command & PACKET3_CP_DMA_CMD_DAIC) {
+ DRM_ERROR("CP DMA DAIC only supported for registers\n");
+ return -EINVAL;
+ }
+ if (((info & 0x00300000) >> 20) == 0) {
+ r = evergreen_cs_packet_next_reloc(p, &reloc);
+ if (r) {
+ DRM_ERROR("bad CP DMA DST\n");
+ return -EINVAL;
+ }
+
+ tmp = radeon_get_ib_value(p, idx+2) +
+ ((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32);
+
+ offset = reloc->lobj.gpu_offset + tmp;
+
+ if ((tmp + size) > radeon_bo_size(reloc->robj)) {
+ dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n",
+ tmp + size, radeon_bo_size(reloc->robj));
+ return -EINVAL;
+ }
+
+ ib[idx+2] = offset;
+ ib[idx+3] = upper_32_bits(offset) & 0xff;
+ } else {
+ DRM_ERROR("bad CP DMA DST_SEL\n");
+ return -EINVAL;
+ }
+ }
+ break;
+ }
case PACKET3_SURFACE_SYNC:
if (pkt->count != 3) {
DRM_ERROR("bad SURFACE_SYNC\n");
@@ -2540,6 +2654,35 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
ib[idx+4] = upper_32_bits(offset) & 0xff;
}
break;
+ case PACKET3_MEM_WRITE:
+ {
+ u64 offset;
+
+ if (pkt->count != 3) {
+ DRM_ERROR("bad MEM_WRITE (invalid count)\n");
+ return -EINVAL;
+ }
+ r = evergreen_cs_packet_next_reloc(p, &reloc);
+ if (r) {
+ DRM_ERROR("bad MEM_WRITE (missing reloc)\n");
+ return -EINVAL;
+ }
+ offset = radeon_get_ib_value(p, idx+0);
+ offset += ((u64)(radeon_get_ib_value(p, idx+1) & 0xff)) << 32UL;
+ if (offset & 0x7) {
+ DRM_ERROR("bad MEM_WRITE (address not qwords aligned)\n");
+ return -EINVAL;
+ }
+ if ((offset + 8) > radeon_bo_size(reloc->robj)) {
+ DRM_ERROR("bad MEM_WRITE bo too small: 0x%llx, 0x%lx\n",
+ offset + 8, radeon_bo_size(reloc->robj));
+ return -EINVAL;
+ }
+ offset += reloc->lobj.gpu_offset;
+ ib[idx+0] = offset;
+ ib[idx+1] = upper_32_bits(offset) & 0xff;
+ break;
+ }
case PACKET3_COPY_DW:
if (pkt->count != 4) {
DRM_ERROR("bad COPY_DW (invalid count)\n");
@@ -2715,6 +2858,455 @@ int evergreen_cs_parse(struct radeon_cs_parser *p)
return 0;
}
+/*
+ * DMA
+ */
+
+#define GET_DMA_CMD(h) (((h) & 0xf0000000) >> 28)
+#define GET_DMA_COUNT(h) ((h) & 0x000fffff)
+#define GET_DMA_T(h) (((h) & 0x00800000) >> 23)
+#define GET_DMA_NEW(h) (((h) & 0x04000000) >> 26)
+#define GET_DMA_MISC(h) (((h) & 0x0700000) >> 20)
+
+/**
+ * evergreen_dma_cs_parse() - parse the DMA IB
+ * @p: parser structure holding parsing context.
+ *
+ * Parses the DMA IB from the CS ioctl and updates
+ * the GPU addresses based on the reloc information and
+ * checks for errors. (Evergreen-Cayman)
+ * Returns 0 for success and an error on failure.
+ **/
+int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
+{
+ struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
+ struct radeon_cs_reloc *src_reloc, *dst_reloc, *dst2_reloc;
+ u32 header, cmd, count, tiled, new_cmd, misc;
+ volatile u32 *ib = p->ib.ptr;
+ u32 idx, idx_value;
+ u64 src_offset, dst_offset, dst2_offset;
+ int r;
+
+ do {
+ if (p->idx >= ib_chunk->length_dw) {
+ DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
+ p->idx, ib_chunk->length_dw);
+ return -EINVAL;
+ }
+ idx = p->idx;
+ header = radeon_get_ib_value(p, idx);
+ cmd = GET_DMA_CMD(header);
+ count = GET_DMA_COUNT(header);
+ tiled = GET_DMA_T(header);
+ new_cmd = GET_DMA_NEW(header);
+ misc = GET_DMA_MISC(header);
+
+ switch (cmd) {
+ case DMA_PACKET_WRITE:
+ r = r600_dma_cs_next_reloc(p, &dst_reloc);
+ if (r) {
+ DRM_ERROR("bad DMA_PACKET_WRITE\n");
+ return -EINVAL;
+ }
+ if (tiled) {
+ dst_offset = ib[idx+1];
+ dst_offset <<= 8;
+
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ p->idx += count + 7;
+ } else {
+ dst_offset = ib[idx+1];
+ dst_offset |= ((u64)(ib[idx+2] & 0xff)) << 32;
+
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ p->idx += count + 3;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n",
+ dst_offset, radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ break;
+ case DMA_PACKET_COPY:
+ r = r600_dma_cs_next_reloc(p, &src_reloc);
+ if (r) {
+ DRM_ERROR("bad DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ r = r600_dma_cs_next_reloc(p, &dst_reloc);
+ if (r) {
+ DRM_ERROR("bad DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ if (tiled) {
+ idx_value = radeon_get_ib_value(p, idx + 2);
+ if (new_cmd) {
+ switch (misc) {
+ case 0:
+ /* L2T, frame to fields */
+ if (idx_value & (1 << 31)) {
+ DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ r = r600_dma_cs_next_reloc(p, &dst2_reloc);
+ if (r) {
+ DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ dst_offset = ib[idx+1];
+ dst_offset <<= 8;
+ dst2_offset = ib[idx+2];
+ dst2_offset <<= 8;
+ src_offset = ib[idx+8];
+ src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
+ if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%llu %lu)\n",
+ src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
+ dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
+ dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
+ ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+ p->idx += 10;
+ break;
+ case 1:
+ /* L2T, T2L partial */
+ if (p->family < CHIP_CAYMAN) {
+ DRM_ERROR("L2T, T2L Partial is cayman only !\n");
+ return -EINVAL;
+ }
+ /* detile bit */
+ if (idx_value & (1 << 31)) {
+ /* tiled src, linear dst */
+ ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
+
+ ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ } else {
+ /* linear src, tiled dst */
+ ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ }
+ p->idx += 12;
+ break;
+ case 3:
+ /* L2T, broadcast */
+ if (idx_value & (1 << 31)) {
+ DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ r = r600_dma_cs_next_reloc(p, &dst2_reloc);
+ if (r) {
+ DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ dst_offset = ib[idx+1];
+ dst_offset <<= 8;
+ dst2_offset = ib[idx+2];
+ dst2_offset <<= 8;
+ src_offset = ib[idx+8];
+ src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
+ if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
+ src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
+ dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
+ dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
+ ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+ p->idx += 10;
+ break;
+ case 4:
+ /* L2T, T2L */
+ /* detile bit */
+ if (idx_value & (1 << 31)) {
+ /* tiled src, linear dst */
+ src_offset = ib[idx+1];
+ src_offset <<= 8;
+ ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
+
+ dst_offset = ib[idx+7];
+ dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
+ ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ } else {
+ /* linear src, tiled dst */
+ src_offset = ib[idx+7];
+ src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
+ ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+
+ dst_offset = ib[idx+1];
+ dst_offset <<= 8;
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ }
+ if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%llu %lu)\n",
+ src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, T2L dst buffer too small (%llu %lu)\n",
+ dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ p->idx += 9;
+ break;
+ case 5:
+ /* T2T partial */
+ if (p->family < CHIP_CAYMAN) {
+ DRM_ERROR("L2T, T2L Partial is cayman only !\n");
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
+ ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ p->idx += 13;
+ break;
+ case 7:
+ /* L2T, broadcast */
+ if (idx_value & (1 << 31)) {
+ DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ r = r600_dma_cs_next_reloc(p, &dst2_reloc);
+ if (r) {
+ DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ dst_offset = ib[idx+1];
+ dst_offset <<= 8;
+ dst2_offset = ib[idx+2];
+ dst2_offset <<= 8;
+ src_offset = ib[idx+8];
+ src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
+ if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
+ src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
+ dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
+ dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
+ ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+ p->idx += 10;
+ break;
+ default:
+ DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
+ return -EINVAL;
+ }
+ } else {
+ switch (misc) {
+ case 0:
+ /* detile bit */
+ if (idx_value & (1 << 31)) {
+ /* tiled src, linear dst */
+ src_offset = ib[idx+1];
+ src_offset <<= 8;
+ ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
+
+ dst_offset = ib[idx+7];
+ dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
+ ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ } else {
+ /* linear src, tiled dst */
+ src_offset = ib[idx+7];
+ src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
+ ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+
+ dst_offset = ib[idx+1];
+ dst_offset <<= 8;
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ }
+ if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
+ src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
+ dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ p->idx += 9;
+ break;
+ default:
+ DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
+ return -EINVAL;
+ }
+ }
+ } else {
+ if (new_cmd) {
+ switch (misc) {
+ case 0:
+ /* L2L, byte */
+ src_offset = ib[idx+2];
+ src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
+ dst_offset = ib[idx+1];
+ dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
+ if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2L, byte src buffer too small (%llu %lu)\n",
+ src_offset + count, radeon_bo_size(src_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst_offset + count) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2L, byte dst buffer too small (%llu %lu)\n",
+ dst_offset + count, radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff);
+ ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff);
+ ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+ p->idx += 5;
+ break;
+ case 1:
+ /* L2L, partial */
+ if (p->family < CHIP_CAYMAN) {
+ DRM_ERROR("L2L Partial is cayman only !\n");
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff);
+ ib[idx+2] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+ ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff);
+ ib[idx+5] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+
+ p->idx += 9;
+ break;
+ case 4:
+ /* L2L, dw, broadcast */
+ r = r600_dma_cs_next_reloc(p, &dst2_reloc);
+ if (r) {
+ DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ dst_offset = ib[idx+1];
+ dst_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
+ dst2_offset = ib[idx+2];
+ dst2_offset |= ((u64)(ib[idx+5] & 0xff)) << 32;
+ src_offset = ib[idx+3];
+ src_offset |= ((u64)(ib[idx+6] & 0xff)) << 32;
+ if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%llu %lu)\n",
+ src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%llu %lu)\n",
+ dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%llu %lu)\n",
+ dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+3] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+4] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ ib[idx+5] += upper_32_bits(dst2_reloc->lobj.gpu_offset) & 0xff;
+ ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+ p->idx += 7;
+ break;
+ default:
+ DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
+ return -EINVAL;
+ }
+ } else {
+ /* L2L, dw */
+ src_offset = ib[idx+2];
+ src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
+ dst_offset = ib[idx+1];
+ dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
+ if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2L, dw src buffer too small (%llu %lu)\n",
+ src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2L, dw dst buffer too small (%llu %lu)\n",
+ dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+ p->idx += 5;
+ }
+ }
+ break;
+ case DMA_PACKET_CONSTANT_FILL:
+ r = r600_dma_cs_next_reloc(p, &dst_reloc);
+ if (r) {
+ DRM_ERROR("bad DMA_PACKET_CONSTANT_FILL\n");
+ return -EINVAL;
+ }
+ dst_offset = ib[idx+1];
+ dst_offset |= ((u64)(ib[idx+3] & 0x00ff0000)) << 16;
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n",
+ dst_offset, radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) << 16) & 0x00ff0000;
+ p->idx += 4;
+ break;
+ case DMA_PACKET_NOP:
+ p->idx += 1;
+ break;
+ default:
+ DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
+ return -EINVAL;
+ }
+ } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
+#if 0
+ for (r = 0; r < p->ib->length_dw; r++) {
+ printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]);
+ mdelay(1);
+ }
+#endif
+ return 0;
+}
+
/* vm parser */
static bool evergreen_vm_reg_valid(u32 reg)
{
@@ -2724,7 +3316,11 @@ static bool evergreen_vm_reg_valid(u32 reg)
/* check config regs */
switch (reg) {
+ case WAIT_UNTIL:
case GRBM_GFX_INDEX:
+ case CP_STRMOUT_CNTL:
+ case CP_COHER_CNTL:
+ case CP_COHER_SIZE:
case VGT_VTX_VECT_EJECT_REG:
case VGT_CACHE_INVALIDATION:
case VGT_GS_VERTEX_REUSE:
@@ -2840,6 +3436,7 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev,
u32 idx = pkt->idx + 1;
u32 idx_value = ib[idx];
u32 start_reg, end_reg, reg, i;
+ u32 command, info;
switch (pkt->opcode) {
case PACKET3_NOP:
@@ -2914,6 +3511,64 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev,
return -EINVAL;
}
break;
+ case PACKET3_CP_DMA:
+ command = ib[idx + 4];
+ info = ib[idx + 1];
+ if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
+ (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
+ ((((info & 0x00300000) >> 20) == 0) &&
+ (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
+ ((((info & 0x60000000) >> 29) == 0) &&
+ (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
+ /* non mem to mem copies requires dw aligned count */
+ if ((command & 0x1fffff) % 4) {
+ DRM_ERROR("CP DMA command requires dw count alignment\n");
+ return -EINVAL;
+ }
+ }
+ if (command & PACKET3_CP_DMA_CMD_SAS) {
+ /* src address space is register */
+ if (((info & 0x60000000) >> 29) == 0) {
+ start_reg = idx_value << 2;
+ if (command & PACKET3_CP_DMA_CMD_SAIC) {
+ reg = start_reg;
+ if (!evergreen_vm_reg_valid(reg)) {
+ DRM_ERROR("CP DMA Bad SRC register\n");
+ return -EINVAL;
+ }
+ } else {
+ for (i = 0; i < (command & 0x1fffff); i++) {
+ reg = start_reg + (4 * i);
+ if (!evergreen_vm_reg_valid(reg)) {
+ DRM_ERROR("CP DMA Bad SRC register\n");
+ return -EINVAL;
+ }
+ }
+ }
+ }
+ }
+ if (command & PACKET3_CP_DMA_CMD_DAS) {
+ /* dst address space is register */
+ if (((info & 0x00300000) >> 20) == 0) {
+ start_reg = ib[idx + 2];
+ if (command & PACKET3_CP_DMA_CMD_DAIC) {
+ reg = start_reg;
+ if (!evergreen_vm_reg_valid(reg)) {
+ DRM_ERROR("CP DMA Bad DST register\n");
+ return -EINVAL;
+ }
+ } else {
+ for (i = 0; i < (command & 0x1fffff); i++) {
+ reg = start_reg + (4 * i);
+ if (!evergreen_vm_reg_valid(reg)) {
+ DRM_ERROR("CP DMA Bad DST register\n");
+ return -EINVAL;
+ }
+ }
+ }
+ }
+ }
+ break;
default:
return -EINVAL;
}
@@ -2955,3 +3610,114 @@ int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
return ret;
}
+
+/**
+ * evergreen_dma_ib_parse() - parse the DMA IB for VM
+ * @rdev: radeon_device pointer
+ * @ib: radeon_ib pointer
+ *
+ * Parses the DMA IB from the VM CS ioctl
+ * checks for errors. (Cayman-SI)
+ * Returns 0 for success and an error on failure.
+ **/
+int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
+{
+ u32 idx = 0;
+ u32 header, cmd, count, tiled, new_cmd, misc;
+
+ do {
+ header = ib->ptr[idx];
+ cmd = GET_DMA_CMD(header);
+ count = GET_DMA_COUNT(header);
+ tiled = GET_DMA_T(header);
+ new_cmd = GET_DMA_NEW(header);
+ misc = GET_DMA_MISC(header);
+
+ switch (cmd) {
+ case DMA_PACKET_WRITE:
+ if (tiled)
+ idx += count + 7;
+ else
+ idx += count + 3;
+ break;
+ case DMA_PACKET_COPY:
+ if (tiled) {
+ if (new_cmd) {
+ switch (misc) {
+ case 0:
+ /* L2T, frame to fields */
+ idx += 10;
+ break;
+ case 1:
+ /* L2T, T2L partial */
+ idx += 12;
+ break;
+ case 3:
+ /* L2T, broadcast */
+ idx += 10;
+ break;
+ case 4:
+ /* L2T, T2L */
+ idx += 9;
+ break;
+ case 5:
+ /* T2T partial */
+ idx += 13;
+ break;
+ case 7:
+ /* L2T, broadcast */
+ idx += 10;
+ break;
+ default:
+ DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
+ return -EINVAL;
+ }
+ } else {
+ switch (misc) {
+ case 0:
+ idx += 9;
+ break;
+ default:
+ DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
+ return -EINVAL;
+ }
+ }
+ } else {
+ if (new_cmd) {
+ switch (misc) {
+ case 0:
+ /* L2L, byte */
+ idx += 5;
+ break;
+ case 1:
+ /* L2L, partial */
+ idx += 9;
+ break;
+ case 4:
+ /* L2L, dw, broadcast */
+ idx += 7;
+ break;
+ default:
+ DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
+ return -EINVAL;
+ }
+ } else {
+ /* L2L, dw */
+ idx += 5;
+ }
+ }
+ break;
+ case DMA_PACKET_CONSTANT_FILL:
+ idx += 4;
+ break;
+ case DMA_PACKET_NOP:
+ idx += 1;
+ break;
+ default:
+ DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
+ return -EINVAL;
+ }
+ } while (idx < ib->length_dw);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h
index df542f1a5dfb..0bfd0e9e469b 100644
--- a/drivers/gpu/drm/radeon/evergreend.h
+++ b/drivers/gpu/drm/radeon/evergreend.h
@@ -45,6 +45,8 @@
#define TURKS_GB_ADDR_CONFIG_GOLDEN 0x02010002
#define CEDAR_GB_ADDR_CONFIG_GOLDEN 0x02010001
#define CAICOS_GB_ADDR_CONFIG_GOLDEN 0x02010001
+#define SUMO_GB_ADDR_CONFIG_GOLDEN 0x02010002
+#define SUMO2_GB_ADDR_CONFIG_GOLDEN 0x02010002
/* Registers */
@@ -91,6 +93,10 @@
#define FB_READ_EN (1 << 0)
#define FB_WRITE_EN (1 << 1)
+#define CP_STRMOUT_CNTL 0x84FC
+
+#define CP_COHER_CNTL 0x85F0
+#define CP_COHER_SIZE 0x85F4
#define CP_COHER_BASE 0x85F8
#define CP_STALLED_STAT1 0x8674
#define CP_STALLED_STAT2 0x8678
@@ -351,6 +357,54 @@
# define AFMT_MPEG_INFO_UPDATE (1 << 10)
#define AFMT_GENERIC0_7 0x7138
+/* DCE4/5 ELD audio interface */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR0 0x5f84 /* LPCM */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR1 0x5f88 /* AC3 */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR2 0x5f8c /* MPEG1 */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR3 0x5f90 /* MP3 */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR4 0x5f94 /* MPEG2 */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR5 0x5f98 /* AAC */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR6 0x5f9c /* DTS */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR7 0x5fa0 /* ATRAC */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR8 0x5fa4 /* one bit audio - leave at 0 (default) */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR9 0x5fa8 /* Dolby Digital */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR10 0x5fac /* DTS-HD */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR11 0x5fb0 /* MAT-MLP */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR12 0x5fb4 /* DTS */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR13 0x5fb8 /* WMA Pro */
+# define MAX_CHANNELS(x) (((x) & 0x7) << 0)
+/* max channels minus one. 7 = 8 channels */
+# define SUPPORTED_FREQUENCIES(x) (((x) & 0xff) << 8)
+# define DESCRIPTOR_BYTE_2(x) (((x) & 0xff) << 16)
+# define SUPPORTED_FREQUENCIES_STEREO(x) (((x) & 0xff) << 24) /* LPCM only */
+/* SUPPORTED_FREQUENCIES, SUPPORTED_FREQUENCIES_STEREO
+ * bit0 = 32 kHz
+ * bit1 = 44.1 kHz
+ * bit2 = 48 kHz
+ * bit3 = 88.2 kHz
+ * bit4 = 96 kHz
+ * bit5 = 176.4 kHz
+ * bit6 = 192 kHz
+ */
+
+#define AZ_HOT_PLUG_CONTROL 0x5e78
+# define AZ_FORCE_CODEC_WAKE (1 << 0)
+# define PIN0_JACK_DETECTION_ENABLE (1 << 4)
+# define PIN1_JACK_DETECTION_ENABLE (1 << 5)
+# define PIN2_JACK_DETECTION_ENABLE (1 << 6)
+# define PIN3_JACK_DETECTION_ENABLE (1 << 7)
+# define PIN0_UNSOLICITED_RESPONSE_ENABLE (1 << 8)
+# define PIN1_UNSOLICITED_RESPONSE_ENABLE (1 << 9)
+# define PIN2_UNSOLICITED_RESPONSE_ENABLE (1 << 10)
+# define PIN3_UNSOLICITED_RESPONSE_ENABLE (1 << 11)
+# define CODEC_HOT_PLUG_ENABLE (1 << 12)
+# define PIN0_AUDIO_ENABLED (1 << 24)
+# define PIN1_AUDIO_ENABLED (1 << 25)
+# define PIN2_AUDIO_ENABLED (1 << 26)
+# define PIN3_AUDIO_ENABLED (1 << 27)
+# define AUDIO_ENABLED (1 << 31)
+
+
#define GC_USER_SHADER_PIPE_CONFIG 0x8954
#define INACTIVE_QD_PIPES(x) ((x) << 8)
#define INACTIVE_QD_PIPES_MASK 0x0000FF00
@@ -647,6 +701,7 @@
#define PAGE_TABLE_DEPTH(x) (((x) & 3) << 1)
#define RANGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 4)
#define VM_CONTEXT1_CNTL 0x1414
+#define VM_CONTEXT1_CNTL2 0x1434
#define VM_CONTEXT0_PAGE_TABLE_BASE_ADDR 0x153C
#define VM_CONTEXT0_PAGE_TABLE_END_ADDR 0x157C
#define VM_CONTEXT0_PAGE_TABLE_START_ADDR 0x155C
@@ -668,6 +723,8 @@
#define CACHE_UPDATE_MODE(x) ((x) << 6)
#define VM_L2_STATUS 0x140C
#define L2_BUSY (1 << 0)
+#define VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x14FC
+#define VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x14DC
#define WAIT_UNTIL 0x8040
@@ -685,8 +742,9 @@
#define SOFT_RESET_ROM (1 << 14)
#define SOFT_RESET_SEM (1 << 15)
#define SOFT_RESET_VMC (1 << 17)
+#define SOFT_RESET_DMA (1 << 20)
#define SOFT_RESET_TST (1 << 21)
-#define SOFT_RESET_REGBB (1 << 22)
+#define SOFT_RESET_REGBB (1 << 22)
#define SOFT_RESET_ORB (1 << 23)
/* display watermarks */
@@ -850,6 +908,37 @@
# define DC_HPDx_RX_INT_TIMER(x) ((x) << 16)
# define DC_HPDx_EN (1 << 28)
+/* ASYNC DMA */
+#define DMA_RB_RPTR 0xd008
+#define DMA_RB_WPTR 0xd00c
+
+#define DMA_CNTL 0xd02c
+# define TRAP_ENABLE (1 << 0)
+# define SEM_INCOMPLETE_INT_ENABLE (1 << 1)
+# define SEM_WAIT_INT_ENABLE (1 << 2)
+# define DATA_SWAP_ENABLE (1 << 3)
+# define FENCE_SWAP_ENABLE (1 << 4)
+# define CTXEMPTY_INT_ENABLE (1 << 28)
+#define DMA_TILING_CONFIG 0xD0B8
+
+#define CAYMAN_DMA1_CNTL 0xd82c
+
+/* async DMA packets */
+#define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \
+ (((t) & 0x1) << 23) | \
+ (((s) & 0x1) << 22) | \
+ (((n) & 0xFFFFF) << 0))
+/* async DMA Packet types */
+#define DMA_PACKET_WRITE 0x2
+#define DMA_PACKET_COPY 0x3
+#define DMA_PACKET_INDIRECT_BUFFER 0x4
+#define DMA_PACKET_SEMAPHORE 0x5
+#define DMA_PACKET_FENCE 0x6
+#define DMA_PACKET_TRAP 0x7
+#define DMA_PACKET_SRBM_WRITE 0x9
+#define DMA_PACKET_CONSTANT_FILL 0xd
+#define DMA_PACKET_NOP 0xf
+
/* PCIE link stuff */
#define PCIE_LC_TRAINING_CNTL 0xa1 /* PCIE_P */
#define PCIE_LC_LINK_WIDTH_CNTL 0xa2 /* PCIE_P */
@@ -947,6 +1036,53 @@
#define PACKET3_WAIT_REG_MEM 0x3C
#define PACKET3_MEM_WRITE 0x3D
#define PACKET3_INDIRECT_BUFFER 0x32
+#define PACKET3_CP_DMA 0x41
+/* 1. header
+ * 2. SRC_ADDR_LO or DATA [31:0]
+ * 3. CP_SYNC [31] | SRC_SEL [30:29] | ENGINE [27] | DST_SEL [21:20] |
+ * SRC_ADDR_HI [7:0]
+ * 4. DST_ADDR_LO [31:0]
+ * 5. DST_ADDR_HI [7:0]
+ * 6. COMMAND [29:22] | BYTE_COUNT [20:0]
+ */
+# define PACKET3_CP_DMA_DST_SEL(x) ((x) << 20)
+ /* 0 - SRC_ADDR
+ * 1 - GDS
+ */
+# define PACKET3_CP_DMA_ENGINE(x) ((x) << 27)
+ /* 0 - ME
+ * 1 - PFP
+ */
+# define PACKET3_CP_DMA_SRC_SEL(x) ((x) << 29)
+ /* 0 - SRC_ADDR
+ * 1 - GDS
+ * 2 - DATA
+ */
+# define PACKET3_CP_DMA_CP_SYNC (1 << 31)
+/* COMMAND */
+# define PACKET3_CP_DMA_DIS_WC (1 << 21)
+# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23)
+ /* 0 - none
+ * 1 - 8 in 16
+ * 2 - 8 in 32
+ * 3 - 8 in 64
+ */
+# define PACKET3_CP_DMA_CMD_DST_SWAP(x) ((x) << 24)
+ /* 0 - none
+ * 1 - 8 in 16
+ * 2 - 8 in 32
+ * 3 - 8 in 64
+ */
+# define PACKET3_CP_DMA_CMD_SAS (1 << 26)
+ /* 0 - memory
+ * 1 - register
+ */
+# define PACKET3_CP_DMA_CMD_DAS (1 << 27)
+ /* 0 - memory
+ * 1 - register
+ */
+# define PACKET3_CP_DMA_CMD_SAIC (1 << 28)
+# define PACKET3_CP_DMA_CMD_DAIC (1 << 29)
#define PACKET3_SURFACE_SYNC 0x43
# define PACKET3_CB0_DEST_BASE_ENA (1 << 6)
# define PACKET3_CB1_DEST_BASE_ENA (1 << 7)
@@ -1892,4 +2028,15 @@
/* cayman packet3 addition */
#define CAYMAN_PACKET3_DEALLOC_STATE 0x14
+/* DMA regs common on r6xx/r7xx/evergreen/ni */
+#define DMA_RB_CNTL 0xd000
+# define DMA_RB_ENABLE (1 << 0)
+# define DMA_RB_SIZE(x) ((x) << 1) /* log2 */
+# define DMA_RB_SWAP_ENABLE (1 << 9) /* 8IN32 */
+# define DMA_RPTR_WRITEBACK_ENABLE (1 << 12)
+# define DMA_RPTR_WRITEBACK_SWAP_ENABLE (1 << 13) /* 8IN32 */
+# define DMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */
+#define DMA_STATUS_REG 0xd034
+# define DMA_IDLE (1 << 0)
+
#endif
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 81e6a568c29d..896f1cbc58a5 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -611,6 +611,8 @@ static void cayman_gpu_init(struct radeon_device *rdev)
WREG32(GB_ADDR_CONFIG, gb_addr_config);
WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
WREG32(HDP_ADDR_CONFIG, gb_addr_config);
+ WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
+ WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
tmp = gb_addr_config & NUM_PIPES_MASK;
tmp = r6xx_remap_render_backend(rdev, tmp,
@@ -784,10 +786,20 @@ static int cayman_pcie_gart_enable(struct radeon_device *rdev)
/* enable context1-7 */
WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
(u32)(rdev->dummy_page.addr >> 12));
- WREG32(VM_CONTEXT1_CNTL2, 0);
- WREG32(VM_CONTEXT1_CNTL, 0);
+ WREG32(VM_CONTEXT1_CNTL2, 4);
WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
- RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
+ RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
+ PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
+ VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
+ READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT |
+ WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
cayman_pcie_gart_tlb_flush(rdev);
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
@@ -905,6 +917,7 @@ static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
WREG32(SCRATCH_UMSK, 0);
+ rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
}
}
@@ -1059,7 +1072,7 @@ static int cayman_cp_resume(struct radeon_device *rdev)
WREG32(CP_DEBUG, (1 << 27));
- /* set the wb address wether it's enabled or not */
+ /* set the wb address whether it's enabled or not */
WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
WREG32(SCRATCH_UMSK, 0xff);
@@ -1076,7 +1089,7 @@ static int cayman_cp_resume(struct radeon_device *rdev)
#endif
WREG32(cp_rb_cntl[i], rb_cntl);
- /* set the wb address wether it's enabled or not */
+ /* set the wb address whether it's enabled or not */
addr = rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET;
WREG32(cp_rb_rptr_addr[i], addr & 0xFFFFFFFC);
WREG32(cp_rb_rptr_addr_hi[i], upper_32_bits(addr) & 0xFF);
@@ -1118,22 +1131,195 @@ static int cayman_cp_resume(struct radeon_device *rdev)
return 0;
}
-static int cayman_gpu_soft_reset(struct radeon_device *rdev)
+/*
+ * DMA
+ * Starting with R600, the GPU has an asynchronous
+ * DMA engine. The programming model is very similar
+ * to the 3D engine (ring buffer, IBs, etc.), but the
+ * DMA controller has it's own packet format that is
+ * different form the PM4 format used by the 3D engine.
+ * It supports copying data, writing embedded data,
+ * solid fills, and a number of other things. It also
+ * has support for tiling/detiling of buffers.
+ * Cayman and newer support two asynchronous DMA engines.
+ */
+/**
+ * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine
+ *
+ * @rdev: radeon_device pointer
+ * @ib: IB object to schedule
+ *
+ * Schedule an IB in the DMA ring (cayman-SI).
+ */
+void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
+ struct radeon_ib *ib)
+{
+ struct radeon_ring *ring = &rdev->ring[ib->ring];
+
+ if (rdev->wb.enabled) {
+ u32 next_rptr = ring->wptr + 4;
+ while ((next_rptr & 7) != 5)
+ next_rptr++;
+ next_rptr += 3;
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
+ radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
+ radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
+ radeon_ring_write(ring, next_rptr);
+ }
+
+ /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
+ * Pad as necessary with NOPs.
+ */
+ while ((ring->wptr & 7) != 5)
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+ radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0));
+ radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
+ radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
+
+}
+
+/**
+ * cayman_dma_stop - stop the async dma engines
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Stop the async dma engines (cayman-SI).
+ */
+void cayman_dma_stop(struct radeon_device *rdev)
+{
+ u32 rb_cntl;
+
+ radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
+
+ /* dma0 */
+ rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
+ rb_cntl &= ~DMA_RB_ENABLE;
+ WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl);
+
+ /* dma1 */
+ rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
+ rb_cntl &= ~DMA_RB_ENABLE;
+ WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl);
+
+ rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
+ rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false;
+}
+
+/**
+ * cayman_dma_resume - setup and start the async dma engines
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Set up the DMA ring buffers and enable them. (cayman-SI).
+ * Returns 0 for success, error for failure.
+ */
+int cayman_dma_resume(struct radeon_device *rdev)
+{
+ struct radeon_ring *ring;
+ u32 rb_cntl, dma_cntl;
+ u32 rb_bufsz;
+ u32 reg_offset, wb_offset;
+ int i, r;
+
+ /* Reset dma */
+ WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1);
+ RREG32(SRBM_SOFT_RESET);
+ udelay(50);
+ WREG32(SRBM_SOFT_RESET, 0);
+
+ for (i = 0; i < 2; i++) {
+ if (i == 0) {
+ ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+ reg_offset = DMA0_REGISTER_OFFSET;
+ wb_offset = R600_WB_DMA_RPTR_OFFSET;
+ } else {
+ ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
+ reg_offset = DMA1_REGISTER_OFFSET;
+ wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
+ }
+
+ WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
+ WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
+
+ /* Set ring buffer size in dwords */
+ rb_bufsz = drm_order(ring->ring_size / 4);
+ rb_cntl = rb_bufsz << 1;
+#ifdef __BIG_ENDIAN
+ rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
+#endif
+ WREG32(DMA_RB_CNTL + reg_offset, rb_cntl);
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32(DMA_RB_RPTR + reg_offset, 0);
+ WREG32(DMA_RB_WPTR + reg_offset, 0);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset,
+ upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF);
+ WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset,
+ ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
+
+ if (rdev->wb.enabled)
+ rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
+
+ WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8);
+
+ /* enable DMA IBs */
+ WREG32(DMA_IB_CNTL + reg_offset, DMA_IB_ENABLE | CMD_VMID_FORCE);
+
+ dma_cntl = RREG32(DMA_CNTL + reg_offset);
+ dma_cntl &= ~CTXEMPTY_INT_ENABLE;
+ WREG32(DMA_CNTL + reg_offset, dma_cntl);
+
+ ring->wptr = 0;
+ WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2);
+
+ ring->rptr = RREG32(DMA_RB_RPTR + reg_offset) >> 2;
+
+ WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE);
+
+ ring->ready = true;
+
+ r = radeon_ring_test(rdev, ring->idx, ring);
+ if (r) {
+ ring->ready = false;
+ return r;
+ }
+ }
+
+ radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
+
+ return 0;
+}
+
+/**
+ * cayman_dma_fini - tear down the async dma engines
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Stop the async dma engines and free the rings (cayman-SI).
+ */
+void cayman_dma_fini(struct radeon_device *rdev)
+{
+ cayman_dma_stop(rdev);
+ radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
+ radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
+}
+
+static void cayman_gpu_soft_reset_gfx(struct radeon_device *rdev)
{
- struct evergreen_mc_save save;
u32 grbm_reset = 0;
if (!(RREG32(GRBM_STATUS) & GUI_ACTIVE))
- return 0;
+ return;
- dev_info(rdev->dev, "GPU softreset \n");
- dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
+ dev_info(rdev->dev, " GRBM_STATUS = 0x%08X\n",
RREG32(GRBM_STATUS));
- dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
+ dev_info(rdev->dev, " GRBM_STATUS_SE0 = 0x%08X\n",
RREG32(GRBM_STATUS_SE0));
- dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
+ dev_info(rdev->dev, " GRBM_STATUS_SE1 = 0x%08X\n",
RREG32(GRBM_STATUS_SE1));
- dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
+ dev_info(rdev->dev, " SRBM_STATUS = 0x%08X\n",
RREG32(SRBM_STATUS));
dev_info(rdev->dev, " R_008674_CP_STALLED_STAT1 = 0x%08X\n",
RREG32(CP_STALLED_STAT1));
@@ -1143,19 +1329,7 @@ static int cayman_gpu_soft_reset(struct radeon_device *rdev)
RREG32(CP_BUSY_STAT));
dev_info(rdev->dev, " R_008680_CP_STAT = 0x%08X\n",
RREG32(CP_STAT));
- dev_info(rdev->dev, " VM_CONTEXT0_PROTECTION_FAULT_ADDR 0x%08X\n",
- RREG32(0x14F8));
- dev_info(rdev->dev, " VM_CONTEXT0_PROTECTION_FAULT_STATUS 0x%08X\n",
- RREG32(0x14D8));
- dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
- RREG32(0x14FC));
- dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
- RREG32(0x14DC));
- evergreen_mc_stop(rdev, &save);
- if (evergreen_mc_wait_for_idle(rdev)) {
- dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
- }
/* Disable CP parsing/prefetching */
WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT);
@@ -1180,16 +1354,14 @@ static int cayman_gpu_soft_reset(struct radeon_device *rdev)
udelay(50);
WREG32(GRBM_SOFT_RESET, 0);
(void)RREG32(GRBM_SOFT_RESET);
- /* Wait a little for things to settle down */
- udelay(50);
- dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
+ dev_info(rdev->dev, " GRBM_STATUS = 0x%08X\n",
RREG32(GRBM_STATUS));
- dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
+ dev_info(rdev->dev, " GRBM_STATUS_SE0 = 0x%08X\n",
RREG32(GRBM_STATUS_SE0));
- dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
+ dev_info(rdev->dev, " GRBM_STATUS_SE1 = 0x%08X\n",
RREG32(GRBM_STATUS_SE1));
- dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
+ dev_info(rdev->dev, " SRBM_STATUS = 0x%08X\n",
RREG32(SRBM_STATUS));
dev_info(rdev->dev, " R_008674_CP_STALLED_STAT1 = 0x%08X\n",
RREG32(CP_STALLED_STAT1));
@@ -1199,13 +1371,107 @@ static int cayman_gpu_soft_reset(struct radeon_device *rdev)
RREG32(CP_BUSY_STAT));
dev_info(rdev->dev, " R_008680_CP_STAT = 0x%08X\n",
RREG32(CP_STAT));
+
+}
+
+static void cayman_gpu_soft_reset_dma(struct radeon_device *rdev)
+{
+ u32 tmp;
+
+ if (RREG32(DMA_STATUS_REG) & DMA_IDLE)
+ return;
+
+ dev_info(rdev->dev, " R_00D034_DMA_STATUS_REG = 0x%08X\n",
+ RREG32(DMA_STATUS_REG));
+
+ /* dma0 */
+ tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
+ tmp &= ~DMA_RB_ENABLE;
+ WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
+
+ /* dma1 */
+ tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
+ tmp &= ~DMA_RB_ENABLE;
+ WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
+
+ /* Reset dma */
+ WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1);
+ RREG32(SRBM_SOFT_RESET);
+ udelay(50);
+ WREG32(SRBM_SOFT_RESET, 0);
+
+ dev_info(rdev->dev, " R_00D034_DMA_STATUS_REG = 0x%08X\n",
+ RREG32(DMA_STATUS_REG));
+
+}
+
+static int cayman_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
+{
+ struct evergreen_mc_save save;
+
+ if (reset_mask == 0)
+ return 0;
+
+ dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
+
+ dev_info(rdev->dev, " VM_CONTEXT0_PROTECTION_FAULT_ADDR 0x%08X\n",
+ RREG32(0x14F8));
+ dev_info(rdev->dev, " VM_CONTEXT0_PROTECTION_FAULT_STATUS 0x%08X\n",
+ RREG32(0x14D8));
+ dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
+ RREG32(0x14FC));
+ dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
+ RREG32(0x14DC));
+
+ evergreen_mc_stop(rdev, &save);
+ if (evergreen_mc_wait_for_idle(rdev)) {
+ dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
+ }
+
+ if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE))
+ cayman_gpu_soft_reset_gfx(rdev);
+
+ if (reset_mask & RADEON_RESET_DMA)
+ cayman_gpu_soft_reset_dma(rdev);
+
+ /* Wait a little for things to settle down */
+ udelay(50);
+
evergreen_mc_resume(rdev, &save);
return 0;
}
int cayman_asic_reset(struct radeon_device *rdev)
{
- return cayman_gpu_soft_reset(rdev);
+ return cayman_gpu_soft_reset(rdev, (RADEON_RESET_GFX |
+ RADEON_RESET_COMPUTE |
+ RADEON_RESET_DMA));
+}
+
+/**
+ * cayman_dma_is_lockup - Check if the DMA engine is locked up
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Check if the async DMA engine is locked up (cayman-SI).
+ * Returns true if the engine appears to be locked up, false if not.
+ */
+bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
+{
+ u32 dma_status_reg;
+
+ if (ring->idx == R600_RING_TYPE_DMA_INDEX)
+ dma_status_reg = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
+ else
+ dma_status_reg = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
+ if (dma_status_reg & DMA_IDLE) {
+ radeon_ring_lockup_update(ring);
+ return false;
+ }
+ /* force ring activities */
+ radeon_ring_force_activity(rdev, ring);
+ return radeon_ring_test_lockup(rdev, ring);
}
static int cayman_startup(struct radeon_device *rdev)
@@ -1289,6 +1555,18 @@ static int cayman_startup(struct radeon_device *rdev)
return r;
}
+ r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
+ if (r) {
+ dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
+ return r;
+ }
+
+ r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
+ if (r) {
+ dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
+ return r;
+ }
+
/* Enable IRQ */
r = r600_irq_init(rdev);
if (r) {
@@ -1303,6 +1581,23 @@ static int cayman_startup(struct radeon_device *rdev)
0, 0xfffff, RADEON_CP_PACKET2);
if (r)
return r;
+
+ ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+ r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
+ DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
+ DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
+ 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+ if (r)
+ return r;
+
+ ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
+ r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
+ DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
+ DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
+ 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+ if (r)
+ return r;
+
r = cayman_cp_load_microcode(rdev);
if (r)
return r;
@@ -1310,6 +1605,10 @@ static int cayman_startup(struct radeon_device *rdev)
if (r)
return r;
+ r = cayman_dma_resume(rdev);
+ if (r)
+ return r;
+
r = radeon_ib_pool_init(rdev);
if (r) {
dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -1354,7 +1653,7 @@ int cayman_suspend(struct radeon_device *rdev)
{
r600_audio_fini(rdev);
cayman_cp_enable(rdev, false);
- rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
+ cayman_dma_stop(rdev);
evergreen_irq_suspend(rdev);
radeon_wb_disable(rdev);
cayman_pcie_gart_disable(rdev);
@@ -1421,6 +1720,14 @@ int cayman_init(struct radeon_device *rdev)
ring->ring_obj = NULL;
r600_ring_init(rdev, ring, 1024 * 1024);
+ ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+ ring->ring_obj = NULL;
+ r600_ring_init(rdev, ring, 64 * 1024);
+
+ ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
+ ring->ring_obj = NULL;
+ r600_ring_init(rdev, ring, 64 * 1024);
+
rdev->ih.ring_obj = NULL;
r600_ih_ring_init(rdev, 64 * 1024);
@@ -1433,6 +1740,7 @@ int cayman_init(struct radeon_device *rdev)
if (r) {
dev_err(rdev->dev, "disabling GPU acceleration\n");
cayman_cp_fini(rdev);
+ cayman_dma_fini(rdev);
r600_irq_fini(rdev);
if (rdev->flags & RADEON_IS_IGP)
si_rlc_fini(rdev);
@@ -1463,6 +1771,7 @@ void cayman_fini(struct radeon_device *rdev)
{
r600_blit_fini(rdev);
cayman_cp_fini(rdev);
+ cayman_dma_fini(rdev);
r600_irq_fini(rdev);
if (rdev->flags & RADEON_IS_IGP)
si_rlc_fini(rdev);
@@ -1538,30 +1847,57 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
{
struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index];
uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
-
- while (count) {
- unsigned ndw = 1 + count * 2;
- if (ndw > 0x3FFF)
- ndw = 0x3FFF;
-
- radeon_ring_write(ring, PACKET3(PACKET3_ME_WRITE, ndw));
- radeon_ring_write(ring, pe);
- radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
- for (; ndw > 1; ndw -= 2, --count, pe += 8) {
- uint64_t value = 0;
- if (flags & RADEON_VM_PAGE_SYSTEM) {
- value = radeon_vm_map_gart(rdev, addr);
- value &= 0xFFFFFFFFFFFFF000ULL;
+ uint64_t value;
+ unsigned ndw;
+
+ if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
+ while (count) {
+ ndw = 1 + count * 2;
+ if (ndw > 0x3FFF)
+ ndw = 0x3FFF;
+
+ radeon_ring_write(ring, PACKET3(PACKET3_ME_WRITE, ndw));
+ radeon_ring_write(ring, pe);
+ radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
+ for (; ndw > 1; ndw -= 2, --count, pe += 8) {
+ if (flags & RADEON_VM_PAGE_SYSTEM) {
+ value = radeon_vm_map_gart(rdev, addr);
+ value &= 0xFFFFFFFFFFFFF000ULL;
+ } else if (flags & RADEON_VM_PAGE_VALID) {
+ value = addr;
+ } else {
+ value = 0;
+ }
addr += incr;
-
- } else if (flags & RADEON_VM_PAGE_VALID) {
- value = addr;
+ value |= r600_flags;
+ radeon_ring_write(ring, value);
+ radeon_ring_write(ring, upper_32_bits(value));
+ }
+ }
+ } else {
+ while (count) {
+ ndw = count * 2;
+ if (ndw > 0xFFFFE)
+ ndw = 0xFFFFE;
+
+ /* for non-physically contiguous pages (system) */
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw));
+ radeon_ring_write(ring, pe);
+ radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
+ for (; ndw > 0; ndw -= 2, --count, pe += 8) {
+ if (flags & RADEON_VM_PAGE_SYSTEM) {
+ value = radeon_vm_map_gart(rdev, addr);
+ value &= 0xFFFFFFFFFFFFF000ULL;
+ } else if (flags & RADEON_VM_PAGE_VALID) {
+ value = addr;
+ } else {
+ value = 0;
+ }
addr += incr;
+ value |= r600_flags;
+ radeon_ring_write(ring, value);
+ radeon_ring_write(ring, upper_32_bits(value));
}
-
- value |= r600_flags;
- radeon_ring_write(ring, value);
- radeon_ring_write(ring, upper_32_bits(value));
}
}
}
@@ -1596,3 +1932,26 @@ void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
radeon_ring_write(ring, 0x0);
}
+
+void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
+{
+ struct radeon_ring *ring = &rdev->ring[ridx];
+
+ if (vm == NULL)
+ return;
+
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
+ radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
+ radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
+
+ /* flush hdp cache */
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
+ radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
+ radeon_ring_write(ring, 1);
+
+ /* bits 0-7 are the VM contexts0-7 */
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
+ radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
+ radeon_ring_write(ring, 1 << vm->id);
+}
+
diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h
index cbef6815907a..48e5022ee921 100644
--- a/drivers/gpu/drm/radeon/nid.h
+++ b/drivers/gpu/drm/radeon/nid.h
@@ -50,6 +50,24 @@
#define VMID(x) (((x) & 0x7) << 0)
#define SRBM_STATUS 0x0E50
+#define SRBM_SOFT_RESET 0x0E60
+#define SOFT_RESET_BIF (1 << 1)
+#define SOFT_RESET_CG (1 << 2)
+#define SOFT_RESET_DC (1 << 5)
+#define SOFT_RESET_DMA1 (1 << 6)
+#define SOFT_RESET_GRBM (1 << 8)
+#define SOFT_RESET_HDP (1 << 9)
+#define SOFT_RESET_IH (1 << 10)
+#define SOFT_RESET_MC (1 << 11)
+#define SOFT_RESET_RLC (1 << 13)
+#define SOFT_RESET_ROM (1 << 14)
+#define SOFT_RESET_SEM (1 << 15)
+#define SOFT_RESET_VMC (1 << 17)
+#define SOFT_RESET_DMA (1 << 20)
+#define SOFT_RESET_TST (1 << 21)
+#define SOFT_RESET_REGBB (1 << 22)
+#define SOFT_RESET_ORB (1 << 23)
+
#define VM_CONTEXT0_REQUEST_RESPONSE 0x1470
#define REQUEST_TYPE(x) (((x) & 0xf) << 0)
#define RESPONSE_TYPE_MASK 0x000000F0
@@ -80,7 +98,18 @@
#define VM_CONTEXT0_CNTL 0x1410
#define ENABLE_CONTEXT (1 << 0)
#define PAGE_TABLE_DEPTH(x) (((x) & 3) << 1)
+#define RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 3)
#define RANGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 4)
+#define DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 6)
+#define DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 7)
+#define PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 9)
+#define PDE0_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 10)
+#define VALID_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 12)
+#define VALID_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 13)
+#define READ_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 15)
+#define READ_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 16)
+#define WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 18)
+#define WRITE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 19)
#define VM_CONTEXT1_CNTL 0x1414
#define VM_CONTEXT0_CNTL2 0x1430
#define VM_CONTEXT1_CNTL2 0x1434
@@ -588,5 +617,61 @@
#define PACKET3_SET_APPEND_CNT 0x75
#define PACKET3_ME_WRITE 0x7A
-#endif
+/* ASYNC DMA - first instance at 0xd000, second at 0xd800 */
+#define DMA0_REGISTER_OFFSET 0x0 /* not a register */
+#define DMA1_REGISTER_OFFSET 0x800 /* not a register */
+
+#define DMA_RB_CNTL 0xd000
+# define DMA_RB_ENABLE (1 << 0)
+# define DMA_RB_SIZE(x) ((x) << 1) /* log2 */
+# define DMA_RB_SWAP_ENABLE (1 << 9) /* 8IN32 */
+# define DMA_RPTR_WRITEBACK_ENABLE (1 << 12)
+# define DMA_RPTR_WRITEBACK_SWAP_ENABLE (1 << 13) /* 8IN32 */
+# define DMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */
+#define DMA_RB_BASE 0xd004
+#define DMA_RB_RPTR 0xd008
+#define DMA_RB_WPTR 0xd00c
+
+#define DMA_RB_RPTR_ADDR_HI 0xd01c
+#define DMA_RB_RPTR_ADDR_LO 0xd020
+
+#define DMA_IB_CNTL 0xd024
+# define DMA_IB_ENABLE (1 << 0)
+# define DMA_IB_SWAP_ENABLE (1 << 4)
+# define CMD_VMID_FORCE (1 << 31)
+#define DMA_IB_RPTR 0xd028
+#define DMA_CNTL 0xd02c
+# define TRAP_ENABLE (1 << 0)
+# define SEM_INCOMPLETE_INT_ENABLE (1 << 1)
+# define SEM_WAIT_INT_ENABLE (1 << 2)
+# define DATA_SWAP_ENABLE (1 << 3)
+# define FENCE_SWAP_ENABLE (1 << 4)
+# define CTXEMPTY_INT_ENABLE (1 << 28)
+#define DMA_STATUS_REG 0xd034
+# define DMA_IDLE (1 << 0)
+#define DMA_SEM_INCOMPLETE_TIMER_CNTL 0xd044
+#define DMA_SEM_WAIT_FAIL_TIMER_CNTL 0xd048
+#define DMA_TILING_CONFIG 0xd0b8
+#define DMA_MODE 0xd0bc
+
+#define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \
+ (((t) & 0x1) << 23) | \
+ (((s) & 0x1) << 22) | \
+ (((n) & 0xFFFFF) << 0))
+
+#define DMA_IB_PACKET(cmd, vmid, n) ((((cmd) & 0xF) << 28) | \
+ (((vmid) & 0xF) << 20) | \
+ (((n) & 0xFFFFF) << 0))
+
+/* async DMA Packet types */
+#define DMA_PACKET_WRITE 0x2
+#define DMA_PACKET_COPY 0x3
+#define DMA_PACKET_INDIRECT_BUFFER 0x4
+#define DMA_PACKET_SEMAPHORE 0x5
+#define DMA_PACKET_FENCE 0x6
+#define DMA_PACKET_TRAP 0x7
+#define DMA_PACKET_SRBM_WRITE 0x9
+#define DMA_PACKET_CONSTANT_FILL 0xd
+#define DMA_PACKET_NOP 0xf
+#endif
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 376884f1bcd2..8ff7cac222dc 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -4135,23 +4135,36 @@ int r100_init(struct radeon_device *rdev)
return 0;
}
-uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg)
+uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg,
+ bool always_indirect)
{
- if (reg < rdev->rmmio_size)
+ if (reg < rdev->rmmio_size && !always_indirect)
return readl(((void __iomem *)rdev->rmmio) + reg);
else {
+ unsigned long flags;
+ uint32_t ret;
+
+ spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
- return readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
+ ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
+ spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
+
+ return ret;
}
}
-void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
+void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v,
+ bool always_indirect)
{
- if (reg < rdev->rmmio_size)
+ if (reg < rdev->rmmio_size && !always_indirect)
writel(v, ((void __iomem *)rdev->rmmio) + reg);
else {
+ unsigned long flags;
+
+ spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
+ spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
}
}
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 169ecc9628ea..537e259b3837 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -1258,9 +1258,8 @@ void r600_vram_scratch_fini(struct radeon_device *rdev)
* reset, it's up to the caller to determine if the GPU needs one. We
* might add an helper function to check that.
*/
-static int r600_gpu_soft_reset(struct radeon_device *rdev)
+static void r600_gpu_soft_reset_gfx(struct radeon_device *rdev)
{
- struct rv515_mc_save save;
u32 grbm_busy_mask = S_008010_VC_BUSY(1) | S_008010_VGT_BUSY_NO_DMA(1) |
S_008010_VGT_BUSY(1) | S_008010_TA03_BUSY(1) |
S_008010_TC_BUSY(1) | S_008010_SX_BUSY(1) |
@@ -1280,14 +1279,13 @@ static int r600_gpu_soft_reset(struct radeon_device *rdev)
u32 tmp;
if (!(RREG32(GRBM_STATUS) & GUI_ACTIVE))
- return 0;
+ return;
- dev_info(rdev->dev, "GPU softreset \n");
- dev_info(rdev->dev, " R_008010_GRBM_STATUS=0x%08X\n",
+ dev_info(rdev->dev, " R_008010_GRBM_STATUS = 0x%08X\n",
RREG32(R_008010_GRBM_STATUS));
- dev_info(rdev->dev, " R_008014_GRBM_STATUS2=0x%08X\n",
+ dev_info(rdev->dev, " R_008014_GRBM_STATUS2 = 0x%08X\n",
RREG32(R_008014_GRBM_STATUS2));
- dev_info(rdev->dev, " R_000E50_SRBM_STATUS=0x%08X\n",
+ dev_info(rdev->dev, " R_000E50_SRBM_STATUS = 0x%08X\n",
RREG32(R_000E50_SRBM_STATUS));
dev_info(rdev->dev, " R_008674_CP_STALLED_STAT1 = 0x%08X\n",
RREG32(CP_STALLED_STAT1));
@@ -1297,12 +1295,10 @@ static int r600_gpu_soft_reset(struct radeon_device *rdev)
RREG32(CP_BUSY_STAT));
dev_info(rdev->dev, " R_008680_CP_STAT = 0x%08X\n",
RREG32(CP_STAT));
- rv515_mc_stop(rdev, &save);
- if (r600_mc_wait_for_idle(rdev)) {
- dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
- }
+
/* Disable CP parsing/prefetching */
WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(1));
+
/* Check if any of the rendering block is busy and reset it */
if ((RREG32(R_008010_GRBM_STATUS) & grbm_busy_mask) ||
(RREG32(R_008014_GRBM_STATUS2) & grbm2_busy_mask)) {
@@ -1332,13 +1328,12 @@ static int r600_gpu_soft_reset(struct radeon_device *rdev)
RREG32(R_008020_GRBM_SOFT_RESET);
mdelay(15);
WREG32(R_008020_GRBM_SOFT_RESET, 0);
- /* Wait a little for things to settle down */
- mdelay(1);
- dev_info(rdev->dev, " R_008010_GRBM_STATUS=0x%08X\n",
+
+ dev_info(rdev->dev, " R_008010_GRBM_STATUS = 0x%08X\n",
RREG32(R_008010_GRBM_STATUS));
- dev_info(rdev->dev, " R_008014_GRBM_STATUS2=0x%08X\n",
+ dev_info(rdev->dev, " R_008014_GRBM_STATUS2 = 0x%08X\n",
RREG32(R_008014_GRBM_STATUS2));
- dev_info(rdev->dev, " R_000E50_SRBM_STATUS=0x%08X\n",
+ dev_info(rdev->dev, " R_000E50_SRBM_STATUS = 0x%08X\n",
RREG32(R_000E50_SRBM_STATUS));
dev_info(rdev->dev, " R_008674_CP_STALLED_STAT1 = 0x%08X\n",
RREG32(CP_STALLED_STAT1));
@@ -1348,6 +1343,60 @@ static int r600_gpu_soft_reset(struct radeon_device *rdev)
RREG32(CP_BUSY_STAT));
dev_info(rdev->dev, " R_008680_CP_STAT = 0x%08X\n",
RREG32(CP_STAT));
+
+}
+
+static void r600_gpu_soft_reset_dma(struct radeon_device *rdev)
+{
+ u32 tmp;
+
+ if (RREG32(DMA_STATUS_REG) & DMA_IDLE)
+ return;
+
+ dev_info(rdev->dev, " R_00D034_DMA_STATUS_REG = 0x%08X\n",
+ RREG32(DMA_STATUS_REG));
+
+ /* Disable DMA */
+ tmp = RREG32(DMA_RB_CNTL);
+ tmp &= ~DMA_RB_ENABLE;
+ WREG32(DMA_RB_CNTL, tmp);
+
+ /* Reset dma */
+ if (rdev->family >= CHIP_RV770)
+ WREG32(SRBM_SOFT_RESET, RV770_SOFT_RESET_DMA);
+ else
+ WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA);
+ RREG32(SRBM_SOFT_RESET);
+ udelay(50);
+ WREG32(SRBM_SOFT_RESET, 0);
+
+ dev_info(rdev->dev, " R_00D034_DMA_STATUS_REG = 0x%08X\n",
+ RREG32(DMA_STATUS_REG));
+}
+
+static int r600_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
+{
+ struct rv515_mc_save save;
+
+ if (reset_mask == 0)
+ return 0;
+
+ dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
+
+ rv515_mc_stop(rdev, &save);
+ if (r600_mc_wait_for_idle(rdev)) {
+ dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
+ }
+
+ if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE))
+ r600_gpu_soft_reset_gfx(rdev);
+
+ if (reset_mask & RADEON_RESET_DMA)
+ r600_gpu_soft_reset_dma(rdev);
+
+ /* Wait a little for things to settle down */
+ mdelay(1);
+
rv515_mc_resume(rdev, &save);
return 0;
}
@@ -1370,9 +1419,34 @@ bool r600_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
return radeon_ring_test_lockup(rdev, ring);
}
+/**
+ * r600_dma_is_lockup - Check if the DMA engine is locked up
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Check if the async DMA engine is locked up (r6xx-evergreen).
+ * Returns true if the engine appears to be locked up, false if not.
+ */
+bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
+{
+ u32 dma_status_reg;
+
+ dma_status_reg = RREG32(DMA_STATUS_REG);
+ if (dma_status_reg & DMA_IDLE) {
+ radeon_ring_lockup_update(ring);
+ return false;
+ }
+ /* force ring activities */
+ radeon_ring_force_activity(rdev, ring);
+ return radeon_ring_test_lockup(rdev, ring);
+}
+
int r600_asic_reset(struct radeon_device *rdev)
{
- return r600_gpu_soft_reset(rdev);
+ return r600_gpu_soft_reset(rdev, (RADEON_RESET_GFX |
+ RADEON_RESET_COMPUTE |
+ RADEON_RESET_DMA));
}
u32 r6xx_remap_render_backend(struct radeon_device *rdev,
@@ -1588,6 +1662,7 @@ static void r600_gpu_init(struct radeon_device *rdev)
WREG32(GB_TILING_CONFIG, tiling_config);
WREG32(DCP_TILING_CONFIG, tiling_config & 0xffff);
WREG32(HDP_TILING_CONFIG, tiling_config & 0xffff);
+ WREG32(DMA_TILING_CONFIG, tiling_config & 0xffff);
tmp = R6XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & INACTIVE_QD_PIPES_MASK) >> 8);
WREG32(VGT_OUT_DEALLOC_CNTL, (tmp * 4) & DEALLOC_DIST_MASK);
@@ -1865,6 +1940,7 @@ void r600_cp_stop(struct radeon_device *rdev)
radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(1));
WREG32(SCRATCH_UMSK, 0);
+ rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
}
int r600_init_microcode(struct radeon_device *rdev)
@@ -2190,6 +2266,128 @@ void r600_cp_fini(struct radeon_device *rdev)
radeon_scratch_free(rdev, ring->rptr_save_reg);
}
+/*
+ * DMA
+ * Starting with R600, the GPU has an asynchronous
+ * DMA engine. The programming model is very similar
+ * to the 3D engine (ring buffer, IBs, etc.), but the
+ * DMA controller has it's own packet format that is
+ * different form the PM4 format used by the 3D engine.
+ * It supports copying data, writing embedded data,
+ * solid fills, and a number of other things. It also
+ * has support for tiling/detiling of buffers.
+ */
+/**
+ * r600_dma_stop - stop the async dma engine
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Stop the async dma engine (r6xx-evergreen).
+ */
+void r600_dma_stop(struct radeon_device *rdev)
+{
+ u32 rb_cntl = RREG32(DMA_RB_CNTL);
+
+ radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
+
+ rb_cntl &= ~DMA_RB_ENABLE;
+ WREG32(DMA_RB_CNTL, rb_cntl);
+
+ rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
+}
+
+/**
+ * r600_dma_resume - setup and start the async dma engine
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Set up the DMA ring buffer and enable it. (r6xx-evergreen).
+ * Returns 0 for success, error for failure.
+ */
+int r600_dma_resume(struct radeon_device *rdev)
+{
+ struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+ u32 rb_cntl, dma_cntl;
+ u32 rb_bufsz;
+ int r;
+
+ /* Reset dma */
+ if (rdev->family >= CHIP_RV770)
+ WREG32(SRBM_SOFT_RESET, RV770_SOFT_RESET_DMA);
+ else
+ WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA);
+ RREG32(SRBM_SOFT_RESET);
+ udelay(50);
+ WREG32(SRBM_SOFT_RESET, 0);
+
+ WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL, 0);
+ WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL, 0);
+
+ /* Set ring buffer size in dwords */
+ rb_bufsz = drm_order(ring->ring_size / 4);
+ rb_cntl = rb_bufsz << 1;
+#ifdef __BIG_ENDIAN
+ rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
+#endif
+ WREG32(DMA_RB_CNTL, rb_cntl);
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32(DMA_RB_RPTR, 0);
+ WREG32(DMA_RB_WPTR, 0);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32(DMA_RB_RPTR_ADDR_HI,
+ upper_32_bits(rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFF);
+ WREG32(DMA_RB_RPTR_ADDR_LO,
+ ((rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFFFFFFFC));
+
+ if (rdev->wb.enabled)
+ rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
+
+ WREG32(DMA_RB_BASE, ring->gpu_addr >> 8);
+
+ /* enable DMA IBs */
+ WREG32(DMA_IB_CNTL, DMA_IB_ENABLE);
+
+ dma_cntl = RREG32(DMA_CNTL);
+ dma_cntl &= ~CTXEMPTY_INT_ENABLE;
+ WREG32(DMA_CNTL, dma_cntl);
+
+ if (rdev->family >= CHIP_RV770)
+ WREG32(DMA_MODE, 1);
+
+ ring->wptr = 0;
+ WREG32(DMA_RB_WPTR, ring->wptr << 2);
+
+ ring->rptr = RREG32(DMA_RB_RPTR) >> 2;
+
+ WREG32(DMA_RB_CNTL, rb_cntl | DMA_RB_ENABLE);
+
+ ring->ready = true;
+
+ r = radeon_ring_test(rdev, R600_RING_TYPE_DMA_INDEX, ring);
+ if (r) {
+ ring->ready = false;
+ return r;
+ }
+
+ radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
+
+ return 0;
+}
+
+/**
+ * r600_dma_fini - tear down the async dma engine
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Stop the async dma engine and free the ring (r6xx-evergreen).
+ */
+void r600_dma_fini(struct radeon_device *rdev)
+{
+ r600_dma_stop(rdev);
+ radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
+}
/*
* GPU scratch registers helpers function.
@@ -2246,6 +2444,64 @@ int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
return r;
}
+/**
+ * r600_dma_ring_test - simple async dma engine test
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Test the DMA engine by writing using it to write an
+ * value to memory. (r6xx-SI).
+ * Returns 0 for success, error for failure.
+ */
+int r600_dma_ring_test(struct radeon_device *rdev,
+ struct radeon_ring *ring)
+{
+ unsigned i;
+ int r;
+ void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
+ u32 tmp;
+
+ if (!ptr) {
+ DRM_ERROR("invalid vram scratch pointer\n");
+ return -EINVAL;
+ }
+
+ tmp = 0xCAFEDEAD;
+ writel(tmp, ptr);
+
+ r = radeon_ring_lock(rdev, ring, 4);
+ if (r) {
+ DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
+ return r;
+ }
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
+ radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
+ radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff);
+ radeon_ring_write(ring, 0xDEADBEEF);
+ radeon_ring_unlock_commit(rdev, ring);
+
+ for (i = 0; i < rdev->usec_timeout; i++) {
+ tmp = readl(ptr);
+ if (tmp == 0xDEADBEEF)
+ break;
+ DRM_UDELAY(1);
+ }
+
+ if (i < rdev->usec_timeout) {
+ DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
+ } else {
+ DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
+ ring->idx, tmp);
+ r = -EINVAL;
+ }
+ return r;
+}
+
+/*
+ * CP fences/semaphores
+ */
+
void r600_fence_ring_emit(struct radeon_device *rdev,
struct radeon_fence *fence)
{
@@ -2309,6 +2565,59 @@ void r600_semaphore_ring_emit(struct radeon_device *rdev,
radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | sel);
}
+/*
+ * DMA fences/semaphores
+ */
+
+/**
+ * r600_dma_fence_ring_emit - emit a fence on the DMA ring
+ *
+ * @rdev: radeon_device pointer
+ * @fence: radeon fence object
+ *
+ * Add a DMA fence packet to the ring to write
+ * the fence seq number and DMA trap packet to generate
+ * an interrupt if needed (r6xx-r7xx).
+ */
+void r600_dma_fence_ring_emit(struct radeon_device *rdev,
+ struct radeon_fence *fence)
+{
+ struct radeon_ring *ring = &rdev->ring[fence->ring];
+ u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
+
+ /* write the fence */
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0));
+ radeon_ring_write(ring, addr & 0xfffffffc);
+ radeon_ring_write(ring, (upper_32_bits(addr) & 0xff));
+ radeon_ring_write(ring, lower_32_bits(fence->seq));
+ /* generate an interrupt */
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0));
+}
+
+/**
+ * r600_dma_semaphore_ring_emit - emit a semaphore on the dma ring
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ * @semaphore: radeon semaphore object
+ * @emit_wait: wait or signal semaphore
+ *
+ * Add a DMA semaphore packet to the ring wait on or signal
+ * other rings (r6xx-SI).
+ */
+void r600_dma_semaphore_ring_emit(struct radeon_device *rdev,
+ struct radeon_ring *ring,
+ struct radeon_semaphore *semaphore,
+ bool emit_wait)
+{
+ u64 addr = semaphore->gpu_addr;
+ u32 s = emit_wait ? 0 : 1;
+
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SEMAPHORE, 0, s, 0));
+ radeon_ring_write(ring, addr & 0xfffffffc);
+ radeon_ring_write(ring, upper_32_bits(addr) & 0xff);
+}
+
int r600_copy_blit(struct radeon_device *rdev,
uint64_t src_offset,
uint64_t dst_offset,
@@ -2328,6 +2637,80 @@ int r600_copy_blit(struct radeon_device *rdev,
return 0;
}
+/**
+ * r600_copy_dma - copy pages using the DMA engine
+ *
+ * @rdev: radeon_device pointer
+ * @src_offset: src GPU address
+ * @dst_offset: dst GPU address
+ * @num_gpu_pages: number of GPU pages to xfer
+ * @fence: radeon fence object
+ *
+ * Copy GPU paging using the DMA engine (r6xx).
+ * Used by the radeon ttm implementation to move pages if
+ * registered as the asic copy callback.
+ */
+int r600_copy_dma(struct radeon_device *rdev,
+ uint64_t src_offset, uint64_t dst_offset,
+ unsigned num_gpu_pages,
+ struct radeon_fence **fence)
+{
+ struct radeon_semaphore *sem = NULL;
+ int ring_index = rdev->asic->copy.dma_ring_index;
+ struct radeon_ring *ring = &rdev->ring[ring_index];
+ u32 size_in_dw, cur_size_in_dw;
+ int i, num_loops;
+ int r = 0;
+
+ r = radeon_semaphore_create(rdev, &sem);
+ if (r) {
+ DRM_ERROR("radeon: moving bo (%d).\n", r);
+ return r;
+ }
+
+ size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
+ num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFE);
+ r = radeon_ring_lock(rdev, ring, num_loops * 4 + 8);
+ if (r) {
+ DRM_ERROR("radeon: moving bo (%d).\n", r);
+ radeon_semaphore_free(rdev, &sem, NULL);
+ return r;
+ }
+
+ if (radeon_fence_need_sync(*fence, ring->idx)) {
+ radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
+ ring->idx);
+ radeon_fence_note_sync(*fence, ring->idx);
+ } else {
+ radeon_semaphore_free(rdev, &sem, NULL);
+ }
+
+ for (i = 0; i < num_loops; i++) {
+ cur_size_in_dw = size_in_dw;
+ if (cur_size_in_dw > 0xFFFE)
+ cur_size_in_dw = 0xFFFE;
+ size_in_dw -= cur_size_in_dw;
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw));
+ radeon_ring_write(ring, dst_offset & 0xfffffffc);
+ radeon_ring_write(ring, src_offset & 0xfffffffc);
+ radeon_ring_write(ring, (((upper_32_bits(dst_offset) & 0xff) << 16) |
+ (upper_32_bits(src_offset) & 0xff)));
+ src_offset += cur_size_in_dw * 4;
+ dst_offset += cur_size_in_dw * 4;
+ }
+
+ r = radeon_fence_emit(rdev, fence, ring->idx);
+ if (r) {
+ radeon_ring_unlock_undo(rdev, ring);
+ return r;
+ }
+
+ radeon_ring_unlock_commit(rdev, ring);
+ radeon_semaphore_free(rdev, &sem, *fence);
+
+ return r;
+}
+
int r600_set_surface_reg(struct radeon_device *rdev, int reg,
uint32_t tiling_flags, uint32_t pitch,
uint32_t offset, uint32_t obj_size)
@@ -2343,7 +2726,7 @@ void r600_clear_surface_reg(struct radeon_device *rdev, int reg)
static int r600_startup(struct radeon_device *rdev)
{
- struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
+ struct radeon_ring *ring;
int r;
/* enable pcie gen2 link */
@@ -2388,6 +2771,12 @@ static int r600_startup(struct radeon_device *rdev)
return r;
}
+ r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
+ if (r) {
+ dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
+ return r;
+ }
+
/* Enable IRQ */
r = r600_irq_init(rdev);
if (r) {
@@ -2397,12 +2786,20 @@ static int r600_startup(struct radeon_device *rdev)
}
r600_irq_set(rdev);
+ ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
R600_CP_RB_RPTR, R600_CP_RB_WPTR,
0, 0xfffff, RADEON_CP_PACKET2);
+ if (r)
+ return r;
+ ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+ r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
+ DMA_RB_RPTR, DMA_RB_WPTR,
+ 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
if (r)
return r;
+
r = r600_cp_load_microcode(rdev);
if (r)
return r;
@@ -2410,6 +2807,10 @@ static int r600_startup(struct radeon_device *rdev)
if (r)
return r;
+ r = r600_dma_resume(rdev);
+ if (r)
+ return r;
+
r = radeon_ib_pool_init(rdev);
if (r) {
dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -2465,7 +2866,7 @@ int r600_suspend(struct radeon_device *rdev)
{
r600_audio_fini(rdev);
r600_cp_stop(rdev);
- rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
+ r600_dma_stop(rdev);
r600_irq_suspend(rdev);
radeon_wb_disable(rdev);
r600_pcie_gart_disable(rdev);
@@ -2538,6 +2939,9 @@ int r600_init(struct radeon_device *rdev)
rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL;
r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024);
+ rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL;
+ r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024);
+
rdev->ih.ring_obj = NULL;
r600_ih_ring_init(rdev, 64 * 1024);
@@ -2550,6 +2954,7 @@ int r600_init(struct radeon_device *rdev)
if (r) {
dev_err(rdev->dev, "disabling GPU acceleration\n");
r600_cp_fini(rdev);
+ r600_dma_fini(rdev);
r600_irq_fini(rdev);
radeon_wb_fini(rdev);
radeon_ib_pool_fini(rdev);
@@ -2566,6 +2971,7 @@ void r600_fini(struct radeon_device *rdev)
r600_audio_fini(rdev);
r600_blit_fini(rdev);
r600_cp_fini(rdev);
+ r600_dma_fini(rdev);
r600_irq_fini(rdev);
radeon_wb_fini(rdev);
radeon_ib_pool_fini(rdev);
@@ -2668,6 +3074,104 @@ free_scratch:
return r;
}
+/**
+ * r600_dma_ib_test - test an IB on the DMA engine
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Test a simple IB in the DMA ring (r6xx-SI).
+ * Returns 0 on success, error on failure.
+ */
+int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
+{
+ struct radeon_ib ib;
+ unsigned i;
+ int r;
+ void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
+ u32 tmp = 0;
+
+ if (!ptr) {
+ DRM_ERROR("invalid vram scratch pointer\n");
+ return -EINVAL;
+ }
+
+ tmp = 0xCAFEDEAD;
+ writel(tmp, ptr);
+
+ r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
+ if (r) {
+ DRM_ERROR("radeon: failed to get ib (%d).\n", r);
+ return r;
+ }
+
+ ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1);
+ ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
+ ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff;
+ ib.ptr[3] = 0xDEADBEEF;
+ ib.length_dw = 4;
+
+ r = radeon_ib_schedule(rdev, &ib, NULL);
+ if (r) {
+ radeon_ib_free(rdev, &ib);
+ DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
+ return r;
+ }
+ r = radeon_fence_wait(ib.fence, false);
+ if (r) {
+ DRM_ERROR("radeon: fence wait failed (%d).\n", r);
+ return r;
+ }
+ for (i = 0; i < rdev->usec_timeout; i++) {
+ tmp = readl(ptr);
+ if (tmp == 0xDEADBEEF)
+ break;
+ DRM_UDELAY(1);
+ }
+ if (i < rdev->usec_timeout) {
+ DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
+ } else {
+ DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
+ r = -EINVAL;
+ }
+ radeon_ib_free(rdev, &ib);
+ return r;
+}
+
+/**
+ * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine
+ *
+ * @rdev: radeon_device pointer
+ * @ib: IB object to schedule
+ *
+ * Schedule an IB in the DMA ring (r6xx-r7xx).
+ */
+void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
+{
+ struct radeon_ring *ring = &rdev->ring[ib->ring];
+
+ if (rdev->wb.enabled) {
+ u32 next_rptr = ring->wptr + 4;
+ while ((next_rptr & 7) != 5)
+ next_rptr++;
+ next_rptr += 3;
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
+ radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
+ radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
+ radeon_ring_write(ring, next_rptr);
+ }
+
+ /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
+ * Pad as necessary with NOPs.
+ */
+ while ((ring->wptr & 7) != 5)
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0));
+ radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
+ radeon_ring_write(ring, (ib->length_dw << 16) | (upper_32_bits(ib->gpu_addr) & 0xFF));
+
+}
+
/*
* Interrupts
*
@@ -2859,6 +3363,8 @@ static void r600_disable_interrupt_state(struct radeon_device *rdev)
u32 tmp;
WREG32(CP_INT_CNTL, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
+ tmp = RREG32(DMA_CNTL) & ~TRAP_ENABLE;
+ WREG32(DMA_CNTL, tmp);
WREG32(GRBM_INT_CNTL, 0);
WREG32(DxMODE_INT_MASK, 0);
WREG32(D1GRPH_INTERRUPT_CONTROL, 0);
@@ -3000,6 +3506,7 @@ int r600_irq_set(struct radeon_device *rdev)
u32 grbm_int_cntl = 0;
u32 hdmi0, hdmi1;
u32 d1grph = 0, d2grph = 0;
+ u32 dma_cntl;
if (!rdev->irq.installed) {
WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
@@ -3034,12 +3541,19 @@ int r600_irq_set(struct radeon_device *rdev)
hdmi0 = RREG32(HDMI0_AUDIO_PACKET_CONTROL) & ~HDMI0_AZ_FORMAT_WTRIG_MASK;
hdmi1 = RREG32(HDMI1_AUDIO_PACKET_CONTROL) & ~HDMI0_AZ_FORMAT_WTRIG_MASK;
}
+ dma_cntl = RREG32(DMA_CNTL) & ~TRAP_ENABLE;
if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
DRM_DEBUG("r600_irq_set: sw int\n");
cp_int_cntl |= RB_INT_ENABLE;
cp_int_cntl |= TIME_STAMP_INT_ENABLE;
}
+
+ if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
+ DRM_DEBUG("r600_irq_set: sw int dma\n");
+ dma_cntl |= TRAP_ENABLE;
+ }
+
if (rdev->irq.crtc_vblank_int[0] ||
atomic_read(&rdev->irq.pflip[0])) {
DRM_DEBUG("r600_irq_set: vblank 0\n");
@@ -3084,6 +3598,7 @@ int r600_irq_set(struct radeon_device *rdev)
}
WREG32(CP_INT_CNTL, cp_int_cntl);
+ WREG32(DMA_CNTL, dma_cntl);
WREG32(DxMODE_INT_MASK, mode_int);
WREG32(D1GRPH_INTERRUPT_CONTROL, d1grph);
WREG32(D2GRPH_INTERRUPT_CONTROL, d2grph);
@@ -3463,6 +3978,10 @@ restart_ih:
DRM_DEBUG("IH: CP EOP\n");
radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
break;
+ case 224: /* DMA trap event */
+ DRM_DEBUG("IH: DMA trap\n");
+ radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
+ break;
case 233: /* GUI IDLE */
DRM_DEBUG("IH: GUI idle\n");
break;
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index 211c40252fe0..03191a56eb44 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -657,87 +657,30 @@ static int r600_cs_track_validate_db(struct radeon_cs_parser *p)
/* nby is npipes htiles aligned == npipes * 8 pixel aligned */
nby = round_up(nby, track->npipes * 8);
} else {
- /* htile widht & nby (8 or 4) make 2 bits number */
- tmp = track->htile_surface & 3;
+ /* always assume 8x8 htile */
/* align is htile align * 8, htile align vary according to
* number of pipe and tile width and nby
*/
switch (track->npipes) {
case 8:
- switch (tmp) {
- case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
- nbx = round_up(nbx, 64 * 8);
- nby = round_up(nby, 64 * 8);
- break;
- case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
- case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
- nbx = round_up(nbx, 64 * 8);
- nby = round_up(nby, 32 * 8);
- break;
- case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
- nbx = round_up(nbx, 32 * 8);
- nby = round_up(nby, 32 * 8);
- break;
- default:
- return -EINVAL;
- }
+ /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
+ nbx = round_up(nbx, 64 * 8);
+ nby = round_up(nby, 64 * 8);
break;
case 4:
- switch (tmp) {
- case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
- nbx = round_up(nbx, 64 * 8);
- nby = round_up(nby, 32 * 8);
- break;
- case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
- case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
- nbx = round_up(nbx, 32 * 8);
- nby = round_up(nby, 32 * 8);
- break;
- case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
- nbx = round_up(nbx, 32 * 8);
- nby = round_up(nby, 16 * 8);
- break;
- default:
- return -EINVAL;
- }
+ /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
+ nbx = round_up(nbx, 64 * 8);
+ nby = round_up(nby, 32 * 8);
break;
case 2:
- switch (tmp) {
- case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
- nbx = round_up(nbx, 32 * 8);
- nby = round_up(nby, 32 * 8);
- break;
- case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
- case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
- nbx = round_up(nbx, 32 * 8);
- nby = round_up(nby, 16 * 8);
- break;
- case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
- nbx = round_up(nbx, 16 * 8);
- nby = round_up(nby, 16 * 8);
- break;
- default:
- return -EINVAL;
- }
+ /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
+ nbx = round_up(nbx, 32 * 8);
+ nby = round_up(nby, 32 * 8);
break;
case 1:
- switch (tmp) {
- case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
- nbx = round_up(nbx, 32 * 8);
- nby = round_up(nby, 16 * 8);
- break;
- case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
- case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
- nbx = round_up(nbx, 16 * 8);
- nby = round_up(nby, 16 * 8);
- break;
- case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
- nbx = round_up(nbx, 16 * 8);
- nby = round_up(nby, 8 * 8);
- break;
- default:
- return -EINVAL;
- }
+ /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
+ nbx = round_up(nbx, 32 * 8);
+ nby = round_up(nby, 16 * 8);
break;
default:
dev_warn(p->dev, "%s:%d invalid num pipes %d\n",
@@ -746,9 +689,10 @@ static int r600_cs_track_validate_db(struct radeon_cs_parser *p)
}
}
/* compute number of htile */
- nbx = G_028D24_HTILE_WIDTH(track->htile_surface) ? nbx / 8 : nbx / 4;
- nby = G_028D24_HTILE_HEIGHT(track->htile_surface) ? nby / 8 : nby / 4;
- size = nbx * nby * 4;
+ nbx = nbx >> 3;
+ nby = nby >> 3;
+ /* size must be aligned on npipes * 2K boundary */
+ size = roundup(nbx * nby * 4, track->npipes * (2 << 10));
size += track->htile_offset;
if (size > radeon_bo_size(track->htile_bo)) {
@@ -1492,6 +1436,8 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
break;
case DB_HTILE_SURFACE:
track->htile_surface = radeon_get_ib_value(p, idx);
+ /* force 8x8 htile width and height */
+ ib[idx] |= 3;
track->db_dirty = true;
break;
case SQ_PGM_START_FS:
@@ -1949,6 +1895,78 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
ib[idx+2] = upper_32_bits(offset) & 0xff;
}
break;
+ case PACKET3_CP_DMA:
+ {
+ u32 command, size;
+ u64 offset, tmp;
+ if (pkt->count != 4) {
+ DRM_ERROR("bad CP DMA\n");
+ return -EINVAL;
+ }
+ command = radeon_get_ib_value(p, idx+4);
+ size = command & 0x1fffff;
+ if (command & PACKET3_CP_DMA_CMD_SAS) {
+ /* src address space is register */
+ DRM_ERROR("CP DMA SAS not supported\n");
+ return -EINVAL;
+ } else {
+ if (command & PACKET3_CP_DMA_CMD_SAIC) {
+ DRM_ERROR("CP DMA SAIC only supported for registers\n");
+ return -EINVAL;
+ }
+ /* src address space is memory */
+ r = r600_cs_packet_next_reloc(p, &reloc);
+ if (r) {
+ DRM_ERROR("bad CP DMA SRC\n");
+ return -EINVAL;
+ }
+
+ tmp = radeon_get_ib_value(p, idx) +
+ ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
+
+ offset = reloc->lobj.gpu_offset + tmp;
+
+ if ((tmp + size) > radeon_bo_size(reloc->robj)) {
+ dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n",
+ tmp + size, radeon_bo_size(reloc->robj));
+ return -EINVAL;
+ }
+
+ ib[idx] = offset;
+ ib[idx+1] = (ib[idx+1] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
+ }
+ if (command & PACKET3_CP_DMA_CMD_DAS) {
+ /* dst address space is register */
+ DRM_ERROR("CP DMA DAS not supported\n");
+ return -EINVAL;
+ } else {
+ /* dst address space is memory */
+ if (command & PACKET3_CP_DMA_CMD_DAIC) {
+ DRM_ERROR("CP DMA DAIC only supported for registers\n");
+ return -EINVAL;
+ }
+ r = r600_cs_packet_next_reloc(p, &reloc);
+ if (r) {
+ DRM_ERROR("bad CP DMA DST\n");
+ return -EINVAL;
+ }
+
+ tmp = radeon_get_ib_value(p, idx+2) +
+ ((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32);
+
+ offset = reloc->lobj.gpu_offset + tmp;
+
+ if ((tmp + size) > radeon_bo_size(reloc->robj)) {
+ dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n",
+ tmp + size, radeon_bo_size(reloc->robj));
+ return -EINVAL;
+ }
+
+ ib[idx+2] = offset;
+ ib[idx+3] = upper_32_bits(offset) & 0xff;
+ }
+ break;
+ }
case PACKET3_SURFACE_SYNC:
if (pkt->count != 3) {
DRM_ERROR("bad SURFACE_SYNC\n");
@@ -2276,6 +2294,35 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
ib[idx+4] = upper_32_bits(offset) & 0xff;
}
break;
+ case PACKET3_MEM_WRITE:
+ {
+ u64 offset;
+
+ if (pkt->count != 3) {
+ DRM_ERROR("bad MEM_WRITE (invalid count)\n");
+ return -EINVAL;
+ }
+ r = r600_cs_packet_next_reloc(p, &reloc);
+ if (r) {
+ DRM_ERROR("bad MEM_WRITE (missing reloc)\n");
+ return -EINVAL;
+ }
+ offset = radeon_get_ib_value(p, idx+0);
+ offset += ((u64)(radeon_get_ib_value(p, idx+1) & 0xff)) << 32UL;
+ if (offset & 0x7) {
+ DRM_ERROR("bad MEM_WRITE (address not qwords aligned)\n");
+ return -EINVAL;
+ }
+ if ((offset + 8) > radeon_bo_size(reloc->robj)) {
+ DRM_ERROR("bad MEM_WRITE bo too small: 0x%llx, 0x%lx\n",
+ offset + 8, radeon_bo_size(reloc->robj));
+ return -EINVAL;
+ }
+ offset += reloc->lobj.gpu_offset;
+ ib[idx+0] = offset;
+ ib[idx+1] = upper_32_bits(offset) & 0xff;
+ break;
+ }
case PACKET3_COPY_DW:
if (pkt->count != 4) {
DRM_ERROR("bad COPY_DW (invalid count)\n");
@@ -2496,3 +2543,209 @@ void r600_cs_legacy_init(void)
{
r600_cs_packet_next_reloc = &r600_cs_packet_next_reloc_nomm;
}
+
+/*
+ * DMA
+ */
+/**
+ * r600_dma_cs_next_reloc() - parse next reloc
+ * @p: parser structure holding parsing context.
+ * @cs_reloc: reloc informations
+ *
+ * Return the next reloc, do bo validation and compute
+ * GPU offset using the provided start.
+ **/
+int r600_dma_cs_next_reloc(struct radeon_cs_parser *p,
+ struct radeon_cs_reloc **cs_reloc)
+{
+ struct radeon_cs_chunk *relocs_chunk;
+ unsigned idx;
+
+ if (p->chunk_relocs_idx == -1) {
+ DRM_ERROR("No relocation chunk !\n");
+ return -EINVAL;
+ }
+ *cs_reloc = NULL;
+ relocs_chunk = &p->chunks[p->chunk_relocs_idx];
+ idx = p->dma_reloc_idx;
+ if (idx >= relocs_chunk->length_dw) {
+ DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
+ idx, relocs_chunk->length_dw);
+ return -EINVAL;
+ }
+ *cs_reloc = p->relocs_ptr[idx];
+ p->dma_reloc_idx++;
+ return 0;
+}
+
+#define GET_DMA_CMD(h) (((h) & 0xf0000000) >> 28)
+#define GET_DMA_COUNT(h) ((h) & 0x0000ffff)
+#define GET_DMA_T(h) (((h) & 0x00800000) >> 23)
+
+/**
+ * r600_dma_cs_parse() - parse the DMA IB
+ * @p: parser structure holding parsing context.
+ *
+ * Parses the DMA IB from the CS ioctl and updates
+ * the GPU addresses based on the reloc information and
+ * checks for errors. (R6xx-R7xx)
+ * Returns 0 for success and an error on failure.
+ **/
+int r600_dma_cs_parse(struct radeon_cs_parser *p)
+{
+ struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
+ struct radeon_cs_reloc *src_reloc, *dst_reloc;
+ u32 header, cmd, count, tiled;
+ volatile u32 *ib = p->ib.ptr;
+ u32 idx, idx_value;
+ u64 src_offset, dst_offset;
+ int r;
+
+ do {
+ if (p->idx >= ib_chunk->length_dw) {
+ DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
+ p->idx, ib_chunk->length_dw);
+ return -EINVAL;
+ }
+ idx = p->idx;
+ header = radeon_get_ib_value(p, idx);
+ cmd = GET_DMA_CMD(header);
+ count = GET_DMA_COUNT(header);
+ tiled = GET_DMA_T(header);
+
+ switch (cmd) {
+ case DMA_PACKET_WRITE:
+ r = r600_dma_cs_next_reloc(p, &dst_reloc);
+ if (r) {
+ DRM_ERROR("bad DMA_PACKET_WRITE\n");
+ return -EINVAL;
+ }
+ if (tiled) {
+ dst_offset = ib[idx+1];
+ dst_offset <<= 8;
+
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ p->idx += count + 5;
+ } else {
+ dst_offset = ib[idx+1];
+ dst_offset |= ((u64)(ib[idx+2] & 0xff)) << 32;
+
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ p->idx += count + 3;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n",
+ dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ break;
+ case DMA_PACKET_COPY:
+ r = r600_dma_cs_next_reloc(p, &src_reloc);
+ if (r) {
+ DRM_ERROR("bad DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ r = r600_dma_cs_next_reloc(p, &dst_reloc);
+ if (r) {
+ DRM_ERROR("bad DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ if (tiled) {
+ idx_value = radeon_get_ib_value(p, idx + 2);
+ /* detile bit */
+ if (idx_value & (1 << 31)) {
+ /* tiled src, linear dst */
+ src_offset = ib[idx+1];
+ src_offset <<= 8;
+ ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
+
+ dst_offset = ib[idx+5];
+ dst_offset |= ((u64)(ib[idx+6] & 0xff)) << 32;
+ ib[idx+5] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+6] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ } else {
+ /* linear src, tiled dst */
+ src_offset = ib[idx+5];
+ src_offset |= ((u64)(ib[idx+6] & 0xff)) << 32;
+ ib[idx+5] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+
+ dst_offset = ib[idx+1];
+ dst_offset <<= 8;
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ }
+ p->idx += 7;
+ } else {
+ if (p->family >= CHIP_RV770) {
+ src_offset = ib[idx+2];
+ src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
+ dst_offset = ib[idx+1];
+ dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
+
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+ p->idx += 5;
+ } else {
+ src_offset = ib[idx+2];
+ src_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
+ dst_offset = ib[idx+1];
+ dst_offset |= ((u64)(ib[idx+3] & 0xff0000)) << 16;
+
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+3] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+ ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff) << 16;
+ p->idx += 4;
+ }
+ }
+ if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+ dev_warn(p->dev, "DMA copy src buffer too small (%llu %lu)\n",
+ src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA write dst buffer too small (%llu %lu)\n",
+ dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ break;
+ case DMA_PACKET_CONSTANT_FILL:
+ if (p->family < CHIP_RV770) {
+ DRM_ERROR("Constant Fill is 7xx only !\n");
+ return -EINVAL;
+ }
+ r = r600_dma_cs_next_reloc(p, &dst_reloc);
+ if (r) {
+ DRM_ERROR("bad DMA_PACKET_WRITE\n");
+ return -EINVAL;
+ }
+ dst_offset = ib[idx+1];
+ dst_offset |= ((u64)(ib[idx+3] & 0x00ff0000)) << 16;
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n",
+ dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) << 16) & 0x00ff0000;
+ p->idx += 4;
+ break;
+ case DMA_PACKET_NOP:
+ p->idx += 1;
+ break;
+ default:
+ DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
+ return -EINVAL;
+ }
+ } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
+#if 0
+ for (r = 0; r < p->ib->length_dw; r++) {
+ printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]);
+ mdelay(1);
+ }
+#endif
+ return 0;
+}
diff --git a/drivers/gpu/drm/radeon/r600_reg.h b/drivers/gpu/drm/radeon/r600_reg.h
index 2b960cb5c18a..909219b1bf80 100644
--- a/drivers/gpu/drm/radeon/r600_reg.h
+++ b/drivers/gpu/drm/radeon/r600_reg.h
@@ -96,6 +96,15 @@
#define R600_CONFIG_F0_BASE 0x542C
#define R600_CONFIG_APER_SIZE 0x5430
+#define R600_BIF_FB_EN 0x5490
+#define R600_FB_READ_EN (1 << 0)
+#define R600_FB_WRITE_EN (1 << 1)
+
+#define R600_CITF_CNTL 0x200c
+#define R600_BLACKOUT_MASK 0x00000003
+
+#define R700_MC_CITF_CNTL 0x25c0
+
#define R600_ROM_CNTL 0x1600
# define R600_SCK_OVERWRITE (1 << 1)
# define R600_SCK_PRESCALE_CRYSTAL_CLK_SHIFT 28
diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h
index fa6f37099ba9..4a53402b1852 100644
--- a/drivers/gpu/drm/radeon/r600d.h
+++ b/drivers/gpu/drm/radeon/r600d.h
@@ -590,9 +590,59 @@
#define WAIT_2D_IDLECLEAN_bit (1 << 16)
#define WAIT_3D_IDLECLEAN_bit (1 << 17)
+/* async DMA */
+#define DMA_TILING_CONFIG 0x3ec4
+#define DMA_CONFIG 0x3e4c
+
+#define DMA_RB_CNTL 0xd000
+# define DMA_RB_ENABLE (1 << 0)
+# define DMA_RB_SIZE(x) ((x) << 1) /* log2 */
+# define DMA_RB_SWAP_ENABLE (1 << 9) /* 8IN32 */
+# define DMA_RPTR_WRITEBACK_ENABLE (1 << 12)
+# define DMA_RPTR_WRITEBACK_SWAP_ENABLE (1 << 13) /* 8IN32 */
+# define DMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */
+#define DMA_RB_BASE 0xd004
+#define DMA_RB_RPTR 0xd008
+#define DMA_RB_WPTR 0xd00c
+
+#define DMA_RB_RPTR_ADDR_HI 0xd01c
+#define DMA_RB_RPTR_ADDR_LO 0xd020
+
+#define DMA_IB_CNTL 0xd024
+# define DMA_IB_ENABLE (1 << 0)
+# define DMA_IB_SWAP_ENABLE (1 << 4)
+#define DMA_IB_RPTR 0xd028
+#define DMA_CNTL 0xd02c
+# define TRAP_ENABLE (1 << 0)
+# define SEM_INCOMPLETE_INT_ENABLE (1 << 1)
+# define SEM_WAIT_INT_ENABLE (1 << 2)
+# define DATA_SWAP_ENABLE (1 << 3)
+# define FENCE_SWAP_ENABLE (1 << 4)
+# define CTXEMPTY_INT_ENABLE (1 << 28)
+#define DMA_STATUS_REG 0xd034
+# define DMA_IDLE (1 << 0)
+#define DMA_SEM_INCOMPLETE_TIMER_CNTL 0xd044
+#define DMA_SEM_WAIT_FAIL_TIMER_CNTL 0xd048
+#define DMA_MODE 0xd0bc
+
+/* async DMA packets */
+#define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \
+ (((t) & 0x1) << 23) | \
+ (((s) & 0x1) << 22) | \
+ (((n) & 0xFFFF) << 0))
+/* async DMA Packet types */
+#define DMA_PACKET_WRITE 0x2
+#define DMA_PACKET_COPY 0x3
+#define DMA_PACKET_INDIRECT_BUFFER 0x4
+#define DMA_PACKET_SEMAPHORE 0x5
+#define DMA_PACKET_FENCE 0x6
+#define DMA_PACKET_TRAP 0x7
+#define DMA_PACKET_CONSTANT_FILL 0xd /* 7xx only */
+#define DMA_PACKET_NOP 0xf
+
#define IH_RB_CNTL 0x3e00
# define IH_RB_ENABLE (1 << 0)
-# define IH_IB_SIZE(x) ((x) << 1) /* log2 */
+# define IH_RB_SIZE(x) ((x) << 1) /* log2 */
# define IH_RB_FULL_DRAIN_ENABLE (1 << 6)
# define IH_WPTR_WRITEBACK_ENABLE (1 << 8)
# define IH_WPTR_WRITEBACK_TIMER(x) ((x) << 9) /* log2 */
@@ -637,7 +687,9 @@
#define TN_RLC_CLEAR_STATE_RESTORE_BASE 0x3f20
#define SRBM_SOFT_RESET 0xe60
+# define SOFT_RESET_DMA (1 << 12)
# define SOFT_RESET_RLC (1 << 13)
+# define RV770_SOFT_RESET_DMA (1 << 20)
#define CP_INT_CNTL 0xc124
# define CNTX_BUSY_INT_ENABLE (1 << 19)
@@ -1134,6 +1186,38 @@
#define PACKET3_WAIT_REG_MEM 0x3C
#define PACKET3_MEM_WRITE 0x3D
#define PACKET3_INDIRECT_BUFFER 0x32
+#define PACKET3_CP_DMA 0x41
+/* 1. header
+ * 2. SRC_ADDR_LO [31:0]
+ * 3. CP_SYNC [31] | SRC_ADDR_HI [7:0]
+ * 4. DST_ADDR_LO [31:0]
+ * 5. DST_ADDR_HI [7:0]
+ * 6. COMMAND [29:22] | BYTE_COUNT [20:0]
+ */
+# define PACKET3_CP_DMA_CP_SYNC (1 << 31)
+/* COMMAND */
+# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23)
+ /* 0 - none
+ * 1 - 8 in 16
+ * 2 - 8 in 32
+ * 3 - 8 in 64
+ */
+# define PACKET3_CP_DMA_CMD_DST_SWAP(x) ((x) << 24)
+ /* 0 - none
+ * 1 - 8 in 16
+ * 2 - 8 in 32
+ * 3 - 8 in 64
+ */
+# define PACKET3_CP_DMA_CMD_SAS (1 << 26)
+ /* 0 - memory
+ * 1 - register
+ */
+# define PACKET3_CP_DMA_CMD_DAS (1 << 27)
+ /* 0 - memory
+ * 1 - register
+ */
+# define PACKET3_CP_DMA_CMD_SAIC (1 << 28)
+# define PACKET3_CP_DMA_CMD_DAIC (1 << 29)
#define PACKET3_SURFACE_SYNC 0x43
# define PACKET3_CB0_DEST_BASE_ENA (1 << 6)
# define PACKET3_TC_ACTION_ENA (1 << 23)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 8c42d54c2e26..34e52304a525 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -109,7 +109,7 @@ extern int radeon_lockup_timeout;
#define RADEON_BIOS_NUM_SCRATCH 8
/* max number of rings */
-#define RADEON_NUM_RINGS 3
+#define RADEON_NUM_RINGS 5
/* fence seq are set to this number when signaled */
#define RADEON_FENCE_SIGNALED_SEQ 0LL
@@ -122,11 +122,21 @@ extern int radeon_lockup_timeout;
#define CAYMAN_RING_TYPE_CP1_INDEX 1
#define CAYMAN_RING_TYPE_CP2_INDEX 2
+/* R600+ has an async dma ring */
+#define R600_RING_TYPE_DMA_INDEX 3
+/* cayman add a second async dma ring */
+#define CAYMAN_RING_TYPE_DMA1_INDEX 4
+
/* hardcode those limit for now */
#define RADEON_VA_IB_OFFSET (1 << 20)
#define RADEON_VA_RESERVED_SIZE (8 << 20)
#define RADEON_IB_VM_MAX_SIZE (64 << 10)
+/* reset flags */
+#define RADEON_RESET_GFX (1 << 0)
+#define RADEON_RESET_COMPUTE (1 << 1)
+#define RADEON_RESET_DMA (1 << 2)
+
/*
* Errata workarounds.
*/
@@ -220,12 +230,13 @@ struct radeon_fence {
int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring);
int radeon_fence_driver_init(struct radeon_device *rdev);
void radeon_fence_driver_fini(struct radeon_device *rdev);
+void radeon_fence_driver_force_completion(struct radeon_device *rdev);
int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, int ring);
void radeon_fence_process(struct radeon_device *rdev, int ring);
bool radeon_fence_signaled(struct radeon_fence *fence);
int radeon_fence_wait(struct radeon_fence *fence, bool interruptible);
int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring);
-void radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring);
+int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring);
int radeon_fence_wait_any(struct radeon_device *rdev,
struct radeon_fence **fences,
bool intr);
@@ -313,6 +324,7 @@ struct radeon_bo {
struct list_head list;
/* Protected by tbo.reserved */
u32 placements[3];
+ u32 busy_placements[3];
struct ttm_placement placement;
struct ttm_buffer_object tbo;
struct ttm_bo_kmap_obj kmap;
@@ -787,6 +799,15 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *cp, unsigne
void radeon_ring_fini(struct radeon_device *rdev, struct radeon_ring *cp);
+/* r600 async dma */
+void r600_dma_stop(struct radeon_device *rdev);
+int r600_dma_resume(struct radeon_device *rdev);
+void r600_dma_fini(struct radeon_device *rdev);
+
+void cayman_dma_stop(struct radeon_device *rdev);
+int cayman_dma_resume(struct radeon_device *rdev);
+void cayman_dma_fini(struct radeon_device *rdev);
+
/*
* CS.
*/
@@ -824,6 +845,7 @@ struct radeon_cs_parser {
struct radeon_cs_reloc *relocs;
struct radeon_cs_reloc **relocs_ptr;
struct list_head validated;
+ unsigned dma_reloc_idx;
/* indices of various chunks */
int chunk_ib_idx;
int chunk_relocs_idx;
@@ -883,7 +905,9 @@ struct radeon_wb {
#define RADEON_WB_CP_RPTR_OFFSET 1024
#define RADEON_WB_CP1_RPTR_OFFSET 1280
#define RADEON_WB_CP2_RPTR_OFFSET 1536
+#define R600_WB_DMA_RPTR_OFFSET 1792
#define R600_WB_IH_WPTR_OFFSET 2048
+#define CAYMAN_WB_DMA1_RPTR_OFFSET 2304
#define R600_WB_EVENT_OFFSET 3072
/**
@@ -1539,6 +1563,8 @@ struct radeon_device {
/* Register mmio */
resource_size_t rmmio_base;
resource_size_t rmmio_size;
+ /* protects concurrent MM_INDEX/DATA based register access */
+ spinlock_t mmio_idx_lock;
void __iomem *rmmio;
radeon_rreg_t mc_rreg;
radeon_wreg_t mc_wreg;
@@ -1614,8 +1640,10 @@ int radeon_device_init(struct radeon_device *rdev,
void radeon_device_fini(struct radeon_device *rdev);
int radeon_gpu_wait_for_idle(struct radeon_device *rdev);
-uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg);
-void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
+uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg,
+ bool always_indirect);
+void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v,
+ bool always_indirect);
u32 r100_io_rreg(struct radeon_device *rdev, u32 reg);
void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v);
@@ -1631,9 +1659,11 @@ void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v);
#define WREG8(reg, v) writeb(v, (rdev->rmmio) + (reg))
#define RREG16(reg) readw((rdev->rmmio) + (reg))
#define WREG16(reg, v) writew(v, (rdev->rmmio) + (reg))
-#define RREG32(reg) r100_mm_rreg(rdev, (reg))
-#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", r100_mm_rreg(rdev, (reg)))
-#define WREG32(reg, v) r100_mm_wreg(rdev, (reg), (v))
+#define RREG32(reg) r100_mm_rreg(rdev, (reg), false)
+#define RREG32_IDX(reg) r100_mm_rreg(rdev, (reg), true)
+#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", r100_mm_rreg(rdev, (reg), false))
+#define WREG32(reg, v) r100_mm_wreg(rdev, (reg), (v), false)
+#define WREG32_IDX(reg, v) r100_mm_wreg(rdev, (reg), (v), true)
#define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
#define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
#define RREG32_PLL(reg) rdev->pll_rreg(rdev, (reg))
@@ -1658,7 +1688,7 @@ void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v);
tmp_ |= ((val) & ~(mask)); \
WREG32_PLL(reg, tmp_); \
} while (0)
-#define DREG32_SYS(sqf, rdev, reg) seq_printf((sqf), #reg " : 0x%08X\n", r100_mm_rreg((rdev), (reg)))
+#define DREG32_SYS(sqf, rdev, reg) seq_printf((sqf), #reg " : 0x%08X\n", r100_mm_rreg((rdev), (reg), false))
#define RREG32_IO(reg) r100_io_rreg(rdev, (reg))
#define WREG32_IO(reg, v) r100_io_wreg(rdev, (reg), (v))
diff --git a/drivers/gpu/drm/radeon/radeon_agp.c b/drivers/gpu/drm/radeon/radeon_agp.c
index 10ea17a6b2a6..42433344cb1b 100644
--- a/drivers/gpu/drm/radeon/radeon_agp.c
+++ b/drivers/gpu/drm/radeon/radeon_agp.c
@@ -69,9 +69,12 @@ static struct radeon_agpmode_quirk radeon_agpmode_quirk_list[] = {
/* Intel 82830 830 Chipset Host Bridge / Mobility M6 LY Needs AGPMode 2 (fdo #17360)*/
{ PCI_VENDOR_ID_INTEL, 0x3575, PCI_VENDOR_ID_ATI, 0x4c59,
PCI_VENDOR_ID_DELL, 0x00e3, 2},
- /* Intel 82852/82855 host bridge / Mobility FireGL 9000 R250 Needs AGPMode 1 (lp #296617) */
+ /* Intel 82852/82855 host bridge / Mobility FireGL 9000 RV250 Needs AGPMode 1 (lp #296617) */
{ PCI_VENDOR_ID_INTEL, 0x3580, PCI_VENDOR_ID_ATI, 0x4c66,
PCI_VENDOR_ID_DELL, 0x0149, 1},
+ /* Intel 82855PM host bridge / Mobility FireGL 9000 RV250 Needs AGPMode 1 for suspend/resume */
+ { PCI_VENDOR_ID_INTEL, 0x3340, PCI_VENDOR_ID_ATI, 0x4c66,
+ PCI_VENDOR_ID_IBM, 0x0531, 1},
/* Intel 82852/82855 host bridge / Mobility 9600 M10 RV350 Needs AGPMode 1 (deb #467460) */
{ PCI_VENDOR_ID_INTEL, 0x3580, PCI_VENDOR_ID_ATI, 0x4e50,
0x1025, 0x0061, 1},
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index 654520b95ab7..9056fafb00ea 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -947,6 +947,15 @@ static struct radeon_asic r600_asic = {
.ring_test = &r600_ring_test,
.ib_test = &r600_ib_test,
.is_lockup = &r600_gpu_is_lockup,
+ },
+ [R600_RING_TYPE_DMA_INDEX] = {
+ .ib_execute = &r600_dma_ring_ib_execute,
+ .emit_fence = &r600_dma_fence_ring_emit,
+ .emit_semaphore = &r600_dma_semaphore_ring_emit,
+ .cs_parse = &r600_dma_cs_parse,
+ .ring_test = &r600_dma_ring_test,
+ .ib_test = &r600_dma_ib_test,
+ .is_lockup = &r600_dma_is_lockup,
}
},
.irq = {
@@ -963,10 +972,10 @@ static struct radeon_asic r600_asic = {
.copy = {
.blit = &r600_copy_blit,
.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
- .dma = NULL,
- .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX,
- .copy = &r600_copy_blit,
- .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+ .dma = &r600_copy_dma,
+ .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
+ .copy = &r600_copy_dma,
+ .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
},
.surface = {
.set_reg = r600_set_surface_reg,
@@ -1022,6 +1031,15 @@ static struct radeon_asic rs780_asic = {
.ring_test = &r600_ring_test,
.ib_test = &r600_ib_test,
.is_lockup = &r600_gpu_is_lockup,
+ },
+ [R600_RING_TYPE_DMA_INDEX] = {
+ .ib_execute = &r600_dma_ring_ib_execute,
+ .emit_fence = &r600_dma_fence_ring_emit,
+ .emit_semaphore = &r600_dma_semaphore_ring_emit,
+ .cs_parse = &r600_dma_cs_parse,
+ .ring_test = &r600_dma_ring_test,
+ .ib_test = &r600_dma_ib_test,
+ .is_lockup = &r600_dma_is_lockup,
}
},
.irq = {
@@ -1038,10 +1056,10 @@ static struct radeon_asic rs780_asic = {
.copy = {
.blit = &r600_copy_blit,
.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
- .dma = NULL,
- .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX,
- .copy = &r600_copy_blit,
- .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+ .dma = &r600_copy_dma,
+ .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
+ .copy = &r600_copy_dma,
+ .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
},
.surface = {
.set_reg = r600_set_surface_reg,
@@ -1097,6 +1115,15 @@ static struct radeon_asic rv770_asic = {
.ring_test = &r600_ring_test,
.ib_test = &r600_ib_test,
.is_lockup = &r600_gpu_is_lockup,
+ },
+ [R600_RING_TYPE_DMA_INDEX] = {
+ .ib_execute = &r600_dma_ring_ib_execute,
+ .emit_fence = &r600_dma_fence_ring_emit,
+ .emit_semaphore = &r600_dma_semaphore_ring_emit,
+ .cs_parse = &r600_dma_cs_parse,
+ .ring_test = &r600_dma_ring_test,
+ .ib_test = &r600_dma_ib_test,
+ .is_lockup = &r600_dma_is_lockup,
}
},
.irq = {
@@ -1113,10 +1140,10 @@ static struct radeon_asic rv770_asic = {
.copy = {
.blit = &r600_copy_blit,
.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
- .dma = NULL,
- .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX,
- .copy = &r600_copy_blit,
- .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+ .dma = &rv770_copy_dma,
+ .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
+ .copy = &rv770_copy_dma,
+ .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
},
.surface = {
.set_reg = r600_set_surface_reg,
@@ -1172,6 +1199,15 @@ static struct radeon_asic evergreen_asic = {
.ring_test = &r600_ring_test,
.ib_test = &r600_ib_test,
.is_lockup = &evergreen_gpu_is_lockup,
+ },
+ [R600_RING_TYPE_DMA_INDEX] = {
+ .ib_execute = &evergreen_dma_ring_ib_execute,
+ .emit_fence = &evergreen_dma_fence_ring_emit,
+ .emit_semaphore = &r600_dma_semaphore_ring_emit,
+ .cs_parse = &evergreen_dma_cs_parse,
+ .ring_test = &r600_dma_ring_test,
+ .ib_test = &r600_dma_ib_test,
+ .is_lockup = &r600_dma_is_lockup,
}
},
.irq = {
@@ -1188,10 +1224,10 @@ static struct radeon_asic evergreen_asic = {
.copy = {
.blit = &r600_copy_blit,
.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
- .dma = NULL,
- .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX,
- .copy = &r600_copy_blit,
- .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+ .dma = &evergreen_copy_dma,
+ .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
+ .copy = &evergreen_copy_dma,
+ .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
},
.surface = {
.set_reg = r600_set_surface_reg,
@@ -1248,6 +1284,15 @@ static struct radeon_asic sumo_asic = {
.ib_test = &r600_ib_test,
.is_lockup = &evergreen_gpu_is_lockup,
},
+ [R600_RING_TYPE_DMA_INDEX] = {
+ .ib_execute = &evergreen_dma_ring_ib_execute,
+ .emit_fence = &evergreen_dma_fence_ring_emit,
+ .emit_semaphore = &r600_dma_semaphore_ring_emit,
+ .cs_parse = &evergreen_dma_cs_parse,
+ .ring_test = &r600_dma_ring_test,
+ .ib_test = &r600_dma_ib_test,
+ .is_lockup = &r600_dma_is_lockup,
+ }
},
.irq = {
.set = &evergreen_irq_set,
@@ -1263,10 +1308,10 @@ static struct radeon_asic sumo_asic = {
.copy = {
.blit = &r600_copy_blit,
.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
- .dma = NULL,
- .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX,
- .copy = &r600_copy_blit,
- .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+ .dma = &evergreen_copy_dma,
+ .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
+ .copy = &evergreen_copy_dma,
+ .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
},
.surface = {
.set_reg = r600_set_surface_reg,
@@ -1322,6 +1367,15 @@ static struct radeon_asic btc_asic = {
.ring_test = &r600_ring_test,
.ib_test = &r600_ib_test,
.is_lockup = &evergreen_gpu_is_lockup,
+ },
+ [R600_RING_TYPE_DMA_INDEX] = {
+ .ib_execute = &evergreen_dma_ring_ib_execute,
+ .emit_fence = &evergreen_dma_fence_ring_emit,
+ .emit_semaphore = &r600_dma_semaphore_ring_emit,
+ .cs_parse = &evergreen_dma_cs_parse,
+ .ring_test = &r600_dma_ring_test,
+ .ib_test = &r600_dma_ib_test,
+ .is_lockup = &r600_dma_is_lockup,
}
},
.irq = {
@@ -1338,10 +1392,10 @@ static struct radeon_asic btc_asic = {
.copy = {
.blit = &r600_copy_blit,
.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
- .dma = NULL,
- .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX,
- .copy = &r600_copy_blit,
- .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+ .dma = &evergreen_copy_dma,
+ .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
+ .copy = &evergreen_copy_dma,
+ .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
},
.surface = {
.set_reg = r600_set_surface_reg,
@@ -1391,7 +1445,7 @@ static struct radeon_asic cayman_asic = {
.vm = {
.init = &cayman_vm_init,
.fini = &cayman_vm_fini,
- .pt_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+ .pt_ring_index = R600_RING_TYPE_DMA_INDEX,
.set_page = &cayman_vm_set_page,
},
.ring = {
@@ -1427,6 +1481,28 @@ static struct radeon_asic cayman_asic = {
.ib_test = &r600_ib_test,
.is_lockup = &evergreen_gpu_is_lockup,
.vm_flush = &cayman_vm_flush,
+ },
+ [R600_RING_TYPE_DMA_INDEX] = {
+ .ib_execute = &cayman_dma_ring_ib_execute,
+ .ib_parse = &evergreen_dma_ib_parse,
+ .emit_fence = &evergreen_dma_fence_ring_emit,
+ .emit_semaphore = &r600_dma_semaphore_ring_emit,
+ .cs_parse = &evergreen_dma_cs_parse,
+ .ring_test = &r600_dma_ring_test,
+ .ib_test = &r600_dma_ib_test,
+ .is_lockup = &cayman_dma_is_lockup,
+ .vm_flush = &cayman_dma_vm_flush,
+ },
+ [CAYMAN_RING_TYPE_DMA1_INDEX] = {
+ .ib_execute = &cayman_dma_ring_ib_execute,
+ .ib_parse = &evergreen_dma_ib_parse,
+ .emit_fence = &evergreen_dma_fence_ring_emit,
+ .emit_semaphore = &r600_dma_semaphore_ring_emit,
+ .cs_parse = &evergreen_dma_cs_parse,
+ .ring_test = &r600_dma_ring_test,
+ .ib_test = &r600_dma_ib_test,
+ .is_lockup = &cayman_dma_is_lockup,
+ .vm_flush = &cayman_dma_vm_flush,
}
},
.irq = {
@@ -1443,10 +1519,10 @@ static struct radeon_asic cayman_asic = {
.copy = {
.blit = &r600_copy_blit,
.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
- .dma = NULL,
- .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX,
- .copy = &r600_copy_blit,
- .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+ .dma = &evergreen_copy_dma,
+ .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
+ .copy = &evergreen_copy_dma,
+ .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
},
.surface = {
.set_reg = r600_set_surface_reg,
@@ -1496,7 +1572,7 @@ static struct radeon_asic trinity_asic = {
.vm = {
.init = &cayman_vm_init,
.fini = &cayman_vm_fini,
- .pt_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+ .pt_ring_index = R600_RING_TYPE_DMA_INDEX,
.set_page = &cayman_vm_set_page,
},
.ring = {
@@ -1532,6 +1608,28 @@ static struct radeon_asic trinity_asic = {
.ib_test = &r600_ib_test,
.is_lockup = &evergreen_gpu_is_lockup,
.vm_flush = &cayman_vm_flush,
+ },
+ [R600_RING_TYPE_DMA_INDEX] = {
+ .ib_execute = &cayman_dma_ring_ib_execute,
+ .ib_parse = &evergreen_dma_ib_parse,
+ .emit_fence = &evergreen_dma_fence_ring_emit,
+ .emit_semaphore = &r600_dma_semaphore_ring_emit,
+ .cs_parse = &evergreen_dma_cs_parse,
+ .ring_test = &r600_dma_ring_test,
+ .ib_test = &r600_dma_ib_test,
+ .is_lockup = &cayman_dma_is_lockup,
+ .vm_flush = &cayman_dma_vm_flush,
+ },
+ [CAYMAN_RING_TYPE_DMA1_INDEX] = {
+ .ib_execute = &cayman_dma_ring_ib_execute,
+ .ib_parse = &evergreen_dma_ib_parse,
+ .emit_fence = &evergreen_dma_fence_ring_emit,
+ .emit_semaphore = &r600_dma_semaphore_ring_emit,
+ .cs_parse = &evergreen_dma_cs_parse,
+ .ring_test = &r600_dma_ring_test,
+ .ib_test = &r600_dma_ib_test,
+ .is_lockup = &cayman_dma_is_lockup,
+ .vm_flush = &cayman_dma_vm_flush,
}
},
.irq = {
@@ -1548,10 +1646,10 @@ static struct radeon_asic trinity_asic = {
.copy = {
.blit = &r600_copy_blit,
.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
- .dma = NULL,
- .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX,
- .copy = &r600_copy_blit,
- .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+ .dma = &evergreen_copy_dma,
+ .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
+ .copy = &evergreen_copy_dma,
+ .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
},
.surface = {
.set_reg = r600_set_surface_reg,
@@ -1601,7 +1699,7 @@ static struct radeon_asic si_asic = {
.vm = {
.init = &si_vm_init,
.fini = &si_vm_fini,
- .pt_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+ .pt_ring_index = R600_RING_TYPE_DMA_INDEX,
.set_page = &si_vm_set_page,
},
.ring = {
@@ -1637,6 +1735,28 @@ static struct radeon_asic si_asic = {
.ib_test = &r600_ib_test,
.is_lockup = &si_gpu_is_lockup,
.vm_flush = &si_vm_flush,
+ },
+ [R600_RING_TYPE_DMA_INDEX] = {
+ .ib_execute = &cayman_dma_ring_ib_execute,
+ .ib_parse = &evergreen_dma_ib_parse,
+ .emit_fence = &evergreen_dma_fence_ring_emit,
+ .emit_semaphore = &r600_dma_semaphore_ring_emit,
+ .cs_parse = NULL,
+ .ring_test = &r600_dma_ring_test,
+ .ib_test = &r600_dma_ib_test,
+ .is_lockup = &cayman_dma_is_lockup,
+ .vm_flush = &si_dma_vm_flush,
+ },
+ [CAYMAN_RING_TYPE_DMA1_INDEX] = {
+ .ib_execute = &cayman_dma_ring_ib_execute,
+ .ib_parse = &evergreen_dma_ib_parse,
+ .emit_fence = &evergreen_dma_fence_ring_emit,
+ .emit_semaphore = &r600_dma_semaphore_ring_emit,
+ .cs_parse = NULL,
+ .ring_test = &r600_dma_ring_test,
+ .ib_test = &r600_dma_ib_test,
+ .is_lockup = &cayman_dma_is_lockup,
+ .vm_flush = &si_dma_vm_flush,
}
},
.irq = {
@@ -1653,10 +1773,10 @@ static struct radeon_asic si_asic = {
.copy = {
.blit = NULL,
.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
- .dma = NULL,
- .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX,
- .copy = NULL,
- .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+ .dma = &si_copy_dma,
+ .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
+ .copy = &si_copy_dma,
+ .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
},
.surface = {
.set_reg = r600_set_surface_reg,
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index 5e3a0e5c6be1..15d70e613076 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -263,6 +263,7 @@ extern int rs690_mc_wait_for_idle(struct radeon_device *rdev);
struct rv515_mc_save {
u32 vga_render_control;
u32 vga_hdp_control;
+ bool crtc_enabled[2];
};
int rv515_init(struct radeon_device *rdev);
@@ -303,12 +304,21 @@ void r600_pcie_gart_tlb_flush(struct radeon_device *rdev);
uint32_t r600_pciep_rreg(struct radeon_device *rdev, uint32_t reg);
void r600_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
int r600_cs_parse(struct radeon_cs_parser *p);
+int r600_dma_cs_parse(struct radeon_cs_parser *p);
void r600_fence_ring_emit(struct radeon_device *rdev,
struct radeon_fence *fence);
void r600_semaphore_ring_emit(struct radeon_device *rdev,
struct radeon_ring *cp,
struct radeon_semaphore *semaphore,
bool emit_wait);
+void r600_dma_fence_ring_emit(struct radeon_device *rdev,
+ struct radeon_fence *fence);
+void r600_dma_semaphore_ring_emit(struct radeon_device *rdev,
+ struct radeon_ring *ring,
+ struct radeon_semaphore *semaphore,
+ bool emit_wait);
+void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
+bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring);
bool r600_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *cp);
int r600_asic_reset(struct radeon_device *rdev);
int r600_set_surface_reg(struct radeon_device *rdev, int reg,
@@ -316,11 +326,16 @@ int r600_set_surface_reg(struct radeon_device *rdev, int reg,
uint32_t offset, uint32_t obj_size);
void r600_clear_surface_reg(struct radeon_device *rdev, int reg);
int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
+int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
+int r600_dma_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
int r600_copy_blit(struct radeon_device *rdev,
uint64_t src_offset, uint64_t dst_offset,
unsigned num_gpu_pages, struct radeon_fence **fence);
+int r600_copy_dma(struct radeon_device *rdev,
+ uint64_t src_offset, uint64_t dst_offset,
+ unsigned num_gpu_pages, struct radeon_fence **fence);
void r600_hpd_init(struct radeon_device *rdev);
void r600_hpd_fini(struct radeon_device *rdev);
bool r600_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd);
@@ -388,6 +403,10 @@ u32 rv770_page_flip(struct radeon_device *rdev, int crtc, u64 crtc_base);
void r700_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
void r700_cp_stop(struct radeon_device *rdev);
void r700_cp_fini(struct radeon_device *rdev);
+int rv770_copy_dma(struct radeon_device *rdev,
+ uint64_t src_offset, uint64_t dst_offset,
+ unsigned num_gpu_pages,
+ struct radeon_fence **fence);
/*
* evergreen
@@ -416,6 +435,7 @@ u32 evergreen_get_vblank_counter(struct radeon_device *rdev, int crtc);
int evergreen_irq_set(struct radeon_device *rdev);
int evergreen_irq_process(struct radeon_device *rdev);
extern int evergreen_cs_parse(struct radeon_cs_parser *p);
+extern int evergreen_dma_cs_parse(struct radeon_cs_parser *p);
extern void evergreen_pm_misc(struct radeon_device *rdev);
extern void evergreen_pm_prepare(struct radeon_device *rdev);
extern void evergreen_pm_finish(struct radeon_device *rdev);
@@ -428,6 +448,14 @@ extern void dce4_wait_for_vblank(struct radeon_device *rdev, int crtc);
void evergreen_disable_interrupt_state(struct radeon_device *rdev);
int evergreen_blit_init(struct radeon_device *rdev);
int evergreen_mc_wait_for_idle(struct radeon_device *rdev);
+void evergreen_dma_fence_ring_emit(struct radeon_device *rdev,
+ struct radeon_fence *fence);
+void evergreen_dma_ring_ib_execute(struct radeon_device *rdev,
+ struct radeon_ib *ib);
+int evergreen_copy_dma(struct radeon_device *rdev,
+ uint64_t src_offset, uint64_t dst_offset,
+ unsigned num_gpu_pages,
+ struct radeon_fence **fence);
/*
* cayman
@@ -449,6 +477,11 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
uint64_t addr, unsigned count,
uint32_t incr, uint32_t flags);
int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
+int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
+void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
+ struct radeon_ib *ib);
+bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring);
+void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
/* DCE6 - SI */
void dce6_bandwidth_update(struct radeon_device *rdev);
@@ -476,5 +509,10 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe,
void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
uint64_t si_get_gpu_clock(struct radeon_device *rdev);
+int si_copy_dma(struct radeon_device *rdev,
+ uint64_t src_offset, uint64_t dst_offset,
+ unsigned num_gpu_pages,
+ struct radeon_fence **fence);
+void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
#endif
diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c
index 45b660b27cfc..33a56a09ff10 100644
--- a/drivers/gpu/drm/radeon/radeon_combios.c
+++ b/drivers/gpu/drm/radeon/radeon_combios.c
@@ -1548,6 +1548,9 @@ bool radeon_get_legacy_connector_info_from_table(struct drm_device *dev)
of_machine_is_compatible("PowerBook6,7")) {
/* ibook */
rdev->mode_info.connector_table = CT_IBOOK;
+ } else if (of_machine_is_compatible("PowerMac3,5")) {
+ /* PowerMac G4 Silver radeon 7500 */
+ rdev->mode_info.connector_table = CT_MAC_G4_SILVER;
} else if (of_machine_is_compatible("PowerMac4,4")) {
/* emac */
rdev->mode_info.connector_table = CT_EMAC;
@@ -2212,6 +2215,54 @@ bool radeon_get_legacy_connector_info_from_table(struct drm_device *dev)
CONNECTOR_OBJECT_ID_SVIDEO,
&hpd);
break;
+ case CT_MAC_G4_SILVER:
+ DRM_INFO("Connector Table: %d (mac g4 silver)\n",
+ rdev->mode_info.connector_table);
+ /* DVI-I - tv dac, int tmds */
+ ddc_i2c = combios_setup_i2c_bus(rdev, DDC_DVI, 0, 0);
+ hpd.hpd = RADEON_HPD_1; /* ??? */
+ radeon_add_legacy_encoder(dev,
+ radeon_get_encoder_enum(dev,
+ ATOM_DEVICE_DFP1_SUPPORT,
+ 0),
+ ATOM_DEVICE_DFP1_SUPPORT);
+ radeon_add_legacy_encoder(dev,
+ radeon_get_encoder_enum(dev,
+ ATOM_DEVICE_CRT2_SUPPORT,
+ 2),
+ ATOM_DEVICE_CRT2_SUPPORT);
+ radeon_add_legacy_connector(dev, 0,
+ ATOM_DEVICE_DFP1_SUPPORT |
+ ATOM_DEVICE_CRT2_SUPPORT,
+ DRM_MODE_CONNECTOR_DVII, &ddc_i2c,
+ CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_I,
+ &hpd);
+ /* VGA - primary dac */
+ ddc_i2c = combios_setup_i2c_bus(rdev, DDC_VGA, 0, 0);
+ hpd.hpd = RADEON_HPD_NONE;
+ radeon_add_legacy_encoder(dev,
+ radeon_get_encoder_enum(dev,
+ ATOM_DEVICE_CRT1_SUPPORT,
+ 1),
+ ATOM_DEVICE_CRT1_SUPPORT);
+ radeon_add_legacy_connector(dev, 1, ATOM_DEVICE_CRT1_SUPPORT,
+ DRM_MODE_CONNECTOR_VGA, &ddc_i2c,
+ CONNECTOR_OBJECT_ID_VGA,
+ &hpd);
+ /* TV - TV DAC */
+ ddc_i2c.valid = false;
+ hpd.hpd = RADEON_HPD_NONE;
+ radeon_add_legacy_encoder(dev,
+ radeon_get_encoder_enum(dev,
+ ATOM_DEVICE_TV1_SUPPORT,
+ 2),
+ ATOM_DEVICE_TV1_SUPPORT);
+ radeon_add_legacy_connector(dev, 2, ATOM_DEVICE_TV1_SUPPORT,
+ DRM_MODE_CONNECTOR_SVIDEO,
+ &ddc_i2c,
+ CONNECTOR_OBJECT_ID_SVIDEO,
+ &hpd);
+ break;
default:
DRM_INFO("Connector table: %d (invalid)\n",
rdev->mode_info.connector_table);
@@ -3246,11 +3297,9 @@ static uint32_t combios_detect_ram(struct drm_device *dev, int ram,
while (ram--) {
addr = ram * 1024 * 1024;
/* write to each page */
- WREG32(RADEON_MM_INDEX, (addr) | RADEON_MM_APER);
- WREG32(RADEON_MM_DATA, 0xdeadbeef);
+ WREG32_IDX((addr) | RADEON_MM_APER, 0xdeadbeef);
/* read back and verify */
- WREG32(RADEON_MM_INDEX, (addr) | RADEON_MM_APER);
- if (RREG32(RADEON_MM_DATA) != 0xdeadbeef)
+ if (RREG32_IDX((addr) | RADEON_MM_APER) != 0xdeadbeef)
return 0;
}
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index b884c362a8c2..2399f25ec037 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -741,7 +741,7 @@ radeon_vga_detect(struct drm_connector *connector, bool force)
ret = connector_status_disconnected;
if (radeon_connector->ddc_bus)
- dret = radeon_ddc_probe(radeon_connector);
+ dret = radeon_ddc_probe(radeon_connector, false);
if (dret) {
radeon_connector->detected_by_load = false;
if (radeon_connector->edid) {
@@ -947,7 +947,7 @@ radeon_dvi_detect(struct drm_connector *connector, bool force)
return connector->status;
if (radeon_connector->ddc_bus)
- dret = radeon_ddc_probe(radeon_connector);
+ dret = radeon_ddc_probe(radeon_connector, false);
if (dret) {
radeon_connector->detected_by_load = false;
if (radeon_connector->edid) {
@@ -1401,7 +1401,8 @@ radeon_dp_detect(struct drm_connector *connector, bool force)
if (encoder) {
/* setup ddc on the bridge */
radeon_atom_ext_encoder_setup_ddc(encoder);
- if (radeon_ddc_probe(radeon_connector)) /* try DDC */
+ /* bridge chips are always aux */
+ if (radeon_ddc_probe(radeon_connector, true)) /* try DDC */
ret = connector_status_connected;
else if (radeon_connector->dac_load_detect) { /* try load detection */
struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private;
@@ -1419,7 +1420,8 @@ radeon_dp_detect(struct drm_connector *connector, bool force)
if (radeon_dp_getdpcd(radeon_connector))
ret = connector_status_connected;
} else {
- if (radeon_ddc_probe(radeon_connector))
+ /* try non-aux ddc (DP to DVI/HMDI/etc. adapter) */
+ if (radeon_ddc_probe(radeon_connector, false))
ret = connector_status_connected;
}
}
@@ -1599,7 +1601,7 @@ radeon_add_atom_connector(struct drm_device *dev,
connector->interlace_allowed = true;
connector->doublescan_allowed = true;
radeon_connector->dac_load_detect = true;
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.load_detect_property,
1);
break;
@@ -1608,13 +1610,13 @@ radeon_add_atom_connector(struct drm_device *dev,
case DRM_MODE_CONNECTOR_HDMIA:
case DRM_MODE_CONNECTOR_HDMIB:
case DRM_MODE_CONNECTOR_DisplayPort:
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.underscan_property,
UNDERSCAN_OFF);
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.underscan_hborder_property,
0);
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.underscan_vborder_property,
0);
subpixel_order = SubPixelHorizontalRGB;
@@ -1625,14 +1627,14 @@ radeon_add_atom_connector(struct drm_device *dev,
connector->doublescan_allowed = false;
if (connector_type == DRM_MODE_CONNECTOR_DVII) {
radeon_connector->dac_load_detect = true;
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.load_detect_property,
1);
}
break;
case DRM_MODE_CONNECTOR_LVDS:
case DRM_MODE_CONNECTOR_eDP:
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
dev->mode_config.scaling_mode_property,
DRM_MODE_SCALE_FULLSCREEN);
subpixel_order = SubPixelHorizontalRGB;
@@ -1651,7 +1653,7 @@ radeon_add_atom_connector(struct drm_device *dev,
DRM_ERROR("VGA: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
}
radeon_connector->dac_load_detect = true;
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.load_detect_property,
1);
/* no HPD on analog connectors */
@@ -1669,7 +1671,7 @@ radeon_add_atom_connector(struct drm_device *dev,
DRM_ERROR("DVIA: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
}
radeon_connector->dac_load_detect = true;
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.load_detect_property,
1);
/* no HPD on analog connectors */
@@ -1692,23 +1694,23 @@ radeon_add_atom_connector(struct drm_device *dev,
DRM_ERROR("DVI: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
}
subpixel_order = SubPixelHorizontalRGB;
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.coherent_mode_property,
1);
if (ASIC_IS_AVIVO(rdev)) {
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.underscan_property,
UNDERSCAN_OFF);
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.underscan_hborder_property,
0);
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.underscan_vborder_property,
0);
}
if (connector_type == DRM_MODE_CONNECTOR_DVII) {
radeon_connector->dac_load_detect = true;
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.load_detect_property,
1);
}
@@ -1732,17 +1734,17 @@ radeon_add_atom_connector(struct drm_device *dev,
if (!radeon_connector->ddc_bus)
DRM_ERROR("HDMI: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
}
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.coherent_mode_property,
1);
if (ASIC_IS_AVIVO(rdev)) {
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.underscan_property,
UNDERSCAN_OFF);
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.underscan_hborder_property,
0);
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.underscan_vborder_property,
0);
}
@@ -1771,17 +1773,17 @@ radeon_add_atom_connector(struct drm_device *dev,
DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
}
subpixel_order = SubPixelHorizontalRGB;
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.coherent_mode_property,
1);
if (ASIC_IS_AVIVO(rdev)) {
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.underscan_property,
UNDERSCAN_OFF);
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.underscan_hborder_property,
0);
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.underscan_vborder_property,
0);
}
@@ -1806,7 +1808,7 @@ radeon_add_atom_connector(struct drm_device *dev,
if (!radeon_connector->ddc_bus)
DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
}
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
dev->mode_config.scaling_mode_property,
DRM_MODE_SCALE_FULLSCREEN);
subpixel_order = SubPixelHorizontalRGB;
@@ -1819,10 +1821,10 @@ radeon_add_atom_connector(struct drm_device *dev,
drm_connector_init(dev, &radeon_connector->base, &radeon_tv_connector_funcs, connector_type);
drm_connector_helper_add(&radeon_connector->base, &radeon_tv_connector_helper_funcs);
radeon_connector->dac_load_detect = true;
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.load_detect_property,
1);
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.tv_std_property,
radeon_atombios_get_tv_info(rdev));
/* no HPD on analog connectors */
@@ -1843,7 +1845,7 @@ radeon_add_atom_connector(struct drm_device *dev,
if (!radeon_connector->ddc_bus)
DRM_ERROR("LVDS: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
}
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
dev->mode_config.scaling_mode_property,
DRM_MODE_SCALE_FULLSCREEN);
subpixel_order = SubPixelHorizontalRGB;
@@ -1922,7 +1924,7 @@ radeon_add_legacy_connector(struct drm_device *dev,
DRM_ERROR("VGA: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
}
radeon_connector->dac_load_detect = true;
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.load_detect_property,
1);
/* no HPD on analog connectors */
@@ -1940,7 +1942,7 @@ radeon_add_legacy_connector(struct drm_device *dev,
DRM_ERROR("DVIA: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
}
radeon_connector->dac_load_detect = true;
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.load_detect_property,
1);
/* no HPD on analog connectors */
@@ -1959,7 +1961,7 @@ radeon_add_legacy_connector(struct drm_device *dev,
}
if (connector_type == DRM_MODE_CONNECTOR_DVII) {
radeon_connector->dac_load_detect = true;
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.load_detect_property,
1);
}
@@ -1983,10 +1985,10 @@ radeon_add_legacy_connector(struct drm_device *dev,
*/
if (rdev->family == CHIP_RS400 || rdev->family == CHIP_RS480)
radeon_connector->dac_load_detect = false;
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.load_detect_property,
radeon_connector->dac_load_detect);
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.tv_std_property,
radeon_combios_get_tv_info(rdev));
/* no HPD on analog connectors */
@@ -2002,7 +2004,7 @@ radeon_add_legacy_connector(struct drm_device *dev,
if (!radeon_connector->ddc_bus)
DRM_ERROR("LVDS: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
}
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
dev->mode_config.scaling_mode_property,
DRM_MODE_SCALE_FULLSCREEN);
subpixel_order = SubPixelHorizontalRGB;
diff --git a/drivers/gpu/drm/radeon/radeon_cp.c b/drivers/gpu/drm/radeon/radeon_cp.c
index 8b2797dc7b64..9143fc45e35b 100644
--- a/drivers/gpu/drm/radeon/radeon_cp.c
+++ b/drivers/gpu/drm/radeon/radeon_cp.c
@@ -116,20 +116,6 @@ u32 radeon_get_scratch(drm_radeon_private_t *dev_priv, int index)
}
}
-u32 RADEON_READ_MM(drm_radeon_private_t *dev_priv, int addr)
-{
- u32 ret;
-
- if (addr < 0x10000)
- ret = DRM_READ32(dev_priv->mmio, addr);
- else {
- DRM_WRITE32(dev_priv->mmio, RADEON_MM_INDEX, addr);
- ret = DRM_READ32(dev_priv->mmio, RADEON_MM_DATA);
- }
-
- return ret;
-}
-
static u32 R500_READ_MCIND(drm_radeon_private_t *dev_priv, int addr)
{
u32 ret;
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 41672cc563fb..396baba0141a 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -43,6 +43,7 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
return 0;
}
chunk = &p->chunks[p->chunk_relocs_idx];
+ p->dma_reloc_idx = 0;
/* FIXME: we assume that each relocs use 4 dwords */
p->nrelocs = chunk->length_dw / 4;
p->relocs_ptr = kcalloc(p->nrelocs, sizeof(void *), GFP_KERNEL);
@@ -111,6 +112,18 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority
} else
p->ring = RADEON_RING_TYPE_GFX_INDEX;
break;
+ case RADEON_CS_RING_DMA:
+ if (p->rdev->family >= CHIP_CAYMAN) {
+ if (p->priority > 0)
+ p->ring = R600_RING_TYPE_DMA_INDEX;
+ else
+ p->ring = CAYMAN_RING_TYPE_DMA1_INDEX;
+ } else if (p->rdev->family >= CHIP_R600) {
+ p->ring = R600_RING_TYPE_DMA_INDEX;
+ } else {
+ return -EINVAL;
+ }
+ break;
}
return 0;
}
diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c
index 0fe56c9f64bd..ad6df625e8b8 100644
--- a/drivers/gpu/drm/radeon/radeon_cursor.c
+++ b/drivers/gpu/drm/radeon/radeon_cursor.c
@@ -66,24 +66,25 @@ static void radeon_hide_cursor(struct drm_crtc *crtc)
struct radeon_device *rdev = crtc->dev->dev_private;
if (ASIC_IS_DCE4(rdev)) {
- WREG32(RADEON_MM_INDEX, EVERGREEN_CUR_CONTROL + radeon_crtc->crtc_offset);
- WREG32(RADEON_MM_DATA, EVERGREEN_CURSOR_MODE(EVERGREEN_CURSOR_24_8_PRE_MULT) |
- EVERGREEN_CURSOR_URGENT_CONTROL(EVERGREEN_CURSOR_URGENT_1_2));
+ WREG32_IDX(EVERGREEN_CUR_CONTROL + radeon_crtc->crtc_offset,
+ EVERGREEN_CURSOR_MODE(EVERGREEN_CURSOR_24_8_PRE_MULT) |
+ EVERGREEN_CURSOR_URGENT_CONTROL(EVERGREEN_CURSOR_URGENT_1_2));
} else if (ASIC_IS_AVIVO(rdev)) {
- WREG32(RADEON_MM_INDEX, AVIVO_D1CUR_CONTROL + radeon_crtc->crtc_offset);
- WREG32(RADEON_MM_DATA, (AVIVO_D1CURSOR_MODE_24BPP << AVIVO_D1CURSOR_MODE_SHIFT));
+ WREG32_IDX(AVIVO_D1CUR_CONTROL + radeon_crtc->crtc_offset,
+ (AVIVO_D1CURSOR_MODE_24BPP << AVIVO_D1CURSOR_MODE_SHIFT));
} else {
+ u32 reg;
switch (radeon_crtc->crtc_id) {
case 0:
- WREG32(RADEON_MM_INDEX, RADEON_CRTC_GEN_CNTL);
+ reg = RADEON_CRTC_GEN_CNTL;
break;
case 1:
- WREG32(RADEON_MM_INDEX, RADEON_CRTC2_GEN_CNTL);
+ reg = RADEON_CRTC2_GEN_CNTL;
break;
default:
return;
}
- WREG32_P(RADEON_MM_DATA, 0, ~RADEON_CRTC_CUR_EN);
+ WREG32_IDX(reg, RREG32_IDX(reg) & ~RADEON_CRTC_CUR_EN);
}
}
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index e2f5f888c374..edfc54e41842 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -897,6 +897,25 @@ static void radeon_check_arguments(struct radeon_device *rdev)
}
/**
+ * radeon_switcheroo_quirk_long_wakeup - return true if longer d3 delay is
+ * needed for waking up.
+ *
+ * @pdev: pci dev pointer
+ */
+static bool radeon_switcheroo_quirk_long_wakeup(struct pci_dev *pdev)
+{
+
+ /* 6600m in a macbook pro */
+ if (pdev->subsystem_vendor == PCI_VENDOR_ID_APPLE &&
+ pdev->subsystem_device == 0x00e2) {
+ printk(KERN_INFO "radeon: quirking longer d3 wakeup delay\n");
+ return true;
+ }
+
+ return false;
+}
+
+/**
* radeon_switcheroo_set_state - set switcheroo state
*
* @pdev: pci dev pointer
@@ -910,10 +929,19 @@ static void radeon_switcheroo_set_state(struct pci_dev *pdev, enum vga_switchero
struct drm_device *dev = pci_get_drvdata(pdev);
pm_message_t pmm = { .event = PM_EVENT_SUSPEND };
if (state == VGA_SWITCHEROO_ON) {
+ unsigned d3_delay = dev->pdev->d3_delay;
+
printk(KERN_INFO "radeon: switched on\n");
/* don't suspend or resume card normally */
dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
+
+ if (d3_delay < 20 && radeon_switcheroo_quirk_long_wakeup(pdev))
+ dev->pdev->d3_delay = 20;
+
radeon_resume_kms(dev);
+
+ dev->pdev->d3_delay = d3_delay;
+
dev->switch_power_state = DRM_SWITCH_POWER_ON;
drm_kms_helper_poll_enable(dev);
} else {
@@ -1059,6 +1087,7 @@ int radeon_device_init(struct radeon_device *rdev,
/* Registers mapping */
/* TODO: block userspace mapping of io register */
+ spin_lock_init(&rdev->mmio_idx_lock);
rdev->rmmio_base = pci_resource_start(rdev->pdev, 2);
rdev->rmmio_size = pci_resource_len(rdev->pdev, 2);
rdev->rmmio = ioremap(rdev->rmmio_base, rdev->rmmio_size);
@@ -1163,6 +1192,7 @@ int radeon_suspend_kms(struct drm_device *dev, pm_message_t state)
struct drm_crtc *crtc;
struct drm_connector *connector;
int i, r;
+ bool force_completion = false;
if (dev == NULL || dev->dev_private == NULL) {
return -ENODEV;
@@ -1205,8 +1235,16 @@ int radeon_suspend_kms(struct drm_device *dev, pm_message_t state)
mutex_lock(&rdev->ring_lock);
/* wait for gpu to finish processing current batch */
- for (i = 0; i < RADEON_NUM_RINGS; i++)
- radeon_fence_wait_empty_locked(rdev, i);
+ for (i = 0; i < RADEON_NUM_RINGS; i++) {
+ r = radeon_fence_wait_empty_locked(rdev, i);
+ if (r) {
+ /* delay GPU reset to resume */
+ force_completion = true;
+ }
+ }
+ if (force_completion) {
+ radeon_fence_driver_force_completion(rdev);
+ }
mutex_unlock(&rdev->ring_lock);
radeon_save_bios_scratch_regs(rdev);
@@ -1337,7 +1375,6 @@ retry:
}
radeon_restore_bios_scratch_regs(rdev);
- drm_helper_resume_force_mode(rdev->ddev);
if (!r) {
for (i = 0; i < RADEON_NUM_RINGS; ++i) {
@@ -1357,11 +1394,14 @@ retry:
}
}
} else {
+ radeon_fence_driver_force_completion(rdev);
for (i = 0; i < RADEON_NUM_RINGS; ++i) {
kfree(ring_data[i]);
}
}
+ drm_helper_resume_force_mode(rdev->ddev);
+
ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
if (r) {
/* bad news, how to tell it to userspace ? */
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index bfa2a6015727..1da2386d7cf7 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -378,8 +378,12 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc,
work->old_rbo = rbo;
obj = new_radeon_fb->obj;
rbo = gem_to_radeon_bo(obj);
+
+ spin_lock(&rbo->tbo.bdev->fence_lock);
if (rbo->tbo.sync_obj)
work->fence = radeon_fence_ref(rbo->tbo.sync_obj);
+ spin_unlock(&rbo->tbo.bdev->fence_lock);
+
INIT_WORK(&work->work, radeon_unpin_work_func);
/* We borrow the event spin lock for protecting unpin_work */
@@ -695,10 +699,15 @@ int radeon_ddc_get_modes(struct radeon_connector *radeon_connector)
if (radeon_connector->router.ddc_valid)
radeon_router_select_ddc_port(radeon_connector);
- if ((radeon_connector->base.connector_type == DRM_MODE_CONNECTOR_DisplayPort) ||
- (radeon_connector->base.connector_type == DRM_MODE_CONNECTOR_eDP) ||
- (radeon_connector_encoder_get_dp_bridge_encoder_id(&radeon_connector->base) !=
- ENCODER_OBJECT_ID_NONE)) {
+ if (radeon_connector_encoder_get_dp_bridge_encoder_id(&radeon_connector->base) !=
+ ENCODER_OBJECT_ID_NONE) {
+ struct radeon_connector_atom_dig *dig = radeon_connector->con_priv;
+
+ if (dig->dp_i2c_bus)
+ radeon_connector->edid = drm_get_edid(&radeon_connector->base,
+ &dig->dp_i2c_bus->adapter);
+ } else if ((radeon_connector->base.connector_type == DRM_MODE_CONNECTOR_DisplayPort) ||
+ (radeon_connector->base.connector_type == DRM_MODE_CONNECTOR_eDP)) {
struct radeon_connector_atom_dig *dig = radeon_connector->con_priv;
if ((dig->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT ||
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 8c1a83c6eb07..dff6cf77f953 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -65,9 +65,13 @@
* 2.22.0 - r600 only: RESOLVE_BOX allowed
* 2.23.0 - allow STRMOUT_BASE_UPDATE on RS780 and RS880
* 2.24.0 - eg only: allow MIP_ADDRESS=0 for MSAA textures
+ * 2.25.0 - eg+: new info request for num SE and num SH
+ * 2.26.0 - r600-eg: fix htile size computation
+ * 2.27.0 - r600-SI: Add CS ioctl support for async DMA
+ * 2.28.0 - r600-eg: Add MEM_WRITE packet support
*/
#define KMS_DRIVER_MAJOR 2
-#define KMS_DRIVER_MINOR 24
+#define KMS_DRIVER_MINOR 28
#define KMS_DRIVER_PATCHLEVEL 0
int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
int radeon_driver_unload_kms(struct drm_device *dev);
@@ -302,8 +306,8 @@ static int radeon_kick_out_firmware_fb(struct pci_dev *pdev)
return 0;
}
-static int __devinit
-radeon_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+static int radeon_pci_probe(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
{
int ret;
diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h
index a1b59ca96d01..e7fdf163a8ca 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.h
+++ b/drivers/gpu/drm/radeon/radeon_drv.h
@@ -366,7 +366,6 @@ extern int radeon_cp_buffers(struct drm_device *dev, void *data, struct drm_file
extern u32 radeon_read_fb_location(drm_radeon_private_t *dev_priv);
extern void radeon_write_agp_location(drm_radeon_private_t *dev_priv, u32 agp_loc);
extern void radeon_write_agp_base(drm_radeon_private_t *dev_priv, u64 agp_base);
-extern u32 RADEON_READ_MM(drm_radeon_private_t *dev_priv, int addr);
extern void radeon_freelist_reset(struct drm_device * dev);
extern struct drm_buf *radeon_freelist_get(struct drm_device * dev);
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 22bd6c2c2740..34356252567a 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -609,26 +609,20 @@ int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring)
* Returns 0 if the fences have passed, error for all other cases.
* Caller must hold ring lock.
*/
-void radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring)
+int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring)
{
uint64_t seq = rdev->fence_drv[ring].sync_seq[ring];
+ int r;
- while(1) {
- int r;
- r = radeon_fence_wait_seq(rdev, seq, ring, false, false);
+ r = radeon_fence_wait_seq(rdev, seq, ring, false, false);
+ if (r) {
if (r == -EDEADLK) {
- mutex_unlock(&rdev->ring_lock);
- r = radeon_gpu_reset(rdev);
- mutex_lock(&rdev->ring_lock);
- if (!r)
- continue;
- }
- if (r) {
- dev_err(rdev->dev, "error waiting for ring to become"
- " idle (%d)\n", r);
+ return -EDEADLK;
}
- return;
+ dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%d)\n",
+ ring, r);
}
+ return 0;
}
/**
@@ -772,7 +766,7 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
int r;
radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
- if (rdev->wb.use_event) {
+ if (rdev->wb.use_event || !radeon_ring_supports_scratch_reg(rdev, &rdev->ring[ring])) {
rdev->fence_drv[ring].scratch_reg = 0;
index = R600_WB_EVENT_OFFSET + ring * 4;
} else {
@@ -854,13 +848,17 @@ int radeon_fence_driver_init(struct radeon_device *rdev)
*/
void radeon_fence_driver_fini(struct radeon_device *rdev)
{
- int ring;
+ int ring, r;
mutex_lock(&rdev->ring_lock);
for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
if (!rdev->fence_drv[ring].initialized)
continue;
- radeon_fence_wait_empty_locked(rdev, ring);
+ r = radeon_fence_wait_empty_locked(rdev, ring);
+ if (r) {
+ /* no need to trigger GPU reset as we are unloading */
+ radeon_fence_driver_force_completion(rdev);
+ }
wake_up_all(&rdev->fence_queue);
radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
rdev->fence_drv[ring].initialized = false;
@@ -868,6 +866,25 @@ void radeon_fence_driver_fini(struct radeon_device *rdev)
mutex_unlock(&rdev->ring_lock);
}
+/**
+ * radeon_fence_driver_force_completion - force all fence waiter to complete
+ *
+ * @rdev: radeon device pointer
+ *
+ * In case of GPU reset failure make sure no process keep waiting on fence
+ * that will never complete.
+ */
+void radeon_fence_driver_force_completion(struct radeon_device *rdev)
+{
+ int ring;
+
+ for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
+ if (!rdev->fence_drv[ring].initialized)
+ continue;
+ radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring);
+ }
+}
+
/*
* Fence debugfs
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index 8690be757d80..6e24f84755b5 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -1237,7 +1237,6 @@ void radeon_vm_bo_invalidate(struct radeon_device *rdev,
{
struct radeon_bo_va *bo_va;
- BUG_ON(!radeon_bo_is_reserved(bo));
list_for_each_entry(bo_va, &bo->va, bo_list) {
bo_va->valid = false;
}
diff --git a/drivers/gpu/drm/radeon/radeon_i2c.c b/drivers/gpu/drm/radeon/radeon_i2c.c
index c5bddd630eb9..fc60b74ee304 100644
--- a/drivers/gpu/drm/radeon/radeon_i2c.c
+++ b/drivers/gpu/drm/radeon/radeon_i2c.c
@@ -39,7 +39,7 @@ extern u32 radeon_atom_hw_i2c_func(struct i2c_adapter *adap);
* radeon_ddc_probe
*
*/
-bool radeon_ddc_probe(struct radeon_connector *radeon_connector)
+bool radeon_ddc_probe(struct radeon_connector *radeon_connector, bool use_aux)
{
u8 out = 0x0;
u8 buf[8];
@@ -63,7 +63,13 @@ bool radeon_ddc_probe(struct radeon_connector *radeon_connector)
if (radeon_connector->router.ddc_valid)
radeon_router_select_ddc_port(radeon_connector);
- ret = i2c_transfer(&radeon_connector->ddc_bus->adapter, msgs, 2);
+ if (use_aux) {
+ struct radeon_connector_atom_dig *dig = radeon_connector->con_priv;
+ ret = i2c_transfer(&dig->dp_i2c_bus->adapter, msgs, 2);
+ } else {
+ ret = i2c_transfer(&radeon_connector->ddc_bus->adapter, msgs, 2);
+ }
+
if (ret != 2)
/* Couldn't find an accessible DDC on this connector */
return false;
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index dc781c49b96b..9c312f9afb68 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -361,6 +361,22 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
return -EINVAL;
}
break;
+ case RADEON_INFO_MAX_SE:
+ if (rdev->family >= CHIP_TAHITI)
+ value = rdev->config.si.max_shader_engines;
+ else if (rdev->family >= CHIP_CAYMAN)
+ value = rdev->config.cayman.max_shader_engines;
+ else if (rdev->family >= CHIP_CEDAR)
+ value = rdev->config.evergreen.num_ses;
+ else
+ value = 1;
+ break;
+ case RADEON_INFO_MAX_SH_PER_SE:
+ if (rdev->family >= CHIP_TAHITI)
+ value = rdev->config.si.max_sh_per_se;
+ else
+ return -EINVAL;
+ break;
default:
DRM_DEBUG_KMS("Invalid request %d\n", info->request);
return -EINVAL;
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index d818b503b42f..4003f5a68c09 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -209,7 +209,8 @@ enum radeon_connector_table {
CT_RN50_POWER,
CT_MAC_X800,
CT_MAC_G5_9600,
- CT_SAM440EP
+ CT_SAM440EP,
+ CT_MAC_G4_SILVER
};
enum radeon_dvo_chip {
@@ -558,7 +559,7 @@ extern void radeon_i2c_put_byte(struct radeon_i2c_chan *i2c,
u8 val);
extern void radeon_router_select_ddc_port(struct radeon_connector *radeon_connector);
extern void radeon_router_select_cd_port(struct radeon_connector *radeon_connector);
-extern bool radeon_ddc_probe(struct radeon_connector *radeon_connector);
+extern bool radeon_ddc_probe(struct radeon_connector *radeon_connector, bool use_aux);
extern int radeon_ddc_get_modes(struct radeon_connector *radeon_connector);
extern struct drm_encoder *radeon_best_encoder(struct drm_connector *connector);
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 7c4b4bb05a36..883c95d8d90f 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -84,17 +84,34 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
rbo->placement.fpfn = 0;
rbo->placement.lpfn = 0;
rbo->placement.placement = rbo->placements;
- rbo->placement.busy_placement = rbo->placements;
if (domain & RADEON_GEM_DOMAIN_VRAM)
rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
TTM_PL_FLAG_VRAM;
- if (domain & RADEON_GEM_DOMAIN_GTT)
- rbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
- if (domain & RADEON_GEM_DOMAIN_CPU)
- rbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
+ if (domain & RADEON_GEM_DOMAIN_GTT) {
+ if (rbo->rdev->flags & RADEON_IS_AGP) {
+ rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_TT;
+ } else {
+ rbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_TT;
+ }
+ }
+ if (domain & RADEON_GEM_DOMAIN_CPU) {
+ if (rbo->rdev->flags & RADEON_IS_AGP) {
+ rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_SYSTEM;
+ } else {
+ rbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM;
+ }
+ }
if (!c)
rbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
rbo->placement.num_placement = c;
+
+ c = 0;
+ rbo->placement.busy_placement = rbo->busy_placements;
+ if (rbo->rdev->flags & RADEON_IS_AGP) {
+ rbo->busy_placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_TT;
+ } else {
+ rbo->busy_placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_TT;
+ }
rbo->placement.num_busy_placement = c;
}
@@ -240,7 +257,7 @@ int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,
}
for (i = 0; i < bo->placement.num_placement; i++)
bo->placements[i] |= TTM_PL_FLAG_NO_EVICT;
- r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false, false);
+ r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
if (likely(r == 0)) {
bo->pin_count = 1;
if (gpu_addr != NULL)
@@ -269,7 +286,7 @@ int radeon_bo_unpin(struct radeon_bo *bo)
return 0;
for (i = 0; i < bo->placement.num_placement; i++)
bo->placements[i] &= ~TTM_PL_FLAG_NO_EVICT;
- r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false, false);
+ r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
if (unlikely(r != 0))
dev_err(bo->rdev->dev, "%p validate failed for unpin\n", bo);
return r;
@@ -340,7 +357,6 @@ int radeon_bo_list_validate(struct list_head *head)
{
struct radeon_bo_list *lobj;
struct radeon_bo *bo;
- u32 domain;
int r;
r = ttm_eu_reserve_buffers(head);
@@ -350,17 +366,9 @@ int radeon_bo_list_validate(struct list_head *head)
list_for_each_entry(lobj, head, tv.head) {
bo = lobj->bo;
if (!bo->pin_count) {
- domain = lobj->wdomain ? lobj->wdomain : lobj->rdomain;
-
- retry:
- radeon_ttm_placement_from_domain(bo, domain);
r = ttm_bo_validate(&bo->tbo, &bo->placement,
- true, false, false);
+ true, false);
if (unlikely(r)) {
- if (r != -ERESTARTSYS && domain == RADEON_GEM_DOMAIN_VRAM) {
- domain |= RADEON_GEM_DOMAIN_GTT;
- goto retry;
- }
return r;
}
}
@@ -520,7 +528,7 @@ void radeon_bo_get_tiling_flags(struct radeon_bo *bo,
int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
bool force_drop)
{
- BUG_ON(!radeon_bo_is_reserved(bo));
+ BUG_ON(!radeon_bo_is_reserved(bo) && !force_drop);
if (!(bo->tiling_flags & RADEON_TILING_SURFACE))
return 0;
@@ -575,7 +583,7 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
/* hurrah the memory is not visible ! */
radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM);
rbo->placement.lpfn = rdev->mc.visible_vram_size >> PAGE_SHIFT;
- r = ttm_bo_validate(bo, &rbo->placement, false, true, false);
+ r = ttm_bo_validate(bo, &rbo->placement, false, false);
if (unlikely(r != 0))
return r;
offset = bo->mem.start << PAGE_SHIFT;
diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c
index aa14dbb7e4fb..0bfa656aa87d 100644
--- a/drivers/gpu/drm/radeon/radeon_pm.c
+++ b/drivers/gpu/drm/radeon/radeon_pm.c
@@ -234,7 +234,7 @@ static void radeon_set_power_state(struct radeon_device *rdev)
static void radeon_pm_set_clocks(struct radeon_device *rdev)
{
- int i;
+ int i, r;
/* no need to take locks, etc. if nothing's going to change */
if ((rdev->pm.requested_clock_mode_index == rdev->pm.current_clock_mode_index) &&
@@ -248,8 +248,17 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev)
/* wait for the rings to drain */
for (i = 0; i < RADEON_NUM_RINGS; i++) {
struct radeon_ring *ring = &rdev->ring[i];
- if (ring->ready)
- radeon_fence_wait_empty_locked(rdev, i);
+ if (!ring->ready) {
+ continue;
+ }
+ r = radeon_fence_wait_empty_locked(rdev, i);
+ if (r) {
+ /* needs a GPU reset dont reset here */
+ mutex_unlock(&rdev->ring_lock);
+ up_write(&rdev->pm.mclk_lock);
+ mutex_unlock(&rdev->ddev->struct_mutex);
+ return;
+ }
}
radeon_unmap_vram_bos(rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_prime.c b/drivers/gpu/drm/radeon/radeon_prime.c
index e09521858f64..26c23bb651c6 100644
--- a/drivers/gpu/drm/radeon/radeon_prime.c
+++ b/drivers/gpu/drm/radeon/radeon_prime.c
@@ -194,6 +194,7 @@ struct drm_gem_object *radeon_gem_prime_import(struct drm_device *dev,
bo = dma_buf->priv;
if (bo->gem_base.dev == dev) {
drm_gem_object_reference(&bo->gem_base);
+ dma_buf_put(dma_buf);
return &bo->gem_base;
}
}
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index 47634f27f2e5..141f2b6a9cf2 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -459,7 +459,7 @@ void radeon_ring_unlock_commit(struct radeon_device *rdev, struct radeon_ring *r
*
* @ring: radeon_ring structure holding ring information
*
- * Reset the driver's copy of the wtpr (all asics).
+ * Reset the driver's copy of the wptr (all asics).
*/
void radeon_ring_undo(struct radeon_ring *ring)
{
@@ -503,7 +503,7 @@ void radeon_ring_force_activity(struct radeon_device *rdev, struct radeon_ring *
}
/**
- * radeon_ring_force_activity - update lockup variables
+ * radeon_ring_lockup_update - update lockup variables
*
* @ring: radeon_ring structure holding ring information
*
@@ -770,22 +770,28 @@ static int radeon_debugfs_ring_info(struct seq_file *m, void *data)
int ridx = *(int*)node->info_ent->data;
struct radeon_ring *ring = &rdev->ring[ridx];
unsigned count, i, j;
+ u32 tmp;
radeon_ring_free_size(rdev, ring);
count = (ring->ring_size / 4) - ring->ring_free_dw;
- seq_printf(m, "wptr(0x%04x): 0x%08x\n", ring->wptr_reg, RREG32(ring->wptr_reg));
- seq_printf(m, "rptr(0x%04x): 0x%08x\n", ring->rptr_reg, RREG32(ring->rptr_reg));
+ tmp = RREG32(ring->wptr_reg) >> ring->ptr_reg_shift;
+ seq_printf(m, "wptr(0x%04x): 0x%08x [%5d]\n", ring->wptr_reg, tmp, tmp);
+ tmp = RREG32(ring->rptr_reg) >> ring->ptr_reg_shift;
+ seq_printf(m, "rptr(0x%04x): 0x%08x [%5d]\n", ring->rptr_reg, tmp, tmp);
if (ring->rptr_save_reg) {
seq_printf(m, "rptr next(0x%04x): 0x%08x\n", ring->rptr_save_reg,
RREG32(ring->rptr_save_reg));
}
- seq_printf(m, "driver's copy of the wptr: 0x%08x\n", ring->wptr);
- seq_printf(m, "driver's copy of the rptr: 0x%08x\n", ring->rptr);
+ seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n", ring->wptr, ring->wptr);
+ seq_printf(m, "driver's copy of the rptr: 0x%08x [%5d]\n", ring->rptr, ring->rptr);
seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw);
seq_printf(m, "%u dwords in ring\n", count);
- i = ring->rptr;
- for (j = 0; j <= count; j++) {
- seq_printf(m, "r[%04d]=0x%08x\n", i, ring->ring[i]);
+ /* print 8 dw before current rptr as often it's the last executed
+ * packet that is the root issue
+ */
+ i = (ring->rptr + ring->ptr_mask + 1 - 32) & ring->ptr_mask;
+ for (j = 0; j <= (count + 32); j++) {
+ seq_printf(m, "r[%5d]=0x%08x\n", i, ring->ring[i]);
i = (i + 1) & ring->ptr_mask;
}
return 0;
@@ -794,11 +800,15 @@ static int radeon_debugfs_ring_info(struct seq_file *m, void *data)
static int radeon_ring_type_gfx_index = RADEON_RING_TYPE_GFX_INDEX;
static int cayman_ring_type_cp1_index = CAYMAN_RING_TYPE_CP1_INDEX;
static int cayman_ring_type_cp2_index = CAYMAN_RING_TYPE_CP2_INDEX;
+static int radeon_ring_type_dma1_index = R600_RING_TYPE_DMA_INDEX;
+static int radeon_ring_type_dma2_index = CAYMAN_RING_TYPE_DMA1_INDEX;
static struct drm_info_list radeon_debugfs_ring_info_list[] = {
{"radeon_ring_gfx", radeon_debugfs_ring_info, 0, &radeon_ring_type_gfx_index},
{"radeon_ring_cp1", radeon_debugfs_ring_info, 0, &cayman_ring_type_cp1_index},
{"radeon_ring_cp2", radeon_debugfs_ring_info, 0, &cayman_ring_type_cp2_index},
+ {"radeon_ring_dma1", radeon_debugfs_ring_info, 0, &radeon_ring_type_dma1_index},
+ {"radeon_ring_dma2", radeon_debugfs_ring_info, 0, &radeon_ring_type_dma2_index},
};
static int radeon_debugfs_sa_info(struct seq_file *m, void *data)
diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c
index 587c09a00ba2..fda09c9ea689 100644
--- a/drivers/gpu/drm/radeon/radeon_test.c
+++ b/drivers/gpu/drm/radeon/radeon_test.c
@@ -26,16 +26,31 @@
#include "radeon_reg.h"
#include "radeon.h"
+#define RADEON_TEST_COPY_BLIT 1
+#define RADEON_TEST_COPY_DMA 0
+
/* Test BO GTT->VRAM and VRAM->GTT GPU copies across the whole GTT aperture */
-void radeon_test_moves(struct radeon_device *rdev)
+static void radeon_do_test_moves(struct radeon_device *rdev, int flag)
{
struct radeon_bo *vram_obj = NULL;
struct radeon_bo **gtt_obj = NULL;
struct radeon_fence *fence = NULL;
uint64_t gtt_addr, vram_addr;
unsigned i, n, size;
- int r;
+ int r, ring;
+
+ switch (flag) {
+ case RADEON_TEST_COPY_DMA:
+ ring = radeon_copy_dma_ring_index(rdev);
+ break;
+ case RADEON_TEST_COPY_BLIT:
+ ring = radeon_copy_blit_ring_index(rdev);
+ break;
+ default:
+ DRM_ERROR("Unknown copy method\n");
+ return;
+ }
size = 1024 * 1024;
@@ -106,7 +121,10 @@ void radeon_test_moves(struct radeon_device *rdev)
radeon_bo_kunmap(gtt_obj[i]);
- r = radeon_copy(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
+ if (ring == R600_RING_TYPE_DMA_INDEX)
+ r = radeon_copy_dma(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
+ else
+ r = radeon_copy_blit(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
if (r) {
DRM_ERROR("Failed GTT->VRAM copy %d\n", i);
goto out_cleanup;
@@ -149,7 +167,10 @@ void radeon_test_moves(struct radeon_device *rdev)
radeon_bo_kunmap(vram_obj);
- r = radeon_copy(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
+ if (ring == R600_RING_TYPE_DMA_INDEX)
+ r = radeon_copy_dma(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
+ else
+ r = radeon_copy_blit(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
if (r) {
DRM_ERROR("Failed VRAM->GTT copy %d\n", i);
goto out_cleanup;
@@ -223,6 +244,14 @@ out_cleanup:
}
}
+void radeon_test_moves(struct radeon_device *rdev)
+{
+ if (rdev->asic->copy.dma)
+ radeon_do_test_moves(rdev, RADEON_TEST_COPY_DMA);
+ if (rdev->asic->copy.blit)
+ radeon_do_test_moves(rdev, RADEON_TEST_COPY_BLIT);
+}
+
void radeon_test_ring_sync(struct radeon_device *rdev,
struct radeon_ring *ringA,
struct radeon_ring *ringB)
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 563c8edcb03b..1d8ff2f850ba 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -216,7 +216,7 @@ static void radeon_move_null(struct ttm_buffer_object *bo,
}
static int radeon_move_blit(struct ttm_buffer_object *bo,
- bool evict, int no_wait_reserve, bool no_wait_gpu,
+ bool evict, bool no_wait_gpu,
struct ttm_mem_reg *new_mem,
struct ttm_mem_reg *old_mem)
{
@@ -266,14 +266,14 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
&fence);
/* FIXME: handle copy error */
r = ttm_bo_move_accel_cleanup(bo, (void *)fence,
- evict, no_wait_reserve, no_wait_gpu, new_mem);
+ evict, no_wait_gpu, new_mem);
radeon_fence_unref(&fence);
return r;
}
static int radeon_move_vram_ram(struct ttm_buffer_object *bo,
bool evict, bool interruptible,
- bool no_wait_reserve, bool no_wait_gpu,
+ bool no_wait_gpu,
struct ttm_mem_reg *new_mem)
{
struct radeon_device *rdev;
@@ -294,7 +294,7 @@ static int radeon_move_vram_ram(struct ttm_buffer_object *bo,
placement.busy_placement = &placements;
placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
r = ttm_bo_mem_space(bo, &placement, &tmp_mem,
- interruptible, no_wait_reserve, no_wait_gpu);
+ interruptible, no_wait_gpu);
if (unlikely(r)) {
return r;
}
@@ -308,11 +308,11 @@ static int radeon_move_vram_ram(struct ttm_buffer_object *bo,
if (unlikely(r)) {
goto out_cleanup;
}
- r = radeon_move_blit(bo, true, no_wait_reserve, no_wait_gpu, &tmp_mem, old_mem);
+ r = radeon_move_blit(bo, true, no_wait_gpu, &tmp_mem, old_mem);
if (unlikely(r)) {
goto out_cleanup;
}
- r = ttm_bo_move_ttm(bo, true, no_wait_reserve, no_wait_gpu, new_mem);
+ r = ttm_bo_move_ttm(bo, true, no_wait_gpu, new_mem);
out_cleanup:
ttm_bo_mem_put(bo, &tmp_mem);
return r;
@@ -320,7 +320,7 @@ out_cleanup:
static int radeon_move_ram_vram(struct ttm_buffer_object *bo,
bool evict, bool interruptible,
- bool no_wait_reserve, bool no_wait_gpu,
+ bool no_wait_gpu,
struct ttm_mem_reg *new_mem)
{
struct radeon_device *rdev;
@@ -340,15 +340,16 @@ static int radeon_move_ram_vram(struct ttm_buffer_object *bo,
placement.num_busy_placement = 1;
placement.busy_placement = &placements;
placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
- r = ttm_bo_mem_space(bo, &placement, &tmp_mem, interruptible, no_wait_reserve, no_wait_gpu);
+ r = ttm_bo_mem_space(bo, &placement, &tmp_mem,
+ interruptible, no_wait_gpu);
if (unlikely(r)) {
return r;
}
- r = ttm_bo_move_ttm(bo, true, no_wait_reserve, no_wait_gpu, &tmp_mem);
+ r = ttm_bo_move_ttm(bo, true, no_wait_gpu, &tmp_mem);
if (unlikely(r)) {
goto out_cleanup;
}
- r = radeon_move_blit(bo, true, no_wait_reserve, no_wait_gpu, new_mem, old_mem);
+ r = radeon_move_blit(bo, true, no_wait_gpu, new_mem, old_mem);
if (unlikely(r)) {
goto out_cleanup;
}
@@ -359,7 +360,7 @@ out_cleanup:
static int radeon_bo_move(struct ttm_buffer_object *bo,
bool evict, bool interruptible,
- bool no_wait_reserve, bool no_wait_gpu,
+ bool no_wait_gpu,
struct ttm_mem_reg *new_mem)
{
struct radeon_device *rdev;
@@ -388,18 +389,18 @@ static int radeon_bo_move(struct ttm_buffer_object *bo,
if (old_mem->mem_type == TTM_PL_VRAM &&
new_mem->mem_type == TTM_PL_SYSTEM) {
r = radeon_move_vram_ram(bo, evict, interruptible,
- no_wait_reserve, no_wait_gpu, new_mem);
+ no_wait_gpu, new_mem);
} else if (old_mem->mem_type == TTM_PL_SYSTEM &&
new_mem->mem_type == TTM_PL_VRAM) {
r = radeon_move_ram_vram(bo, evict, interruptible,
- no_wait_reserve, no_wait_gpu, new_mem);
+ no_wait_gpu, new_mem);
} else {
- r = radeon_move_blit(bo, evict, no_wait_reserve, no_wait_gpu, new_mem, old_mem);
+ r = radeon_move_blit(bo, evict, no_wait_gpu, new_mem, old_mem);
}
if (r) {
memcpy:
- r = ttm_bo_move_memcpy(bo, evict, no_wait_reserve, no_wait_gpu, new_mem);
+ r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
}
return r;
}
diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c
index 785d09590b24..2bb6d0e84b3d 100644
--- a/drivers/gpu/drm/radeon/rv515.c
+++ b/drivers/gpu/drm/radeon/rv515.c
@@ -40,6 +40,12 @@ static int rv515_debugfs_ga_info_init(struct radeon_device *rdev);
static void rv515_gpu_init(struct radeon_device *rdev);
int rv515_mc_wait_for_idle(struct radeon_device *rdev);
+static const u32 crtc_offsets[2] =
+{
+ 0,
+ AVIVO_D2CRTC_H_TOTAL - AVIVO_D1CRTC_H_TOTAL
+};
+
void rv515_debugfs(struct radeon_device *rdev)
{
if (r100_debugfs_rbbm_init(rdev)) {
@@ -281,30 +287,114 @@ static int rv515_debugfs_ga_info_init(struct radeon_device *rdev)
void rv515_mc_stop(struct radeon_device *rdev, struct rv515_mc_save *save)
{
+ u32 crtc_enabled, tmp, frame_count, blackout;
+ int i, j;
+
save->vga_render_control = RREG32(R_000300_VGA_RENDER_CONTROL);
save->vga_hdp_control = RREG32(R_000328_VGA_HDP_CONTROL);
- /* Stop all video */
- WREG32(R_0068E8_D2CRTC_UPDATE_LOCK, 0);
+ /* disable VGA render */
WREG32(R_000300_VGA_RENDER_CONTROL, 0);
- WREG32(R_0060E8_D1CRTC_UPDATE_LOCK, 1);
- WREG32(R_0068E8_D2CRTC_UPDATE_LOCK, 1);
- WREG32(R_006080_D1CRTC_CONTROL, 0);
- WREG32(R_006880_D2CRTC_CONTROL, 0);
- WREG32(R_0060E8_D1CRTC_UPDATE_LOCK, 0);
- WREG32(R_0068E8_D2CRTC_UPDATE_LOCK, 0);
- WREG32(R_000330_D1VGA_CONTROL, 0);
- WREG32(R_000338_D2VGA_CONTROL, 0);
+ /* blank the display controllers */
+ for (i = 0; i < rdev->num_crtc; i++) {
+ crtc_enabled = RREG32(AVIVO_D1CRTC_CONTROL + crtc_offsets[i]) & AVIVO_CRTC_EN;
+ if (crtc_enabled) {
+ save->crtc_enabled[i] = true;
+ tmp = RREG32(AVIVO_D1CRTC_CONTROL + crtc_offsets[i]);
+ if (!(tmp & AVIVO_CRTC_DISP_READ_REQUEST_DISABLE)) {
+ radeon_wait_for_vblank(rdev, i);
+ tmp |= AVIVO_CRTC_DISP_READ_REQUEST_DISABLE;
+ WREG32(AVIVO_D1CRTC_CONTROL + crtc_offsets[i], tmp);
+ }
+ /* wait for the next frame */
+ frame_count = radeon_get_vblank_counter(rdev, i);
+ for (j = 0; j < rdev->usec_timeout; j++) {
+ if (radeon_get_vblank_counter(rdev, i) != frame_count)
+ break;
+ udelay(1);
+ }
+ } else {
+ save->crtc_enabled[i] = false;
+ }
+ }
+
+ radeon_mc_wait_for_idle(rdev);
+
+ if (rdev->family >= CHIP_R600) {
+ if (rdev->family >= CHIP_RV770)
+ blackout = RREG32(R700_MC_CITF_CNTL);
+ else
+ blackout = RREG32(R600_CITF_CNTL);
+ if ((blackout & R600_BLACKOUT_MASK) != R600_BLACKOUT_MASK) {
+ /* Block CPU access */
+ WREG32(R600_BIF_FB_EN, 0);
+ /* blackout the MC */
+ blackout |= R600_BLACKOUT_MASK;
+ if (rdev->family >= CHIP_RV770)
+ WREG32(R700_MC_CITF_CNTL, blackout);
+ else
+ WREG32(R600_CITF_CNTL, blackout);
+ }
+ }
}
void rv515_mc_resume(struct radeon_device *rdev, struct rv515_mc_save *save)
{
- WREG32(R_006110_D1GRPH_PRIMARY_SURFACE_ADDRESS, rdev->mc.vram_start);
- WREG32(R_006118_D1GRPH_SECONDARY_SURFACE_ADDRESS, rdev->mc.vram_start);
- WREG32(R_006910_D2GRPH_PRIMARY_SURFACE_ADDRESS, rdev->mc.vram_start);
- WREG32(R_006918_D2GRPH_SECONDARY_SURFACE_ADDRESS, rdev->mc.vram_start);
- WREG32(R_000310_VGA_MEMORY_BASE_ADDRESS, rdev->mc.vram_start);
- /* Unlock host access */
+ u32 tmp, frame_count;
+ int i, j;
+
+ /* update crtc base addresses */
+ for (i = 0; i < rdev->num_crtc; i++) {
+ if (rdev->family >= CHIP_RV770) {
+ if (i == 1) {
+ WREG32(R700_D1GRPH_PRIMARY_SURFACE_ADDRESS_HIGH,
+ upper_32_bits(rdev->mc.vram_start));
+ WREG32(R700_D1GRPH_SECONDARY_SURFACE_ADDRESS_HIGH,
+ upper_32_bits(rdev->mc.vram_start));
+ } else {
+ WREG32(R700_D2GRPH_PRIMARY_SURFACE_ADDRESS_HIGH,
+ upper_32_bits(rdev->mc.vram_start));
+ WREG32(R700_D2GRPH_SECONDARY_SURFACE_ADDRESS_HIGH,
+ upper_32_bits(rdev->mc.vram_start));
+ }
+ }
+ WREG32(R_006110_D1GRPH_PRIMARY_SURFACE_ADDRESS + crtc_offsets[i],
+ (u32)rdev->mc.vram_start);
+ WREG32(R_006118_D1GRPH_SECONDARY_SURFACE_ADDRESS + crtc_offsets[i],
+ (u32)rdev->mc.vram_start);
+ }
+ WREG32(R_000310_VGA_MEMORY_BASE_ADDRESS, (u32)rdev->mc.vram_start);
+
+ if (rdev->family >= CHIP_R600) {
+ /* unblackout the MC */
+ if (rdev->family >= CHIP_RV770)
+ tmp = RREG32(R700_MC_CITF_CNTL);
+ else
+ tmp = RREG32(R600_CITF_CNTL);
+ tmp &= ~R600_BLACKOUT_MASK;
+ if (rdev->family >= CHIP_RV770)
+ WREG32(R700_MC_CITF_CNTL, tmp);
+ else
+ WREG32(R600_CITF_CNTL, tmp);
+ /* allow CPU access */
+ WREG32(R600_BIF_FB_EN, R600_FB_READ_EN | R600_FB_WRITE_EN);
+ }
+
+ for (i = 0; i < rdev->num_crtc; i++) {
+ if (save->crtc_enabled[i]) {
+ tmp = RREG32(AVIVO_D1CRTC_CONTROL + crtc_offsets[i]);
+ tmp &= ~AVIVO_CRTC_DISP_READ_REQUEST_DISABLE;
+ WREG32(AVIVO_D1CRTC_CONTROL + crtc_offsets[i], tmp);
+ /* wait for the next frame */
+ frame_count = radeon_get_vblank_counter(rdev, i);
+ for (j = 0; j < rdev->usec_timeout; j++) {
+ if (radeon_get_vblank_counter(rdev, i) != frame_count)
+ break;
+ udelay(1);
+ }
+ }
+ }
+ /* Unlock vga access */
WREG32(R_000328_VGA_HDP_CONTROL, save->vga_hdp_control);
mdelay(1);
WREG32(R_000300_VGA_RENDER_CONTROL, save->vga_render_control);
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 79814a08c8e5..1b2444f4d8f4 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -316,6 +316,7 @@ void r700_cp_stop(struct radeon_device *rdev)
radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
WREG32(SCRATCH_UMSK, 0);
+ rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
}
static int rv770_cp_load_microcode(struct radeon_device *rdev)
@@ -583,6 +584,8 @@ static void rv770_gpu_init(struct radeon_device *rdev)
WREG32(GB_TILING_CONFIG, gb_tiling_config);
WREG32(DCP_TILING_CONFIG, (gb_tiling_config & 0xffff));
WREG32(HDP_TILING_CONFIG, (gb_tiling_config & 0xffff));
+ WREG32(DMA_TILING_CONFIG, (gb_tiling_config & 0xffff));
+ WREG32(DMA_TILING_CONFIG2, (gb_tiling_config & 0xffff));
WREG32(CGTS_SYS_TCC_DISABLE, 0);
WREG32(CGTS_TCC_DISABLE, 0);
@@ -884,9 +887,83 @@ static int rv770_mc_init(struct radeon_device *rdev)
return 0;
}
+/**
+ * rv770_copy_dma - copy pages using the DMA engine
+ *
+ * @rdev: radeon_device pointer
+ * @src_offset: src GPU address
+ * @dst_offset: dst GPU address
+ * @num_gpu_pages: number of GPU pages to xfer
+ * @fence: radeon fence object
+ *
+ * Copy GPU paging using the DMA engine (r7xx).
+ * Used by the radeon ttm implementation to move pages if
+ * registered as the asic copy callback.
+ */
+int rv770_copy_dma(struct radeon_device *rdev,
+ uint64_t src_offset, uint64_t dst_offset,
+ unsigned num_gpu_pages,
+ struct radeon_fence **fence)
+{
+ struct radeon_semaphore *sem = NULL;
+ int ring_index = rdev->asic->copy.dma_ring_index;
+ struct radeon_ring *ring = &rdev->ring[ring_index];
+ u32 size_in_dw, cur_size_in_dw;
+ int i, num_loops;
+ int r = 0;
+
+ r = radeon_semaphore_create(rdev, &sem);
+ if (r) {
+ DRM_ERROR("radeon: moving bo (%d).\n", r);
+ return r;
+ }
+
+ size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
+ num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFF);
+ r = radeon_ring_lock(rdev, ring, num_loops * 5 + 8);
+ if (r) {
+ DRM_ERROR("radeon: moving bo (%d).\n", r);
+ radeon_semaphore_free(rdev, &sem, NULL);
+ return r;
+ }
+
+ if (radeon_fence_need_sync(*fence, ring->idx)) {
+ radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
+ ring->idx);
+ radeon_fence_note_sync(*fence, ring->idx);
+ } else {
+ radeon_semaphore_free(rdev, &sem, NULL);
+ }
+
+ for (i = 0; i < num_loops; i++) {
+ cur_size_in_dw = size_in_dw;
+ if (cur_size_in_dw > 0xFFFF)
+ cur_size_in_dw = 0xFFFF;
+ size_in_dw -= cur_size_in_dw;
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw));
+ radeon_ring_write(ring, dst_offset & 0xfffffffc);
+ radeon_ring_write(ring, src_offset & 0xfffffffc);
+ radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
+ radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
+ src_offset += cur_size_in_dw * 4;
+ dst_offset += cur_size_in_dw * 4;
+ }
+
+ r = radeon_fence_emit(rdev, fence, ring->idx);
+ if (r) {
+ radeon_ring_unlock_undo(rdev, ring);
+ return r;
+ }
+
+ radeon_ring_unlock_commit(rdev, ring);
+ radeon_semaphore_free(rdev, &sem, *fence);
+
+ return r;
+}
+
static int rv770_startup(struct radeon_device *rdev)
{
- struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
+ struct radeon_ring *ring;
int r;
/* enable pcie gen2 link */
@@ -932,6 +1009,12 @@ static int rv770_startup(struct radeon_device *rdev)
return r;
}
+ r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
+ if (r) {
+ dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
+ return r;
+ }
+
/* Enable IRQ */
r = r600_irq_init(rdev);
if (r) {
@@ -941,11 +1024,20 @@ static int rv770_startup(struct radeon_device *rdev)
}
r600_irq_set(rdev);
+ ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
R600_CP_RB_RPTR, R600_CP_RB_WPTR,
0, 0xfffff, RADEON_CP_PACKET2);
if (r)
return r;
+
+ ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+ r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
+ DMA_RB_RPTR, DMA_RB_WPTR,
+ 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+ if (r)
+ return r;
+
r = rv770_cp_load_microcode(rdev);
if (r)
return r;
@@ -953,6 +1045,10 @@ static int rv770_startup(struct radeon_device *rdev)
if (r)
return r;
+ r = r600_dma_resume(rdev);
+ if (r)
+ return r;
+
r = radeon_ib_pool_init(rdev);
if (r) {
dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -995,7 +1091,7 @@ int rv770_suspend(struct radeon_device *rdev)
{
r600_audio_fini(rdev);
r700_cp_stop(rdev);
- rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
+ r600_dma_stop(rdev);
r600_irq_suspend(rdev);
radeon_wb_disable(rdev);
rv770_pcie_gart_disable(rdev);
@@ -1066,6 +1162,9 @@ int rv770_init(struct radeon_device *rdev)
rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL;
r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024);
+ rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL;
+ r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024);
+
rdev->ih.ring_obj = NULL;
r600_ih_ring_init(rdev, 64 * 1024);
@@ -1078,6 +1177,7 @@ int rv770_init(struct radeon_device *rdev)
if (r) {
dev_err(rdev->dev, "disabling GPU acceleration\n");
r700_cp_fini(rdev);
+ r600_dma_fini(rdev);
r600_irq_fini(rdev);
radeon_wb_fini(rdev);
radeon_ib_pool_fini(rdev);
@@ -1093,6 +1193,7 @@ void rv770_fini(struct radeon_device *rdev)
{
r600_blit_fini(rdev);
r700_cp_fini(rdev);
+ r600_dma_fini(rdev);
r600_irq_fini(rdev);
radeon_wb_fini(rdev);
radeon_ib_pool_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/rv770d.h b/drivers/gpu/drm/radeon/rv770d.h
index b0adfc595d75..20e29d23d348 100644
--- a/drivers/gpu/drm/radeon/rv770d.h
+++ b/drivers/gpu/drm/radeon/rv770d.h
@@ -109,6 +109,9 @@
#define PIPE_TILING__SHIFT 1
#define PIPE_TILING__MASK 0x0000000e
+#define DMA_TILING_CONFIG 0x3ec8
+#define DMA_TILING_CONFIG2 0xd0b8
+
#define GC_USER_SHADER_PIPE_CONFIG 0x8954
#define INACTIVE_QD_PIPES(x) ((x) << 8)
#define INACTIVE_QD_PIPES_MASK 0x0000FF00
@@ -358,6 +361,26 @@
#define WAIT_UNTIL 0x8040
+/* async DMA */
+#define DMA_RB_RPTR 0xd008
+#define DMA_RB_WPTR 0xd00c
+
+/* async DMA packets */
+#define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \
+ (((t) & 0x1) << 23) | \
+ (((s) & 0x1) << 22) | \
+ (((n) & 0xFFFF) << 0))
+/* async DMA Packet types */
+#define DMA_PACKET_WRITE 0x2
+#define DMA_PACKET_COPY 0x3
+#define DMA_PACKET_INDIRECT_BUFFER 0x4
+#define DMA_PACKET_SEMAPHORE 0x5
+#define DMA_PACKET_FENCE 0x6
+#define DMA_PACKET_TRAP 0x7
+#define DMA_PACKET_CONSTANT_FILL 0xd
+#define DMA_PACKET_NOP 0xf
+
+
#define SRBM_STATUS 0x0E50
/* DCE 3.2 HDMI */
@@ -551,6 +574,54 @@
#define HDMI_OFFSET0 (0x7400 - 0x7400)
#define HDMI_OFFSET1 (0x7800 - 0x7400)
+/* DCE3.2 ELD audio interface */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR0 0x71c8 /* LPCM */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR1 0x71cc /* AC3 */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR2 0x71d0 /* MPEG1 */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR3 0x71d4 /* MP3 */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR4 0x71d8 /* MPEG2 */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR5 0x71dc /* AAC */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR6 0x71e0 /* DTS */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR7 0x71e4 /* ATRAC */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR8 0x71e8 /* one bit audio - leave at 0 (default) */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR9 0x71ec /* Dolby Digital */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR10 0x71f0 /* DTS-HD */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR11 0x71f4 /* MAT-MLP */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR12 0x71f8 /* DTS */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR13 0x71fc /* WMA Pro */
+# define MAX_CHANNELS(x) (((x) & 0x7) << 0)
+/* max channels minus one. 7 = 8 channels */
+# define SUPPORTED_FREQUENCIES(x) (((x) & 0xff) << 8)
+# define DESCRIPTOR_BYTE_2(x) (((x) & 0xff) << 16)
+# define SUPPORTED_FREQUENCIES_STEREO(x) (((x) & 0xff) << 24) /* LPCM only */
+/* SUPPORTED_FREQUENCIES, SUPPORTED_FREQUENCIES_STEREO
+ * bit0 = 32 kHz
+ * bit1 = 44.1 kHz
+ * bit2 = 48 kHz
+ * bit3 = 88.2 kHz
+ * bit4 = 96 kHz
+ * bit5 = 176.4 kHz
+ * bit6 = 192 kHz
+ */
+
+#define AZ_HOT_PLUG_CONTROL 0x7300
+# define AZ_FORCE_CODEC_WAKE (1 << 0)
+# define PIN0_JACK_DETECTION_ENABLE (1 << 4)
+# define PIN1_JACK_DETECTION_ENABLE (1 << 5)
+# define PIN2_JACK_DETECTION_ENABLE (1 << 6)
+# define PIN3_JACK_DETECTION_ENABLE (1 << 7)
+# define PIN0_UNSOLICITED_RESPONSE_ENABLE (1 << 8)
+# define PIN1_UNSOLICITED_RESPONSE_ENABLE (1 << 9)
+# define PIN2_UNSOLICITED_RESPONSE_ENABLE (1 << 10)
+# define PIN3_UNSOLICITED_RESPONSE_ENABLE (1 << 11)
+# define CODEC_HOT_PLUG_ENABLE (1 << 12)
+# define PIN0_AUDIO_ENABLED (1 << 24)
+# define PIN1_AUDIO_ENABLED (1 << 25)
+# define PIN2_AUDIO_ENABLED (1 << 26)
+# define PIN3_AUDIO_ENABLED (1 << 27)
+# define AUDIO_ENABLED (1 << 31)
+
+
#define D1GRPH_PRIMARY_SURFACE_ADDRESS 0x6110
#define D1GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x6914
#define D2GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x6114
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index b0db712060fb..3240a3d64f30 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -1660,6 +1660,8 @@ static void si_gpu_init(struct radeon_device *rdev)
WREG32(GB_ADDR_CONFIG, gb_addr_config);
WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
WREG32(HDP_ADDR_CONFIG, gb_addr_config);
+ WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
+ WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
si_tiling_mode_table_init(rdev);
@@ -1836,6 +1838,9 @@ static void si_cp_enable(struct radeon_device *rdev, bool enable)
radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
WREG32(SCRATCH_UMSK, 0);
+ rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
+ rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
+ rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
}
udelay(50);
}
@@ -2007,7 +2012,7 @@ static int si_cp_resume(struct radeon_device *rdev)
ring->wptr = 0;
WREG32(CP_RB0_WPTR, ring->wptr);
- /* set the wb address wether it's enabled or not */
+ /* set the wb address whether it's enabled or not */
WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
@@ -2040,7 +2045,7 @@ static int si_cp_resume(struct radeon_device *rdev)
ring->wptr = 0;
WREG32(CP_RB1_WPTR, ring->wptr);
- /* set the wb address wether it's enabled or not */
+ /* set the wb address whether it's enabled or not */
WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
@@ -2066,7 +2071,7 @@ static int si_cp_resume(struct radeon_device *rdev)
ring->wptr = 0;
WREG32(CP_RB2_WPTR, ring->wptr);
- /* set the wb address wether it's enabled or not */
+ /* set the wb address whether it's enabled or not */
WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
@@ -2121,15 +2126,13 @@ bool si_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
return radeon_ring_test_lockup(rdev, ring);
}
-static int si_gpu_soft_reset(struct radeon_device *rdev)
+static void si_gpu_soft_reset_gfx(struct radeon_device *rdev)
{
- struct evergreen_mc_save save;
u32 grbm_reset = 0;
if (!(RREG32(GRBM_STATUS) & GUI_ACTIVE))
- return 0;
+ return;
- dev_info(rdev->dev, "GPU softreset \n");
dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
RREG32(GRBM_STATUS));
dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
@@ -2140,10 +2143,7 @@ static int si_gpu_soft_reset(struct radeon_device *rdev)
RREG32(GRBM_STATUS_SE1));
dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
RREG32(SRBM_STATUS));
- evergreen_mc_stop(rdev, &save);
- if (radeon_mc_wait_for_idle(rdev)) {
- dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
- }
+
/* Disable CP parsing/prefetching */
WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
@@ -2168,8 +2168,7 @@ static int si_gpu_soft_reset(struct radeon_device *rdev)
udelay(50);
WREG32(GRBM_SOFT_RESET, 0);
(void)RREG32(GRBM_SOFT_RESET);
- /* Wait a little for things to settle down */
- udelay(50);
+
dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
RREG32(GRBM_STATUS));
dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
@@ -2180,13 +2179,75 @@ static int si_gpu_soft_reset(struct radeon_device *rdev)
RREG32(GRBM_STATUS_SE1));
dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
RREG32(SRBM_STATUS));
+}
+
+static void si_gpu_soft_reset_dma(struct radeon_device *rdev)
+{
+ u32 tmp;
+
+ if (RREG32(DMA_STATUS_REG) & DMA_IDLE)
+ return;
+
+ dev_info(rdev->dev, " DMA_STATUS_REG = 0x%08X\n",
+ RREG32(DMA_STATUS_REG));
+
+ /* dma0 */
+ tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
+ tmp &= ~DMA_RB_ENABLE;
+ WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
+
+ /* dma1 */
+ tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
+ tmp &= ~DMA_RB_ENABLE;
+ WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
+
+ /* Reset dma */
+ WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1);
+ RREG32(SRBM_SOFT_RESET);
+ udelay(50);
+ WREG32(SRBM_SOFT_RESET, 0);
+
+ dev_info(rdev->dev, " DMA_STATUS_REG = 0x%08X\n",
+ RREG32(DMA_STATUS_REG));
+}
+
+static int si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
+{
+ struct evergreen_mc_save save;
+
+ if (reset_mask == 0)
+ return 0;
+
+ dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
+
+ dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
+ RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
+ dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
+ RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
+
+ evergreen_mc_stop(rdev, &save);
+ if (radeon_mc_wait_for_idle(rdev)) {
+ dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
+ }
+
+ if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE))
+ si_gpu_soft_reset_gfx(rdev);
+
+ if (reset_mask & RADEON_RESET_DMA)
+ si_gpu_soft_reset_dma(rdev);
+
+ /* Wait a little for things to settle down */
+ udelay(50);
+
evergreen_mc_resume(rdev, &save);
return 0;
}
int si_asic_reset(struct radeon_device *rdev)
{
- return si_gpu_soft_reset(rdev);
+ return si_gpu_soft_reset(rdev, (RADEON_RESET_GFX |
+ RADEON_RESET_COMPUTE |
+ RADEON_RESET_DMA));
}
/* MC */
@@ -2426,9 +2487,20 @@ static int si_pcie_gart_enable(struct radeon_device *rdev)
/* enable context1-15 */
WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
(u32)(rdev->dummy_page.addr >> 12));
- WREG32(VM_CONTEXT1_CNTL2, 0);
+ WREG32(VM_CONTEXT1_CNTL2, 4);
WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
- RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
+ RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
+ PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
+ VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
+ READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT |
+ WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
si_pcie_gart_tlb_flush(rdev);
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
@@ -2474,6 +2546,7 @@ static bool si_vm_reg_valid(u32 reg)
/* check config regs */
switch (reg) {
case GRBM_GFX_INDEX:
+ case CP_STRMOUT_CNTL:
case VGT_VTX_VECT_EJECT_REG:
case VGT_CACHE_INVALIDATION:
case VGT_ESGS_RING_SIZE:
@@ -2533,6 +2606,7 @@ static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
u32 idx = pkt->idx + 1;
u32 idx_value = ib[idx];
u32 start_reg, end_reg, reg, i;
+ u32 command, info;
switch (pkt->opcode) {
case PACKET3_NOP:
@@ -2632,6 +2706,52 @@ static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
return -EINVAL;
}
break;
+ case PACKET3_CP_DMA:
+ command = ib[idx + 4];
+ info = ib[idx + 1];
+ if (command & PACKET3_CP_DMA_CMD_SAS) {
+ /* src address space is register */
+ if (((info & 0x60000000) >> 29) == 0) {
+ start_reg = idx_value << 2;
+ if (command & PACKET3_CP_DMA_CMD_SAIC) {
+ reg = start_reg;
+ if (!si_vm_reg_valid(reg)) {
+ DRM_ERROR("CP DMA Bad SRC register\n");
+ return -EINVAL;
+ }
+ } else {
+ for (i = 0; i < (command & 0x1fffff); i++) {
+ reg = start_reg + (4 * i);
+ if (!si_vm_reg_valid(reg)) {
+ DRM_ERROR("CP DMA Bad SRC register\n");
+ return -EINVAL;
+ }
+ }
+ }
+ }
+ }
+ if (command & PACKET3_CP_DMA_CMD_DAS) {
+ /* dst address space is register */
+ if (((info & 0x00300000) >> 20) == 0) {
+ start_reg = ib[idx + 2];
+ if (command & PACKET3_CP_DMA_CMD_DAIC) {
+ reg = start_reg;
+ if (!si_vm_reg_valid(reg)) {
+ DRM_ERROR("CP DMA Bad DST register\n");
+ return -EINVAL;
+ }
+ } else {
+ for (i = 0; i < (command & 0x1fffff); i++) {
+ reg = start_reg + (4 * i);
+ if (!si_vm_reg_valid(reg)) {
+ DRM_ERROR("CP DMA Bad DST register\n");
+ return -EINVAL;
+ }
+ }
+ }
+ }
+ }
+ break;
default:
DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
return -EINVAL;
@@ -2808,30 +2928,86 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe,
{
struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index];
uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
-
- while (count) {
- unsigned ndw = 2 + count * 2;
- if (ndw > 0x3FFE)
- ndw = 0x3FFE;
-
- radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, ndw));
- radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
- WRITE_DATA_DST_SEL(1)));
- radeon_ring_write(ring, pe);
- radeon_ring_write(ring, upper_32_bits(pe));
- for (; ndw > 2; ndw -= 2, --count, pe += 8) {
- uint64_t value;
- if (flags & RADEON_VM_PAGE_SYSTEM) {
- value = radeon_vm_map_gart(rdev, addr);
- value &= 0xFFFFFFFFFFFFF000ULL;
- } else if (flags & RADEON_VM_PAGE_VALID)
- value = addr;
- else
- value = 0;
- addr += incr;
- value |= r600_flags;
- radeon_ring_write(ring, value);
- radeon_ring_write(ring, upper_32_bits(value));
+ uint64_t value;
+ unsigned ndw;
+
+ if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
+ while (count) {
+ ndw = 2 + count * 2;
+ if (ndw > 0x3FFE)
+ ndw = 0x3FFE;
+
+ radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, ndw));
+ radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(1)));
+ radeon_ring_write(ring, pe);
+ radeon_ring_write(ring, upper_32_bits(pe));
+ for (; ndw > 2; ndw -= 2, --count, pe += 8) {
+ if (flags & RADEON_VM_PAGE_SYSTEM) {
+ value = radeon_vm_map_gart(rdev, addr);
+ value &= 0xFFFFFFFFFFFFF000ULL;
+ } else if (flags & RADEON_VM_PAGE_VALID) {
+ value = addr;
+ } else {
+ value = 0;
+ }
+ addr += incr;
+ value |= r600_flags;
+ radeon_ring_write(ring, value);
+ radeon_ring_write(ring, upper_32_bits(value));
+ }
+ }
+ } else {
+ /* DMA */
+ if (flags & RADEON_VM_PAGE_SYSTEM) {
+ while (count) {
+ ndw = count * 2;
+ if (ndw > 0xFFFFE)
+ ndw = 0xFFFFE;
+
+ /* for non-physically contiguous pages (system) */
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw));
+ radeon_ring_write(ring, pe);
+ radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
+ for (; ndw > 0; ndw -= 2, --count, pe += 8) {
+ if (flags & RADEON_VM_PAGE_SYSTEM) {
+ value = radeon_vm_map_gart(rdev, addr);
+ value &= 0xFFFFFFFFFFFFF000ULL;
+ } else if (flags & RADEON_VM_PAGE_VALID) {
+ value = addr;
+ } else {
+ value = 0;
+ }
+ addr += incr;
+ value |= r600_flags;
+ radeon_ring_write(ring, value);
+ radeon_ring_write(ring, upper_32_bits(value));
+ }
+ }
+ } else {
+ while (count) {
+ ndw = count * 2;
+ if (ndw > 0xFFFFE)
+ ndw = 0xFFFFE;
+
+ if (flags & RADEON_VM_PAGE_VALID)
+ value = addr;
+ else
+ value = 0;
+ /* for physically contiguous pages (vram) */
+ radeon_ring_write(ring, DMA_PTE_PDE_PACKET(ndw));
+ radeon_ring_write(ring, pe); /* dst addr */
+ radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
+ radeon_ring_write(ring, r600_flags); /* mask */
+ radeon_ring_write(ring, 0);
+ radeon_ring_write(ring, value); /* value */
+ radeon_ring_write(ring, upper_32_bits(value));
+ radeon_ring_write(ring, incr); /* increment size */
+ radeon_ring_write(ring, 0);
+ pe += ndw * 4;
+ addr += (ndw / 2) * incr;
+ count -= ndw / 2;
+ }
}
}
}
@@ -2879,6 +3055,32 @@ void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
radeon_ring_write(ring, 0x0);
}
+void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
+{
+ struct radeon_ring *ring = &rdev->ring[ridx];
+
+ if (vm == NULL)
+ return;
+
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
+ if (vm->id < 8) {
+ radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
+ } else {
+ radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
+ }
+ radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
+
+ /* flush hdp cache */
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
+ radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
+ radeon_ring_write(ring, 1);
+
+ /* bits 0-7 are the VM contexts0-7 */
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
+ radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
+ radeon_ring_write(ring, 1 << vm->id);
+}
+
/*
* RLC
*/
@@ -3047,6 +3249,10 @@ static void si_disable_interrupt_state(struct radeon_device *rdev)
WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
WREG32(CP_INT_CNTL_RING1, 0);
WREG32(CP_INT_CNTL_RING2, 0);
+ tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
+ WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
+ tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
+ WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
WREG32(GRBM_INT_CNTL, 0);
WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
@@ -3166,6 +3372,7 @@ int si_irq_set(struct radeon_device *rdev)
u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
u32 grbm_int_cntl = 0;
u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
+ u32 dma_cntl, dma_cntl1;
if (!rdev->irq.installed) {
WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
@@ -3186,6 +3393,9 @@ int si_irq_set(struct radeon_device *rdev)
hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
+ dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
+ dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
+
/* enable CP interrupts on all rings */
if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
DRM_DEBUG("si_irq_set: sw int gfx\n");
@@ -3199,6 +3409,15 @@ int si_irq_set(struct radeon_device *rdev)
DRM_DEBUG("si_irq_set: sw int cp2\n");
cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
}
+ if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
+ DRM_DEBUG("si_irq_set: sw int dma\n");
+ dma_cntl |= TRAP_ENABLE;
+ }
+
+ if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
+ DRM_DEBUG("si_irq_set: sw int dma1\n");
+ dma_cntl1 |= TRAP_ENABLE;
+ }
if (rdev->irq.crtc_vblank_int[0] ||
atomic_read(&rdev->irq.pflip[0])) {
DRM_DEBUG("si_irq_set: vblank 0\n");
@@ -3258,6 +3477,9 @@ int si_irq_set(struct radeon_device *rdev)
WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
+ WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
+ WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
+
WREG32(GRBM_INT_CNTL, grbm_int_cntl);
WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
@@ -3683,6 +3905,16 @@ restart_ih:
break;
}
break;
+ case 146:
+ case 147:
+ dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
+ dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
+ RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
+ dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
+ RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
+ /* reset addr and status */
+ WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
+ break;
case 176: /* RINGID0 CP_INT */
radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
break;
@@ -3706,9 +3938,17 @@ restart_ih:
break;
}
break;
+ case 224: /* DMA trap event */
+ DRM_DEBUG("IH: DMA trap\n");
+ radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
+ break;
case 233: /* GUI IDLE */
DRM_DEBUG("IH: GUI idle\n");
break;
+ case 244: /* DMA trap event */
+ DRM_DEBUG("IH: DMA1 trap\n");
+ radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
+ break;
default:
DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
break;
@@ -3732,6 +3972,80 @@ restart_ih:
return IRQ_HANDLED;
}
+/**
+ * si_copy_dma - copy pages using the DMA engine
+ *
+ * @rdev: radeon_device pointer
+ * @src_offset: src GPU address
+ * @dst_offset: dst GPU address
+ * @num_gpu_pages: number of GPU pages to xfer
+ * @fence: radeon fence object
+ *
+ * Copy GPU paging using the DMA engine (SI).
+ * Used by the radeon ttm implementation to move pages if
+ * registered as the asic copy callback.
+ */
+int si_copy_dma(struct radeon_device *rdev,
+ uint64_t src_offset, uint64_t dst_offset,
+ unsigned num_gpu_pages,
+ struct radeon_fence **fence)
+{
+ struct radeon_semaphore *sem = NULL;
+ int ring_index = rdev->asic->copy.dma_ring_index;
+ struct radeon_ring *ring = &rdev->ring[ring_index];
+ u32 size_in_bytes, cur_size_in_bytes;
+ int i, num_loops;
+ int r = 0;
+
+ r = radeon_semaphore_create(rdev, &sem);
+ if (r) {
+ DRM_ERROR("radeon: moving bo (%d).\n", r);
+ return r;
+ }
+
+ size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
+ num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
+ r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
+ if (r) {
+ DRM_ERROR("radeon: moving bo (%d).\n", r);
+ radeon_semaphore_free(rdev, &sem, NULL);
+ return r;
+ }
+
+ if (radeon_fence_need_sync(*fence, ring->idx)) {
+ radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
+ ring->idx);
+ radeon_fence_note_sync(*fence, ring->idx);
+ } else {
+ radeon_semaphore_free(rdev, &sem, NULL);
+ }
+
+ for (i = 0; i < num_loops; i++) {
+ cur_size_in_bytes = size_in_bytes;
+ if (cur_size_in_bytes > 0xFFFFF)
+ cur_size_in_bytes = 0xFFFFF;
+ size_in_bytes -= cur_size_in_bytes;
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
+ radeon_ring_write(ring, dst_offset & 0xffffffff);
+ radeon_ring_write(ring, src_offset & 0xffffffff);
+ radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
+ radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
+ src_offset += cur_size_in_bytes;
+ dst_offset += cur_size_in_bytes;
+ }
+
+ r = radeon_fence_emit(rdev, fence, ring->idx);
+ if (r) {
+ radeon_ring_unlock_undo(rdev, ring);
+ return r;
+ }
+
+ radeon_ring_unlock_commit(rdev, ring);
+ radeon_semaphore_free(rdev, &sem, *fence);
+
+ return r;
+}
+
/*
* startup/shutdown callbacks
*/
@@ -3803,6 +4117,18 @@ static int si_startup(struct radeon_device *rdev)
return r;
}
+ r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
+ if (r) {
+ dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
+ return r;
+ }
+
+ r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
+ if (r) {
+ dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
+ return r;
+ }
+
/* Enable IRQ */
r = si_irq_init(rdev);
if (r) {
@@ -3833,6 +4159,22 @@ static int si_startup(struct radeon_device *rdev)
if (r)
return r;
+ ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+ r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
+ DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
+ DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
+ 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
+ if (r)
+ return r;
+
+ ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
+ r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
+ DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
+ DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
+ 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
+ if (r)
+ return r;
+
r = si_cp_load_microcode(rdev);
if (r)
return r;
@@ -3840,6 +4182,10 @@ static int si_startup(struct radeon_device *rdev)
if (r)
return r;
+ r = cayman_dma_resume(rdev);
+ if (r)
+ return r;
+
r = radeon_ib_pool_init(rdev);
if (r) {
dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -3881,9 +4227,7 @@ int si_resume(struct radeon_device *rdev)
int si_suspend(struct radeon_device *rdev)
{
si_cp_enable(rdev, false);
- rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
- rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
- rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
+ cayman_dma_stop(rdev);
si_irq_suspend(rdev);
radeon_wb_disable(rdev);
si_pcie_gart_disable(rdev);
@@ -3961,6 +4305,14 @@ int si_init(struct radeon_device *rdev)
ring->ring_obj = NULL;
r600_ring_init(rdev, ring, 1024 * 1024);
+ ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+ ring->ring_obj = NULL;
+ r600_ring_init(rdev, ring, 64 * 1024);
+
+ ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
+ ring->ring_obj = NULL;
+ r600_ring_init(rdev, ring, 64 * 1024);
+
rdev->ih.ring_obj = NULL;
r600_ih_ring_init(rdev, 64 * 1024);
@@ -3973,6 +4325,7 @@ int si_init(struct radeon_device *rdev)
if (r) {
dev_err(rdev->dev, "disabling GPU acceleration\n");
si_cp_fini(rdev);
+ cayman_dma_fini(rdev);
si_irq_fini(rdev);
si_rlc_fini(rdev);
radeon_wb_fini(rdev);
@@ -4001,6 +4354,7 @@ void si_fini(struct radeon_device *rdev)
r600_blit_fini(rdev);
#endif
si_cp_fini(rdev);
+ cayman_dma_fini(rdev);
si_irq_fini(rdev);
si_rlc_fini(rdev);
radeon_wb_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h
index 7d2a20e56577..c056aae814f0 100644
--- a/drivers/gpu/drm/radeon/sid.h
+++ b/drivers/gpu/drm/radeon/sid.h
@@ -62,6 +62,22 @@
#define SRBM_STATUS 0xE50
+#define SRBM_SOFT_RESET 0x0E60
+#define SOFT_RESET_BIF (1 << 1)
+#define SOFT_RESET_DC (1 << 5)
+#define SOFT_RESET_DMA1 (1 << 6)
+#define SOFT_RESET_GRBM (1 << 8)
+#define SOFT_RESET_HDP (1 << 9)
+#define SOFT_RESET_IH (1 << 10)
+#define SOFT_RESET_MC (1 << 11)
+#define SOFT_RESET_ROM (1 << 14)
+#define SOFT_RESET_SEM (1 << 15)
+#define SOFT_RESET_VMC (1 << 17)
+#define SOFT_RESET_DMA (1 << 20)
+#define SOFT_RESET_TST (1 << 21)
+#define SOFT_RESET_REGBB (1 << 22)
+#define SOFT_RESET_ORB (1 << 23)
+
#define CC_SYS_RB_BACKEND_DISABLE 0xe80
#define GC_USER_SYS_RB_BACKEND_DISABLE 0xe84
@@ -91,7 +107,18 @@
#define VM_CONTEXT0_CNTL 0x1410
#define ENABLE_CONTEXT (1 << 0)
#define PAGE_TABLE_DEPTH(x) (((x) & 3) << 1)
+#define RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 3)
#define RANGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 4)
+#define DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 6)
+#define DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 7)
+#define PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 9)
+#define PDE0_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 10)
+#define VALID_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 12)
+#define VALID_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 13)
+#define READ_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 15)
+#define READ_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 16)
+#define WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 18)
+#define WRITE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 19)
#define VM_CONTEXT1_CNTL 0x1414
#define VM_CONTEXT0_CNTL2 0x1430
#define VM_CONTEXT1_CNTL2 0x1434
@@ -104,6 +131,9 @@
#define VM_CONTEXT14_PAGE_TABLE_BASE_ADDR 0x1450
#define VM_CONTEXT15_PAGE_TABLE_BASE_ADDR 0x1454
+#define VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x14FC
+#define VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x14DC
+
#define VM_INVALIDATE_REQUEST 0x1478
#define VM_INVALIDATE_RESPONSE 0x147c
@@ -424,6 +454,7 @@
# define RDERR_INT_ENABLE (1 << 0)
# define GUI_IDLE_INT_ENABLE (1 << 19)
+#define CP_STRMOUT_CNTL 0x84FC
#define SCRATCH_REG0 0x8500
#define SCRATCH_REG1 0x8504
#define SCRATCH_REG2 0x8508
@@ -834,6 +865,54 @@
#define PACKET3_WAIT_REG_MEM 0x3C
#define PACKET3_MEM_WRITE 0x3D
#define PACKET3_COPY_DATA 0x40
+#define PACKET3_CP_DMA 0x41
+/* 1. header
+ * 2. SRC_ADDR_LO or DATA [31:0]
+ * 3. CP_SYNC [31] | SRC_SEL [30:29] | ENGINE [27] | DST_SEL [21:20] |
+ * SRC_ADDR_HI [7:0]
+ * 4. DST_ADDR_LO [31:0]
+ * 5. DST_ADDR_HI [7:0]
+ * 6. COMMAND [30:21] | BYTE_COUNT [20:0]
+ */
+# define PACKET3_CP_DMA_DST_SEL(x) ((x) << 20)
+ /* 0 - SRC_ADDR
+ * 1 - GDS
+ */
+# define PACKET3_CP_DMA_ENGINE(x) ((x) << 27)
+ /* 0 - ME
+ * 1 - PFP
+ */
+# define PACKET3_CP_DMA_SRC_SEL(x) ((x) << 29)
+ /* 0 - SRC_ADDR
+ * 1 - GDS
+ * 2 - DATA
+ */
+# define PACKET3_CP_DMA_CP_SYNC (1 << 31)
+/* COMMAND */
+# define PACKET3_CP_DMA_DIS_WC (1 << 21)
+# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23)
+ /* 0 - none
+ * 1 - 8 in 16
+ * 2 - 8 in 32
+ * 3 - 8 in 64
+ */
+# define PACKET3_CP_DMA_CMD_DST_SWAP(x) ((x) << 24)
+ /* 0 - none
+ * 1 - 8 in 16
+ * 2 - 8 in 32
+ * 3 - 8 in 64
+ */
+# define PACKET3_CP_DMA_CMD_SAS (1 << 26)
+ /* 0 - memory
+ * 1 - register
+ */
+# define PACKET3_CP_DMA_CMD_DAS (1 << 27)
+ /* 0 - memory
+ * 1 - register
+ */
+# define PACKET3_CP_DMA_CMD_SAIC (1 << 28)
+# define PACKET3_CP_DMA_CMD_DAIC (1 << 29)
+# define PACKET3_CP_DMA_CMD_RAW_WAIT (1 << 30)
#define PACKET3_PFP_SYNC_ME 0x42
#define PACKET3_SURFACE_SYNC 0x43
# define PACKET3_DEST_BASE_0_ENA (1 << 0)
@@ -921,4 +1000,63 @@
#define PACKET3_WAIT_ON_AVAIL_BUFFER 0x8A
#define PACKET3_SWITCH_BUFFER 0x8B
+/* ASYNC DMA - first instance at 0xd000, second at 0xd800 */
+#define DMA0_REGISTER_OFFSET 0x0 /* not a register */
+#define DMA1_REGISTER_OFFSET 0x800 /* not a register */
+
+#define DMA_RB_CNTL 0xd000
+# define DMA_RB_ENABLE (1 << 0)
+# define DMA_RB_SIZE(x) ((x) << 1) /* log2 */
+# define DMA_RB_SWAP_ENABLE (1 << 9) /* 8IN32 */
+# define DMA_RPTR_WRITEBACK_ENABLE (1 << 12)
+# define DMA_RPTR_WRITEBACK_SWAP_ENABLE (1 << 13) /* 8IN32 */
+# define DMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */
+#define DMA_RB_BASE 0xd004
+#define DMA_RB_RPTR 0xd008
+#define DMA_RB_WPTR 0xd00c
+
+#define DMA_RB_RPTR_ADDR_HI 0xd01c
+#define DMA_RB_RPTR_ADDR_LO 0xd020
+
+#define DMA_IB_CNTL 0xd024
+# define DMA_IB_ENABLE (1 << 0)
+# define DMA_IB_SWAP_ENABLE (1 << 4)
+#define DMA_IB_RPTR 0xd028
+#define DMA_CNTL 0xd02c
+# define TRAP_ENABLE (1 << 0)
+# define SEM_INCOMPLETE_INT_ENABLE (1 << 1)
+# define SEM_WAIT_INT_ENABLE (1 << 2)
+# define DATA_SWAP_ENABLE (1 << 3)
+# define FENCE_SWAP_ENABLE (1 << 4)
+# define CTXEMPTY_INT_ENABLE (1 << 28)
+#define DMA_STATUS_REG 0xd034
+# define DMA_IDLE (1 << 0)
+#define DMA_TILING_CONFIG 0xd0b8
+
+#define DMA_PACKET(cmd, b, t, s, n) ((((cmd) & 0xF) << 28) | \
+ (((b) & 0x1) << 26) | \
+ (((t) & 0x1) << 23) | \
+ (((s) & 0x1) << 22) | \
+ (((n) & 0xFFFFF) << 0))
+
+#define DMA_IB_PACKET(cmd, vmid, n) ((((cmd) & 0xF) << 28) | \
+ (((vmid) & 0xF) << 20) | \
+ (((n) & 0xFFFFF) << 0))
+
+#define DMA_PTE_PDE_PACKET(n) ((2 << 28) | \
+ (1 << 26) | \
+ (1 << 21) | \
+ (((n) & 0xFFFFF) << 0))
+
+/* async DMA Packet types */
+#define DMA_PACKET_WRITE 0x2
+#define DMA_PACKET_COPY 0x3
+#define DMA_PACKET_INDIRECT_BUFFER 0x4
+#define DMA_PACKET_SEMAPHORE 0x5
+#define DMA_PACKET_FENCE 0x6
+#define DMA_PACKET_TRAP 0x7
+#define DMA_PACKET_SRBM_WRITE 0x9
+#define DMA_PACKET_CONSTANT_FILL 0xd
+#define DMA_PACKET_NOP 0xf
+
#endif