diff options
Diffstat (limited to 'drivers/gpu/drm/radeon')
56 files changed, 4057 insertions, 1943 deletions
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index 3904d7964a4b..f4d4505fe831 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -259,7 +259,7 @@ void atombios_crtc_dpms(struct drm_crtc *crtc, int mode) /* adjust pm to dpms changes BEFORE enabling crtcs */ radeon_pm_compute_clocks(rdev); /* disable crtc pair power gating before programming */ - if (ASIC_IS_DCE6(rdev)) + if (ASIC_IS_DCE6(rdev) && !radeon_crtc->in_mode_set) atombios_powergate_crtc(crtc, ATOM_DISABLE); atombios_enable_crtc(crtc, ATOM_ENABLE); if (ASIC_IS_DCE3(rdev) && !ASIC_IS_DCE6(rdev)) @@ -279,7 +279,7 @@ void atombios_crtc_dpms(struct drm_crtc *crtc, int mode) atombios_enable_crtc(crtc, ATOM_DISABLE); radeon_crtc->enabled = false; /* power gating is per-pair */ - if (ASIC_IS_DCE6(rdev)) { + if (ASIC_IS_DCE6(rdev) && !radeon_crtc->in_mode_set) { struct drm_crtc *other_crtc; struct radeon_crtc *other_radeon_crtc; list_for_each_entry(other_crtc, &rdev->ddev->mode_config.crtc_list, head) { @@ -444,11 +444,28 @@ union atom_enable_ss { static void atombios_crtc_program_ss(struct radeon_device *rdev, int enable, int pll_id, + int crtc_id, struct radeon_atom_ss *ss) { + unsigned i; int index = GetIndexIntoMasterTable(COMMAND, EnableSpreadSpectrumOnPPLL); union atom_enable_ss args; + if (!enable) { + for (i = 0; i < rdev->num_crtc; i++) { + if (rdev->mode_info.crtcs[i] && + rdev->mode_info.crtcs[i]->enabled && + i != crtc_id && + pll_id == rdev->mode_info.crtcs[i]->pll_id) { + /* one other crtc is using this pll don't turn + * off spread spectrum as it might turn off + * display on active crtc + */ + return; + } + } + } + memset(&args, 0, sizeof(args)); if (ASIC_IS_DCE5(rdev)) { @@ -457,22 +474,18 @@ static void atombios_crtc_program_ss(struct radeon_device *rdev, switch (pll_id) { case ATOM_PPLL1: args.v3.ucSpreadSpectrumType |= ATOM_PPLL_SS_TYPE_V3_P1PLL; - args.v3.usSpreadSpectrumAmount = cpu_to_le16(ss->amount); - args.v3.usSpreadSpectrumStep = cpu_to_le16(ss->step); break; case ATOM_PPLL2: args.v3.ucSpreadSpectrumType |= ATOM_PPLL_SS_TYPE_V3_P2PLL; - args.v3.usSpreadSpectrumAmount = cpu_to_le16(ss->amount); - args.v3.usSpreadSpectrumStep = cpu_to_le16(ss->step); break; case ATOM_DCPLL: args.v3.ucSpreadSpectrumType |= ATOM_PPLL_SS_TYPE_V3_DCPLL; - args.v3.usSpreadSpectrumAmount = cpu_to_le16(0); - args.v3.usSpreadSpectrumStep = cpu_to_le16(0); break; case ATOM_PPLL_INVALID: return; } + args.v3.usSpreadSpectrumAmount = cpu_to_le16(ss->amount); + args.v3.usSpreadSpectrumStep = cpu_to_le16(ss->step); args.v3.ucEnable = enable; if ((ss->percentage == 0) || (ss->type & ATOM_EXTERNAL_SS_MASK) || ASIC_IS_DCE61(rdev)) args.v3.ucEnable = ATOM_DISABLE; @@ -482,22 +495,18 @@ static void atombios_crtc_program_ss(struct radeon_device *rdev, switch (pll_id) { case ATOM_PPLL1: args.v2.ucSpreadSpectrumType |= ATOM_PPLL_SS_TYPE_V2_P1PLL; - args.v2.usSpreadSpectrumAmount = cpu_to_le16(ss->amount); - args.v2.usSpreadSpectrumStep = cpu_to_le16(ss->step); break; case ATOM_PPLL2: args.v2.ucSpreadSpectrumType |= ATOM_PPLL_SS_TYPE_V2_P2PLL; - args.v2.usSpreadSpectrumAmount = cpu_to_le16(ss->amount); - args.v2.usSpreadSpectrumStep = cpu_to_le16(ss->step); break; case ATOM_DCPLL: args.v2.ucSpreadSpectrumType |= ATOM_PPLL_SS_TYPE_V2_DCPLL; - args.v2.usSpreadSpectrumAmount = cpu_to_le16(0); - args.v2.usSpreadSpectrumStep = cpu_to_le16(0); break; case ATOM_PPLL_INVALID: return; } + args.v2.usSpreadSpectrumAmount = cpu_to_le16(ss->amount); + args.v2.usSpreadSpectrumStep = cpu_to_le16(ss->step); args.v2.ucEnable = enable; if ((ss->percentage == 0) || (ss->type & ATOM_EXTERNAL_SS_MASK) || ASIC_IS_DCE41(rdev)) args.v2.ucEnable = ATOM_DISABLE; @@ -1036,7 +1045,7 @@ static void atombios_crtc_set_pll(struct drm_crtc *crtc, struct drm_display_mode radeon_compute_pll_legacy(pll, adjusted_clock, &pll_clock, &fb_div, &frac_fb_div, &ref_div, &post_div); - atombios_crtc_program_ss(rdev, ATOM_DISABLE, radeon_crtc->pll_id, &ss); + atombios_crtc_program_ss(rdev, ATOM_DISABLE, radeon_crtc->pll_id, radeon_crtc->crtc_id, &ss); atombios_crtc_program_pll(crtc, radeon_crtc->crtc_id, radeon_crtc->pll_id, encoder_mode, radeon_encoder->encoder_id, mode->clock, @@ -1059,7 +1068,7 @@ static void atombios_crtc_set_pll(struct drm_crtc *crtc, struct drm_display_mode ss.step = step_size; } - atombios_crtc_program_ss(rdev, ATOM_ENABLE, radeon_crtc->pll_id, &ss); + atombios_crtc_program_ss(rdev, ATOM_ENABLE, radeon_crtc->pll_id, radeon_crtc->crtc_id, &ss); } } @@ -1539,8 +1548,12 @@ static int radeon_atom_pick_pll(struct drm_crtc *crtc) * crtc virtual pixel clock. */ if (ENCODER_MODE_IS_DP(atombios_get_encoder_mode(test_encoder))) { - if (ASIC_IS_DCE5(rdev) || rdev->clock.dp_extclk) + if (rdev->clock.dp_extclk) return ATOM_PPLL_INVALID; + else if (ASIC_IS_DCE6(rdev)) + return ATOM_PPLL0; + else if (ASIC_IS_DCE5(rdev)) + return ATOM_DCPLL; } } } @@ -1576,11 +1589,11 @@ void radeon_atom_disp_eng_pll_init(struct radeon_device *rdev) ASIC_INTERNAL_SS_ON_DCPLL, rdev->clock.default_dispclk); if (ss_enabled) - atombios_crtc_program_ss(rdev, ATOM_DISABLE, ATOM_DCPLL, &ss); + atombios_crtc_program_ss(rdev, ATOM_DISABLE, ATOM_DCPLL, -1, &ss); /* XXX: DCE5, make sure voltage, dispclk is high enough */ atombios_crtc_set_disp_eng_pll(rdev, rdev->clock.default_dispclk); if (ss_enabled) - atombios_crtc_program_ss(rdev, ATOM_ENABLE, ATOM_DCPLL, &ss); + atombios_crtc_program_ss(rdev, ATOM_ENABLE, ATOM_DCPLL, -1, &ss); } } @@ -1628,7 +1641,7 @@ int atombios_crtc_mode_set(struct drm_crtc *crtc, } static bool atombios_crtc_mode_fixup(struct drm_crtc *crtc, - struct drm_display_mode *mode, + const struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode) { if (!radeon_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode)) @@ -1639,18 +1652,28 @@ static bool atombios_crtc_mode_fixup(struct drm_crtc *crtc, static void atombios_crtc_prepare(struct drm_crtc *crtc) { struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); + struct drm_device *dev = crtc->dev; + struct radeon_device *rdev = dev->dev_private; + radeon_crtc->in_mode_set = true; /* pick pll */ radeon_crtc->pll_id = radeon_atom_pick_pll(crtc); + /* disable crtc pair power gating before programming */ + if (ASIC_IS_DCE6(rdev)) + atombios_powergate_crtc(crtc, ATOM_DISABLE); + atombios_lock_crtc(crtc, ATOM_ENABLE); atombios_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); } static void atombios_crtc_commit(struct drm_crtc *crtc) { + struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); + atombios_crtc_dpms(crtc, DRM_MODE_DPMS_ON); atombios_lock_crtc(crtc, ATOM_DISABLE); + radeon_crtc->in_mode_set = false; } static void atombios_crtc_disable(struct drm_crtc *crtc) diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index 5131b3b0f7d2..7712cf5ab33b 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c @@ -22,6 +22,7 @@ * * Authors: Dave Airlie * Alex Deucher + * Jerome Glisse */ #include "drmP.h" #include "radeon_drm.h" @@ -608,7 +609,7 @@ int radeon_dp_get_panel_mode(struct drm_encoder *encoder, } void radeon_dp_set_link_config(struct drm_connector *connector, - struct drm_display_mode *mode) + const struct drm_display_mode *mode) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); struct radeon_connector_atom_dig *dig_connector; @@ -654,7 +655,6 @@ static bool radeon_dp_get_link_status(struct radeon_connector *radeon_connector, ret = radeon_dp_aux_native_read(radeon_connector, DP_LANE0_1_STATUS, link_status, DP_LINK_STATUS_SIZE, 100); if (ret <= 0) { - DRM_ERROR("displayport link status failed\n"); return false; } @@ -833,8 +833,10 @@ static int radeon_dp_link_train_cr(struct radeon_dp_link_train_info *dp_info) else mdelay(dp_info->rd_interval * 4); - if (!radeon_dp_get_link_status(dp_info->radeon_connector, dp_info->link_status)) + if (!radeon_dp_get_link_status(dp_info->radeon_connector, dp_info->link_status)) { + DRM_ERROR("displayport link status failed\n"); break; + } if (dp_clock_recovery_ok(dp_info->link_status, dp_info->dp_lane_count)) { clock_recovery = true; @@ -896,8 +898,10 @@ static int radeon_dp_link_train_ce(struct radeon_dp_link_train_info *dp_info) else mdelay(dp_info->rd_interval * 4); - if (!radeon_dp_get_link_status(dp_info->radeon_connector, dp_info->link_status)) + if (!radeon_dp_get_link_status(dp_info->radeon_connector, dp_info->link_status)) { + DRM_ERROR("displayport link status failed\n"); break; + } if (dp_channel_eq_ok(dp_info->link_status, dp_info->dp_lane_count)) { channel_eq = true; diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index 486ccdf4aacd..f9bc27fe269a 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -58,7 +58,7 @@ static inline bool radeon_encoder_is_digital(struct drm_encoder *encoder) } static bool radeon_atom_mode_fixup(struct drm_encoder *encoder, - struct drm_display_mode *mode, + const struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode) { struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); @@ -1392,10 +1392,18 @@ radeon_atom_encoder_dpms_dig(struct drm_encoder *encoder, int mode) case DRM_MODE_DPMS_ON: /* some early dce3.2 boards have a bug in their transmitter control table */ if ((rdev->family == CHIP_RV710) || (rdev->family == CHIP_RV730) || - ASIC_IS_DCE41(rdev) || ASIC_IS_DCE5(rdev)) + ASIC_IS_DCE41(rdev) || ASIC_IS_DCE5(rdev)) { + if (ASIC_IS_DCE6(rdev)) { + /* It seems we need to call ATOM_ENCODER_CMD_SETUP again + * before reenabling encoder on DPMS ON, otherwise we never + * get picture + */ + atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_SETUP, 0); + } atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE, 0, 0); - else + } else { atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE_OUTPUT, 0, 0); + } if (ENCODER_MODE_IS_DP(atombios_get_encoder_mode(encoder)) && connector) { if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) { atombios_set_edp_panel_power(connector, @@ -2234,7 +2242,7 @@ radeon_atom_ext_dpms(struct drm_encoder *encoder, int mode) } static bool radeon_atom_ext_mode_fixup(struct drm_encoder *encoder, - struct drm_display_mode *mode, + const struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode) { return true; diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 7fb3d2e0434c..e93b80a6d4e9 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -99,6 +99,14 @@ void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev) } } +/** + * dce4_wait_for_vblank - vblank wait asic callback. + * + * @rdev: radeon_device pointer + * @crtc: crtc to wait for vblank on + * + * Wait for vblank on the requested crtc (evergreen+). + */ void dce4_wait_for_vblank(struct radeon_device *rdev, int crtc) { struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc]; @@ -118,18 +126,49 @@ void dce4_wait_for_vblank(struct radeon_device *rdev, int crtc) } } +/** + * radeon_irq_kms_pflip_irq_get - pre-pageflip callback. + * + * @rdev: radeon_device pointer + * @crtc: crtc to prepare for pageflip on + * + * Pre-pageflip callback (evergreen+). + * Enables the pageflip irq (vblank irq). + */ void evergreen_pre_page_flip(struct radeon_device *rdev, int crtc) { /* enable the pflip int */ radeon_irq_kms_pflip_irq_get(rdev, crtc); } +/** + * evergreen_post_page_flip - pos-pageflip callback. + * + * @rdev: radeon_device pointer + * @crtc: crtc to cleanup pageflip on + * + * Post-pageflip callback (evergreen+). + * Disables the pageflip irq (vblank irq). + */ void evergreen_post_page_flip(struct radeon_device *rdev, int crtc) { /* disable the pflip int */ radeon_irq_kms_pflip_irq_put(rdev, crtc); } +/** + * evergreen_page_flip - pageflip callback. + * + * @rdev: radeon_device pointer + * @crtc_id: crtc to cleanup pageflip on + * @crtc_base: new address of the crtc (GPU MC address) + * + * Does the actual pageflip (evergreen+). + * During vblank we take the crtc lock and wait for the update_pending + * bit to go high, when it does, we release the lock, and allow the + * double buffered update to take place. + * Returns the current update pending status. + */ u32 evergreen_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base) { struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id]; @@ -214,6 +253,15 @@ int sumo_get_temp(struct radeon_device *rdev) return actual_temp * 1000; } +/** + * sumo_pm_init_profile - Initialize power profiles callback. + * + * @rdev: radeon_device pointer + * + * Initialize the power states used in profile mode + * (sumo, trinity, SI). + * Used for profile mode only. + */ void sumo_pm_init_profile(struct radeon_device *rdev) { int idx; @@ -265,6 +313,14 @@ void sumo_pm_init_profile(struct radeon_device *rdev) rdev->pm.power_state[idx].num_clock_modes - 1; } +/** + * evergreen_pm_misc - set additional pm hw parameters callback. + * + * @rdev: radeon_device pointer + * + * Set non-clock parameters associated with a power state + * (voltage, etc.) (evergreen+). + */ void evergreen_pm_misc(struct radeon_device *rdev) { int req_ps_idx = rdev->pm.requested_power_state_index; @@ -292,6 +348,13 @@ void evergreen_pm_misc(struct radeon_device *rdev) } } +/** + * evergreen_pm_prepare - pre-power state change callback. + * + * @rdev: radeon_device pointer + * + * Prepare for a power state change (evergreen+). + */ void evergreen_pm_prepare(struct radeon_device *rdev) { struct drm_device *ddev = rdev->ddev; @@ -310,6 +373,13 @@ void evergreen_pm_prepare(struct radeon_device *rdev) } } +/** + * evergreen_pm_finish - post-power state change callback. + * + * @rdev: radeon_device pointer + * + * Clean up after a power state change (evergreen+). + */ void evergreen_pm_finish(struct radeon_device *rdev) { struct drm_device *ddev = rdev->ddev; @@ -328,6 +398,15 @@ void evergreen_pm_finish(struct radeon_device *rdev) } } +/** + * evergreen_hpd_sense - hpd sense callback. + * + * @rdev: radeon_device pointer + * @hpd: hpd (hotplug detect) pin + * + * Checks if a digital monitor is connected (evergreen+). + * Returns true if connected, false if not connected. + */ bool evergreen_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd) { bool connected = false; @@ -364,6 +443,14 @@ bool evergreen_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd) return connected; } +/** + * evergreen_hpd_set_polarity - hpd set polarity callback. + * + * @rdev: radeon_device pointer + * @hpd: hpd (hotplug detect) pin + * + * Set the polarity of the hpd pin (evergreen+). + */ void evergreen_hpd_set_polarity(struct radeon_device *rdev, enum radeon_hpd_id hpd) { @@ -424,10 +511,19 @@ void evergreen_hpd_set_polarity(struct radeon_device *rdev, } } +/** + * evergreen_hpd_init - hpd setup callback. + * + * @rdev: radeon_device pointer + * + * Setup the hpd pins used by the card (evergreen+). + * Enable the pin, set the polarity, and enable the hpd interrupts. + */ void evergreen_hpd_init(struct radeon_device *rdev) { struct drm_device *dev = rdev->ddev; struct drm_connector *connector; + unsigned enabled = 0; u32 tmp = DC_HPDx_CONNECTION_TIMER(0x9c4) | DC_HPDx_RX_INT_TIMER(0xfa) | DC_HPDx_EN; @@ -436,73 +532,72 @@ void evergreen_hpd_init(struct radeon_device *rdev) switch (radeon_connector->hpd.hpd) { case RADEON_HPD_1: WREG32(DC_HPD1_CONTROL, tmp); - rdev->irq.hpd[0] = true; break; case RADEON_HPD_2: WREG32(DC_HPD2_CONTROL, tmp); - rdev->irq.hpd[1] = true; break; case RADEON_HPD_3: WREG32(DC_HPD3_CONTROL, tmp); - rdev->irq.hpd[2] = true; break; case RADEON_HPD_4: WREG32(DC_HPD4_CONTROL, tmp); - rdev->irq.hpd[3] = true; break; case RADEON_HPD_5: WREG32(DC_HPD5_CONTROL, tmp); - rdev->irq.hpd[4] = true; break; case RADEON_HPD_6: WREG32(DC_HPD6_CONTROL, tmp); - rdev->irq.hpd[5] = true; break; default: break; } radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd); + enabled |= 1 << radeon_connector->hpd.hpd; } - if (rdev->irq.installed) - evergreen_irq_set(rdev); + radeon_irq_kms_enable_hpd(rdev, enabled); } +/** + * evergreen_hpd_fini - hpd tear down callback. + * + * @rdev: radeon_device pointer + * + * Tear down the hpd pins used by the card (evergreen+). + * Disable the hpd interrupts. + */ void evergreen_hpd_fini(struct radeon_device *rdev) { struct drm_device *dev = rdev->ddev; struct drm_connector *connector; + unsigned disabled = 0; list_for_each_entry(connector, &dev->mode_config.connector_list, head) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); switch (radeon_connector->hpd.hpd) { case RADEON_HPD_1: WREG32(DC_HPD1_CONTROL, 0); - rdev->irq.hpd[0] = false; break; case RADEON_HPD_2: WREG32(DC_HPD2_CONTROL, 0); - rdev->irq.hpd[1] = false; break; case RADEON_HPD_3: WREG32(DC_HPD3_CONTROL, 0); - rdev->irq.hpd[2] = false; break; case RADEON_HPD_4: WREG32(DC_HPD4_CONTROL, 0); - rdev->irq.hpd[3] = false; break; case RADEON_HPD_5: WREG32(DC_HPD5_CONTROL, 0); - rdev->irq.hpd[4] = false; break; case RADEON_HPD_6: WREG32(DC_HPD6_CONTROL, 0); - rdev->irq.hpd[5] = false; break; default: break; } + disabled |= 1 << radeon_connector->hpd.hpd; } + radeon_irq_kms_disable_hpd(rdev, disabled); } /* watermark setup */ @@ -933,6 +1028,14 @@ static void evergreen_program_watermarks(struct radeon_device *rdev, } +/** + * evergreen_bandwidth_update - update display watermarks callback. + * + * @rdev: radeon_device pointer + * + * Update the display watermarks based on the requested mode(s) + * (evergreen+). + */ void evergreen_bandwidth_update(struct radeon_device *rdev) { struct drm_display_mode *mode0 = NULL; @@ -956,6 +1059,15 @@ void evergreen_bandwidth_update(struct radeon_device *rdev) } } +/** + * evergreen_mc_wait_for_idle - wait for MC idle callback. + * + * @rdev: radeon_device pointer + * + * Wait for the MC (memory controller) to be idle. + * (evergreen+). + * Returns 0 if the MC is idle, -1 if not. + */ int evergreen_mc_wait_for_idle(struct radeon_device *rdev) { unsigned i; @@ -1117,24 +1229,8 @@ void evergreen_agp_enable(struct radeon_device *rdev) void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save) { - save->vga_control[0] = RREG32(D1VGA_CONTROL); - save->vga_control[1] = RREG32(D2VGA_CONTROL); save->vga_render_control = RREG32(VGA_RENDER_CONTROL); save->vga_hdp_control = RREG32(VGA_HDP_CONTROL); - save->crtc_control[0] = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET); - save->crtc_control[1] = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET); - if (rdev->num_crtc >= 4) { - save->vga_control[2] = RREG32(EVERGREEN_D3VGA_CONTROL); - save->vga_control[3] = RREG32(EVERGREEN_D4VGA_CONTROL); - save->crtc_control[2] = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET); - save->crtc_control[3] = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET); - } - if (rdev->num_crtc >= 6) { - save->vga_control[4] = RREG32(EVERGREEN_D5VGA_CONTROL); - save->vga_control[5] = RREG32(EVERGREEN_D6VGA_CONTROL); - save->crtc_control[4] = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET); - save->crtc_control[5] = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET); - } /* Stop all video */ WREG32(VGA_RENDER_CONTROL, 0); @@ -1245,47 +1341,6 @@ void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *s /* Unlock host access */ WREG32(VGA_HDP_CONTROL, save->vga_hdp_control); mdelay(1); - /* Restore video state */ - WREG32(D1VGA_CONTROL, save->vga_control[0]); - WREG32(D2VGA_CONTROL, save->vga_control[1]); - if (rdev->num_crtc >= 4) { - WREG32(EVERGREEN_D3VGA_CONTROL, save->vga_control[2]); - WREG32(EVERGREEN_D4VGA_CONTROL, save->vga_control[3]); - } - if (rdev->num_crtc >= 6) { - WREG32(EVERGREEN_D5VGA_CONTROL, save->vga_control[4]); - WREG32(EVERGREEN_D6VGA_CONTROL, save->vga_control[5]); - } - WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC0_REGISTER_OFFSET, 1); - WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC1_REGISTER_OFFSET, 1); - if (rdev->num_crtc >= 4) { - WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC2_REGISTER_OFFSET, 1); - WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC3_REGISTER_OFFSET, 1); - } - if (rdev->num_crtc >= 6) { - WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC4_REGISTER_OFFSET, 1); - WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC5_REGISTER_OFFSET, 1); - } - WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, save->crtc_control[0]); - WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, save->crtc_control[1]); - if (rdev->num_crtc >= 4) { - WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, save->crtc_control[2]); - WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, save->crtc_control[3]); - } - if (rdev->num_crtc >= 6) { - WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, save->crtc_control[4]); - WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, save->crtc_control[5]); - } - WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0); - WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0); - if (rdev->num_crtc >= 4) { - WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0); - WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0); - } - if (rdev->num_crtc >= 6) { - WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0); - WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0); - } WREG32(VGA_RENDER_CONTROL, save->vga_render_control); } @@ -1371,12 +1426,28 @@ void evergreen_mc_program(struct radeon_device *rdev) */ void evergreen_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { - struct radeon_ring *ring = &rdev->ring[ib->fence->ring]; + struct radeon_ring *ring = &rdev->ring[ib->ring]; + u32 next_rptr; /* set to DX10/11 mode */ radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0)); radeon_ring_write(ring, 1); - /* FIXME: implement */ + + if (ring->rptr_save_reg) { + next_rptr = ring->wptr + 3 + 4; + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(ring, ((ring->rptr_save_reg - + PACKET3_SET_CONFIG_REG_START) >> 2)); + radeon_ring_write(ring, next_rptr); + } else if (rdev->wb.enabled) { + next_rptr = ring->wptr + 5 + 4; + radeon_ring_write(ring, PACKET3(PACKET3_MEM_WRITE, 3)); + radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); + radeon_ring_write(ring, (upper_32_bits(ring->next_rptr_gpu_addr) & 0xff) | (1 << 18)); + radeon_ring_write(ring, next_rptr); + radeon_ring_write(ring, 0); + } + radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); radeon_ring_write(ring, #ifdef __BIG_ENDIAN @@ -1858,10 +1929,18 @@ static void evergreen_gpu_init(struct radeon_device *rdev) if (rdev->flags & RADEON_IS_IGP) rdev->config.evergreen.tile_config |= 1 << 4; else { - if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) - rdev->config.evergreen.tile_config |= 1 << 4; - else + switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) { + case 0: /* four banks */ rdev->config.evergreen.tile_config |= 0 << 4; + break; + case 1: /* eight banks */ + rdev->config.evergreen.tile_config |= 1 << 4; + break; + case 2: /* sixteen banks */ + default: + rdev->config.evergreen.tile_config |= 2 << 4; + break; + } } rdev->config.evergreen.tile_config |= 0 << 8; rdev->config.evergreen.tile_config |= @@ -2188,6 +2267,14 @@ static int evergreen_gpu_soft_reset(struct radeon_device *rdev) RREG32(GRBM_STATUS_SE1)); dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n", RREG32(SRBM_STATUS)); + dev_info(rdev->dev, " R_008674_CP_STALLED_STAT1 = 0x%08X\n", + RREG32(CP_STALLED_STAT1)); + dev_info(rdev->dev, " R_008678_CP_STALLED_STAT2 = 0x%08X\n", + RREG32(CP_STALLED_STAT2)); + dev_info(rdev->dev, " R_00867C_CP_BUSY_STAT = 0x%08X\n", + RREG32(CP_BUSY_STAT)); + dev_info(rdev->dev, " R_008680_CP_STAT = 0x%08X\n", + RREG32(CP_STAT)); evergreen_mc_stop(rdev, &save); if (evergreen_mc_wait_for_idle(rdev)) { dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); @@ -2225,6 +2312,14 @@ static int evergreen_gpu_soft_reset(struct radeon_device *rdev) RREG32(GRBM_STATUS_SE1)); dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n", RREG32(SRBM_STATUS)); + dev_info(rdev->dev, " R_008674_CP_STALLED_STAT1 = 0x%08X\n", + RREG32(CP_STALLED_STAT1)); + dev_info(rdev->dev, " R_008678_CP_STALLED_STAT2 = 0x%08X\n", + RREG32(CP_STALLED_STAT2)); + dev_info(rdev->dev, " R_00867C_CP_BUSY_STAT = 0x%08X\n", + RREG32(CP_BUSY_STAT)); + dev_info(rdev->dev, " R_008680_CP_STAT = 0x%08X\n", + RREG32(CP_STAT)); evergreen_mc_resume(rdev, &save); return 0; } @@ -2348,20 +2443,20 @@ int evergreen_irq_set(struct radeon_device *rdev) if (rdev->family >= CHIP_CAYMAN) { /* enable CP interrupts on all rings */ - if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) { + if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { DRM_DEBUG("evergreen_irq_set: sw int gfx\n"); cp_int_cntl |= TIME_STAMP_INT_ENABLE; } - if (rdev->irq.sw_int[CAYMAN_RING_TYPE_CP1_INDEX]) { + if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) { DRM_DEBUG("evergreen_irq_set: sw int cp1\n"); cp_int_cntl1 |= TIME_STAMP_INT_ENABLE; } - if (rdev->irq.sw_int[CAYMAN_RING_TYPE_CP2_INDEX]) { + if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) { DRM_DEBUG("evergreen_irq_set: sw int cp2\n"); cp_int_cntl2 |= TIME_STAMP_INT_ENABLE; } } else { - if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) { + if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { DRM_DEBUG("evergreen_irq_set: sw int gfx\n"); cp_int_cntl |= RB_INT_ENABLE; cp_int_cntl |= TIME_STAMP_INT_ENABLE; @@ -2369,32 +2464,32 @@ int evergreen_irq_set(struct radeon_device *rdev) } if (rdev->irq.crtc_vblank_int[0] || - rdev->irq.pflip[0]) { + atomic_read(&rdev->irq.pflip[0])) { DRM_DEBUG("evergreen_irq_set: vblank 0\n"); crtc1 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[1] || - rdev->irq.pflip[1]) { + atomic_read(&rdev->irq.pflip[1])) { DRM_DEBUG("evergreen_irq_set: vblank 1\n"); crtc2 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[2] || - rdev->irq.pflip[2]) { + atomic_read(&rdev->irq.pflip[2])) { DRM_DEBUG("evergreen_irq_set: vblank 2\n"); crtc3 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[3] || - rdev->irq.pflip[3]) { + atomic_read(&rdev->irq.pflip[3])) { DRM_DEBUG("evergreen_irq_set: vblank 3\n"); crtc4 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[4] || - rdev->irq.pflip[4]) { + atomic_read(&rdev->irq.pflip[4])) { DRM_DEBUG("evergreen_irq_set: vblank 4\n"); crtc5 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[5] || - rdev->irq.pflip[5]) { + atomic_read(&rdev->irq.pflip[5])) { DRM_DEBUG("evergreen_irq_set: vblank 5\n"); crtc6 |= VBLANK_INT_MASK; } @@ -2676,7 +2771,6 @@ int evergreen_irq_process(struct radeon_device *rdev) u32 rptr; u32 src_id, src_data; u32 ring_index; - unsigned long flags; bool queue_hotplug = false; bool queue_hdmi = false; @@ -2684,22 +2778,21 @@ int evergreen_irq_process(struct radeon_device *rdev) return IRQ_NONE; wptr = evergreen_get_ih_wptr(rdev); + +restart_ih: + /* is somebody else already processing irqs? */ + if (atomic_xchg(&rdev->ih.lock, 1)) + return IRQ_NONE; + rptr = rdev->ih.rptr; DRM_DEBUG("r600_irq_process start: rptr %d, wptr %d\n", rptr, wptr); - spin_lock_irqsave(&rdev->ih.lock, flags); - if (rptr == wptr) { - spin_unlock_irqrestore(&rdev->ih.lock, flags); - return IRQ_NONE; - } -restart_ih: /* Order reading of wptr vs. reading of IH ring data */ rmb(); /* display interrupts */ evergreen_irq_ack(rdev); - rdev->ih.wptr = wptr; while (rptr != wptr) { /* wptr/rptr are in bytes! */ ring_index = rptr / 4; @@ -2716,7 +2809,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[0]) + if (atomic_read(&rdev->irq.pflip[0])) radeon_crtc_handle_flip(rdev, 0); rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT; DRM_DEBUG("IH: D1 vblank\n"); @@ -2742,7 +2835,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[1]) + if (atomic_read(&rdev->irq.pflip[1])) radeon_crtc_handle_flip(rdev, 1); rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; DRM_DEBUG("IH: D2 vblank\n"); @@ -2768,7 +2861,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[2]) + if (atomic_read(&rdev->irq.pflip[2])) radeon_crtc_handle_flip(rdev, 2); rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; DRM_DEBUG("IH: D3 vblank\n"); @@ -2794,7 +2887,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[3]) + if (atomic_read(&rdev->irq.pflip[3])) radeon_crtc_handle_flip(rdev, 3); rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; DRM_DEBUG("IH: D4 vblank\n"); @@ -2820,7 +2913,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[4]) + if (atomic_read(&rdev->irq.pflip[4])) radeon_crtc_handle_flip(rdev, 4); rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; DRM_DEBUG("IH: D5 vblank\n"); @@ -2846,7 +2939,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[5]) + if (atomic_read(&rdev->irq.pflip[5])) radeon_crtc_handle_flip(rdev, 5); rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; DRM_DEBUG("IH: D6 vblank\n"); @@ -2986,7 +3079,6 @@ restart_ih: break; case 233: /* GUI IDLE */ DRM_DEBUG("IH: GUI idle\n"); - rdev->pm.gui_idle = true; wake_up(&rdev->irq.idle_queue); break; default: @@ -2998,17 +3090,19 @@ restart_ih: rptr += 16; rptr &= rdev->ih.ptr_mask; } - /* make sure wptr hasn't changed while processing */ - wptr = evergreen_get_ih_wptr(rdev); - if (wptr != rdev->ih.wptr) - goto restart_ih; if (queue_hotplug) schedule_work(&rdev->hotplug_work); if (queue_hdmi) schedule_work(&rdev->audio_work); rdev->ih.rptr = rptr; WREG32(IH_RB_RPTR, rdev->ih.rptr); - spin_unlock_irqrestore(&rdev->ih.lock, flags); + atomic_set(&rdev->ih.lock, 0); + + /* make sure wptr hasn't changed while processing */ + wptr = evergreen_get_ih_wptr(rdev); + if (wptr != rptr) + goto restart_ih; + return IRQ_HANDLED; } @@ -3096,13 +3190,11 @@ static int evergreen_startup(struct radeon_device *rdev) if (r) return r; - r = radeon_ib_pool_start(rdev); - if (r) - return r; - - r = radeon_ib_ring_tests(rdev); - if (r) + r = radeon_ib_pool_init(rdev); + if (r) { + dev_err(rdev->dev, "IB initialization failed (%d).\n", r); return r; + } r = r600_audio_init(rdev); if (r) { @@ -3146,9 +3238,6 @@ int evergreen_suspend(struct radeon_device *rdev) struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; r600_audio_fini(rdev); - /* FIXME: we should wait for ring to be empty */ - radeon_ib_pool_suspend(rdev); - r600_blit_suspend(rdev); r700_cp_stop(rdev); ring->ready = false; evergreen_irq_suspend(rdev); @@ -3234,20 +3323,14 @@ int evergreen_init(struct radeon_device *rdev) if (r) return r; - r = radeon_ib_pool_init(rdev); rdev->accel_working = true; - if (r) { - dev_err(rdev->dev, "IB initialization failed (%d).\n", r); - rdev->accel_working = false; - } - r = evergreen_startup(rdev); if (r) { dev_err(rdev->dev, "disabling GPU acceleration\n"); r700_cp_fini(rdev); r600_irq_fini(rdev); radeon_wb_fini(rdev); - r100_ib_fini(rdev); + radeon_ib_pool_fini(rdev); radeon_irq_kms_fini(rdev); evergreen_pcie_gart_fini(rdev); rdev->accel_working = false; @@ -3274,7 +3357,7 @@ void evergreen_fini(struct radeon_device *rdev) r700_cp_fini(rdev); r600_irq_fini(rdev); radeon_wb_fini(rdev); - r100_ib_fini(rdev); + radeon_ib_pool_fini(rdev); radeon_irq_kms_fini(rdev); evergreen_pcie_gart_fini(rdev); r600_vram_scratch_fini(rdev); @@ -3289,7 +3372,8 @@ void evergreen_fini(struct radeon_device *rdev) void evergreen_pcie_gen2_enable(struct radeon_device *rdev) { - u32 link_width_cntl, speed_cntl; + u32 link_width_cntl, speed_cntl, mask; + int ret; if (radeon_pcie_gen2 == 0) return; @@ -3304,6 +3388,15 @@ void evergreen_pcie_gen2_enable(struct radeon_device *rdev) if (ASIC_IS_X2(rdev)) return; + ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask); + if (ret != 0) + return; + + if (!(mask & DRM_PCIE_SPEED_50)) + return; + + DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n"); + speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL); if ((speed_cntl & LC_OTHER_SIDE_EVER_SENT_GEN2) || (speed_cntl & LC_OTHER_SIDE_SUPPORTS_GEN2)) { diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c index 1e96bd458cfd..89cb9feb5653 100644 --- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c +++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c @@ -622,7 +622,8 @@ int evergreen_blit_init(struct radeon_device *rdev) rdev->r600_blit.primitives.draw_auto = draw_auto; rdev->r600_blit.primitives.set_default_state = set_default_state; - rdev->r600_blit.ring_size_common = 55; /* shaders + def state */ + rdev->r600_blit.ring_size_common = 8; /* sync semaphore */ + rdev->r600_blit.ring_size_common += 55; /* shaders + def state */ rdev->r600_blit.ring_size_common += 16; /* fence emit for VB IB */ rdev->r600_blit.ring_size_common += 5; /* done copy */ rdev->r600_blit.ring_size_common += 16; /* fence emit for done copy */ @@ -633,10 +634,6 @@ int evergreen_blit_init(struct radeon_device *rdev) rdev->r600_blit.max_dim = 16384; - /* pin copy shader into vram if already initialized */ - if (rdev->r600_blit.shader_obj) - goto done; - rdev->r600_blit.state_offset = 0; if (rdev->family < CHIP_CAYMAN) @@ -667,11 +664,26 @@ int evergreen_blit_init(struct radeon_device *rdev) obj_size += cayman_ps_size * 4; obj_size = ALIGN(obj_size, 256); - r = radeon_bo_create(rdev, obj_size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM, - NULL, &rdev->r600_blit.shader_obj); - if (r) { - DRM_ERROR("evergreen failed to allocate shader\n"); - return r; + /* pin copy shader into vram if not already initialized */ + if (!rdev->r600_blit.shader_obj) { + r = radeon_bo_create(rdev, obj_size, PAGE_SIZE, true, + RADEON_GEM_DOMAIN_VRAM, + NULL, &rdev->r600_blit.shader_obj); + if (r) { + DRM_ERROR("evergreen failed to allocate shader\n"); + return r; + } + + r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); + if (unlikely(r != 0)) + return r; + r = radeon_bo_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM, + &rdev->r600_blit.shader_gpu_addr); + radeon_bo_unreserve(rdev->r600_blit.shader_obj); + if (r) { + dev_err(rdev->dev, "(%d) pin blit object failed\n", r); + return r; + } } DRM_DEBUG("evergreen blit allocated bo %08x vs %08x ps %08x\n", @@ -713,17 +725,6 @@ int evergreen_blit_init(struct radeon_device *rdev) radeon_bo_kunmap(rdev->r600_blit.shader_obj); radeon_bo_unreserve(rdev->r600_blit.shader_obj); -done: - r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); - if (unlikely(r != 0)) - return r; - r = radeon_bo_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM, - &rdev->r600_blit.shader_gpu_addr); - radeon_bo_unreserve(rdev->r600_blit.shader_obj); - if (r) { - dev_err(rdev->dev, "(%d) pin blit object failed\n", r); - return r; - } radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); return 0; } diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index c16554122ccd..e44a62a07fe3 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -788,6 +788,13 @@ static int evergreen_cs_track_validate_texture(struct radeon_cs_parser *p, case V_030000_SQ_TEX_DIM_1D_ARRAY: case V_030000_SQ_TEX_DIM_2D_ARRAY: depth = 1; + break; + case V_030000_SQ_TEX_DIM_2D_MSAA: + case V_030000_SQ_TEX_DIM_2D_ARRAY_MSAA: + surf.nsamples = 1 << llevel; + llevel = 0; + depth = 1; + break; case V_030000_SQ_TEX_DIM_3D: break; default: @@ -961,13 +968,15 @@ static int evergreen_cs_track_check(struct radeon_cs_parser *p) if (track->db_dirty) { /* Check stencil buffer */ - if (G_028800_STENCIL_ENABLE(track->db_depth_control)) { + if (G_028044_FORMAT(track->db_s_info) != V_028044_STENCIL_INVALID && + G_028800_STENCIL_ENABLE(track->db_depth_control)) { r = evergreen_cs_track_validate_stencil(p); if (r) return r; } /* Check depth buffer */ - if (G_028800_Z_ENABLE(track->db_depth_control)) { + if (G_028040_FORMAT(track->db_z_info) != V_028040_Z_INVALID && + G_028800_Z_ENABLE(track->db_depth_control)) { r = evergreen_cs_track_validate_depth(p); if (r) return r; diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h index b50b15c70498..79347855d9bf 100644 --- a/drivers/gpu/drm/radeon/evergreend.h +++ b/drivers/gpu/drm/radeon/evergreend.h @@ -88,6 +88,10 @@ #define CONFIG_MEMSIZE 0x5428 #define CP_COHER_BASE 0x85F8 +#define CP_STALLED_STAT1 0x8674 +#define CP_STALLED_STAT2 0x8678 +#define CP_BUSY_STAT 0x867C +#define CP_STAT 0x8680 #define CP_ME_CNTL 0x86D8 #define CP_ME_HALT (1 << 28) #define CP_PFP_HALT (1 << 26) @@ -1273,6 +1277,8 @@ #define S_028044_FORMAT(x) (((x) & 0x1) << 0) #define G_028044_FORMAT(x) (((x) >> 0) & 0x1) #define C_028044_FORMAT 0xFFFFFFFE +#define V_028044_STENCIL_INVALID 0 +#define V_028044_STENCIL_8 1 #define G_028044_TILE_SPLIT(x) (((x) >> 8) & 0x7) #define DB_Z_READ_BASE 0x28048 #define DB_STENCIL_READ_BASE 0x2804c diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index b7bf18e40215..853800e8582f 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -574,10 +574,18 @@ static void cayman_gpu_init(struct radeon_device *rdev) if (rdev->flags & RADEON_IS_IGP) rdev->config.cayman.tile_config |= 1 << 4; else { - if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) - rdev->config.cayman.tile_config |= 1 << 4; - else + switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) { + case 0: /* four banks */ rdev->config.cayman.tile_config |= 0 << 4; + break; + case 1: /* eight banks */ + rdev->config.cayman.tile_config |= 1 << 4; + break; + case 2: /* sixteen banks */ + default: + rdev->config.cayman.tile_config |= 2 << 4; + break; + } } rdev->config.cayman.tile_config |= ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8; @@ -850,11 +858,20 @@ void cayman_fence_ring_emit(struct radeon_device *rdev, void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { - struct radeon_ring *ring = &rdev->ring[ib->fence->ring]; + struct radeon_ring *ring = &rdev->ring[ib->ring]; /* set to DX10/11 mode */ radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0)); radeon_ring_write(ring, 1); + + if (ring->rptr_save_reg) { + uint32_t next_rptr = ring->wptr + 3 + 4 + 8; + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(ring, ((ring->rptr_save_reg - + PACKET3_SET_CONFIG_REG_START) >> 2)); + radeon_ring_write(ring, next_rptr); + } + radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); radeon_ring_write(ring, #ifdef __BIG_ENDIAN @@ -981,16 +998,41 @@ static int cayman_cp_start(struct radeon_device *rdev) static void cayman_cp_fini(struct radeon_device *rdev) { + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; cayman_cp_enable(rdev, false); - radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); + radeon_ring_fini(rdev, ring); + radeon_scratch_free(rdev, ring->rptr_save_reg); } int cayman_cp_resume(struct radeon_device *rdev) { + static const int ridx[] = { + RADEON_RING_TYPE_GFX_INDEX, + CAYMAN_RING_TYPE_CP1_INDEX, + CAYMAN_RING_TYPE_CP2_INDEX + }; + static const unsigned cp_rb_cntl[] = { + CP_RB0_CNTL, + CP_RB1_CNTL, + CP_RB2_CNTL, + }; + static const unsigned cp_rb_rptr_addr[] = { + CP_RB0_RPTR_ADDR, + CP_RB1_RPTR_ADDR, + CP_RB2_RPTR_ADDR + }; + static const unsigned cp_rb_rptr_addr_hi[] = { + CP_RB0_RPTR_ADDR_HI, + CP_RB1_RPTR_ADDR_HI, + CP_RB2_RPTR_ADDR_HI + }; + static const unsigned cp_rb_base[] = { + CP_RB0_BASE, + CP_RB1_BASE, + CP_RB2_BASE + }; struct radeon_ring *ring; - u32 tmp; - u32 rb_bufsz; - int r; + int i, r; /* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */ WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP | @@ -1012,91 +1054,47 @@ int cayman_cp_resume(struct radeon_device *rdev) WREG32(CP_DEBUG, (1 << 27)); - /* ring 0 - compute and gfx */ - /* Set ring buffer size */ - ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; - rb_bufsz = drm_order(ring->ring_size / 8); - tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; -#ifdef __BIG_ENDIAN - tmp |= BUF_SWAP_32BIT; -#endif - WREG32(CP_RB0_CNTL, tmp); - - /* Initialize the ring buffer's read and write pointers */ - WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA); - ring->wptr = 0; - WREG32(CP_RB0_WPTR, ring->wptr); - /* set the wb address wether it's enabled or not */ - WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC); - WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF); WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF); + WREG32(SCRATCH_UMSK, 0xff); - if (rdev->wb.enabled) - WREG32(SCRATCH_UMSK, 0xff); - else { - tmp |= RB_NO_UPDATE; - WREG32(SCRATCH_UMSK, 0); - } - - mdelay(1); - WREG32(CP_RB0_CNTL, tmp); - - WREG32(CP_RB0_BASE, ring->gpu_addr >> 8); - - ring->rptr = RREG32(CP_RB0_RPTR); - - /* ring1 - compute only */ - /* Set ring buffer size */ - ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; - rb_bufsz = drm_order(ring->ring_size / 8); - tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; -#ifdef __BIG_ENDIAN - tmp |= BUF_SWAP_32BIT; -#endif - WREG32(CP_RB1_CNTL, tmp); - - /* Initialize the ring buffer's read and write pointers */ - WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA); - ring->wptr = 0; - WREG32(CP_RB1_WPTR, ring->wptr); - - /* set the wb address wether it's enabled or not */ - WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC); - WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF); - - mdelay(1); - WREG32(CP_RB1_CNTL, tmp); - - WREG32(CP_RB1_BASE, ring->gpu_addr >> 8); + for (i = 0; i < 3; ++i) { + uint32_t rb_cntl; + uint64_t addr; - ring->rptr = RREG32(CP_RB1_RPTR); - - /* ring2 - compute only */ - /* Set ring buffer size */ - ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; - rb_bufsz = drm_order(ring->ring_size / 8); - tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; + /* Set ring buffer size */ + ring = &rdev->ring[ridx[i]]; + rb_cntl = drm_order(ring->ring_size / 8); + rb_cntl |= drm_order(RADEON_GPU_PAGE_SIZE/8) << 8; #ifdef __BIG_ENDIAN - tmp |= BUF_SWAP_32BIT; + rb_cntl |= BUF_SWAP_32BIT; #endif - WREG32(CP_RB2_CNTL, tmp); + WREG32(cp_rb_cntl[i], rb_cntl); - /* Initialize the ring buffer's read and write pointers */ - WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA); - ring->wptr = 0; - WREG32(CP_RB2_WPTR, ring->wptr); + /* set the wb address wether it's enabled or not */ + addr = rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET; + WREG32(cp_rb_rptr_addr[i], addr & 0xFFFFFFFC); + WREG32(cp_rb_rptr_addr_hi[i], upper_32_bits(addr) & 0xFF); + } - /* set the wb address wether it's enabled or not */ - WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC); - WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF); + /* set the rb base addr, this causes an internal reset of ALL rings */ + for (i = 0; i < 3; ++i) { + ring = &rdev->ring[ridx[i]]; + WREG32(cp_rb_base[i], ring->gpu_addr >> 8); + } - mdelay(1); - WREG32(CP_RB2_CNTL, tmp); + for (i = 0; i < 3; ++i) { + /* Initialize the ring buffer's read and write pointers */ + ring = &rdev->ring[ridx[i]]; + WREG32_P(cp_rb_cntl[i], RB_RPTR_WR_ENA, ~RB_RPTR_WR_ENA); - WREG32(CP_RB2_BASE, ring->gpu_addr >> 8); + ring->rptr = ring->wptr = 0; + WREG32(ring->rptr_reg, ring->rptr); + WREG32(ring->wptr_reg, ring->wptr); - ring->rptr = RREG32(CP_RB2_RPTR); + mdelay(1); + WREG32_P(cp_rb_cntl[i], 0, ~RB_RPTR_WR_ENA); + } /* start the rings */ cayman_cp_start(rdev); @@ -1132,6 +1130,14 @@ static int cayman_gpu_soft_reset(struct radeon_device *rdev) RREG32(GRBM_STATUS_SE1)); dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n", RREG32(SRBM_STATUS)); + dev_info(rdev->dev, " R_008674_CP_STALLED_STAT1 = 0x%08X\n", + RREG32(CP_STALLED_STAT1)); + dev_info(rdev->dev, " R_008678_CP_STALLED_STAT2 = 0x%08X\n", + RREG32(CP_STALLED_STAT2)); + dev_info(rdev->dev, " R_00867C_CP_BUSY_STAT = 0x%08X\n", + RREG32(CP_BUSY_STAT)); + dev_info(rdev->dev, " R_008680_CP_STAT = 0x%08X\n", + RREG32(CP_STAT)); dev_info(rdev->dev, " VM_CONTEXT0_PROTECTION_FAULT_ADDR 0x%08X\n", RREG32(0x14F8)); dev_info(rdev->dev, " VM_CONTEXT0_PROTECTION_FAULT_STATUS 0x%08X\n", @@ -1180,6 +1186,14 @@ static int cayman_gpu_soft_reset(struct radeon_device *rdev) RREG32(GRBM_STATUS_SE1)); dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n", RREG32(SRBM_STATUS)); + dev_info(rdev->dev, " R_008674_CP_STALLED_STAT1 = 0x%08X\n", + RREG32(CP_STALLED_STAT1)); + dev_info(rdev->dev, " R_008678_CP_STALLED_STAT2 = 0x%08X\n", + RREG32(CP_STALLED_STAT2)); + dev_info(rdev->dev, " R_00867C_CP_BUSY_STAT = 0x%08X\n", + RREG32(CP_BUSY_STAT)); + dev_info(rdev->dev, " R_008680_CP_STAT = 0x%08X\n", + RREG32(CP_STAT)); evergreen_mc_resume(rdev, &save); return 0; } @@ -1291,17 +1305,17 @@ static int cayman_startup(struct radeon_device *rdev) if (r) return r; - r = radeon_ib_pool_start(rdev); - if (r) - return r; - - r = radeon_ib_ring_tests(rdev); - if (r) + r = radeon_ib_pool_init(rdev); + if (r) { + dev_err(rdev->dev, "IB initialization failed (%d).\n", r); return r; + } - r = radeon_vm_manager_start(rdev); - if (r) + r = radeon_vm_manager_init(rdev); + if (r) { + dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r); return r; + } r = r600_audio_init(rdev); if (r) @@ -1334,10 +1348,6 @@ int cayman_resume(struct radeon_device *rdev) int cayman_suspend(struct radeon_device *rdev) { r600_audio_fini(rdev); - /* FIXME: we should wait for ring to be empty */ - radeon_ib_pool_suspend(rdev); - radeon_vm_manager_suspend(rdev); - r600_blit_suspend(rdev); cayman_cp_enable(rdev, false); rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; evergreen_irq_suspend(rdev); @@ -1413,17 +1423,7 @@ int cayman_init(struct radeon_device *rdev) if (r) return r; - r = radeon_ib_pool_init(rdev); rdev->accel_working = true; - if (r) { - dev_err(rdev->dev, "IB initialization failed (%d).\n", r); - rdev->accel_working = false; - } - r = radeon_vm_manager_init(rdev); - if (r) { - dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r); - } - r = cayman_startup(rdev); if (r) { dev_err(rdev->dev, "disabling GPU acceleration\n"); @@ -1432,7 +1432,7 @@ int cayman_init(struct radeon_device *rdev) if (rdev->flags & RADEON_IS_IGP) si_rlc_fini(rdev); radeon_wb_fini(rdev); - r100_ib_fini(rdev); + radeon_ib_pool_fini(rdev); radeon_vm_manager_fini(rdev); radeon_irq_kms_fini(rdev); cayman_pcie_gart_fini(rdev); @@ -1463,7 +1463,7 @@ void cayman_fini(struct radeon_device *rdev) si_rlc_fini(rdev); radeon_wb_fini(rdev); radeon_vm_manager_fini(rdev); - r100_ib_fini(rdev); + radeon_ib_pool_fini(rdev); radeon_irq_kms_fini(rdev); cayman_pcie_gart_fini(rdev); r600_vram_scratch_fini(rdev); diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h index a0b98066e207..870db340d377 100644 --- a/drivers/gpu/drm/radeon/nid.h +++ b/drivers/gpu/drm/radeon/nid.h @@ -236,6 +236,10 @@ #define CP_SEM_WAIT_TIMER 0x85BC #define CP_SEM_INCOMPLETE_TIMER_CNTL 0x85C8 #define CP_COHER_CNTL2 0x85E8 +#define CP_STALLED_STAT1 0x8674 +#define CP_STALLED_STAT2 0x8678 +#define CP_BUSY_STAT 0x867C +#define CP_STAT 0x8680 #define CP_ME_CNTL 0x86D8 #define CP_ME_HALT (1 << 28) #define CP_PFP_HALT (1 << 26) diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index fb44e7e49083..8acb34fd3fd5 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -65,6 +65,19 @@ MODULE_FIRMWARE(FIRMWARE_R520); #include "r100_track.h" +/* This files gather functions specifics to: + * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280 + * and others in some cases. + */ + +/** + * r100_wait_for_vblank - vblank wait asic callback. + * + * @rdev: radeon_device pointer + * @crtc: crtc to wait for vblank on + * + * Wait for vblank on the requested crtc (r1xx-r4xx). + */ void r100_wait_for_vblank(struct radeon_device *rdev, int crtc) { struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc]; @@ -99,128 +112,49 @@ void r100_wait_for_vblank(struct radeon_device *rdev, int crtc) } } -/* This files gather functions specifics to: - * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280 +/** + * r100_pre_page_flip - pre-pageflip callback. + * + * @rdev: radeon_device pointer + * @crtc: crtc to prepare for pageflip on + * + * Pre-pageflip callback (r1xx-r4xx). + * Enables the pageflip irq (vblank irq). */ - -int r100_reloc_pitch_offset(struct radeon_cs_parser *p, - struct radeon_cs_packet *pkt, - unsigned idx, - unsigned reg) -{ - int r; - u32 tile_flags = 0; - u32 tmp; - struct radeon_cs_reloc *reloc; - u32 value; - - r = r100_cs_packet_next_reloc(p, &reloc); - if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); - r100_cs_dump_packet(p, pkt); - return r; - } - - value = radeon_get_ib_value(p, idx); - tmp = value & 0x003fffff; - tmp += (((u32)reloc->lobj.gpu_offset) >> 10); - - if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) - tile_flags |= RADEON_DST_TILE_MACRO; - if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) { - if (reg == RADEON_SRC_PITCH_OFFSET) { - DRM_ERROR("Cannot src blit from microtiled surface\n"); - r100_cs_dump_packet(p, pkt); - return -EINVAL; - } - tile_flags |= RADEON_DST_TILE_MICRO; - } - - tmp |= tile_flags; - p->ib.ptr[idx] = (value & 0x3fc00000) | tmp; - } else - p->ib.ptr[idx] = (value & 0xffc00000) | tmp; - return 0; -} - -int r100_packet3_load_vbpntr(struct radeon_cs_parser *p, - struct radeon_cs_packet *pkt, - int idx) -{ - unsigned c, i; - struct radeon_cs_reloc *reloc; - struct r100_cs_track *track; - int r = 0; - volatile uint32_t *ib; - u32 idx_value; - - ib = p->ib.ptr; - track = (struct r100_cs_track *)p->track; - c = radeon_get_ib_value(p, idx++) & 0x1F; - if (c > 16) { - DRM_ERROR("Only 16 vertex buffers are allowed %d\n", - pkt->opcode); - r100_cs_dump_packet(p, pkt); - return -EINVAL; - } - track->num_arrays = c; - for (i = 0; i < (c - 1); i+=2, idx+=3) { - r = r100_cs_packet_next_reloc(p, &reloc); - if (r) { - DRM_ERROR("No reloc for packet3 %d\n", - pkt->opcode); - r100_cs_dump_packet(p, pkt); - return r; - } - idx_value = radeon_get_ib_value(p, idx); - ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); - - track->arrays[i + 0].esize = idx_value >> 8; - track->arrays[i + 0].robj = reloc->robj; - track->arrays[i + 0].esize &= 0x7F; - r = r100_cs_packet_next_reloc(p, &reloc); - if (r) { - DRM_ERROR("No reloc for packet3 %d\n", - pkt->opcode); - r100_cs_dump_packet(p, pkt); - return r; - } - ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset); - track->arrays[i + 1].robj = reloc->robj; - track->arrays[i + 1].esize = idx_value >> 24; - track->arrays[i + 1].esize &= 0x7F; - } - if (c & 1) { - r = r100_cs_packet_next_reloc(p, &reloc); - if (r) { - DRM_ERROR("No reloc for packet3 %d\n", - pkt->opcode); - r100_cs_dump_packet(p, pkt); - return r; - } - idx_value = radeon_get_ib_value(p, idx); - ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); - track->arrays[i + 0].robj = reloc->robj; - track->arrays[i + 0].esize = idx_value >> 8; - track->arrays[i + 0].esize &= 0x7F; - } - return r; -} - void r100_pre_page_flip(struct radeon_device *rdev, int crtc) { /* enable the pflip int */ radeon_irq_kms_pflip_irq_get(rdev, crtc); } +/** + * r100_post_page_flip - pos-pageflip callback. + * + * @rdev: radeon_device pointer + * @crtc: crtc to cleanup pageflip on + * + * Post-pageflip callback (r1xx-r4xx). + * Disables the pageflip irq (vblank irq). + */ void r100_post_page_flip(struct radeon_device *rdev, int crtc) { /* disable the pflip int */ radeon_irq_kms_pflip_irq_put(rdev, crtc); } +/** + * r100_page_flip - pageflip callback. + * + * @rdev: radeon_device pointer + * @crtc_id: crtc to cleanup pageflip on + * @crtc_base: new address of the crtc (GPU MC address) + * + * Does the actual pageflip (r1xx-r4xx). + * During vblank we take the crtc lock and wait for the update_pending + * bit to go high, when it does, we release the lock, and allow the + * double buffered update to take place. + * Returns the current update pending status. + */ u32 r100_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base) { struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id]; @@ -247,6 +181,15 @@ u32 r100_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base) return RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) & RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET; } +/** + * r100_pm_get_dynpm_state - look up dynpm power state callback. + * + * @rdev: radeon_device pointer + * + * Look up the optimal power state based on the + * current state of the GPU (r1xx-r5xx). + * Used for dynpm only. + */ void r100_pm_get_dynpm_state(struct radeon_device *rdev) { int i; @@ -329,6 +272,15 @@ void r100_pm_get_dynpm_state(struct radeon_device *rdev) pcie_lanes); } +/** + * r100_pm_init_profile - Initialize power profiles callback. + * + * @rdev: radeon_device pointer + * + * Initialize the power states used in profile mode + * (r1xx-r3xx). + * Used for profile mode only. + */ void r100_pm_init_profile(struct radeon_device *rdev) { /* default */ @@ -368,6 +320,14 @@ void r100_pm_init_profile(struct radeon_device *rdev) rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_on_cm_idx = 0; } +/** + * r100_pm_misc - set additional pm hw parameters callback. + * + * @rdev: radeon_device pointer + * + * Set non-clock parameters associated with a power state + * (voltage, pcie lanes, etc.) (r1xx-r4xx). + */ void r100_pm_misc(struct radeon_device *rdev) { int requested_index = rdev->pm.requested_power_state_index; @@ -459,6 +419,13 @@ void r100_pm_misc(struct radeon_device *rdev) } } +/** + * r100_pm_prepare - pre-power state change callback. + * + * @rdev: radeon_device pointer + * + * Prepare for a power state change (r1xx-r4xx). + */ void r100_pm_prepare(struct radeon_device *rdev) { struct drm_device *ddev = rdev->ddev; @@ -483,6 +450,13 @@ void r100_pm_prepare(struct radeon_device *rdev) } } +/** + * r100_pm_finish - post-power state change callback. + * + * @rdev: radeon_device pointer + * + * Clean up after a power state change (r1xx-r4xx). + */ void r100_pm_finish(struct radeon_device *rdev) { struct drm_device *ddev = rdev->ddev; @@ -507,6 +481,14 @@ void r100_pm_finish(struct radeon_device *rdev) } } +/** + * r100_gui_idle - gui idle callback. + * + * @rdev: radeon_device pointer + * + * Check of the GUI (2D/3D engines) are idle (r1xx-r5xx). + * Returns true if idle, false if not. + */ bool r100_gui_idle(struct radeon_device *rdev) { if (RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_ACTIVE) @@ -516,6 +498,15 @@ bool r100_gui_idle(struct radeon_device *rdev) } /* hpd for digital panel detect/disconnect */ +/** + * r100_hpd_sense - hpd sense callback. + * + * @rdev: radeon_device pointer + * @hpd: hpd (hotplug detect) pin + * + * Checks if a digital monitor is connected (r1xx-r4xx). + * Returns true if connected, false if not connected. + */ bool r100_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd) { bool connected = false; @@ -535,6 +526,14 @@ bool r100_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd) return connected; } +/** + * r100_hpd_set_polarity - hpd set polarity callback. + * + * @rdev: radeon_device pointer + * @hpd: hpd (hotplug detect) pin + * + * Set the polarity of the hpd pin (r1xx-r4xx). + */ void r100_hpd_set_polarity(struct radeon_device *rdev, enum radeon_hpd_id hpd) { @@ -563,47 +562,47 @@ void r100_hpd_set_polarity(struct radeon_device *rdev, } } +/** + * r100_hpd_init - hpd setup callback. + * + * @rdev: radeon_device pointer + * + * Setup the hpd pins used by the card (r1xx-r4xx). + * Set the polarity, and enable the hpd interrupts. + */ void r100_hpd_init(struct radeon_device *rdev) { struct drm_device *dev = rdev->ddev; struct drm_connector *connector; + unsigned enable = 0; list_for_each_entry(connector, &dev->mode_config.connector_list, head) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); - switch (radeon_connector->hpd.hpd) { - case RADEON_HPD_1: - rdev->irq.hpd[0] = true; - break; - case RADEON_HPD_2: - rdev->irq.hpd[1] = true; - break; - default: - break; - } + enable |= 1 << radeon_connector->hpd.hpd; radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd); } - if (rdev->irq.installed) - r100_irq_set(rdev); + radeon_irq_kms_enable_hpd(rdev, enable); } +/** + * r100_hpd_fini - hpd tear down callback. + * + * @rdev: radeon_device pointer + * + * Tear down the hpd pins used by the card (r1xx-r4xx). + * Disable the hpd interrupts. + */ void r100_hpd_fini(struct radeon_device *rdev) { struct drm_device *dev = rdev->ddev; struct drm_connector *connector; + unsigned disable = 0; list_for_each_entry(connector, &dev->mode_config.connector_list, head) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); - switch (radeon_connector->hpd.hpd) { - case RADEON_HPD_1: - rdev->irq.hpd[0] = false; - break; - case RADEON_HPD_2: - rdev->irq.hpd[1] = false; - break; - default: - break; - } + disable |= 1 << radeon_connector->hpd.hpd; } + radeon_irq_kms_disable_hpd(rdev, disable); } /* @@ -635,15 +634,6 @@ int r100_pci_gart_init(struct radeon_device *rdev) return radeon_gart_table_ram_alloc(rdev); } -/* required on r1xx, r2xx, r300, r(v)350, r420/r481, rs400/rs480 */ -void r100_enable_bm(struct radeon_device *rdev) -{ - uint32_t tmp; - /* Enable bus mastering */ - tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS; - WREG32(RADEON_BUS_CNTL, tmp); -} - int r100_pci_gart_enable(struct radeon_device *rdev) { uint32_t tmp; @@ -705,18 +695,18 @@ int r100_irq_set(struct radeon_device *rdev) WREG32(R_000040_GEN_INT_CNTL, 0); return -EINVAL; } - if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) { + if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { tmp |= RADEON_SW_INT_ENABLE; } if (rdev->irq.gui_idle) { tmp |= RADEON_GUI_IDLE_MASK; } if (rdev->irq.crtc_vblank_int[0] || - rdev->irq.pflip[0]) { + atomic_read(&rdev->irq.pflip[0])) { tmp |= RADEON_CRTC_VBLANK_MASK; } if (rdev->irq.crtc_vblank_int[1] || - rdev->irq.pflip[1]) { + atomic_read(&rdev->irq.pflip[1])) { tmp |= RADEON_CRTC2_VBLANK_MASK; } if (rdev->irq.hpd[0]) { @@ -782,7 +772,6 @@ int r100_irq_process(struct radeon_device *rdev) /* gui idle interrupt */ if (status & RADEON_GUI_IDLE_STAT) { rdev->irq.gui_idle_acked = true; - rdev->pm.gui_idle = true; wake_up(&rdev->irq.idle_queue); } /* Vertical blank interrupts */ @@ -792,7 +781,7 @@ int r100_irq_process(struct radeon_device *rdev) rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[0]) + if (atomic_read(&rdev->irq.pflip[0])) radeon_crtc_handle_flip(rdev, 0); } if (status & RADEON_CRTC2_VBLANK_STAT) { @@ -801,7 +790,7 @@ int r100_irq_process(struct radeon_device *rdev) rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[1]) + if (atomic_read(&rdev->irq.pflip[1])) radeon_crtc_handle_flip(rdev, 1); } if (status & RADEON_FP_DETECT_STAT) { @@ -883,7 +872,7 @@ int r100_copy_blit(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, - struct radeon_fence *fence) + struct radeon_fence **fence) { struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; uint32_t cur_pages; @@ -947,7 +936,7 @@ int r100_copy_blit(struct radeon_device *rdev, RADEON_WAIT_HOST_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE); if (fence) { - r = radeon_fence_emit(rdev, fence); + r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX); } radeon_ring_unlock_commit(rdev, ring); return r; @@ -1192,6 +1181,14 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size) } ring->ready = true; radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); + + if (radeon_ring_supports_scratch_reg(rdev, ring)) { + r = radeon_scratch_get(rdev, &ring->rptr_save_reg); + if (r) { + DRM_ERROR("failed to get scratch reg for rptr save (%d).\n", r); + ring->rptr_save_reg = 0; + } + } return 0; } @@ -1202,6 +1199,7 @@ void r100_cp_fini(struct radeon_device *rdev) } /* Disable ring */ r100_cp_disable(rdev); + radeon_scratch_free(rdev, rdev->ring[RADEON_RING_TYPE_GFX_INDEX].rptr_save_reg); radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); DRM_INFO("radeon: cp finalized\n"); } @@ -1223,6 +1221,112 @@ void r100_cp_disable(struct radeon_device *rdev) /* * CS functions */ +int r100_reloc_pitch_offset(struct radeon_cs_parser *p, + struct radeon_cs_packet *pkt, + unsigned idx, + unsigned reg) +{ + int r; + u32 tile_flags = 0; + u32 tmp; + struct radeon_cs_reloc *reloc; + u32 value; + + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for ib[%d]=0x%04X\n", + idx, reg); + r100_cs_dump_packet(p, pkt); + return r; + } + + value = radeon_get_ib_value(p, idx); + tmp = value & 0x003fffff; + tmp += (((u32)reloc->lobj.gpu_offset) >> 10); + + if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { + if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) + tile_flags |= RADEON_DST_TILE_MACRO; + if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) { + if (reg == RADEON_SRC_PITCH_OFFSET) { + DRM_ERROR("Cannot src blit from microtiled surface\n"); + r100_cs_dump_packet(p, pkt); + return -EINVAL; + } + tile_flags |= RADEON_DST_TILE_MICRO; + } + + tmp |= tile_flags; + p->ib.ptr[idx] = (value & 0x3fc00000) | tmp; + } else + p->ib.ptr[idx] = (value & 0xffc00000) | tmp; + return 0; +} + +int r100_packet3_load_vbpntr(struct radeon_cs_parser *p, + struct radeon_cs_packet *pkt, + int idx) +{ + unsigned c, i; + struct radeon_cs_reloc *reloc; + struct r100_cs_track *track; + int r = 0; + volatile uint32_t *ib; + u32 idx_value; + + ib = p->ib.ptr; + track = (struct r100_cs_track *)p->track; + c = radeon_get_ib_value(p, idx++) & 0x1F; + if (c > 16) { + DRM_ERROR("Only 16 vertex buffers are allowed %d\n", + pkt->opcode); + r100_cs_dump_packet(p, pkt); + return -EINVAL; + } + track->num_arrays = c; + for (i = 0; i < (c - 1); i+=2, idx+=3) { + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for packet3 %d\n", + pkt->opcode); + r100_cs_dump_packet(p, pkt); + return r; + } + idx_value = radeon_get_ib_value(p, idx); + ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); + + track->arrays[i + 0].esize = idx_value >> 8; + track->arrays[i + 0].robj = reloc->robj; + track->arrays[i + 0].esize &= 0x7F; + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for packet3 %d\n", + pkt->opcode); + r100_cs_dump_packet(p, pkt); + return r; + } + ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset); + track->arrays[i + 1].robj = reloc->robj; + track->arrays[i + 1].esize = idx_value >> 24; + track->arrays[i + 1].esize &= 0x7F; + } + if (c & 1) { + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for packet3 %d\n", + pkt->opcode); + r100_cs_dump_packet(p, pkt); + return r; + } + idx_value = radeon_get_ib_value(p, idx); + ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); + track->arrays[i + 0].robj = reloc->robj; + track->arrays[i + 0].esize = idx_value >> 8; + track->arrays[i + 0].esize &= 0x7F; + } + return r; +} + int r100_cs_parse_packet0(struct radeon_cs_parser *p, struct radeon_cs_packet *pkt, const unsigned *auth, unsigned n, @@ -2048,6 +2152,379 @@ int r100_cs_parse(struct radeon_cs_parser *p) return 0; } +static void r100_cs_track_texture_print(struct r100_cs_track_texture *t) +{ + DRM_ERROR("pitch %d\n", t->pitch); + DRM_ERROR("use_pitch %d\n", t->use_pitch); + DRM_ERROR("width %d\n", t->width); + DRM_ERROR("width_11 %d\n", t->width_11); + DRM_ERROR("height %d\n", t->height); + DRM_ERROR("height_11 %d\n", t->height_11); + DRM_ERROR("num levels %d\n", t->num_levels); + DRM_ERROR("depth %d\n", t->txdepth); + DRM_ERROR("bpp %d\n", t->cpp); + DRM_ERROR("coordinate type %d\n", t->tex_coord_type); + DRM_ERROR("width round to power of 2 %d\n", t->roundup_w); + DRM_ERROR("height round to power of 2 %d\n", t->roundup_h); + DRM_ERROR("compress format %d\n", t->compress_format); +} + +static int r100_track_compress_size(int compress_format, int w, int h) +{ + int block_width, block_height, block_bytes; + int wblocks, hblocks; + int min_wblocks; + int sz; + + block_width = 4; + block_height = 4; + + switch (compress_format) { + case R100_TRACK_COMP_DXT1: + block_bytes = 8; + min_wblocks = 4; + break; + default: + case R100_TRACK_COMP_DXT35: + block_bytes = 16; + min_wblocks = 2; + break; + } + + hblocks = (h + block_height - 1) / block_height; + wblocks = (w + block_width - 1) / block_width; + if (wblocks < min_wblocks) + wblocks = min_wblocks; + sz = wblocks * hblocks * block_bytes; + return sz; +} + +static int r100_cs_track_cube(struct radeon_device *rdev, + struct r100_cs_track *track, unsigned idx) +{ + unsigned face, w, h; + struct radeon_bo *cube_robj; + unsigned long size; + unsigned compress_format = track->textures[idx].compress_format; + + for (face = 0; face < 5; face++) { + cube_robj = track->textures[idx].cube_info[face].robj; + w = track->textures[idx].cube_info[face].width; + h = track->textures[idx].cube_info[face].height; + + if (compress_format) { + size = r100_track_compress_size(compress_format, w, h); + } else + size = w * h; + size *= track->textures[idx].cpp; + + size += track->textures[idx].cube_info[face].offset; + + if (size > radeon_bo_size(cube_robj)) { + DRM_ERROR("Cube texture offset greater than object size %lu %lu\n", + size, radeon_bo_size(cube_robj)); + r100_cs_track_texture_print(&track->textures[idx]); + return -1; + } + } + return 0; +} + +static int r100_cs_track_texture_check(struct radeon_device *rdev, + struct r100_cs_track *track) +{ + struct radeon_bo *robj; + unsigned long size; + unsigned u, i, w, h, d; + int ret; + + for (u = 0; u < track->num_texture; u++) { + if (!track->textures[u].enabled) + continue; + if (track->textures[u].lookup_disable) + continue; + robj = track->textures[u].robj; + if (robj == NULL) { + DRM_ERROR("No texture bound to unit %u\n", u); + return -EINVAL; + } + size = 0; + for (i = 0; i <= track->textures[u].num_levels; i++) { + if (track->textures[u].use_pitch) { + if (rdev->family < CHIP_R300) + w = (track->textures[u].pitch / track->textures[u].cpp) / (1 << i); + else + w = track->textures[u].pitch / (1 << i); + } else { + w = track->textures[u].width; + if (rdev->family >= CHIP_RV515) + w |= track->textures[u].width_11; + w = w / (1 << i); + if (track->textures[u].roundup_w) + w = roundup_pow_of_two(w); + } + h = track->textures[u].height; + if (rdev->family >= CHIP_RV515) + h |= track->textures[u].height_11; + h = h / (1 << i); + if (track->textures[u].roundup_h) + h = roundup_pow_of_two(h); + if (track->textures[u].tex_coord_type == 1) { + d = (1 << track->textures[u].txdepth) / (1 << i); + if (!d) + d = 1; + } else { + d = 1; + } + if (track->textures[u].compress_format) { + + size += r100_track_compress_size(track->textures[u].compress_format, w, h) * d; + /* compressed textures are block based */ + } else + size += w * h * d; + } + size *= track->textures[u].cpp; + + switch (track->textures[u].tex_coord_type) { + case 0: + case 1: + break; + case 2: + if (track->separate_cube) { + ret = r100_cs_track_cube(rdev, track, u); + if (ret) + return ret; + } else + size *= 6; + break; + default: + DRM_ERROR("Invalid texture coordinate type %u for unit " + "%u\n", track->textures[u].tex_coord_type, u); + return -EINVAL; + } + if (size > radeon_bo_size(robj)) { + DRM_ERROR("Texture of unit %u needs %lu bytes but is " + "%lu\n", u, size, radeon_bo_size(robj)); + r100_cs_track_texture_print(&track->textures[u]); + return -EINVAL; + } + } + return 0; +} + +int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track) +{ + unsigned i; + unsigned long size; + unsigned prim_walk; + unsigned nverts; + unsigned num_cb = track->cb_dirty ? track->num_cb : 0; + + if (num_cb && !track->zb_cb_clear && !track->color_channel_mask && + !track->blend_read_enable) + num_cb = 0; + + for (i = 0; i < num_cb; i++) { + if (track->cb[i].robj == NULL) { + DRM_ERROR("[drm] No buffer for color buffer %d !\n", i); + return -EINVAL; + } + size = track->cb[i].pitch * track->cb[i].cpp * track->maxy; + size += track->cb[i].offset; + if (size > radeon_bo_size(track->cb[i].robj)) { + DRM_ERROR("[drm] Buffer too small for color buffer %d " + "(need %lu have %lu) !\n", i, size, + radeon_bo_size(track->cb[i].robj)); + DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n", + i, track->cb[i].pitch, track->cb[i].cpp, + track->cb[i].offset, track->maxy); + return -EINVAL; + } + } + track->cb_dirty = false; + + if (track->zb_dirty && track->z_enabled) { + if (track->zb.robj == NULL) { + DRM_ERROR("[drm] No buffer for z buffer !\n"); + return -EINVAL; + } + size = track->zb.pitch * track->zb.cpp * track->maxy; + size += track->zb.offset; + if (size > radeon_bo_size(track->zb.robj)) { + DRM_ERROR("[drm] Buffer too small for z buffer " + "(need %lu have %lu) !\n", size, + radeon_bo_size(track->zb.robj)); + DRM_ERROR("[drm] zbuffer (%u %u %u %u)\n", + track->zb.pitch, track->zb.cpp, + track->zb.offset, track->maxy); + return -EINVAL; + } + } + track->zb_dirty = false; + + if (track->aa_dirty && track->aaresolve) { + if (track->aa.robj == NULL) { + DRM_ERROR("[drm] No buffer for AA resolve buffer %d !\n", i); + return -EINVAL; + } + /* I believe the format comes from colorbuffer0. */ + size = track->aa.pitch * track->cb[0].cpp * track->maxy; + size += track->aa.offset; + if (size > radeon_bo_size(track->aa.robj)) { + DRM_ERROR("[drm] Buffer too small for AA resolve buffer %d " + "(need %lu have %lu) !\n", i, size, + radeon_bo_size(track->aa.robj)); + DRM_ERROR("[drm] AA resolve buffer %d (%u %u %u %u)\n", + i, track->aa.pitch, track->cb[0].cpp, + track->aa.offset, track->maxy); + return -EINVAL; + } + } + track->aa_dirty = false; + + prim_walk = (track->vap_vf_cntl >> 4) & 0x3; + if (track->vap_vf_cntl & (1 << 14)) { + nverts = track->vap_alt_nverts; + } else { + nverts = (track->vap_vf_cntl >> 16) & 0xFFFF; + } + switch (prim_walk) { + case 1: + for (i = 0; i < track->num_arrays; i++) { + size = track->arrays[i].esize * track->max_indx * 4; + if (track->arrays[i].robj == NULL) { + DRM_ERROR("(PW %u) Vertex array %u no buffer " + "bound\n", prim_walk, i); + return -EINVAL; + } + if (size > radeon_bo_size(track->arrays[i].robj)) { + dev_err(rdev->dev, "(PW %u) Vertex array %u " + "need %lu dwords have %lu dwords\n", + prim_walk, i, size >> 2, + radeon_bo_size(track->arrays[i].robj) + >> 2); + DRM_ERROR("Max indices %u\n", track->max_indx); + return -EINVAL; + } + } + break; + case 2: + for (i = 0; i < track->num_arrays; i++) { + size = track->arrays[i].esize * (nverts - 1) * 4; + if (track->arrays[i].robj == NULL) { + DRM_ERROR("(PW %u) Vertex array %u no buffer " + "bound\n", prim_walk, i); + return -EINVAL; + } + if (size > radeon_bo_size(track->arrays[i].robj)) { + dev_err(rdev->dev, "(PW %u) Vertex array %u " + "need %lu dwords have %lu dwords\n", + prim_walk, i, size >> 2, + radeon_bo_size(track->arrays[i].robj) + >> 2); + return -EINVAL; + } + } + break; + case 3: + size = track->vtx_size * nverts; + if (size != track->immd_dwords) { + DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n", + track->immd_dwords, size); + DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n", + nverts, track->vtx_size); + return -EINVAL; + } + break; + default: + DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n", + prim_walk); + return -EINVAL; + } + + if (track->tex_dirty) { + track->tex_dirty = false; + return r100_cs_track_texture_check(rdev, track); + } + return 0; +} + +void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track) +{ + unsigned i, face; + + track->cb_dirty = true; + track->zb_dirty = true; + track->tex_dirty = true; + track->aa_dirty = true; + + if (rdev->family < CHIP_R300) { + track->num_cb = 1; + if (rdev->family <= CHIP_RS200) + track->num_texture = 3; + else + track->num_texture = 6; + track->maxy = 2048; + track->separate_cube = 1; + } else { + track->num_cb = 4; + track->num_texture = 16; + track->maxy = 4096; + track->separate_cube = 0; + track->aaresolve = false; + track->aa.robj = NULL; + } + + for (i = 0; i < track->num_cb; i++) { + track->cb[i].robj = NULL; + track->cb[i].pitch = 8192; + track->cb[i].cpp = 16; + track->cb[i].offset = 0; + } + track->z_enabled = true; + track->zb.robj = NULL; + track->zb.pitch = 8192; + track->zb.cpp = 4; + track->zb.offset = 0; + track->vtx_size = 0x7F; + track->immd_dwords = 0xFFFFFFFFUL; + track->num_arrays = 11; + track->max_indx = 0x00FFFFFFUL; + for (i = 0; i < track->num_arrays; i++) { + track->arrays[i].robj = NULL; + track->arrays[i].esize = 0x7F; + } + for (i = 0; i < track->num_texture; i++) { + track->textures[i].compress_format = R100_TRACK_COMP_NONE; + track->textures[i].pitch = 16536; + track->textures[i].width = 16536; + track->textures[i].height = 16536; + track->textures[i].width_11 = 1 << 11; + track->textures[i].height_11 = 1 << 11; + track->textures[i].num_levels = 12; + if (rdev->family <= CHIP_RS200) { + track->textures[i].tex_coord_type = 0; + track->textures[i].txdepth = 0; + } else { + track->textures[i].txdepth = 16; + track->textures[i].tex_coord_type = 1; + } + track->textures[i].cpp = 64; + track->textures[i].robj = NULL; + /* CS IB emission code makes sure texture unit are disabled */ + track->textures[i].enabled = false; + track->textures[i].lookup_disable = false; + track->textures[i].roundup_w = true; + track->textures[i].roundup_h = true; + if (track->separate_cube) + for (face = 0; face < 5; face++) { + track->textures[i].cube_info[face].robj = NULL; + track->textures[i].cube_info[face].width = 16536; + track->textures[i].cube_info[face].height = 16536; + track->textures[i].cube_info[face].offset = 0; + } + } +} /* * Global GPU functions @@ -2175,6 +2652,15 @@ bool r100_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) return radeon_ring_test_lockup(rdev, ring); } +/* required on r1xx, r2xx, r300, r(v)350, r420/r481, rs400/rs480 */ +void r100_enable_bm(struct radeon_device *rdev) +{ + uint32_t tmp; + /* Enable bus mastering */ + tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS; + WREG32(RADEON_BUS_CNTL, tmp); +} + void r100_bm_disable(struct radeon_device *rdev) { u32 tmp; @@ -3261,380 +3747,6 @@ void r100_bandwidth_update(struct radeon_device *rdev) } } -static void r100_cs_track_texture_print(struct r100_cs_track_texture *t) -{ - DRM_ERROR("pitch %d\n", t->pitch); - DRM_ERROR("use_pitch %d\n", t->use_pitch); - DRM_ERROR("width %d\n", t->width); - DRM_ERROR("width_11 %d\n", t->width_11); - DRM_ERROR("height %d\n", t->height); - DRM_ERROR("height_11 %d\n", t->height_11); - DRM_ERROR("num levels %d\n", t->num_levels); - DRM_ERROR("depth %d\n", t->txdepth); - DRM_ERROR("bpp %d\n", t->cpp); - DRM_ERROR("coordinate type %d\n", t->tex_coord_type); - DRM_ERROR("width round to power of 2 %d\n", t->roundup_w); - DRM_ERROR("height round to power of 2 %d\n", t->roundup_h); - DRM_ERROR("compress format %d\n", t->compress_format); -} - -static int r100_track_compress_size(int compress_format, int w, int h) -{ - int block_width, block_height, block_bytes; - int wblocks, hblocks; - int min_wblocks; - int sz; - - block_width = 4; - block_height = 4; - - switch (compress_format) { - case R100_TRACK_COMP_DXT1: - block_bytes = 8; - min_wblocks = 4; - break; - default: - case R100_TRACK_COMP_DXT35: - block_bytes = 16; - min_wblocks = 2; - break; - } - - hblocks = (h + block_height - 1) / block_height; - wblocks = (w + block_width - 1) / block_width; - if (wblocks < min_wblocks) - wblocks = min_wblocks; - sz = wblocks * hblocks * block_bytes; - return sz; -} - -static int r100_cs_track_cube(struct radeon_device *rdev, - struct r100_cs_track *track, unsigned idx) -{ - unsigned face, w, h; - struct radeon_bo *cube_robj; - unsigned long size; - unsigned compress_format = track->textures[idx].compress_format; - - for (face = 0; face < 5; face++) { - cube_robj = track->textures[idx].cube_info[face].robj; - w = track->textures[idx].cube_info[face].width; - h = track->textures[idx].cube_info[face].height; - - if (compress_format) { - size = r100_track_compress_size(compress_format, w, h); - } else - size = w * h; - size *= track->textures[idx].cpp; - - size += track->textures[idx].cube_info[face].offset; - - if (size > radeon_bo_size(cube_robj)) { - DRM_ERROR("Cube texture offset greater than object size %lu %lu\n", - size, radeon_bo_size(cube_robj)); - r100_cs_track_texture_print(&track->textures[idx]); - return -1; - } - } - return 0; -} - -static int r100_cs_track_texture_check(struct radeon_device *rdev, - struct r100_cs_track *track) -{ - struct radeon_bo *robj; - unsigned long size; - unsigned u, i, w, h, d; - int ret; - - for (u = 0; u < track->num_texture; u++) { - if (!track->textures[u].enabled) - continue; - if (track->textures[u].lookup_disable) - continue; - robj = track->textures[u].robj; - if (robj == NULL) { - DRM_ERROR("No texture bound to unit %u\n", u); - return -EINVAL; - } - size = 0; - for (i = 0; i <= track->textures[u].num_levels; i++) { - if (track->textures[u].use_pitch) { - if (rdev->family < CHIP_R300) - w = (track->textures[u].pitch / track->textures[u].cpp) / (1 << i); - else - w = track->textures[u].pitch / (1 << i); - } else { - w = track->textures[u].width; - if (rdev->family >= CHIP_RV515) - w |= track->textures[u].width_11; - w = w / (1 << i); - if (track->textures[u].roundup_w) - w = roundup_pow_of_two(w); - } - h = track->textures[u].height; - if (rdev->family >= CHIP_RV515) - h |= track->textures[u].height_11; - h = h / (1 << i); - if (track->textures[u].roundup_h) - h = roundup_pow_of_two(h); - if (track->textures[u].tex_coord_type == 1) { - d = (1 << track->textures[u].txdepth) / (1 << i); - if (!d) - d = 1; - } else { - d = 1; - } - if (track->textures[u].compress_format) { - - size += r100_track_compress_size(track->textures[u].compress_format, w, h) * d; - /* compressed textures are block based */ - } else - size += w * h * d; - } - size *= track->textures[u].cpp; - - switch (track->textures[u].tex_coord_type) { - case 0: - case 1: - break; - case 2: - if (track->separate_cube) { - ret = r100_cs_track_cube(rdev, track, u); - if (ret) - return ret; - } else - size *= 6; - break; - default: - DRM_ERROR("Invalid texture coordinate type %u for unit " - "%u\n", track->textures[u].tex_coord_type, u); - return -EINVAL; - } - if (size > radeon_bo_size(robj)) { - DRM_ERROR("Texture of unit %u needs %lu bytes but is " - "%lu\n", u, size, radeon_bo_size(robj)); - r100_cs_track_texture_print(&track->textures[u]); - return -EINVAL; - } - } - return 0; -} - -int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track) -{ - unsigned i; - unsigned long size; - unsigned prim_walk; - unsigned nverts; - unsigned num_cb = track->cb_dirty ? track->num_cb : 0; - - if (num_cb && !track->zb_cb_clear && !track->color_channel_mask && - !track->blend_read_enable) - num_cb = 0; - - for (i = 0; i < num_cb; i++) { - if (track->cb[i].robj == NULL) { - DRM_ERROR("[drm] No buffer for color buffer %d !\n", i); - return -EINVAL; - } - size = track->cb[i].pitch * track->cb[i].cpp * track->maxy; - size += track->cb[i].offset; - if (size > radeon_bo_size(track->cb[i].robj)) { - DRM_ERROR("[drm] Buffer too small for color buffer %d " - "(need %lu have %lu) !\n", i, size, - radeon_bo_size(track->cb[i].robj)); - DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n", - i, track->cb[i].pitch, track->cb[i].cpp, - track->cb[i].offset, track->maxy); - return -EINVAL; - } - } - track->cb_dirty = false; - - if (track->zb_dirty && track->z_enabled) { - if (track->zb.robj == NULL) { - DRM_ERROR("[drm] No buffer for z buffer !\n"); - return -EINVAL; - } - size = track->zb.pitch * track->zb.cpp * track->maxy; - size += track->zb.offset; - if (size > radeon_bo_size(track->zb.robj)) { - DRM_ERROR("[drm] Buffer too small for z buffer " - "(need %lu have %lu) !\n", size, - radeon_bo_size(track->zb.robj)); - DRM_ERROR("[drm] zbuffer (%u %u %u %u)\n", - track->zb.pitch, track->zb.cpp, - track->zb.offset, track->maxy); - return -EINVAL; - } - } - track->zb_dirty = false; - - if (track->aa_dirty && track->aaresolve) { - if (track->aa.robj == NULL) { - DRM_ERROR("[drm] No buffer for AA resolve buffer %d !\n", i); - return -EINVAL; - } - /* I believe the format comes from colorbuffer0. */ - size = track->aa.pitch * track->cb[0].cpp * track->maxy; - size += track->aa.offset; - if (size > radeon_bo_size(track->aa.robj)) { - DRM_ERROR("[drm] Buffer too small for AA resolve buffer %d " - "(need %lu have %lu) !\n", i, size, - radeon_bo_size(track->aa.robj)); - DRM_ERROR("[drm] AA resolve buffer %d (%u %u %u %u)\n", - i, track->aa.pitch, track->cb[0].cpp, - track->aa.offset, track->maxy); - return -EINVAL; - } - } - track->aa_dirty = false; - - prim_walk = (track->vap_vf_cntl >> 4) & 0x3; - if (track->vap_vf_cntl & (1 << 14)) { - nverts = track->vap_alt_nverts; - } else { - nverts = (track->vap_vf_cntl >> 16) & 0xFFFF; - } - switch (prim_walk) { - case 1: - for (i = 0; i < track->num_arrays; i++) { - size = track->arrays[i].esize * track->max_indx * 4; - if (track->arrays[i].robj == NULL) { - DRM_ERROR("(PW %u) Vertex array %u no buffer " - "bound\n", prim_walk, i); - return -EINVAL; - } - if (size > radeon_bo_size(track->arrays[i].robj)) { - dev_err(rdev->dev, "(PW %u) Vertex array %u " - "need %lu dwords have %lu dwords\n", - prim_walk, i, size >> 2, - radeon_bo_size(track->arrays[i].robj) - >> 2); - DRM_ERROR("Max indices %u\n", track->max_indx); - return -EINVAL; - } - } - break; - case 2: - for (i = 0; i < track->num_arrays; i++) { - size = track->arrays[i].esize * (nverts - 1) * 4; - if (track->arrays[i].robj == NULL) { - DRM_ERROR("(PW %u) Vertex array %u no buffer " - "bound\n", prim_walk, i); - return -EINVAL; - } - if (size > radeon_bo_size(track->arrays[i].robj)) { - dev_err(rdev->dev, "(PW %u) Vertex array %u " - "need %lu dwords have %lu dwords\n", - prim_walk, i, size >> 2, - radeon_bo_size(track->arrays[i].robj) - >> 2); - return -EINVAL; - } - } - break; - case 3: - size = track->vtx_size * nverts; - if (size != track->immd_dwords) { - DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n", - track->immd_dwords, size); - DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n", - nverts, track->vtx_size); - return -EINVAL; - } - break; - default: - DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n", - prim_walk); - return -EINVAL; - } - - if (track->tex_dirty) { - track->tex_dirty = false; - return r100_cs_track_texture_check(rdev, track); - } - return 0; -} - -void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track) -{ - unsigned i, face; - - track->cb_dirty = true; - track->zb_dirty = true; - track->tex_dirty = true; - track->aa_dirty = true; - - if (rdev->family < CHIP_R300) { - track->num_cb = 1; - if (rdev->family <= CHIP_RS200) - track->num_texture = 3; - else - track->num_texture = 6; - track->maxy = 2048; - track->separate_cube = 1; - } else { - track->num_cb = 4; - track->num_texture = 16; - track->maxy = 4096; - track->separate_cube = 0; - track->aaresolve = false; - track->aa.robj = NULL; - } - - for (i = 0; i < track->num_cb; i++) { - track->cb[i].robj = NULL; - track->cb[i].pitch = 8192; - track->cb[i].cpp = 16; - track->cb[i].offset = 0; - } - track->z_enabled = true; - track->zb.robj = NULL; - track->zb.pitch = 8192; - track->zb.cpp = 4; - track->zb.offset = 0; - track->vtx_size = 0x7F; - track->immd_dwords = 0xFFFFFFFFUL; - track->num_arrays = 11; - track->max_indx = 0x00FFFFFFUL; - for (i = 0; i < track->num_arrays; i++) { - track->arrays[i].robj = NULL; - track->arrays[i].esize = 0x7F; - } - for (i = 0; i < track->num_texture; i++) { - track->textures[i].compress_format = R100_TRACK_COMP_NONE; - track->textures[i].pitch = 16536; - track->textures[i].width = 16536; - track->textures[i].height = 16536; - track->textures[i].width_11 = 1 << 11; - track->textures[i].height_11 = 1 << 11; - track->textures[i].num_levels = 12; - if (rdev->family <= CHIP_RS200) { - track->textures[i].tex_coord_type = 0; - track->textures[i].txdepth = 0; - } else { - track->textures[i].txdepth = 16; - track->textures[i].tex_coord_type = 1; - } - track->textures[i].cpp = 64; - track->textures[i].robj = NULL; - /* CS IB emission code makes sure texture unit are disabled */ - track->textures[i].enabled = false; - track->textures[i].lookup_disable = false; - track->textures[i].roundup_w = true; - track->textures[i].roundup_h = true; - if (track->separate_cube) - for (face = 0; face < 5; face++) { - track->textures[i].cube_info[face].robj = NULL; - track->textures[i].cube_info[face].width = 16536; - track->textures[i].cube_info[face].height = 16536; - track->textures[i].cube_info[face].offset = 0; - } - } -} - int r100_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) { uint32_t scratch; @@ -3679,6 +3791,12 @@ void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; + if (ring->rptr_save_reg) { + u32 next_rptr = ring->wptr + 2 + 3; + radeon_ring_write(ring, PACKET0(ring->rptr_save_reg, 0)); + radeon_ring_write(ring, next_rptr); + } + radeon_ring_write(ring, PACKET0(RADEON_CP_IB_BASE, 1)); radeon_ring_write(ring, ib->gpu_addr); radeon_ring_write(ring, ib->length_dw); @@ -3711,7 +3829,7 @@ int r100_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) ib.ptr[6] = PACKET2(0); ib.ptr[7] = PACKET2(0); ib.length_dw = 8; - r = radeon_ib_schedule(rdev, &ib); + r = radeon_ib_schedule(rdev, &ib, NULL); if (r) { radeon_scratch_free(rdev, scratch); radeon_ib_free(rdev, &ib); @@ -3740,12 +3858,6 @@ int r100_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) return r; } -void r100_ib_fini(struct radeon_device *rdev) -{ - radeon_ib_pool_suspend(rdev); - radeon_ib_pool_fini(rdev); -} - void r100_mc_stop(struct radeon_device *rdev, struct r100_mc_save *save) { /* Shutdown CP we shouldn't need to do that but better be safe than @@ -3905,13 +4017,11 @@ static int r100_startup(struct radeon_device *rdev) return r; } - r = radeon_ib_pool_start(rdev); - if (r) - return r; - - r = radeon_ib_ring_tests(rdev); - if (r) + r = radeon_ib_pool_init(rdev); + if (r) { + dev_err(rdev->dev, "IB initialization failed (%d).\n", r); return r; + } return 0; } @@ -3948,7 +4058,6 @@ int r100_resume(struct radeon_device *rdev) int r100_suspend(struct radeon_device *rdev) { - radeon_ib_pool_suspend(rdev); r100_cp_disable(rdev); radeon_wb_disable(rdev); r100_irq_disable(rdev); @@ -3961,7 +4070,7 @@ void r100_fini(struct radeon_device *rdev) { r100_cp_fini(rdev); radeon_wb_fini(rdev); - r100_ib_fini(rdev); + radeon_ib_pool_fini(rdev); radeon_gem_fini(rdev); if (rdev->flags & RADEON_IS_PCI) r100_pci_gart_fini(rdev); @@ -4068,20 +4177,14 @@ int r100_init(struct radeon_device *rdev) } r100_set_safe_registers(rdev); - r = radeon_ib_pool_init(rdev); rdev->accel_working = true; - if (r) { - dev_err(rdev->dev, "IB initialization failed (%d).\n", r); - rdev->accel_working = false; - } - r = r100_startup(rdev); if (r) { /* Somethings want wront with the accel init stop accel */ dev_err(rdev->dev, "Disabling GPU acceleration\n"); r100_cp_fini(rdev); radeon_wb_fini(rdev); - r100_ib_fini(rdev); + radeon_ib_pool_fini(rdev); radeon_irq_kms_fini(rdev); if (rdev->flags & RADEON_IS_PCI) r100_pci_gart_fini(rdev); diff --git a/drivers/gpu/drm/radeon/r200.c b/drivers/gpu/drm/radeon/r200.c index a26144d01207..f0889259eb08 100644 --- a/drivers/gpu/drm/radeon/r200.c +++ b/drivers/gpu/drm/radeon/r200.c @@ -85,7 +85,7 @@ int r200_copy_dma(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, - struct radeon_fence *fence) + struct radeon_fence **fence) { struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; uint32_t size; @@ -120,7 +120,7 @@ int r200_copy_dma(struct radeon_device *rdev, radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0)); radeon_ring_write(ring, RADEON_WAIT_DMA_GUI_IDLE); if (fence) { - r = radeon_fence_emit(rdev, fence); + r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX); } radeon_ring_unlock_commit(rdev, ring); return r; diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index 97722a33e513..646a1927dda7 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -1391,13 +1391,11 @@ static int r300_startup(struct radeon_device *rdev) return r; } - r = radeon_ib_pool_start(rdev); - if (r) - return r; - - r = radeon_ib_ring_tests(rdev); - if (r) + r = radeon_ib_pool_init(rdev); + if (r) { + dev_err(rdev->dev, "IB initialization failed (%d).\n", r); return r; + } return 0; } @@ -1436,7 +1434,6 @@ int r300_resume(struct radeon_device *rdev) int r300_suspend(struct radeon_device *rdev) { - radeon_ib_pool_suspend(rdev); r100_cp_disable(rdev); radeon_wb_disable(rdev); r100_irq_disable(rdev); @@ -1451,7 +1448,7 @@ void r300_fini(struct radeon_device *rdev) { r100_cp_fini(rdev); radeon_wb_fini(rdev); - r100_ib_fini(rdev); + radeon_ib_pool_fini(rdev); radeon_gem_fini(rdev); if (rdev->flags & RADEON_IS_PCIE) rv370_pcie_gart_fini(rdev); @@ -1538,20 +1535,14 @@ int r300_init(struct radeon_device *rdev) } r300_set_reg_safe(rdev); - r = radeon_ib_pool_init(rdev); rdev->accel_working = true; - if (r) { - dev_err(rdev->dev, "IB initialization failed (%d).\n", r); - rdev->accel_working = false; - } - r = r300_startup(rdev); if (r) { /* Somethings want wront with the accel init stop accel */ dev_err(rdev->dev, "Disabling GPU acceleration\n"); r100_cp_fini(rdev); radeon_wb_fini(rdev); - r100_ib_fini(rdev); + radeon_ib_pool_fini(rdev); radeon_irq_kms_fini(rdev); if (rdev->flags & RADEON_IS_PCIE) rv370_pcie_gart_fini(rdev); diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c index 99137be7a300..f2f5bf6d339f 100644 --- a/drivers/gpu/drm/radeon/r420.c +++ b/drivers/gpu/drm/radeon/r420.c @@ -275,13 +275,11 @@ static int r420_startup(struct radeon_device *rdev) } r420_cp_errata_init(rdev); - r = radeon_ib_pool_start(rdev); - if (r) - return r; - - r = radeon_ib_ring_tests(rdev); - if (r) + r = radeon_ib_pool_init(rdev); + if (r) { + dev_err(rdev->dev, "IB initialization failed (%d).\n", r); return r; + } return 0; } @@ -324,7 +322,6 @@ int r420_resume(struct radeon_device *rdev) int r420_suspend(struct radeon_device *rdev) { - radeon_ib_pool_suspend(rdev); r420_cp_errata_fini(rdev); r100_cp_disable(rdev); radeon_wb_disable(rdev); @@ -340,7 +337,7 @@ void r420_fini(struct radeon_device *rdev) { r100_cp_fini(rdev); radeon_wb_fini(rdev); - r100_ib_fini(rdev); + radeon_ib_pool_fini(rdev); radeon_gem_fini(rdev); if (rdev->flags & RADEON_IS_PCIE) rv370_pcie_gart_fini(rdev); @@ -438,20 +435,14 @@ int r420_init(struct radeon_device *rdev) } r420_set_reg_safe(rdev); - r = radeon_ib_pool_init(rdev); rdev->accel_working = true; - if (r) { - dev_err(rdev->dev, "IB initialization failed (%d).\n", r); - rdev->accel_working = false; - } - r = r420_startup(rdev); if (r) { /* Somethings want wront with the accel init stop accel */ dev_err(rdev->dev, "Disabling GPU acceleration\n"); r100_cp_fini(rdev); radeon_wb_fini(rdev); - r100_ib_fini(rdev); + radeon_ib_pool_fini(rdev); radeon_irq_kms_fini(rdev); if (rdev->flags & RADEON_IS_PCIE) rv370_pcie_gart_fini(rdev); diff --git a/drivers/gpu/drm/radeon/r520.c b/drivers/gpu/drm/radeon/r520.c index b5cf8375cd25..079d3c52c08a 100644 --- a/drivers/gpu/drm/radeon/r520.c +++ b/drivers/gpu/drm/radeon/r520.c @@ -203,13 +203,11 @@ static int r520_startup(struct radeon_device *rdev) return r; } - r = radeon_ib_pool_start(rdev); - if (r) - return r; - - r = radeon_ib_ring_tests(rdev); - if (r) + r = radeon_ib_pool_init(rdev); + if (r) { + dev_err(rdev->dev, "IB initialization failed (%d).\n", r); return r; + } return 0; } @@ -311,20 +309,14 @@ int r520_init(struct radeon_device *rdev) return r; rv515_set_safe_registers(rdev); - r = radeon_ib_pool_init(rdev); rdev->accel_working = true; - if (r) { - dev_err(rdev->dev, "IB initialization failed (%d).\n", r); - rdev->accel_working = false; - } - r = r520_startup(rdev); if (r) { /* Somethings want wront with the accel init stop accel */ dev_err(rdev->dev, "Disabling GPU acceleration\n"); r100_cp_fini(rdev); radeon_wb_fini(rdev); - r100_ib_fini(rdev); + radeon_ib_pool_fini(rdev); radeon_irq_kms_fini(rdev); rv370_pcie_gart_fini(rdev); radeon_agp_fini(rdev); diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index bff627293812..d79c639ae739 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -709,6 +709,7 @@ void r600_hpd_init(struct radeon_device *rdev) { struct drm_device *dev = rdev->ddev; struct drm_connector *connector; + unsigned enable = 0; list_for_each_entry(connector, &dev->mode_config.connector_list, head) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); @@ -729,28 +730,22 @@ void r600_hpd_init(struct radeon_device *rdev) switch (radeon_connector->hpd.hpd) { case RADEON_HPD_1: WREG32(DC_HPD1_CONTROL, tmp); - rdev->irq.hpd[0] = true; break; case RADEON_HPD_2: WREG32(DC_HPD2_CONTROL, tmp); - rdev->irq.hpd[1] = true; break; case RADEON_HPD_3: WREG32(DC_HPD3_CONTROL, tmp); - rdev->irq.hpd[2] = true; break; case RADEON_HPD_4: WREG32(DC_HPD4_CONTROL, tmp); - rdev->irq.hpd[3] = true; break; /* DCE 3.2 */ case RADEON_HPD_5: WREG32(DC_HPD5_CONTROL, tmp); - rdev->irq.hpd[4] = true; break; case RADEON_HPD_6: WREG32(DC_HPD6_CONTROL, tmp); - rdev->irq.hpd[5] = true; break; default: break; @@ -759,85 +754,73 @@ void r600_hpd_init(struct radeon_device *rdev) switch (radeon_connector->hpd.hpd) { case RADEON_HPD_1: WREG32(DC_HOT_PLUG_DETECT1_CONTROL, DC_HOT_PLUG_DETECTx_EN); - rdev->irq.hpd[0] = true; break; case RADEON_HPD_2: WREG32(DC_HOT_PLUG_DETECT2_CONTROL, DC_HOT_PLUG_DETECTx_EN); - rdev->irq.hpd[1] = true; break; case RADEON_HPD_3: WREG32(DC_HOT_PLUG_DETECT3_CONTROL, DC_HOT_PLUG_DETECTx_EN); - rdev->irq.hpd[2] = true; break; default: break; } } + enable |= 1 << radeon_connector->hpd.hpd; radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd); } - if (rdev->irq.installed) - r600_irq_set(rdev); + radeon_irq_kms_enable_hpd(rdev, enable); } void r600_hpd_fini(struct radeon_device *rdev) { struct drm_device *dev = rdev->ddev; struct drm_connector *connector; + unsigned disable = 0; - if (ASIC_IS_DCE3(rdev)) { - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { - struct radeon_connector *radeon_connector = to_radeon_connector(connector); + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + struct radeon_connector *radeon_connector = to_radeon_connector(connector); + if (ASIC_IS_DCE3(rdev)) { switch (radeon_connector->hpd.hpd) { case RADEON_HPD_1: WREG32(DC_HPD1_CONTROL, 0); - rdev->irq.hpd[0] = false; break; case RADEON_HPD_2: WREG32(DC_HPD2_CONTROL, 0); - rdev->irq.hpd[1] = false; break; case RADEON_HPD_3: WREG32(DC_HPD3_CONTROL, 0); - rdev->irq.hpd[2] = false; break; case RADEON_HPD_4: WREG32(DC_HPD4_CONTROL, 0); - rdev->irq.hpd[3] = false; break; /* DCE 3.2 */ case RADEON_HPD_5: WREG32(DC_HPD5_CONTROL, 0); - rdev->irq.hpd[4] = false; break; case RADEON_HPD_6: WREG32(DC_HPD6_CONTROL, 0); - rdev->irq.hpd[5] = false; break; default: break; } - } - } else { - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { - struct radeon_connector *radeon_connector = to_radeon_connector(connector); + } else { switch (radeon_connector->hpd.hpd) { case RADEON_HPD_1: WREG32(DC_HOT_PLUG_DETECT1_CONTROL, 0); - rdev->irq.hpd[0] = false; break; case RADEON_HPD_2: WREG32(DC_HOT_PLUG_DETECT2_CONTROL, 0); - rdev->irq.hpd[1] = false; break; case RADEON_HPD_3: WREG32(DC_HOT_PLUG_DETECT3_CONTROL, 0); - rdev->irq.hpd[2] = false; break; default: break; } } + disable |= 1 << radeon_connector->hpd.hpd; } + radeon_irq_kms_disable_hpd(rdev, disable); } /* @@ -1306,6 +1289,14 @@ int r600_gpu_soft_reset(struct radeon_device *rdev) RREG32(R_008014_GRBM_STATUS2)); dev_info(rdev->dev, " R_000E50_SRBM_STATUS=0x%08X\n", RREG32(R_000E50_SRBM_STATUS)); + dev_info(rdev->dev, " R_008674_CP_STALLED_STAT1 = 0x%08X\n", + RREG32(CP_STALLED_STAT1)); + dev_info(rdev->dev, " R_008678_CP_STALLED_STAT2 = 0x%08X\n", + RREG32(CP_STALLED_STAT2)); + dev_info(rdev->dev, " R_00867C_CP_BUSY_STAT = 0x%08X\n", + RREG32(CP_BUSY_STAT)); + dev_info(rdev->dev, " R_008680_CP_STAT = 0x%08X\n", + RREG32(CP_STAT)); rv515_mc_stop(rdev, &save); if (r600_mc_wait_for_idle(rdev)) { dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); @@ -1349,6 +1340,14 @@ int r600_gpu_soft_reset(struct radeon_device *rdev) RREG32(R_008014_GRBM_STATUS2)); dev_info(rdev->dev, " R_000E50_SRBM_STATUS=0x%08X\n", RREG32(R_000E50_SRBM_STATUS)); + dev_info(rdev->dev, " R_008674_CP_STALLED_STAT1 = 0x%08X\n", + RREG32(CP_STALLED_STAT1)); + dev_info(rdev->dev, " R_008678_CP_STALLED_STAT2 = 0x%08X\n", + RREG32(CP_STALLED_STAT2)); + dev_info(rdev->dev, " R_00867C_CP_BUSY_STAT = 0x%08X\n", + RREG32(CP_BUSY_STAT)); + dev_info(rdev->dev, " R_008680_CP_STAT = 0x%08X\n", + RREG32(CP_STAT)); rv515_mc_resume(rdev, &save); return 0; } @@ -2172,18 +2171,29 @@ int r600_cp_resume(struct radeon_device *rdev) void r600_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ring_size) { u32 rb_bufsz; + int r; /* Align ring size */ rb_bufsz = drm_order(ring_size / 8); ring_size = (1 << (rb_bufsz + 1)) * 4; ring->ring_size = ring_size; ring->align_mask = 16 - 1; + + if (radeon_ring_supports_scratch_reg(rdev, ring)) { + r = radeon_scratch_get(rdev, &ring->rptr_save_reg); + if (r) { + DRM_ERROR("failed to get scratch reg for rptr save (%d).\n", r); + ring->rptr_save_reg = 0; + } + } } void r600_cp_fini(struct radeon_device *rdev) { + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; r600_cp_stop(rdev); - radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); + radeon_ring_fini(rdev, ring); + radeon_scratch_free(rdev, ring->rptr_save_reg); } @@ -2206,7 +2216,7 @@ int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) { uint32_t scratch; uint32_t tmp = 0; - unsigned i, ridx = radeon_ring_index(rdev, ring); + unsigned i; int r; r = radeon_scratch_get(rdev, &scratch); @@ -2217,7 +2227,7 @@ int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) WREG32(scratch, 0xCAFEDEAD); r = radeon_ring_lock(rdev, ring, 3); if (r) { - DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ridx, r); + DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r); radeon_scratch_free(rdev, scratch); return r; } @@ -2232,10 +2242,10 @@ int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) DRM_UDELAY(1); } if (i < rdev->usec_timeout) { - DRM_INFO("ring test on %d succeeded in %d usecs\n", ridx, i); + DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); } else { DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n", - ridx, scratch, tmp); + ring->idx, scratch, tmp); r = -EINVAL; } radeon_scratch_free(rdev, scratch); @@ -2309,34 +2319,21 @@ int r600_copy_blit(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, - struct radeon_fence *fence) + struct radeon_fence **fence) { + struct radeon_semaphore *sem = NULL; struct radeon_sa_bo *vb = NULL; int r; - r = r600_blit_prepare_copy(rdev, num_gpu_pages, &vb); + r = r600_blit_prepare_copy(rdev, num_gpu_pages, fence, &vb, &sem); if (r) { return r; } r600_kms_blit_copy(rdev, src_offset, dst_offset, num_gpu_pages, vb); - r600_blit_done_copy(rdev, fence, vb); + r600_blit_done_copy(rdev, fence, vb, sem); return 0; } -void r600_blit_suspend(struct radeon_device *rdev) -{ - int r; - - /* unpin shaders bo */ - if (rdev->r600_blit.shader_obj) { - r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); - if (!r) { - radeon_bo_unpin(rdev->r600_blit.shader_obj); - radeon_bo_unreserve(rdev->r600_blit.shader_obj); - } - } -} - int r600_set_surface_reg(struct radeon_device *rdev, int reg, uint32_t tiling_flags, uint32_t pitch, uint32_t offset, uint32_t obj_size) @@ -2419,13 +2416,11 @@ int r600_startup(struct radeon_device *rdev) if (r) return r; - r = radeon_ib_pool_start(rdev); - if (r) - return r; - - r = radeon_ib_ring_tests(rdev); - if (r) + r = radeon_ib_pool_init(rdev); + if (r) { + dev_err(rdev->dev, "IB initialization failed (%d).\n", r); return r; + } r = r600_audio_init(rdev); if (r) { @@ -2475,9 +2470,6 @@ int r600_resume(struct radeon_device *rdev) int r600_suspend(struct radeon_device *rdev) { r600_audio_fini(rdev); - radeon_ib_pool_suspend(rdev); - r600_blit_suspend(rdev); - /* FIXME: we should wait for ring to be empty */ r600_cp_stop(rdev); rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; r600_irq_suspend(rdev); @@ -2559,20 +2551,14 @@ int r600_init(struct radeon_device *rdev) if (r) return r; - r = radeon_ib_pool_init(rdev); rdev->accel_working = true; - if (r) { - dev_err(rdev->dev, "IB initialization failed (%d).\n", r); - rdev->accel_working = false; - } - r = r600_startup(rdev); if (r) { dev_err(rdev->dev, "disabling GPU acceleration\n"); r600_cp_fini(rdev); r600_irq_fini(rdev); radeon_wb_fini(rdev); - r100_ib_fini(rdev); + radeon_ib_pool_fini(rdev); radeon_irq_kms_fini(rdev); r600_pcie_gart_fini(rdev); rdev->accel_working = false; @@ -2588,7 +2574,7 @@ void r600_fini(struct radeon_device *rdev) r600_cp_fini(rdev); r600_irq_fini(rdev); radeon_wb_fini(rdev); - r100_ib_fini(rdev); + radeon_ib_pool_fini(rdev); radeon_irq_kms_fini(rdev); r600_pcie_gart_fini(rdev); r600_vram_scratch_fini(rdev); @@ -2607,9 +2593,24 @@ void r600_fini(struct radeon_device *rdev) */ void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { - struct radeon_ring *ring = &rdev->ring[ib->fence->ring]; + struct radeon_ring *ring = &rdev->ring[ib->ring]; + u32 next_rptr; + + if (ring->rptr_save_reg) { + next_rptr = ring->wptr + 3 + 4; + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(ring, ((ring->rptr_save_reg - + PACKET3_SET_CONFIG_REG_OFFSET) >> 2)); + radeon_ring_write(ring, next_rptr); + } else if (rdev->wb.enabled) { + next_rptr = ring->wptr + 5 + 4; + radeon_ring_write(ring, PACKET3(PACKET3_MEM_WRITE, 3)); + radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); + radeon_ring_write(ring, (upper_32_bits(ring->next_rptr_gpu_addr) & 0xff) | (1 << 18)); + radeon_ring_write(ring, next_rptr); + radeon_ring_write(ring, 0); + } - /* FIXME: implement */ radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); radeon_ring_write(ring, #ifdef __BIG_ENDIAN @@ -2627,7 +2628,6 @@ int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) uint32_t tmp = 0; unsigned i; int r; - int ring_index = radeon_ring_index(rdev, ring); r = radeon_scratch_get(rdev, &scratch); if (r) { @@ -2635,7 +2635,7 @@ int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) return r; } WREG32(scratch, 0xCAFEDEAD); - r = radeon_ib_get(rdev, ring_index, &ib, 256); + r = radeon_ib_get(rdev, ring->idx, &ib, 256); if (r) { DRM_ERROR("radeon: failed to get ib (%d).\n", r); return r; @@ -2644,7 +2644,7 @@ int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) ib.ptr[1] = ((scratch - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); ib.ptr[2] = 0xDEADBEEF; ib.length_dw = 3; - r = radeon_ib_schedule(rdev, &ib); + r = radeon_ib_schedule(rdev, &ib, NULL); if (r) { radeon_scratch_free(rdev, scratch); radeon_ib_free(rdev, &ib); @@ -2857,7 +2857,6 @@ void r600_disable_interrupts(struct radeon_device *rdev) WREG32(IH_RB_RPTR, 0); WREG32(IH_RB_WPTR, 0); rdev->ih.enabled = false; - rdev->ih.wptr = 0; rdev->ih.rptr = 0; } @@ -3042,18 +3041,18 @@ int r600_irq_set(struct radeon_device *rdev) hdmi1 = RREG32(HDMI1_AUDIO_PACKET_CONTROL) & ~HDMI0_AZ_FORMAT_WTRIG_MASK; } - if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) { + if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { DRM_DEBUG("r600_irq_set: sw int\n"); cp_int_cntl |= RB_INT_ENABLE; cp_int_cntl |= TIME_STAMP_INT_ENABLE; } if (rdev->irq.crtc_vblank_int[0] || - rdev->irq.pflip[0]) { + atomic_read(&rdev->irq.pflip[0])) { DRM_DEBUG("r600_irq_set: vblank 0\n"); mode_int |= D1MODE_VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[1] || - rdev->irq.pflip[1]) { + atomic_read(&rdev->irq.pflip[1])) { DRM_DEBUG("r600_irq_set: vblank 1\n"); mode_int |= D2MODE_VBLANK_INT_MASK; } @@ -3309,7 +3308,6 @@ int r600_irq_process(struct radeon_device *rdev) u32 rptr; u32 src_id, src_data; u32 ring_index; - unsigned long flags; bool queue_hotplug = false; bool queue_hdmi = false; @@ -3321,24 +3319,21 @@ int r600_irq_process(struct radeon_device *rdev) RREG32(IH_RB_WPTR); wptr = r600_get_ih_wptr(rdev); - rptr = rdev->ih.rptr; - DRM_DEBUG("r600_irq_process start: rptr %d, wptr %d\n", rptr, wptr); - spin_lock_irqsave(&rdev->ih.lock, flags); - - if (rptr == wptr) { - spin_unlock_irqrestore(&rdev->ih.lock, flags); +restart_ih: + /* is somebody else already processing irqs? */ + if (atomic_xchg(&rdev->ih.lock, 1)) return IRQ_NONE; - } -restart_ih: + rptr = rdev->ih.rptr; + DRM_DEBUG("r600_irq_process start: rptr %d, wptr %d\n", rptr, wptr); + /* Order reading of wptr vs. reading of IH ring data */ rmb(); /* display interrupts */ r600_irq_ack(rdev); - rdev->ih.wptr = wptr; while (rptr != wptr) { /* wptr/rptr are in bytes! */ ring_index = rptr / 4; @@ -3355,7 +3350,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[0]) + if (atomic_read(&rdev->irq.pflip[0])) radeon_crtc_handle_flip(rdev, 0); rdev->irq.stat_regs.r600.disp_int &= ~LB_D1_VBLANK_INTERRUPT; DRM_DEBUG("IH: D1 vblank\n"); @@ -3381,7 +3376,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[1]) + if (atomic_read(&rdev->irq.pflip[1])) radeon_crtc_handle_flip(rdev, 1); rdev->irq.stat_regs.r600.disp_int &= ~LB_D2_VBLANK_INTERRUPT; DRM_DEBUG("IH: D2 vblank\n"); @@ -3480,7 +3475,6 @@ restart_ih: break; case 233: /* GUI IDLE */ DRM_DEBUG("IH: GUI idle\n"); - rdev->pm.gui_idle = true; wake_up(&rdev->irq.idle_queue); break; default: @@ -3492,17 +3486,19 @@ restart_ih: rptr += 16; rptr &= rdev->ih.ptr_mask; } - /* make sure wptr hasn't changed while processing */ - wptr = r600_get_ih_wptr(rdev); - if (wptr != rdev->ih.wptr) - goto restart_ih; if (queue_hotplug) schedule_work(&rdev->hotplug_work); if (queue_hdmi) schedule_work(&rdev->audio_work); rdev->ih.rptr = rptr; WREG32(IH_RB_RPTR, rdev->ih.rptr); - spin_unlock_irqrestore(&rdev->ih.lock, flags); + atomic_set(&rdev->ih.lock, 0); + + /* make sure wptr hasn't changed while processing */ + wptr = r600_get_ih_wptr(rdev); + if (wptr != rptr) + goto restart_ih; + return IRQ_HANDLED; } @@ -3685,6 +3681,8 @@ static void r600_pcie_gen2_enable(struct radeon_device *rdev) { u32 link_width_cntl, lanes, speed_cntl, training_cntl, tmp; u16 link_cntl2; + u32 mask; + int ret; if (radeon_pcie_gen2 == 0) return; @@ -3703,6 +3701,15 @@ static void r600_pcie_gen2_enable(struct radeon_device *rdev) if (rdev->family <= CHIP_R600) return; + ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask); + if (ret != 0) + return; + + if (!(mask & DRM_PCIE_SPEED_50)) + return; + + DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n"); + /* 55 nm r6xx asics */ if ((rdev->family == CHIP_RV670) || (rdev->family == CHIP_RV620) || @@ -3782,3 +3789,23 @@ static void r600_pcie_gen2_enable(struct radeon_device *rdev) WREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl); } } + +/** + * r600_get_gpu_clock - return GPU clock counter snapshot + * + * @rdev: radeon_device pointer + * + * Fetches a GPU clock counter snapshot (R6xx-cayman). + * Returns the 64 bit clock counter snapshot. + */ +uint64_t r600_get_gpu_clock(struct radeon_device *rdev) +{ + uint64_t clock; + + mutex_lock(&rdev->gpu_clock_mutex); + WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1); + clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) | + ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL); + mutex_unlock(&rdev->gpu_clock_mutex); + return clock; +} diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c index 03b6e0d3d503..2bef8549ddfe 100644 --- a/drivers/gpu/drm/radeon/r600_blit_kms.c +++ b/drivers/gpu/drm/radeon/r600_blit_kms.c @@ -512,7 +512,8 @@ int r600_blit_init(struct radeon_device *rdev) rdev->r600_blit.primitives.draw_auto = draw_auto; rdev->r600_blit.primitives.set_default_state = set_default_state; - rdev->r600_blit.ring_size_common = 40; /* shaders + def state */ + rdev->r600_blit.ring_size_common = 8; /* sync semaphore */ + rdev->r600_blit.ring_size_common += 40; /* shaders + def state */ rdev->r600_blit.ring_size_common += 5; /* done copy */ rdev->r600_blit.ring_size_common += 16; /* fence emit for done copy */ @@ -523,10 +524,6 @@ int r600_blit_init(struct radeon_device *rdev) rdev->r600_blit.max_dim = 8192; - /* pin copy shader into vram if already initialized */ - if (rdev->r600_blit.shader_obj) - goto done; - rdev->r600_blit.state_offset = 0; if (rdev->family >= CHIP_RV770) @@ -551,11 +548,26 @@ int r600_blit_init(struct radeon_device *rdev) obj_size += r6xx_ps_size * 4; obj_size = ALIGN(obj_size, 256); - r = radeon_bo_create(rdev, obj_size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM, - NULL, &rdev->r600_blit.shader_obj); - if (r) { - DRM_ERROR("r600 failed to allocate shader\n"); - return r; + /* pin copy shader into vram if not already initialized */ + if (rdev->r600_blit.shader_obj == NULL) { + r = radeon_bo_create(rdev, obj_size, PAGE_SIZE, true, + RADEON_GEM_DOMAIN_VRAM, + NULL, &rdev->r600_blit.shader_obj); + if (r) { + DRM_ERROR("r600 failed to allocate shader\n"); + return r; + } + + r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); + if (unlikely(r != 0)) + return r; + r = radeon_bo_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM, + &rdev->r600_blit.shader_gpu_addr); + radeon_bo_unreserve(rdev->r600_blit.shader_obj); + if (r) { + dev_err(rdev->dev, "(%d) pin blit object failed\n", r); + return r; + } } DRM_DEBUG("r6xx blit allocated bo %08x vs %08x ps %08x\n", @@ -586,17 +598,6 @@ int r600_blit_init(struct radeon_device *rdev) radeon_bo_kunmap(rdev->r600_blit.shader_obj); radeon_bo_unreserve(rdev->r600_blit.shader_obj); -done: - r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); - if (unlikely(r != 0)) - return r; - r = radeon_bo_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM, - &rdev->r600_blit.shader_gpu_addr); - radeon_bo_unreserve(rdev->r600_blit.shader_obj); - if (r) { - dev_err(rdev->dev, "(%d) pin blit object failed\n", r); - return r; - } radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); return 0; } @@ -666,7 +667,8 @@ static unsigned r600_blit_create_rect(unsigned num_gpu_pages, int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages, - struct radeon_sa_bo **vb) + struct radeon_fence **fence, struct radeon_sa_bo **vb, + struct radeon_semaphore **sem) { struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; int r; @@ -689,34 +691,50 @@ int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages, return r; } + r = radeon_semaphore_create(rdev, sem); + if (r) { + radeon_sa_bo_free(rdev, vb, NULL); + return r; + } + /* calculate number of loops correctly */ ring_size = num_loops * dwords_per_loop; ring_size += rdev->r600_blit.ring_size_common; r = radeon_ring_lock(rdev, ring, ring_size); if (r) { radeon_sa_bo_free(rdev, vb, NULL); + radeon_semaphore_free(rdev, sem, NULL); return r; } + if (radeon_fence_need_sync(*fence, RADEON_RING_TYPE_GFX_INDEX)) { + radeon_semaphore_sync_rings(rdev, *sem, (*fence)->ring, + RADEON_RING_TYPE_GFX_INDEX); + radeon_fence_note_sync(*fence, RADEON_RING_TYPE_GFX_INDEX); + } else { + radeon_semaphore_free(rdev, sem, NULL); + } + rdev->r600_blit.primitives.set_default_state(rdev); rdev->r600_blit.primitives.set_shaders(rdev); return 0; } -void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence, - struct radeon_sa_bo *vb) +void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence **fence, + struct radeon_sa_bo *vb, struct radeon_semaphore *sem) { struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; int r; - r = radeon_fence_emit(rdev, fence); + r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX); if (r) { radeon_ring_unlock_undo(rdev, ring); return; } radeon_ring_unlock_commit(rdev, ring); - radeon_sa_bo_free(rdev, &vb, fence); + radeon_sa_bo_free(rdev, &vb, *fence); + radeon_semaphore_free(rdev, &sem, *fence); } void r600_kms_blit_copy(struct radeon_device *rdev, diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c index ca87f7afaf23..ab74e6b149e7 100644 --- a/drivers/gpu/drm/radeon/r600_cs.c +++ b/drivers/gpu/drm/radeon/r600_cs.c @@ -47,13 +47,17 @@ struct r600_cs_track { u32 npipes; /* value we track */ u32 sq_config; + u32 log_nsamples; u32 nsamples; u32 cb_color_base_last[8]; struct radeon_bo *cb_color_bo[8]; u64 cb_color_bo_mc[8]; - u32 cb_color_bo_offset[8]; - struct radeon_bo *cb_color_frag_bo[8]; /* unused */ - struct radeon_bo *cb_color_tile_bo[8]; /* unused */ + u64 cb_color_bo_offset[8]; + struct radeon_bo *cb_color_frag_bo[8]; + u64 cb_color_frag_offset[8]; + struct radeon_bo *cb_color_tile_bo[8]; + u64 cb_color_tile_offset[8]; + u32 cb_color_mask[8]; u32 cb_color_info[8]; u32 cb_color_view[8]; u32 cb_color_size_idx[8]; /* unused */ @@ -349,10 +353,6 @@ static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) unsigned array_mode; u32 format; - if (G_0280A0_TILE_MODE(track->cb_color_info[i])) { - dev_warn(p->dev, "FMASK or CMASK buffer are not supported by this kernel\n"); - return -EINVAL; - } size = radeon_bo_size(track->cb_color_bo[i]) - track->cb_color_bo_offset[i]; format = G_0280A0_FORMAT(track->cb_color_info[i]); if (!r600_fmt_is_valid_color(format)) { @@ -420,7 +420,8 @@ static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) } /* check offset */ - tmp = r600_fmt_get_nblocksy(format, height) * r600_fmt_get_nblocksx(format, pitch) * r600_fmt_get_blocksize(format); + tmp = r600_fmt_get_nblocksy(format, height) * r600_fmt_get_nblocksx(format, pitch) * + r600_fmt_get_blocksize(format) * track->nsamples; switch (array_mode) { default: case V_0280A0_ARRAY_LINEAR_GENERAL: @@ -441,7 +442,7 @@ static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) * broken userspace. */ } else { - dev_warn(p->dev, "%s offset[%d] %d %d %d %lu too big (%d %d) (%d %d %d)\n", + dev_warn(p->dev, "%s offset[%d] %d %llu %d %lu too big (%d %d) (%d %d %d)\n", __func__, i, array_mode, track->cb_color_bo_offset[i], tmp, radeon_bo_size(track->cb_color_bo[i]), @@ -458,6 +459,51 @@ static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) tmp = S_028060_PITCH_TILE_MAX((pitch / 8) - 1) | S_028060_SLICE_TILE_MAX(slice_tile_max - 1); ib[track->cb_color_size_idx[i]] = tmp; + + /* FMASK/CMASK */ + switch (G_0280A0_TILE_MODE(track->cb_color_info[i])) { + case V_0280A0_TILE_DISABLE: + break; + case V_0280A0_FRAG_ENABLE: + if (track->nsamples > 1) { + uint32_t tile_max = G_028100_FMASK_TILE_MAX(track->cb_color_mask[i]); + /* the tile size is 8x8, but the size is in units of bits. + * for bytes, do just * 8. */ + uint32_t bytes = track->nsamples * track->log_nsamples * 8 * (tile_max + 1); + + if (bytes + track->cb_color_frag_offset[i] > + radeon_bo_size(track->cb_color_frag_bo[i])) { + dev_warn(p->dev, "%s FMASK_TILE_MAX too large " + "(tile_max=%u, bytes=%u, offset=%llu, bo_size=%lu)\n", + __func__, tile_max, bytes, + track->cb_color_frag_offset[i], + radeon_bo_size(track->cb_color_frag_bo[i])); + return -EINVAL; + } + } + /* fall through */ + case V_0280A0_CLEAR_ENABLE: + { + uint32_t block_max = G_028100_CMASK_BLOCK_MAX(track->cb_color_mask[i]); + /* One block = 128x128 pixels, one 8x8 tile has 4 bits.. + * (128*128) / (8*8) / 2 = 128 bytes per block. */ + uint32_t bytes = (block_max + 1) * 128; + + if (bytes + track->cb_color_tile_offset[i] > + radeon_bo_size(track->cb_color_tile_bo[i])) { + dev_warn(p->dev, "%s CMASK_BLOCK_MAX too large " + "(block_max=%u, bytes=%u, offset=%llu, bo_size=%lu)\n", + __func__, block_max, bytes, + track->cb_color_tile_offset[i], + radeon_bo_size(track->cb_color_tile_bo[i])); + return -EINVAL; + } + break; + } + default: + dev_warn(p->dev, "%s invalid tile mode\n", __func__); + return -EINVAL; + } return 0; } @@ -566,7 +612,7 @@ static int r600_cs_track_validate_db(struct radeon_cs_parser *p) ntiles = G_028000_SLICE_TILE_MAX(track->db_depth_size) + 1; nviews = G_028004_SLICE_MAX(track->db_depth_view) + 1; - tmp = ntiles * bpe * 64 * nviews; + tmp = ntiles * bpe * 64 * nviews * track->nsamples; if ((tmp + track->db_offset) > radeon_bo_size(track->db_bo)) { dev_warn(p->dev, "z/stencil buffer (%d) too small (0x%08X %d %d %d -> %u have %lu)\n", array_mode, @@ -764,8 +810,10 @@ static int r600_cs_track_check(struct radeon_cs_parser *p) } /* Check depth buffer */ - if (track->db_dirty && (G_028800_STENCIL_ENABLE(track->db_depth_control) || - G_028800_Z_ENABLE(track->db_depth_control))) { + if (track->db_dirty && + G_028010_FORMAT(track->db_depth_info) != V_028010_DEPTH_INVALID && + (G_028800_STENCIL_ENABLE(track->db_depth_control) || + G_028800_Z_ENABLE(track->db_depth_control))) { r = r600_cs_track_validate_db(p); if (r) return r; @@ -1229,6 +1277,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) break; case R_028C04_PA_SC_AA_CONFIG: tmp = G_028C04_MSAA_NUM_SAMPLES(radeon_get_ib_value(p, idx)); + track->log_nsamples = tmp; track->nsamples = 1 << tmp; track->cb_dirty = true; break; @@ -1310,16 +1359,21 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) dev_err(p->dev, "Broken old userspace ? no cb_color0_base supplied before trying to write 0x%08X\n", reg); return -EINVAL; } - ib[idx] = track->cb_color_base_last[tmp]; track->cb_color_frag_bo[tmp] = track->cb_color_bo[tmp]; + track->cb_color_frag_offset[tmp] = track->cb_color_bo_offset[tmp]; + ib[idx] = track->cb_color_base_last[tmp]; } else { r = r600_cs_packet_next_reloc(p, &reloc); if (r) { dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg); return -EINVAL; } - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); track->cb_color_frag_bo[tmp] = reloc->robj; + track->cb_color_frag_offset[tmp] = (u64)ib[idx] << 8; + ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + } + if (G_0280A0_TILE_MODE(track->cb_color_info[tmp])) { + track->cb_dirty = true; } break; case R_0280C0_CB_COLOR0_TILE: @@ -1336,16 +1390,35 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) dev_err(p->dev, "Broken old userspace ? no cb_color0_base supplied before trying to write 0x%08X\n", reg); return -EINVAL; } - ib[idx] = track->cb_color_base_last[tmp]; track->cb_color_tile_bo[tmp] = track->cb_color_bo[tmp]; + track->cb_color_tile_offset[tmp] = track->cb_color_bo_offset[tmp]; + ib[idx] = track->cb_color_base_last[tmp]; } else { r = r600_cs_packet_next_reloc(p, &reloc); if (r) { dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg); return -EINVAL; } - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); track->cb_color_tile_bo[tmp] = reloc->robj; + track->cb_color_tile_offset[tmp] = (u64)ib[idx] << 8; + ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + } + if (G_0280A0_TILE_MODE(track->cb_color_info[tmp])) { + track->cb_dirty = true; + } + break; + case R_028100_CB_COLOR0_MASK: + case R_028104_CB_COLOR1_MASK: + case R_028108_CB_COLOR2_MASK: + case R_02810C_CB_COLOR3_MASK: + case R_028110_CB_COLOR4_MASK: + case R_028114_CB_COLOR5_MASK: + case R_028118_CB_COLOR6_MASK: + case R_02811C_CB_COLOR7_MASK: + tmp = (reg - R_028100_CB_COLOR0_MASK) / 4; + track->cb_color_mask[tmp] = ib[idx]; + if (G_0280A0_TILE_MODE(track->cb_color_info[tmp])) { + track->cb_dirty = true; } break; case CB_COLOR0_BASE: @@ -1490,7 +1563,7 @@ unsigned r600_mip_minify(unsigned size, unsigned level) } static void r600_texture_size(unsigned nfaces, unsigned blevel, unsigned llevel, - unsigned w0, unsigned h0, unsigned d0, unsigned format, + unsigned w0, unsigned h0, unsigned d0, unsigned nsamples, unsigned format, unsigned block_align, unsigned height_align, unsigned base_align, unsigned *l0_size, unsigned *mipmap_size) { @@ -1518,7 +1591,7 @@ static void r600_texture_size(unsigned nfaces, unsigned blevel, unsigned llevel, depth = r600_mip_minify(d0, i); - size = nbx * nby * blocksize; + size = nbx * nby * blocksize * nsamples; if (nfaces) size *= nfaces; else @@ -1557,13 +1630,14 @@ static int r600_check_texture_resource(struct radeon_cs_parser *p, u32 idx, u32 tiling_flags) { struct r600_cs_track *track = p->track; - u32 nfaces, llevel, blevel, w0, h0, d0; - u32 word0, word1, l0_size, mipmap_size, word2, word3; + u32 dim, nfaces, llevel, blevel, w0, h0, d0; + u32 word0, word1, l0_size, mipmap_size, word2, word3, word4, word5; u32 height_align, pitch, pitch_align, depth_align; - u32 array, barray, larray; + u32 barray, larray; u64 base_align; struct array_mode_checker array_check; u32 format; + bool is_array; /* on legacy kernel we don't perform advanced check */ if (p->rdev == NULL) @@ -1581,12 +1655,28 @@ static int r600_check_texture_resource(struct radeon_cs_parser *p, u32 idx, word0 |= S_038000_TILE_MODE(V_038000_ARRAY_1D_TILED_THIN1); } word1 = radeon_get_ib_value(p, idx + 1); + word2 = radeon_get_ib_value(p, idx + 2) << 8; + word3 = radeon_get_ib_value(p, idx + 3) << 8; + word4 = radeon_get_ib_value(p, idx + 4); + word5 = radeon_get_ib_value(p, idx + 5); + dim = G_038000_DIM(word0); w0 = G_038000_TEX_WIDTH(word0) + 1; + pitch = (G_038000_PITCH(word0) + 1) * 8; h0 = G_038004_TEX_HEIGHT(word1) + 1; d0 = G_038004_TEX_DEPTH(word1); + format = G_038004_DATA_FORMAT(word1); + blevel = G_038010_BASE_LEVEL(word4); + llevel = G_038014_LAST_LEVEL(word5); + /* pitch in texels */ + array_check.array_mode = G_038000_TILE_MODE(word0); + array_check.group_size = track->group_size; + array_check.nbanks = track->nbanks; + array_check.npipes = track->npipes; + array_check.nsamples = 1; + array_check.blocksize = r600_fmt_get_blocksize(format); nfaces = 1; - array = 0; - switch (G_038000_DIM(word0)) { + is_array = false; + switch (dim) { case V_038000_SQ_TEX_DIM_1D: case V_038000_SQ_TEX_DIM_2D: case V_038000_SQ_TEX_DIM_3D: @@ -1599,29 +1689,25 @@ static int r600_check_texture_resource(struct radeon_cs_parser *p, u32 idx, break; case V_038000_SQ_TEX_DIM_1D_ARRAY: case V_038000_SQ_TEX_DIM_2D_ARRAY: - array = 1; + is_array = true; break; - case V_038000_SQ_TEX_DIM_2D_MSAA: case V_038000_SQ_TEX_DIM_2D_ARRAY_MSAA: + is_array = true; + /* fall through */ + case V_038000_SQ_TEX_DIM_2D_MSAA: + array_check.nsamples = 1 << llevel; + llevel = 0; + break; default: dev_warn(p->dev, "this kernel doesn't support %d texture dim\n", G_038000_DIM(word0)); return -EINVAL; } - format = G_038004_DATA_FORMAT(word1); if (!r600_fmt_is_valid_texture(format, p->family)) { dev_warn(p->dev, "%s:%d texture invalid format %d\n", __func__, __LINE__, format); return -EINVAL; } - /* pitch in texels */ - pitch = (G_038000_PITCH(word0) + 1) * 8; - array_check.array_mode = G_038000_TILE_MODE(word0); - array_check.group_size = track->group_size; - array_check.nbanks = track->nbanks; - array_check.npipes = track->npipes; - array_check.nsamples = 1; - array_check.blocksize = r600_fmt_get_blocksize(format); if (r600_get_array_mode_alignment(&array_check, &pitch_align, &height_align, &depth_align, &base_align)) { dev_warn(p->dev, "%s:%d tex array mode (%d) invalid\n", @@ -1647,24 +1733,17 @@ static int r600_check_texture_resource(struct radeon_cs_parser *p, u32 idx, return -EINVAL; } - word2 = radeon_get_ib_value(p, idx + 2) << 8; - word3 = radeon_get_ib_value(p, idx + 3) << 8; - - word0 = radeon_get_ib_value(p, idx + 4); - word1 = radeon_get_ib_value(p, idx + 5); - blevel = G_038010_BASE_LEVEL(word0); - llevel = G_038014_LAST_LEVEL(word1); if (blevel > llevel) { dev_warn(p->dev, "texture blevel %d > llevel %d\n", blevel, llevel); } - if (array == 1) { - barray = G_038014_BASE_ARRAY(word1); - larray = G_038014_LAST_ARRAY(word1); + if (is_array) { + barray = G_038014_BASE_ARRAY(word5); + larray = G_038014_LAST_ARRAY(word5); nfaces = larray - barray + 1; } - r600_texture_size(nfaces, blevel, llevel, w0, h0, d0, format, + r600_texture_size(nfaces, blevel, llevel, w0, h0, d0, array_check.nsamples, format, pitch_align, height_align, base_align, &l0_size, &mipmap_size); /* using get ib will give us the offset into the texture bo */ @@ -1677,7 +1756,6 @@ static int r600_check_texture_resource(struct radeon_cs_parser *p, u32 idx, return -EINVAL; } /* using get ib will give us the offset into the mipmap bo */ - word3 = radeon_get_ib_value(p, idx + 3) << 8; if ((mipmap_size + word3) > radeon_bo_size(mipmap)) { /*dev_warn(p->dev, "mipmap bo too small (%d %d %d %d %d %d -> %d have %ld)\n", w0, h0, format, blevel, nlevels, word3, mipmap_size, radeon_bo_size(texture));*/ diff --git a/drivers/gpu/drm/radeon/r600_hdmi.c b/drivers/gpu/drm/radeon/r600_hdmi.c index 82a0a4c919c0..e3558c3ef24a 100644 --- a/drivers/gpu/drm/radeon/r600_hdmi.c +++ b/drivers/gpu/drm/radeon/r600_hdmi.c @@ -519,8 +519,7 @@ void r600_hdmi_enable(struct drm_encoder *encoder) if (rdev->irq.installed) { /* if irq is available use it */ - rdev->irq.afmt[dig->afmt->id] = true; - radeon_irq_set(rdev); + radeon_irq_kms_enable_afmt(rdev, dig->afmt->id); } dig->afmt->enabled = true; @@ -556,8 +555,7 @@ void r600_hdmi_disable(struct drm_encoder *encoder) offset, radeon_encoder->encoder_id); /* disable irq */ - rdev->irq.afmt[dig->afmt->id] = false; - radeon_irq_set(rdev); + radeon_irq_kms_disable_afmt(rdev, dig->afmt->id); /* Older chipsets not handled by AtomBIOS */ if (rdev->family >= CHIP_R600 && !ASIC_IS_DCE3(rdev)) { diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h index 025fd5b6c08c..bdb69a63062f 100644 --- a/drivers/gpu/drm/radeon/r600d.h +++ b/drivers/gpu/drm/radeon/r600d.h @@ -92,6 +92,20 @@ #define R_028094_CB_COLOR5_VIEW 0x028094 #define R_028098_CB_COLOR6_VIEW 0x028098 #define R_02809C_CB_COLOR7_VIEW 0x02809C +#define R_028100_CB_COLOR0_MASK 0x028100 +#define S_028100_CMASK_BLOCK_MAX(x) (((x) & 0xFFF) << 0) +#define G_028100_CMASK_BLOCK_MAX(x) (((x) >> 0) & 0xFFF) +#define C_028100_CMASK_BLOCK_MAX 0xFFFFF000 +#define S_028100_FMASK_TILE_MAX(x) (((x) & 0xFFFFF) << 12) +#define G_028100_FMASK_TILE_MAX(x) (((x) >> 12) & 0xFFFFF) +#define C_028100_FMASK_TILE_MAX 0x00000FFF +#define R_028104_CB_COLOR1_MASK 0x028104 +#define R_028108_CB_COLOR2_MASK 0x028108 +#define R_02810C_CB_COLOR3_MASK 0x02810C +#define R_028110_CB_COLOR4_MASK 0x028110 +#define R_028114_CB_COLOR5_MASK 0x028114 +#define R_028118_CB_COLOR6_MASK 0x028118 +#define R_02811C_CB_COLOR7_MASK 0x02811C #define CB_COLOR0_INFO 0x280a0 # define CB_FORMAT(x) ((x) << 2) # define CB_ARRAY_MODE(x) ((x) << 8) @@ -153,6 +167,9 @@ #define CONFIG_MEMSIZE 0x5428 #define CONFIG_CNTL 0x5424 +#define CP_STALLED_STAT1 0x8674 +#define CP_STALLED_STAT2 0x8678 +#define CP_BUSY_STAT 0x867C #define CP_STAT 0x8680 #define CP_COHER_BASE 0x85F8 #define CP_DEBUG 0xC1FC @@ -599,6 +616,9 @@ #define RLC_HB_WPTR 0x3f1c #define RLC_HB_WPTR_LSB_ADDR 0x3f14 #define RLC_HB_WPTR_MSB_ADDR 0x3f18 +#define RLC_GPU_CLOCK_COUNT_LSB 0x3f38 +#define RLC_GPU_CLOCK_COUNT_MSB 0x3f3c +#define RLC_CAPTURE_GPU_CLOCK_COUNT 0x3f40 #define RLC_MC_CNTL 0x3f44 #define RLC_UCODE_CNTL 0x3f48 #define RLC_UCODE_ADDR 0x3f2c @@ -1394,6 +1414,9 @@ #define S_0280A0_TILE_MODE(x) (((x) & 0x3) << 18) #define G_0280A0_TILE_MODE(x) (((x) >> 18) & 0x3) #define C_0280A0_TILE_MODE 0xFFF3FFFF +#define V_0280A0_TILE_DISABLE 0 +#define V_0280A0_CLEAR_ENABLE 1 +#define V_0280A0_FRAG_ENABLE 2 #define S_0280A0_BLEND_CLAMP(x) (((x) & 0x1) << 20) #define G_0280A0_BLEND_CLAMP(x) (((x) >> 20) & 0x1) #define C_0280A0_BLEND_CLAMP 0xFFEFFFFF diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index fefcca55c1eb..59a15315ae9f 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -113,7 +113,6 @@ extern int radeon_lockup_timeout; /* fence seq are set to this number when signaled */ #define RADEON_FENCE_SIGNALED_SEQ 0LL -#define RADEON_FENCE_NOTEMITED_SEQ (~0LL) /* internal ring indices */ /* r1xx+ has gfx CP ring */ @@ -143,65 +142,8 @@ struct radeon_device; /* * BIOS. */ -#define ATRM_BIOS_PAGE 4096 - -#if defined(CONFIG_VGA_SWITCHEROO) -bool radeon_atrm_supported(struct pci_dev *pdev); -int radeon_atrm_get_bios_chunk(uint8_t *bios, int offset, int len); -#else -static inline bool radeon_atrm_supported(struct pci_dev *pdev) -{ - return false; -} - -static inline int radeon_atrm_get_bios_chunk(uint8_t *bios, int offset, int len){ - return -EINVAL; -} -#endif bool radeon_get_bios(struct radeon_device *rdev); - -/* - * Mutex which allows recursive locking from the same process. - */ -struct radeon_mutex { - struct mutex mutex; - struct task_struct *owner; - int level; -}; - -static inline void radeon_mutex_init(struct radeon_mutex *mutex) -{ - mutex_init(&mutex->mutex); - mutex->owner = NULL; - mutex->level = 0; -} - -static inline void radeon_mutex_lock(struct radeon_mutex *mutex) -{ - if (mutex_trylock(&mutex->mutex)) { - /* The mutex was unlocked before, so it's ours now */ - mutex->owner = current; - } else if (mutex->owner != current) { - /* Another process locked the mutex, take it */ - mutex_lock(&mutex->mutex); - mutex->owner = current; - } - /* Otherwise the mutex was already locked by this process */ - - mutex->level++; -} - -static inline void radeon_mutex_unlock(struct radeon_mutex *mutex) -{ - if (--mutex->level > 0) - return; - - mutex->owner = NULL; - mutex_unlock(&mutex->mutex); -} - - /* * Dummy page */ @@ -258,8 +200,8 @@ struct radeon_fence_driver { uint32_t scratch_reg; uint64_t gpu_addr; volatile uint32_t *cpu_addr; - /* seq is protected by ring emission lock */ - uint64_t seq; + /* sync_seq is protected by ring emission lock */ + uint64_t sync_seq[RADEON_NUM_RINGS]; atomic64_t last_seq; unsigned long last_activity; bool initialized; @@ -277,19 +219,39 @@ struct radeon_fence { int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring); int radeon_fence_driver_init(struct radeon_device *rdev); void radeon_fence_driver_fini(struct radeon_device *rdev); -int radeon_fence_create(struct radeon_device *rdev, struct radeon_fence **fence, int ring); -int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence); +int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, int ring); void radeon_fence_process(struct radeon_device *rdev, int ring); bool radeon_fence_signaled(struct radeon_fence *fence); int radeon_fence_wait(struct radeon_fence *fence, bool interruptible); int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring); -int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring); +void radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring); int radeon_fence_wait_any(struct radeon_device *rdev, struct radeon_fence **fences, bool intr); struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence); void radeon_fence_unref(struct radeon_fence **fence); unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring); +bool radeon_fence_need_sync(struct radeon_fence *fence, int ring); +void radeon_fence_note_sync(struct radeon_fence *fence, int ring); +static inline struct radeon_fence *radeon_fence_later(struct radeon_fence *a, + struct radeon_fence *b) +{ + if (!a) { + return b; + } + + if (!b) { + return a; + } + + BUG_ON(a->ring != b->ring); + + if (a->seq > b->seq) { + return a; + } else { + return b; + } +} /* * Tiling registers @@ -323,6 +285,7 @@ struct radeon_bo_va { uint64_t soffset; uint64_t eoffset; uint32_t flags; + struct radeon_fence *fence; bool valid; }; @@ -385,7 +348,7 @@ struct radeon_bo_list { * alignment). */ struct radeon_sa_manager { - spinlock_t lock; + wait_queue_head_t wq; struct radeon_bo *bo; struct list_head *hole; struct list_head flist[RADEON_NUM_RINGS]; @@ -451,10 +414,9 @@ void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring, struct radeon_semaphore *semaphore); int radeon_semaphore_sync_rings(struct radeon_device *rdev, struct radeon_semaphore *semaphore, - bool sync_to[RADEON_NUM_RINGS], - int dst_ring); + int signaler, int waiter); void radeon_semaphore_free(struct radeon_device *rdev, - struct radeon_semaphore *semaphore, + struct radeon_semaphore **semaphore, struct radeon_fence *fence); /* @@ -597,21 +559,18 @@ union radeon_irq_stat_regs { #define RADEON_MAX_AFMT_BLOCKS 6 struct radeon_irq { - bool installed; - bool sw_int[RADEON_NUM_RINGS]; - bool crtc_vblank_int[RADEON_MAX_CRTCS]; - bool pflip[RADEON_MAX_CRTCS]; - wait_queue_head_t vblank_queue; - bool hpd[RADEON_MAX_HPD_PINS]; - bool gui_idle; - bool gui_idle_acked; - wait_queue_head_t idle_queue; - bool afmt[RADEON_MAX_AFMT_BLOCKS]; - spinlock_t sw_lock; - int sw_refcount[RADEON_NUM_RINGS]; - union radeon_irq_stat_regs stat_regs; - spinlock_t pflip_lock[RADEON_MAX_CRTCS]; - int pflip_refcount[RADEON_MAX_CRTCS]; + bool installed; + spinlock_t lock; + atomic_t ring_int[RADEON_NUM_RINGS]; + bool crtc_vblank_int[RADEON_MAX_CRTCS]; + atomic_t pflip[RADEON_MAX_CRTCS]; + wait_queue_head_t vblank_queue; + bool hpd[RADEON_MAX_HPD_PINS]; + bool gui_idle; + bool gui_idle_acked; + wait_queue_head_t idle_queue; + bool afmt[RADEON_MAX_AFMT_BLOCKS]; + union radeon_irq_stat_regs stat_regs; }; int radeon_irq_kms_init(struct radeon_device *rdev); @@ -620,6 +579,11 @@ void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring); void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring); void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc); void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc); +void radeon_irq_kms_enable_afmt(struct radeon_device *rdev, int block); +void radeon_irq_kms_disable_afmt(struct radeon_device *rdev, int block); +void radeon_irq_kms_enable_hpd(struct radeon_device *rdev, unsigned hpd_mask); +void radeon_irq_kms_disable_hpd(struct radeon_device *rdev, unsigned hpd_mask); +int radeon_irq_kms_wait_gui_idle(struct radeon_device *rdev); /* * CP & rings. @@ -630,9 +594,11 @@ struct radeon_ib { uint32_t length_dw; uint64_t gpu_addr; uint32_t *ptr; + int ring; struct radeon_fence *fence; unsigned vm_id; bool is_const_ib; + struct radeon_fence *sync_to[RADEON_NUM_RINGS]; struct radeon_semaphore *semaphore; }; @@ -642,6 +608,9 @@ struct radeon_ring { unsigned rptr; unsigned rptr_offs; unsigned rptr_reg; + unsigned rptr_save_reg; + u64 next_rptr_gpu_addr; + volatile u32 *next_rptr_cpu_addr; unsigned wptr; unsigned wptr_old; unsigned wptr_reg; @@ -657,6 +626,7 @@ struct radeon_ring { u32 ptr_reg_shift; u32 ptr_reg_mask; u32 nop; + u32 idx; }; /* @@ -690,6 +660,7 @@ struct radeon_vm_funcs { }; struct radeon_vm_manager { + struct mutex lock; struct list_head lru_vm; uint32_t use_bitmap; struct radeon_sa_manager sa_manager; @@ -718,13 +689,10 @@ struct r600_ih { struct radeon_bo *ring_obj; volatile uint32_t *ring; unsigned rptr; - unsigned rptr_offs; - unsigned wptr; - unsigned wptr_old; unsigned ring_size; uint64_t gpu_addr; uint32_t ptr_mask; - spinlock_t lock; + atomic_t lock; bool enabled; }; @@ -757,8 +725,6 @@ struct r600_blit { u32 state_len; }; -void r600_blit_suspend(struct radeon_device *rdev); - /* * SI RLC stuff */ @@ -774,14 +740,14 @@ struct si_rlc { int radeon_ib_get(struct radeon_device *rdev, int ring, struct radeon_ib *ib, unsigned size); void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib); -int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib); +int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, + struct radeon_ib *const_ib); int radeon_ib_pool_init(struct radeon_device *rdev); void radeon_ib_pool_fini(struct radeon_device *rdev); -int radeon_ib_pool_start(struct radeon_device *rdev); -int radeon_ib_pool_suspend(struct radeon_device *rdev); int radeon_ib_ring_tests(struct radeon_device *rdev); /* Ring access between begin & end cannot sleep */ -int radeon_ring_index(struct radeon_device *rdev, struct radeon_ring *cp); +bool radeon_ring_supports_scratch_reg(struct radeon_device *rdev, + struct radeon_ring *ring); void radeon_ring_free_size(struct radeon_device *rdev, struct radeon_ring *cp); int radeon_ring_alloc(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ndw); int radeon_ring_lock(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ndw); @@ -793,6 +759,10 @@ int radeon_ring_test(struct radeon_device *rdev, struct radeon_ring *cp); void radeon_ring_force_activity(struct radeon_device *rdev, struct radeon_ring *ring); void radeon_ring_lockup_update(struct radeon_ring *ring); bool radeon_ring_test_lockup(struct radeon_device *rdev, struct radeon_ring *ring); +unsigned radeon_ring_backup(struct radeon_device *rdev, struct radeon_ring *ring, + uint32_t **data); +int radeon_ring_restore(struct radeon_device *rdev, struct radeon_ring *ring, + unsigned size, uint32_t *data); int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ring_size, unsigned rptr_offs, unsigned rptr_reg, unsigned wptr_reg, u32 ptr_reg_shift, u32 ptr_reg_mask, u32 nop); @@ -891,6 +861,7 @@ struct radeon_wb { }; #define RADEON_WB_SCRATCH_OFFSET 0 +#define RADEON_WB_RING0_NEXT_RPTR 256 #define RADEON_WB_CP_RPTR_OFFSET 1024 #define RADEON_WB_CP1_RPTR_OFFSET 1280 #define RADEON_WB_CP2_RPTR_OFFSET 1536 @@ -1039,11 +1010,12 @@ struct radeon_power_state { struct radeon_pm { struct mutex mutex; + /* write locked while reprogramming mclk */ + struct rw_semaphore mclk_lock; u32 active_crtcs; int active_crtc_count; int req_vblank; bool vblank_sync; - bool gui_idle; fixed20_12 max_bandwidth; fixed20_12 igp_sideport_mclk; fixed20_12 igp_system_mclk; @@ -1192,20 +1164,20 @@ struct radeon_asic { uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, - struct radeon_fence *fence); + struct radeon_fence **fence); u32 blit_ring_index; int (*dma)(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, - struct radeon_fence *fence); + struct radeon_fence **fence); u32 dma_ring_index; /* method used for bo copy */ int (*copy)(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, - struct radeon_fence *fence); + struct radeon_fence **fence); /* ring used for bo copies */ u32 copy_ring_index; } copy; @@ -1467,6 +1439,7 @@ struct radeon_device { struct device *dev; struct drm_device *ddev; struct pci_dev *pdev; + struct rw_semaphore exclusive_lock; /* ASIC */ union radeon_asic_config config; enum radeon_family family; @@ -1512,7 +1485,6 @@ struct radeon_device { struct radeon_gem gem; struct radeon_pm pm; uint32_t bios_scratch[RADEON_BIOS_NUM_SCRATCH]; - struct radeon_mutex cs_mutex; struct radeon_wb wb; struct radeon_dummy_page dummy_page; bool shutdown; @@ -1534,7 +1506,6 @@ struct radeon_device { struct work_struct audio_work; int num_crtc; /* number of crtcs */ struct mutex dc_hw_i2c_mutex; /* display controller hw i2c mutex */ - struct mutex vram_mutex; bool audio_enabled; struct r600_audio audio_status; /* audio stuff */ struct notifier_block acpi_nb; @@ -1548,6 +1519,7 @@ struct radeon_device { unsigned debugfs_count; /* virtual memory */ struct radeon_vm_manager vm_manager; + struct mutex gpu_clock_mutex; }; int radeon_device_init(struct radeon_device *rdev, @@ -1748,11 +1720,11 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v); #define radeon_pm_finish(rdev) (rdev)->asic->pm.finish((rdev)) #define radeon_pm_init_profile(rdev) (rdev)->asic->pm.init_profile((rdev)) #define radeon_pm_get_dynpm_state(rdev) (rdev)->asic->pm.get_dynpm_state((rdev)) -#define radeon_pre_page_flip(rdev, crtc) rdev->asic->pflip.pre_page_flip((rdev), (crtc)) -#define radeon_page_flip(rdev, crtc, base) rdev->asic->pflip.page_flip((rdev), (crtc), (base)) -#define radeon_post_page_flip(rdev, crtc) rdev->asic->pflip.post_page_flip((rdev), (crtc)) -#define radeon_wait_for_vblank(rdev, crtc) rdev->asic->display.wait_for_vblank((rdev), (crtc)) -#define radeon_mc_wait_for_idle(rdev) rdev->asic->mc_wait_for_idle((rdev)) +#define radeon_pre_page_flip(rdev, crtc) (rdev)->asic->pflip.pre_page_flip((rdev), (crtc)) +#define radeon_page_flip(rdev, crtc, base) (rdev)->asic->pflip.page_flip((rdev), (crtc), (base)) +#define radeon_post_page_flip(rdev, crtc) (rdev)->asic->pflip.post_page_flip((rdev), (crtc)) +#define radeon_wait_for_vblank(rdev, crtc) (rdev)->asic->display.wait_for_vblank((rdev), (crtc)) +#define radeon_mc_wait_for_idle(rdev) (rdev)->asic->mc_wait_for_idle((rdev)) /* Common functions */ /* AGP */ @@ -1785,8 +1757,6 @@ extern void radeon_ttm_set_active_vram_size(struct radeon_device *rdev, u64 size */ int radeon_vm_manager_init(struct radeon_device *rdev); void radeon_vm_manager_fini(struct radeon_device *rdev); -int radeon_vm_manager_start(struct radeon_device *rdev); -int radeon_vm_manager_suspend(struct radeon_device *rdev); int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm); void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm); int radeon_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm); diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index f533df5f7d50..973417c4b014 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -40,6 +40,16 @@ /* * Registers accessors functions. */ +/** + * radeon_invalid_rreg - dummy reg read function + * + * @rdev: radeon device pointer + * @reg: offset of register + * + * Dummy register read function. Used for register blocks + * that certain asics don't have (all asics). + * Returns the value in the register. + */ static uint32_t radeon_invalid_rreg(struct radeon_device *rdev, uint32_t reg) { DRM_ERROR("Invalid callback to read register 0x%04X\n", reg); @@ -47,6 +57,16 @@ static uint32_t radeon_invalid_rreg(struct radeon_device *rdev, uint32_t reg) return 0; } +/** + * radeon_invalid_wreg - dummy reg write function + * + * @rdev: radeon device pointer + * @reg: offset of register + * @v: value to write to the register + * + * Dummy register read function. Used for register blocks + * that certain asics don't have (all asics). + */ static void radeon_invalid_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) { DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n", @@ -54,6 +74,14 @@ static void radeon_invalid_wreg(struct radeon_device *rdev, uint32_t reg, uint32 BUG_ON(1); } +/** + * radeon_register_accessor_init - sets up the register accessor callbacks + * + * @rdev: radeon device pointer + * + * Sets up the register accessor callbacks for various register + * apertures. Not all asics have all apertures (all asics). + */ static void radeon_register_accessor_init(struct radeon_device *rdev) { rdev->mc_rreg = &radeon_invalid_rreg; @@ -102,6 +130,14 @@ static void radeon_register_accessor_init(struct radeon_device *rdev) /* helper to disable agp */ +/** + * radeon_agp_disable - AGP disable helper function + * + * @rdev: radeon device pointer + * + * Removes AGP flags and changes the gart callbacks on AGP + * cards when using the internal gart rather than AGP (all asics). + */ void radeon_agp_disable(struct radeon_device *rdev) { rdev->flags &= ~RADEON_IS_AGP; @@ -1608,6 +1644,16 @@ static struct radeon_asic si_asic = { }, }; +/** + * radeon_asic_init - register asic specific callbacks + * + * @rdev: radeon device pointer + * + * Registers the appropriate asic specific callbacks for each + * chip family. Also sets other asics specific info like the number + * of crtcs and the register aperture accessors (all asics). + * Returns 0 for success. + */ int radeon_asic_init(struct radeon_device *rdev) { radeon_register_accessor_init(rdev); diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index e76a941ef14e..18c38d14c8cd 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -79,7 +79,7 @@ int r100_copy_blit(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, - struct radeon_fence *fence); + struct radeon_fence **fence); int r100_set_surface_reg(struct radeon_device *rdev, int reg, uint32_t tiling_flags, uint32_t pitch, uint32_t offset, uint32_t obj_size); @@ -103,7 +103,6 @@ int r100_pci_gart_enable(struct radeon_device *rdev); void r100_pci_gart_disable(struct radeon_device *rdev); int r100_debugfs_mc_info_init(struct radeon_device *rdev); int r100_gui_wait_for_idle(struct radeon_device *rdev); -void r100_ib_fini(struct radeon_device *rdev); int r100_ib_test(struct radeon_device *rdev, struct radeon_ring *ring); void r100_irq_disable(struct radeon_device *rdev); void r100_mc_stop(struct radeon_device *rdev, struct r100_mc_save *save); @@ -144,7 +143,7 @@ extern int r200_copy_dma(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, - struct radeon_fence *fence); + struct radeon_fence **fence); void r200_set_safe_registers(struct radeon_device *rdev); /* @@ -256,13 +255,10 @@ extern int rs690_mc_wait_for_idle(struct radeon_device *rdev); * rv515 */ struct rv515_mc_save { - u32 d1vga_control; - u32 d2vga_control; u32 vga_render_control; u32 vga_hdp_control; - u32 d1crtc_control; - u32 d2crtc_control; }; + int rv515_init(struct radeon_device *rdev); void rv515_fini(struct radeon_device *rdev); uint32_t rv515_mc_rreg(struct radeon_device *rdev, uint32_t reg); @@ -318,7 +314,7 @@ void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp); int r600_copy_blit(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, - unsigned num_gpu_pages, struct radeon_fence *fence); + unsigned num_gpu_pages, struct radeon_fence **fence); void r600_hpd_init(struct radeon_device *rdev); void r600_hpd_fini(struct radeon_device *rdev); bool r600_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd); @@ -363,14 +359,16 @@ int r600_hdmi_buffer_status_changed(struct drm_encoder *encoder); void r600_hdmi_update_audio_settings(struct drm_encoder *encoder); /* r600 blit */ int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages, - struct radeon_sa_bo **vb); -void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence, - struct radeon_sa_bo *vb); + struct radeon_fence **fence, struct radeon_sa_bo **vb, + struct radeon_semaphore **sem); +void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence **fence, + struct radeon_sa_bo *vb, struct radeon_semaphore *sem); void r600_kms_blit_copy(struct radeon_device *rdev, u64 src_gpu_addr, u64 dst_gpu_addr, unsigned num_gpu_pages, struct radeon_sa_bo *vb); int r600_mc_wait_for_idle(struct radeon_device *rdev); +uint64_t r600_get_gpu_clock(struct radeon_device *rdev); /* * rv770,rv730,rv710,rv740 @@ -389,11 +387,10 @@ void r700_cp_fini(struct radeon_device *rdev); * evergreen */ struct evergreen_mc_save { - u32 vga_control[6]; u32 vga_render_control; u32 vga_hdp_control; - u32 crtc_control[6]; }; + void evergreen_pcie_gart_tlb_flush(struct radeon_device *rdev); int evergreen_init(struct radeon_device *rdev); void evergreen_fini(struct radeon_device *rdev); @@ -472,5 +469,6 @@ int si_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id); void si_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm); void si_vm_tlb_flush(struct radeon_device *rdev, struct radeon_vm *vm); int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); +uint64_t si_get_gpu_clock(struct radeon_device *rdev); #endif diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index b1e3820df363..d67d4f3eb6f4 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -452,7 +452,7 @@ static bool radeon_atom_apply_quirks(struct drm_device *dev, } /* Fujitsu D3003-S2 board lists DVI-I as DVI-D and VGA */ - if ((dev->pdev->device == 0x9802) && + if (((dev->pdev->device == 0x9802) || (dev->pdev->device == 0x9806)) && (dev->pdev->subsystem_vendor == 0x1734) && (dev->pdev->subsystem_device == 0x11bd)) { if (*connector_type == DRM_MODE_CONNECTOR_VGA) { @@ -1263,6 +1263,8 @@ bool radeon_atom_get_clock_info(struct drm_device *dev) union igp_info { struct _ATOM_INTEGRATED_SYSTEM_INFO info; struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 info_2; + struct _ATOM_INTEGRATED_SYSTEM_INFO_V6 info_6; + struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_7 info_7; }; bool radeon_atombios_sideport_present(struct radeon_device *rdev) @@ -1390,27 +1392,50 @@ static void radeon_atombios_get_igp_ss_overrides(struct radeon_device *rdev, struct radeon_mode_info *mode_info = &rdev->mode_info; int index = GetIndexIntoMasterTable(DATA, IntegratedSystemInfo); u16 data_offset, size; - struct _ATOM_INTEGRATED_SYSTEM_INFO_V6 *igp_info; + union igp_info *igp_info; u8 frev, crev; u16 percentage = 0, rate = 0; /* get any igp specific overrides */ if (atom_parse_data_header(mode_info->atom_context, index, &size, &frev, &crev, &data_offset)) { - igp_info = (struct _ATOM_INTEGRATED_SYSTEM_INFO_V6 *) + igp_info = (union igp_info *) (mode_info->atom_context->bios + data_offset); - switch (id) { - case ASIC_INTERNAL_SS_ON_TMDS: - percentage = le16_to_cpu(igp_info->usDVISSPercentage); - rate = le16_to_cpu(igp_info->usDVISSpreadRateIn10Hz); + switch (crev) { + case 6: + switch (id) { + case ASIC_INTERNAL_SS_ON_TMDS: + percentage = le16_to_cpu(igp_info->info_6.usDVISSPercentage); + rate = le16_to_cpu(igp_info->info_6.usDVISSpreadRateIn10Hz); + break; + case ASIC_INTERNAL_SS_ON_HDMI: + percentage = le16_to_cpu(igp_info->info_6.usHDMISSPercentage); + rate = le16_to_cpu(igp_info->info_6.usHDMISSpreadRateIn10Hz); + break; + case ASIC_INTERNAL_SS_ON_LVDS: + percentage = le16_to_cpu(igp_info->info_6.usLvdsSSPercentage); + rate = le16_to_cpu(igp_info->info_6.usLvdsSSpreadRateIn10Hz); + break; + } break; - case ASIC_INTERNAL_SS_ON_HDMI: - percentage = le16_to_cpu(igp_info->usHDMISSPercentage); - rate = le16_to_cpu(igp_info->usHDMISSpreadRateIn10Hz); + case 7: + switch (id) { + case ASIC_INTERNAL_SS_ON_TMDS: + percentage = le16_to_cpu(igp_info->info_7.usDVISSPercentage); + rate = le16_to_cpu(igp_info->info_7.usDVISSpreadRateIn10Hz); + break; + case ASIC_INTERNAL_SS_ON_HDMI: + percentage = le16_to_cpu(igp_info->info_7.usHDMISSPercentage); + rate = le16_to_cpu(igp_info->info_7.usHDMISSpreadRateIn10Hz); + break; + case ASIC_INTERNAL_SS_ON_LVDS: + percentage = le16_to_cpu(igp_info->info_7.usLvdsSSPercentage); + rate = le16_to_cpu(igp_info->info_7.usLvdsSSpreadRateIn10Hz); + break; + } break; - case ASIC_INTERNAL_SS_ON_LVDS: - percentage = le16_to_cpu(igp_info->usLvdsSSPercentage); - rate = le16_to_cpu(igp_info->usLvdsSSpreadRateIn10Hz); + default: + DRM_ERROR("Unsupported IGP table: %d %d\n", frev, crev); break; } if (percentage) diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c index 98724fcb0088..2a2cf0b88a28 100644 --- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c +++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c @@ -30,57 +30,8 @@ static struct radeon_atpx_priv { /* handle for device - and atpx */ acpi_handle dhandle; acpi_handle atpx_handle; - acpi_handle atrm_handle; } radeon_atpx_priv; -/* retrieve the ROM in 4k blocks */ -static int radeon_atrm_call(acpi_handle atrm_handle, uint8_t *bios, - int offset, int len) -{ - acpi_status status; - union acpi_object atrm_arg_elements[2], *obj; - struct acpi_object_list atrm_arg; - struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL}; - - atrm_arg.count = 2; - atrm_arg.pointer = &atrm_arg_elements[0]; - - atrm_arg_elements[0].type = ACPI_TYPE_INTEGER; - atrm_arg_elements[0].integer.value = offset; - - atrm_arg_elements[1].type = ACPI_TYPE_INTEGER; - atrm_arg_elements[1].integer.value = len; - - status = acpi_evaluate_object(atrm_handle, NULL, &atrm_arg, &buffer); - if (ACPI_FAILURE(status)) { - printk("failed to evaluate ATRM got %s\n", acpi_format_exception(status)); - return -ENODEV; - } - - obj = (union acpi_object *)buffer.pointer; - memcpy(bios+offset, obj->buffer.pointer, obj->buffer.length); - len = obj->buffer.length; - kfree(buffer.pointer); - return len; -} - -bool radeon_atrm_supported(struct pci_dev *pdev) -{ - /* get the discrete ROM only via ATRM */ - if (!radeon_atpx_priv.atpx_detected) - return false; - - if (radeon_atpx_priv.dhandle == DEVICE_ACPI_HANDLE(&pdev->dev)) - return false; - return true; -} - - -int radeon_atrm_get_bios_chunk(uint8_t *bios, int offset, int len) -{ - return radeon_atrm_call(radeon_atpx_priv.atrm_handle, bios, offset, len); -} - static int radeon_atpx_get_version(acpi_handle handle) { acpi_status status; @@ -198,7 +149,7 @@ static int radeon_atpx_power_state(enum vga_switcheroo_client_id id, static bool radeon_atpx_pci_probe_handle(struct pci_dev *pdev) { - acpi_handle dhandle, atpx_handle, atrm_handle; + acpi_handle dhandle, atpx_handle; acpi_status status; dhandle = DEVICE_ACPI_HANDLE(&pdev->dev); @@ -209,13 +160,8 @@ static bool radeon_atpx_pci_probe_handle(struct pci_dev *pdev) if (ACPI_FAILURE(status)) return false; - status = acpi_get_handle(dhandle, "ATRM", &atrm_handle); - if (ACPI_FAILURE(status)) - return false; - radeon_atpx_priv.dhandle = dhandle; radeon_atpx_priv.atpx_handle = atpx_handle; - radeon_atpx_priv.atrm_handle = atrm_handle; return true; } diff --git a/drivers/gpu/drm/radeon/radeon_benchmark.c b/drivers/gpu/drm/radeon/radeon_benchmark.c index 364f5b1a04b9..bedda9caadd9 100644 --- a/drivers/gpu/drm/radeon/radeon_benchmark.c +++ b/drivers/gpu/drm/radeon/radeon_benchmark.c @@ -45,20 +45,14 @@ static int radeon_benchmark_do_move(struct radeon_device *rdev, unsigned size, for (i = 0; i < n; i++) { switch (flag) { case RADEON_BENCHMARK_COPY_DMA: - r = radeon_fence_create(rdev, &fence, radeon_copy_dma_ring_index(rdev)); - if (r) - return r; r = radeon_copy_dma(rdev, saddr, daddr, size / RADEON_GPU_PAGE_SIZE, - fence); + &fence); break; case RADEON_BENCHMARK_COPY_BLIT: - r = radeon_fence_create(rdev, &fence, radeon_copy_blit_ring_index(rdev)); - if (r) - return r; r = radeon_copy_blit(rdev, saddr, daddr, size / RADEON_GPU_PAGE_SIZE, - fence); + &fence); break; default: DRM_ERROR("Unknown copy method\n"); diff --git a/drivers/gpu/drm/radeon/radeon_bios.c b/drivers/gpu/drm/radeon/radeon_bios.c index 501f4881e5aa..d306cc8fdeaa 100644 --- a/drivers/gpu/drm/radeon/radeon_bios.c +++ b/drivers/gpu/drm/radeon/radeon_bios.c @@ -32,6 +32,7 @@ #include <linux/vga_switcheroo.h> #include <linux/slab.h> +#include <linux/acpi.h> /* * BIOS. */ @@ -98,16 +99,81 @@ static bool radeon_read_bios(struct radeon_device *rdev) return true; } +#ifdef CONFIG_ACPI /* ATRM is used to get the BIOS on the discrete cards in * dual-gpu systems. */ +/* retrieve the ROM in 4k blocks */ +#define ATRM_BIOS_PAGE 4096 +/** + * radeon_atrm_call - fetch a chunk of the vbios + * + * @atrm_handle: acpi ATRM handle + * @bios: vbios image pointer + * @offset: offset of vbios image data to fetch + * @len: length of vbios image data to fetch + * + * Executes ATRM to fetch a chunk of the discrete + * vbios image on PX systems (all asics). + * Returns the length of the buffer fetched. + */ +static int radeon_atrm_call(acpi_handle atrm_handle, uint8_t *bios, + int offset, int len) +{ + acpi_status status; + union acpi_object atrm_arg_elements[2], *obj; + struct acpi_object_list atrm_arg; + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL}; + + atrm_arg.count = 2; + atrm_arg.pointer = &atrm_arg_elements[0]; + + atrm_arg_elements[0].type = ACPI_TYPE_INTEGER; + atrm_arg_elements[0].integer.value = offset; + + atrm_arg_elements[1].type = ACPI_TYPE_INTEGER; + atrm_arg_elements[1].integer.value = len; + + status = acpi_evaluate_object(atrm_handle, NULL, &atrm_arg, &buffer); + if (ACPI_FAILURE(status)) { + printk("failed to evaluate ATRM got %s\n", acpi_format_exception(status)); + return -ENODEV; + } + + obj = (union acpi_object *)buffer.pointer; + memcpy(bios+offset, obj->buffer.pointer, obj->buffer.length); + len = obj->buffer.length; + kfree(buffer.pointer); + return len; +} + static bool radeon_atrm_get_bios(struct radeon_device *rdev) { int ret; int size = 256 * 1024; int i; + struct pci_dev *pdev = NULL; + acpi_handle dhandle, atrm_handle; + acpi_status status; + bool found = false; + + /* ATRM is for the discrete card only */ + if (rdev->flags & RADEON_IS_IGP) + return false; + + while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) { + dhandle = DEVICE_ACPI_HANDLE(&pdev->dev); + if (!dhandle) + continue; + + status = acpi_get_handle(dhandle, "ATRM", &atrm_handle); + if (!ACPI_FAILURE(status)) { + found = true; + break; + } + } - if (!radeon_atrm_supported(rdev->pdev)) + if (!found) return false; rdev->bios = kmalloc(size, GFP_KERNEL); @@ -117,9 +183,10 @@ static bool radeon_atrm_get_bios(struct radeon_device *rdev) } for (i = 0; i < size / ATRM_BIOS_PAGE; i++) { - ret = radeon_atrm_get_bios_chunk(rdev->bios, - (i * ATRM_BIOS_PAGE), - ATRM_BIOS_PAGE); + ret = radeon_atrm_call(atrm_handle, + rdev->bios, + (i * ATRM_BIOS_PAGE), + ATRM_BIOS_PAGE); if (ret < ATRM_BIOS_PAGE) break; } @@ -130,6 +197,12 @@ static bool radeon_atrm_get_bios(struct radeon_device *rdev) } return true; } +#else +static inline bool radeon_atrm_get_bios(struct radeon_device *rdev) +{ + return false; +} +#endif static bool ni_read_disabled_bios(struct radeon_device *rdev) { @@ -476,6 +549,61 @@ static bool radeon_read_disabled_bios(struct radeon_device *rdev) return legacy_read_disabled_bios(rdev); } +#ifdef CONFIG_ACPI +static bool radeon_acpi_vfct_bios(struct radeon_device *rdev) +{ + bool ret = false; + struct acpi_table_header *hdr; + acpi_size tbl_size; + UEFI_ACPI_VFCT *vfct; + GOP_VBIOS_CONTENT *vbios; + VFCT_IMAGE_HEADER *vhdr; + + if (!ACPI_SUCCESS(acpi_get_table_with_size("VFCT", 1, &hdr, &tbl_size))) + return false; + if (tbl_size < sizeof(UEFI_ACPI_VFCT)) { + DRM_ERROR("ACPI VFCT table present but broken (too short #1)\n"); + goto out_unmap; + } + + vfct = (UEFI_ACPI_VFCT *)hdr; + if (vfct->VBIOSImageOffset + sizeof(VFCT_IMAGE_HEADER) > tbl_size) { + DRM_ERROR("ACPI VFCT table present but broken (too short #2)\n"); + goto out_unmap; + } + + vbios = (GOP_VBIOS_CONTENT *)((char *)hdr + vfct->VBIOSImageOffset); + vhdr = &vbios->VbiosHeader; + DRM_INFO("ACPI VFCT contains a BIOS for %02x:%02x.%d %04x:%04x, size %d\n", + vhdr->PCIBus, vhdr->PCIDevice, vhdr->PCIFunction, + vhdr->VendorID, vhdr->DeviceID, vhdr->ImageLength); + + if (vhdr->PCIBus != rdev->pdev->bus->number || + vhdr->PCIDevice != PCI_SLOT(rdev->pdev->devfn) || + vhdr->PCIFunction != PCI_FUNC(rdev->pdev->devfn) || + vhdr->VendorID != rdev->pdev->vendor || + vhdr->DeviceID != rdev->pdev->device) { + DRM_INFO("ACPI VFCT table is not for this card\n"); + goto out_unmap; + }; + + if (vfct->VBIOSImageOffset + sizeof(VFCT_IMAGE_HEADER) + vhdr->ImageLength > tbl_size) { + DRM_ERROR("ACPI VFCT image truncated\n"); + goto out_unmap; + } + + rdev->bios = kmemdup(&vbios->VbiosContent, vhdr->ImageLength, GFP_KERNEL); + ret = !!rdev->bios; + +out_unmap: + return ret; +} +#else +static inline bool radeon_acpi_vfct_bios(struct radeon_device *rdev) +{ + return false; +} +#endif bool radeon_get_bios(struct radeon_device *rdev) { @@ -484,6 +612,8 @@ bool radeon_get_bios(struct radeon_device *rdev) r = radeon_atrm_get_bios(rdev); if (r == false) + r = radeon_acpi_vfct_bios(rdev); + if (r == false) r = igp_read_bios_from_vram(rdev); if (r == false) r = radeon_read_bios(rdev); diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c index 576f4f6919f2..f75247d42ffd 100644 --- a/drivers/gpu/drm/radeon/radeon_combios.c +++ b/drivers/gpu/drm/radeon/radeon_combios.c @@ -719,6 +719,34 @@ static struct radeon_i2c_bus_rec combios_setup_i2c_bus(struct radeon_device *rde return i2c; } +static struct radeon_i2c_bus_rec radeon_combios_get_i2c_info_from_table(struct radeon_device *rdev) +{ + struct drm_device *dev = rdev->ddev; + struct radeon_i2c_bus_rec i2c; + u16 offset; + u8 id, blocks, clk, data; + int i; + + i2c.valid = false; + + offset = combios_get_table_offset(dev, COMBIOS_I2C_INFO_TABLE); + if (offset) { + blocks = RBIOS8(offset + 2); + for (i = 0; i < blocks; i++) { + id = RBIOS8(offset + 3 + (i * 5) + 0); + if (id == 136) { + clk = RBIOS8(offset + 3 + (i * 5) + 3); + data = RBIOS8(offset + 3 + (i * 5) + 4); + /* gpiopad */ + i2c = combios_setup_i2c_bus(rdev, DDC_MONID, + (1 << clk), (1 << data)); + break; + } + } + } + return i2c; +} + void radeon_combios_i2c_init(struct radeon_device *rdev) { struct drm_device *dev = rdev->ddev; @@ -755,30 +783,14 @@ void radeon_combios_i2c_init(struct radeon_device *rdev) } else if (rdev->family == CHIP_RS300 || rdev->family == CHIP_RS400 || rdev->family == CHIP_RS480) { - u16 offset; - u8 id, blocks, clk, data; - int i; - /* 0x68 */ i2c = combios_setup_i2c_bus(rdev, DDC_CRT2, 0, 0); rdev->i2c_bus[3] = radeon_i2c_create(dev, &i2c, "MONID"); - offset = combios_get_table_offset(dev, COMBIOS_I2C_INFO_TABLE); - if (offset) { - blocks = RBIOS8(offset + 2); - for (i = 0; i < blocks; i++) { - id = RBIOS8(offset + 3 + (i * 5) + 0); - if (id == 136) { - clk = RBIOS8(offset + 3 + (i * 5) + 3); - data = RBIOS8(offset + 3 + (i * 5) + 4); - /* gpiopad */ - i2c = combios_setup_i2c_bus(rdev, DDC_MONID, - (1 << clk), (1 << data)); - rdev->i2c_bus[4] = radeon_i2c_create(dev, &i2c, "GPIOPAD_MASK"); - break; - } - } - } + /* gpiopad */ + i2c = radeon_combios_get_i2c_info_from_table(rdev); + if (i2c.valid) + rdev->i2c_bus[4] = radeon_i2c_create(dev, &i2c, "GPIOPAD_MASK"); } else if ((rdev->family == CHIP_R200) || (rdev->family >= CHIP_R300)) { /* 0x68 */ @@ -2321,7 +2333,10 @@ bool radeon_get_legacy_connector_info_from_bios(struct drm_device *dev) connector = (tmp >> 12) & 0xf; ddc_type = (tmp >> 8) & 0xf; - ddc_i2c = combios_setup_i2c_bus(rdev, ddc_type, 0, 0); + if (ddc_type == 5) + ddc_i2c = radeon_combios_get_i2c_info_from_table(rdev); + else + ddc_i2c = combios_setup_i2c_bus(rdev, ddc_type, 0, 0); switch (connector) { case CONNECTOR_PROPRIETARY_LEGACY: diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 2914c5761cfc..895e628b60f8 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -64,14 +64,33 @@ void radeon_connector_hotplug(struct drm_connector *connector) /* just deal with DP (not eDP) here. */ if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort) { - int saved_dpms = connector->dpms; - - /* Only turn off the display it it's physically disconnected */ - if (!radeon_hpd_sense(rdev, radeon_connector->hpd.hpd)) - drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); - else if (radeon_dp_needs_link_train(radeon_connector)) - drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); - connector->dpms = saved_dpms; + struct radeon_connector_atom_dig *dig_connector = + radeon_connector->con_priv; + + /* if existing sink type was not DP no need to retrain */ + if (dig_connector->dp_sink_type != CONNECTOR_OBJECT_ID_DISPLAYPORT) + return; + + /* first get sink type as it may be reset after (un)plug */ + dig_connector->dp_sink_type = radeon_dp_getsinktype(radeon_connector); + /* don't do anything if sink is not display port, i.e., + * passive dp->(dvi|hdmi) adaptor + */ + if (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) { + int saved_dpms = connector->dpms; + /* Only turn off the display if it's physically disconnected */ + if (!radeon_hpd_sense(rdev, radeon_connector->hpd.hpd)) { + drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); + } else if (radeon_dp_needs_link_train(radeon_connector)) { + /* set it to OFF so that drm_helper_connector_dpms() + * won't return immediately since the current state + * is ON at this point. + */ + connector->dpms = DRM_MODE_DPMS_OFF; + drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); + } + connector->dpms = saved_dpms; + } } } diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 142f89462aa4..b4a0db24f4dd 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -115,36 +115,20 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority return 0; } -static int radeon_cs_sync_rings(struct radeon_cs_parser *p) +static void radeon_cs_sync_rings(struct radeon_cs_parser *p) { - bool sync_to_ring[RADEON_NUM_RINGS] = { }; - bool need_sync = false; - int i, r; + int i; for (i = 0; i < p->nrelocs; i++) { - struct radeon_fence *fence; + struct radeon_fence *a, *b; if (!p->relocs[i].robj || !p->relocs[i].robj->tbo.sync_obj) continue; - fence = p->relocs[i].robj->tbo.sync_obj; - if (fence->ring != p->ring && !radeon_fence_signaled(fence)) { - sync_to_ring[fence->ring] = true; - need_sync = true; - } - } - - if (!need_sync) { - return 0; - } - - r = radeon_semaphore_create(p->rdev, &p->ib.semaphore); - if (r) { - return r; + a = p->relocs[i].robj->tbo.sync_obj; + b = p->ib.sync_to[a->ring]; + p->ib.sync_to[a->ring] = radeon_fence_later(a, b); } - - return radeon_semaphore_sync_rings(p->rdev, p->ib.semaphore, - sync_to_ring, p->ring); } /* XXX: note that this is called from the legacy UMS CS ioctl as well */ @@ -294,6 +278,30 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) return 0; } +static void radeon_bo_vm_fence_va(struct radeon_cs_parser *parser, + struct radeon_fence *fence) +{ + struct radeon_fpriv *fpriv = parser->filp->driver_priv; + struct radeon_vm *vm = &fpriv->vm; + struct radeon_bo_list *lobj; + + if (parser->chunk_ib_idx == -1) { + return; + } + if ((parser->cs_flags & RADEON_CS_USE_VM) == 0) { + return; + } + + list_for_each_entry(lobj, &parser->validated, tv.head) { + struct radeon_bo_va *bo_va; + struct radeon_bo *rbo = lobj->bo; + + bo_va = radeon_bo_va(rbo, vm); + radeon_fence_unref(&bo_va->fence); + bo_va->fence = radeon_fence_ref(fence); + } +} + /** * cs_parser_fini() - clean parser states * @parser: parser structure holding parsing context. @@ -306,11 +314,14 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error) { unsigned i; - if (!error) + if (!error) { + /* fence all bo va before ttm_eu_fence_buffer_objects so bo are still reserved */ + radeon_bo_vm_fence_va(parser, parser->ib.fence); ttm_eu_fence_buffer_objects(&parser->validated, parser->ib.fence); - else + } else { ttm_eu_backoff_reservation(&parser->validated); + } if (parser->relocs != NULL) { for (i = 0; i < parser->nrelocs; i++) { @@ -368,16 +379,13 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev, DRM_ERROR("Invalid command stream !\n"); return r; } - r = radeon_cs_sync_rings(parser); - if (r) { - DRM_ERROR("Failed to synchronize rings !\n"); - } + radeon_cs_sync_rings(parser); parser->ib.vm_id = 0; - r = radeon_ib_schedule(rdev, &parser->ib); + r = radeon_ib_schedule(rdev, &parser->ib, NULL); if (r) { DRM_ERROR("Failed to schedule IB !\n"); } - return 0; + return r; } static int radeon_bo_vm_update_pte(struct radeon_cs_parser *parser, @@ -407,7 +415,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, if (parser->chunk_ib_idx == -1) return 0; - if ((parser->cs_flags & RADEON_CS_USE_VM) == 0) return 0; @@ -459,6 +466,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, return r; } + mutex_lock(&rdev->vm_manager.lock); mutex_lock(&vm->mutex); r = radeon_vm_bind(rdev, vm); if (r) { @@ -468,30 +476,26 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, if (r) { goto out; } - r = radeon_cs_sync_rings(parser); - if (r) { - DRM_ERROR("Failed to synchronize rings !\n"); - } + radeon_cs_sync_rings(parser); + + parser->ib.vm_id = vm->id; + /* ib pool is bind at 0 in virtual address space, + * so gpu_addr is the offset inside the pool bo + */ + parser->ib.gpu_addr = parser->ib.sa_bo->soffset; if ((rdev->family >= CHIP_TAHITI) && (parser->chunk_const_ib_idx != -1)) { parser->const_ib.vm_id = vm->id; - /* ib pool is bind at 0 in virtual address space to gpu_addr is the - * offset inside the pool bo + /* ib pool is bind at 0 in virtual address space, + * so gpu_addr is the offset inside the pool bo */ parser->const_ib.gpu_addr = parser->const_ib.sa_bo->soffset; - r = radeon_ib_schedule(rdev, &parser->const_ib); - if (r) - goto out; + r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib); + } else { + r = radeon_ib_schedule(rdev, &parser->ib, NULL); } - parser->ib.vm_id = vm->id; - /* ib pool is bind at 0 in virtual address space to gpu_addr is the - * offset inside the pool bo - */ - parser->ib.gpu_addr = parser->ib.sa_bo->soffset; - parser->ib.is_const_ib = false; - r = radeon_ib_schedule(rdev, &parser->ib); out: if (!r) { if (vm->fence) { @@ -499,7 +503,8 @@ out: } vm->fence = radeon_fence_ref(parser->ib.fence); } - mutex_unlock(&fpriv->vm.mutex); + mutex_unlock(&vm->mutex); + mutex_unlock(&rdev->vm_manager.lock); return r; } @@ -519,9 +524,9 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) struct radeon_cs_parser parser; int r; - radeon_mutex_lock(&rdev->cs_mutex); + down_read(&rdev->exclusive_lock); if (!rdev->accel_working) { - radeon_mutex_unlock(&rdev->cs_mutex); + up_read(&rdev->exclusive_lock); return -EBUSY; } /* initialize parser */ @@ -534,8 +539,8 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (r) { DRM_ERROR("Failed to initialize parser !\n"); radeon_cs_parser_fini(&parser, r); + up_read(&rdev->exclusive_lock); r = radeon_cs_handle_lockup(rdev, r); - radeon_mutex_unlock(&rdev->cs_mutex); return r; } r = radeon_cs_parser_relocs(&parser); @@ -543,8 +548,8 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (r != -ERESTARTSYS) DRM_ERROR("Failed to parse relocation %d!\n", r); radeon_cs_parser_fini(&parser, r); + up_read(&rdev->exclusive_lock); r = radeon_cs_handle_lockup(rdev, r); - radeon_mutex_unlock(&rdev->cs_mutex); return r; } r = radeon_cs_ib_chunk(rdev, &parser); @@ -557,8 +562,8 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) } out: radeon_cs_parser_fini(&parser, r); + up_read(&rdev->exclusive_lock); r = radeon_cs_handle_lockup(rdev, r); - radeon_mutex_unlock(&rdev->cs_mutex); return r; } diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c index 42acc6449dd6..8794744cdf1a 100644 --- a/drivers/gpu/drm/radeon/radeon_cursor.c +++ b/drivers/gpu/drm/radeon/radeon_cursor.c @@ -67,7 +67,8 @@ static void radeon_hide_cursor(struct drm_crtc *crtc) if (ASIC_IS_DCE4(rdev)) { WREG32(RADEON_MM_INDEX, EVERGREEN_CUR_CONTROL + radeon_crtc->crtc_offset); - WREG32(RADEON_MM_DATA, EVERGREEN_CURSOR_MODE(EVERGREEN_CURSOR_24_8_PRE_MULT)); + WREG32(RADEON_MM_DATA, EVERGREEN_CURSOR_MODE(EVERGREEN_CURSOR_24_8_PRE_MULT) | + EVERGREEN_CURSOR_URGENT_CONTROL(EVERGREEN_CURSOR_URGENT_1_2)); } else if (ASIC_IS_AVIVO(rdev)) { WREG32(RADEON_MM_INDEX, AVIVO_D1CUR_CONTROL + radeon_crtc->crtc_offset); WREG32(RADEON_MM_DATA, (AVIVO_D1CURSOR_MODE_24BPP << AVIVO_D1CURSOR_MODE_SHIFT)); @@ -94,7 +95,8 @@ static void radeon_show_cursor(struct drm_crtc *crtc) if (ASIC_IS_DCE4(rdev)) { WREG32(RADEON_MM_INDEX, EVERGREEN_CUR_CONTROL + radeon_crtc->crtc_offset); WREG32(RADEON_MM_DATA, EVERGREEN_CURSOR_EN | - EVERGREEN_CURSOR_MODE(EVERGREEN_CURSOR_24_8_PRE_MULT)); + EVERGREEN_CURSOR_MODE(EVERGREEN_CURSOR_24_8_PRE_MULT) | + EVERGREEN_CURSOR_URGENT_CONTROL(EVERGREEN_CURSOR_URGENT_1_2)); } else if (ASIC_IS_AVIVO(rdev)) { WREG32(RADEON_MM_INDEX, AVIVO_D1CUR_CONTROL + radeon_crtc->crtc_offset); WREG32(RADEON_MM_DATA, AVIVO_D1CURSOR_EN | @@ -262,8 +264,14 @@ int radeon_crtc_cursor_move(struct drm_crtc *crtc, if (!(cursor_end & 0x7f)) w--; } - if (w <= 0) + if (w <= 0) { w = 1; + cursor_end = x - xorigin + w; + if (!(cursor_end & 0x7f)) { + x--; + WARN_ON_ONCE(x < 0); + } + } } } diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 066c98b888a5..d2e243867ac6 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -96,8 +96,12 @@ static const char radeon_family_name[][16] = { "LAST", }; -/* - * Clear GPU surface registers. +/** + * radeon_surface_init - Clear GPU surface registers. + * + * @rdev: radeon_device pointer + * + * Clear GPU surface registers (r1xx-r5xx). */ void radeon_surface_init(struct radeon_device *rdev) { @@ -119,6 +123,13 @@ void radeon_surface_init(struct radeon_device *rdev) /* * GPU scratch registers helpers function. */ +/** + * radeon_scratch_init - Init scratch register driver information. + * + * @rdev: radeon_device pointer + * + * Init CP scratch register driver information (r1xx-r5xx) + */ void radeon_scratch_init(struct radeon_device *rdev) { int i; @@ -136,6 +147,15 @@ void radeon_scratch_init(struct radeon_device *rdev) } } +/** + * radeon_scratch_get - Allocate a scratch register + * + * @rdev: radeon_device pointer + * @reg: scratch register mmio offset + * + * Allocate a CP scratch register for use by the driver (all asics). + * Returns 0 on success or -EINVAL on failure. + */ int radeon_scratch_get(struct radeon_device *rdev, uint32_t *reg) { int i; @@ -150,6 +170,14 @@ int radeon_scratch_get(struct radeon_device *rdev, uint32_t *reg) return -EINVAL; } +/** + * radeon_scratch_free - Free a scratch register + * + * @rdev: radeon_device pointer + * @reg: scratch register mmio offset + * + * Free a CP scratch register allocated for use by the driver (all asics) + */ void radeon_scratch_free(struct radeon_device *rdev, uint32_t reg) { int i; @@ -162,6 +190,20 @@ void radeon_scratch_free(struct radeon_device *rdev, uint32_t reg) } } +/* + * radeon_wb_*() + * Writeback is the the method by which the the GPU updates special pages + * in memory with the status of certain GPU events (fences, ring pointers, + * etc.). + */ + +/** + * radeon_wb_disable - Disable Writeback + * + * @rdev: radeon_device pointer + * + * Disables Writeback (all asics). Used for suspend. + */ void radeon_wb_disable(struct radeon_device *rdev) { int r; @@ -177,6 +219,14 @@ void radeon_wb_disable(struct radeon_device *rdev) rdev->wb.enabled = false; } +/** + * radeon_wb_fini - Disable Writeback and free memory + * + * @rdev: radeon_device pointer + * + * Disables Writeback and frees the Writeback memory (all asics). + * Used at driver shutdown. + */ void radeon_wb_fini(struct radeon_device *rdev) { radeon_wb_disable(rdev); @@ -187,6 +237,15 @@ void radeon_wb_fini(struct radeon_device *rdev) } } +/** + * radeon_wb_init- Init Writeback driver info and allocate memory + * + * @rdev: radeon_device pointer + * + * Disables Writeback and frees the Writeback memory (all asics). + * Used at driver startup. + * Returns 0 on success or an -error on failure. + */ int radeon_wb_init(struct radeon_device *rdev) { int r; @@ -355,6 +414,15 @@ void radeon_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc) /* * GPU helpers function. */ +/** + * radeon_card_posted - check if the hw has already been initialized + * + * @rdev: radeon_device pointer + * + * Check if the asic has been initialized (all asics). + * Used at driver startup. + * Returns true if initialized or false if not. + */ bool radeon_card_posted(struct radeon_device *rdev) { uint32_t reg; @@ -404,6 +472,14 @@ bool radeon_card_posted(struct radeon_device *rdev) } +/** + * radeon_update_bandwidth_info - update display bandwidth params + * + * @rdev: radeon_device pointer + * + * Used when sclk/mclk are switched or display modes are set. + * params are used to calculate display watermarks (all asics) + */ void radeon_update_bandwidth_info(struct radeon_device *rdev) { fixed20_12 a; @@ -424,6 +500,15 @@ void radeon_update_bandwidth_info(struct radeon_device *rdev) } } +/** + * radeon_boot_test_post_card - check and possibly initialize the hw + * + * @rdev: radeon_device pointer + * + * Check if the asic is initialized and if not, attempt to initialize + * it (all asics). + * Returns true if initialized or false if not. + */ bool radeon_boot_test_post_card(struct radeon_device *rdev) { if (radeon_card_posted(rdev)) @@ -442,6 +527,16 @@ bool radeon_boot_test_post_card(struct radeon_device *rdev) } } +/** + * radeon_dummy_page_init - init dummy page used by the driver + * + * @rdev: radeon_device pointer + * + * Allocate the dummy page used by the driver (all asics). + * This dummy page is used by the driver as a filler for gart entries + * when pages are taken out of the GART + * Returns 0 on sucess, -ENOMEM on failure. + */ int radeon_dummy_page_init(struct radeon_device *rdev) { if (rdev->dummy_page.page) @@ -460,6 +555,13 @@ int radeon_dummy_page_init(struct radeon_device *rdev) return 0; } +/** + * radeon_dummy_page_fini - free dummy page used by the driver + * + * @rdev: radeon_device pointer + * + * Frees the dummy page used by the driver (all asics). + */ void radeon_dummy_page_fini(struct radeon_device *rdev) { if (rdev->dummy_page.page == NULL) @@ -472,6 +574,23 @@ void radeon_dummy_page_fini(struct radeon_device *rdev) /* ATOM accessor methods */ +/* + * ATOM is an interpreted byte code stored in tables in the vbios. The + * driver registers callbacks to access registers and the interpreter + * in the driver parses the tables and executes then to program specific + * actions (set display modes, asic init, etc.). See radeon_atombios.c, + * atombios.h, and atom.c + */ + +/** + * cail_pll_read - read PLL register + * + * @info: atom card_info pointer + * @reg: PLL register offset + * + * Provides a PLL register accessor for the atom interpreter (r4xx+). + * Returns the value of the PLL register. + */ static uint32_t cail_pll_read(struct card_info *info, uint32_t reg) { struct radeon_device *rdev = info->dev->dev_private; @@ -481,6 +600,15 @@ static uint32_t cail_pll_read(struct card_info *info, uint32_t reg) return r; } +/** + * cail_pll_write - write PLL register + * + * @info: atom card_info pointer + * @reg: PLL register offset + * @val: value to write to the pll register + * + * Provides a PLL register accessor for the atom interpreter (r4xx+). + */ static void cail_pll_write(struct card_info *info, uint32_t reg, uint32_t val) { struct radeon_device *rdev = info->dev->dev_private; @@ -488,6 +616,15 @@ static void cail_pll_write(struct card_info *info, uint32_t reg, uint32_t val) rdev->pll_wreg(rdev, reg, val); } +/** + * cail_mc_read - read MC (Memory Controller) register + * + * @info: atom card_info pointer + * @reg: MC register offset + * + * Provides an MC register accessor for the atom interpreter (r4xx+). + * Returns the value of the MC register. + */ static uint32_t cail_mc_read(struct card_info *info, uint32_t reg) { struct radeon_device *rdev = info->dev->dev_private; @@ -497,6 +634,15 @@ static uint32_t cail_mc_read(struct card_info *info, uint32_t reg) return r; } +/** + * cail_mc_write - write MC (Memory Controller) register + * + * @info: atom card_info pointer + * @reg: MC register offset + * @val: value to write to the pll register + * + * Provides a MC register accessor for the atom interpreter (r4xx+). + */ static void cail_mc_write(struct card_info *info, uint32_t reg, uint32_t val) { struct radeon_device *rdev = info->dev->dev_private; @@ -504,6 +650,15 @@ static void cail_mc_write(struct card_info *info, uint32_t reg, uint32_t val) rdev->mc_wreg(rdev, reg, val); } +/** + * cail_reg_write - write MMIO register + * + * @info: atom card_info pointer + * @reg: MMIO register offset + * @val: value to write to the pll register + * + * Provides a MMIO register accessor for the atom interpreter (r4xx+). + */ static void cail_reg_write(struct card_info *info, uint32_t reg, uint32_t val) { struct radeon_device *rdev = info->dev->dev_private; @@ -511,6 +666,15 @@ static void cail_reg_write(struct card_info *info, uint32_t reg, uint32_t val) WREG32(reg*4, val); } +/** + * cail_reg_read - read MMIO register + * + * @info: atom card_info pointer + * @reg: MMIO register offset + * + * Provides an MMIO register accessor for the atom interpreter (r4xx+). + * Returns the value of the MMIO register. + */ static uint32_t cail_reg_read(struct card_info *info, uint32_t reg) { struct radeon_device *rdev = info->dev->dev_private; @@ -520,6 +684,15 @@ static uint32_t cail_reg_read(struct card_info *info, uint32_t reg) return r; } +/** + * cail_ioreg_write - write IO register + * + * @info: atom card_info pointer + * @reg: IO register offset + * @val: value to write to the pll register + * + * Provides a IO register accessor for the atom interpreter (r4xx+). + */ static void cail_ioreg_write(struct card_info *info, uint32_t reg, uint32_t val) { struct radeon_device *rdev = info->dev->dev_private; @@ -527,6 +700,15 @@ static void cail_ioreg_write(struct card_info *info, uint32_t reg, uint32_t val) WREG32_IO(reg*4, val); } +/** + * cail_ioreg_read - read IO register + * + * @info: atom card_info pointer + * @reg: IO register offset + * + * Provides an IO register accessor for the atom interpreter (r4xx+). + * Returns the value of the IO register. + */ static uint32_t cail_ioreg_read(struct card_info *info, uint32_t reg) { struct radeon_device *rdev = info->dev->dev_private; @@ -536,6 +718,16 @@ static uint32_t cail_ioreg_read(struct card_info *info, uint32_t reg) return r; } +/** + * radeon_atombios_init - init the driver info and callbacks for atombios + * + * @rdev: radeon_device pointer + * + * Initializes the driver info and register access callbacks for the + * ATOM interpreter (r4xx+). + * Returns 0 on sucess, -ENOMEM on failure. + * Called at driver startup. + */ int radeon_atombios_init(struct radeon_device *rdev) { struct card_info *atom_card_info = @@ -569,6 +761,15 @@ int radeon_atombios_init(struct radeon_device *rdev) return 0; } +/** + * radeon_atombios_fini - free the driver info and callbacks for atombios + * + * @rdev: radeon_device pointer + * + * Frees the driver info and register access callbacks for the ATOM + * interpreter (r4xx+). + * Called at driver shutdown. + */ void radeon_atombios_fini(struct radeon_device *rdev) { if (rdev->mode_info.atom_context) { @@ -578,17 +779,50 @@ void radeon_atombios_fini(struct radeon_device *rdev) kfree(rdev->mode_info.atom_card_info); } +/* COMBIOS */ +/* + * COMBIOS is the bios format prior to ATOM. It provides + * command tables similar to ATOM, but doesn't have a unified + * parser. See radeon_combios.c + */ + +/** + * radeon_combios_init - init the driver info for combios + * + * @rdev: radeon_device pointer + * + * Initializes the driver info for combios (r1xx-r3xx). + * Returns 0 on sucess. + * Called at driver startup. + */ int radeon_combios_init(struct radeon_device *rdev) { radeon_combios_initialize_bios_scratch_regs(rdev->ddev); return 0; } +/** + * radeon_combios_fini - free the driver info for combios + * + * @rdev: radeon_device pointer + * + * Frees the driver info for combios (r1xx-r3xx). + * Called at driver shutdown. + */ void radeon_combios_fini(struct radeon_device *rdev) { } -/* if we get transitioned to only one device, tak VGA back */ +/* if we get transitioned to only one device, take VGA back */ +/** + * radeon_vga_set_decode - enable/disable vga decode + * + * @cookie: radeon_device pointer + * @state: enable/disable vga decode + * + * Enable/disable vga decode (all asics). + * Returns VGA resource flags. + */ static unsigned int radeon_vga_set_decode(void *cookie, bool state) { struct radeon_device *rdev = cookie; @@ -600,6 +834,14 @@ static unsigned int radeon_vga_set_decode(void *cookie, bool state) return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; } +/** + * radeon_check_arguments - validate module params + * + * @rdev: radeon_device pointer + * + * Validates certain module parameters and updates + * the associated values used by the driver (all asics). + */ void radeon_check_arguments(struct radeon_device *rdev) { /* vramlimit must be a power of two */ @@ -666,6 +908,15 @@ void radeon_check_arguments(struct radeon_device *rdev) } } +/** + * radeon_switcheroo_set_state - set switcheroo state + * + * @pdev: pci dev pointer + * @state: vga switcheroo state + * + * Callback for the switcheroo driver. Suspends or resumes the + * the asics before or after it is powered up using ACPI methods. + */ static void radeon_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state) { struct drm_device *dev = pci_get_drvdata(pdev); @@ -686,6 +937,15 @@ static void radeon_switcheroo_set_state(struct pci_dev *pdev, enum vga_switchero } } +/** + * radeon_switcheroo_can_switch - see if switcheroo state can change + * + * @pdev: pci dev pointer + * + * Callback for the switcheroo driver. Check of the switcheroo + * state can be changed. + * Returns true if the state can be changed, false if not. + */ static bool radeon_switcheroo_can_switch(struct pci_dev *pdev) { struct drm_device *dev = pci_get_drvdata(pdev); @@ -703,6 +963,18 @@ static const struct vga_switcheroo_client_ops radeon_switcheroo_ops = { .can_switch = radeon_switcheroo_can_switch, }; +/** + * radeon_device_init - initialize the driver + * + * @rdev: radeon_device pointer + * @pdev: drm dev pointer + * @pdev: pci dev pointer + * @flags: driver flags + * + * Initializes the driver info and hw (all asics). + * Returns 0 for success or an error on failure. + * Called at driver startup. + */ int radeon_device_init(struct radeon_device *rdev, struct drm_device *ddev, struct pci_dev *pdev, @@ -721,6 +993,10 @@ int radeon_device_init(struct radeon_device *rdev, rdev->usec_timeout = RADEON_MAX_USEC_TIMEOUT; rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024; rdev->accel_working = false; + /* set up ring ids */ + for (i = 0; i < RADEON_NUM_RINGS; i++) { + rdev->ring[i].idx = i; + } DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X).\n", radeon_family_name[rdev->family], pdev->vendor, pdev->device, @@ -728,20 +1004,21 @@ int radeon_device_init(struct radeon_device *rdev, /* mutex initialization are all done here so we * can recall function without having locking issues */ - radeon_mutex_init(&rdev->cs_mutex); mutex_init(&rdev->ring_lock); mutex_init(&rdev->dc_hw_i2c_mutex); - if (rdev->family >= CHIP_R600) - spin_lock_init(&rdev->ih.lock); + atomic_set(&rdev->ih.lock, 0); mutex_init(&rdev->gem.mutex); mutex_init(&rdev->pm.mutex); - mutex_init(&rdev->vram_mutex); + mutex_init(&rdev->gpu_clock_mutex); + init_rwsem(&rdev->pm.mclk_lock); + init_rwsem(&rdev->exclusive_lock); init_waitqueue_head(&rdev->irq.vblank_queue); init_waitqueue_head(&rdev->irq.idle_queue); r = radeon_gem_init(rdev); if (r) return r; /* initialize vm here */ + mutex_init(&rdev->vm_manager.lock); rdev->vm_manager.use_bitmap = 1; rdev->vm_manager.max_pfn = 1 << 20; INIT_LIST_HEAD(&rdev->vm_manager.lru_vm); @@ -822,6 +1099,10 @@ int radeon_device_init(struct radeon_device *rdev, if (r) return r; + r = radeon_ib_ring_tests(rdev); + if (r) + DRM_ERROR("ib ring test failed (%d).\n", r); + if (rdev->flags & RADEON_IS_AGP && !rdev->accel_working) { /* Acceleration not working on AGP card try again * with fallback to PCI or PCIE GART @@ -847,6 +1128,14 @@ int radeon_device_init(struct radeon_device *rdev, static void radeon_debugfs_remove_files(struct radeon_device *rdev); +/** + * radeon_device_fini - tear down the driver + * + * @rdev: radeon_device pointer + * + * Tear down the driver info (all asics). + * Called at driver shutdown. + */ void radeon_device_fini(struct radeon_device *rdev) { DRM_INFO("radeon: finishing device.\n"); @@ -868,6 +1157,16 @@ void radeon_device_fini(struct radeon_device *rdev) /* * Suspend & resume. */ +/** + * radeon_suspend_kms - initiate device suspend + * + * @pdev: drm dev pointer + * @state: suspend state + * + * Puts the hw in the suspend state (all asics). + * Returns 0 for success or an error on failure. + * Called at driver suspend. + */ int radeon_suspend_kms(struct drm_device *dev, pm_message_t state) { struct radeon_device *rdev; @@ -942,10 +1241,20 @@ int radeon_suspend_kms(struct drm_device *dev, pm_message_t state) return 0; } +/** + * radeon_resume_kms - initiate device resume + * + * @pdev: drm dev pointer + * + * Bring the hw back to operating state (all asics). + * Returns 0 for success or an error on failure. + * Called at driver resume. + */ int radeon_resume_kms(struct drm_device *dev) { struct drm_connector *connector; struct radeon_device *rdev = dev->dev_private; + int r; if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; @@ -960,6 +1269,11 @@ int radeon_resume_kms(struct drm_device *dev) /* resume AGP if in use */ radeon_agp_resume(rdev); radeon_resume(rdev); + + r = radeon_ib_ring_tests(rdev); + if (r) + DRM_ERROR("ib ring test failed (%d).\n", r); + radeon_pm_resume(rdev); radeon_restore_bios_scratch_regs(rdev); @@ -984,30 +1298,77 @@ int radeon_resume_kms(struct drm_device *dev) return 0; } +/** + * radeon_gpu_reset - reset the asic + * + * @rdev: radeon device pointer + * + * Attempt the reset the GPU if it has hung (all asics). + * Returns 0 for success or an error on failure. + */ int radeon_gpu_reset(struct radeon_device *rdev) { - int r; + unsigned ring_sizes[RADEON_NUM_RINGS]; + uint32_t *ring_data[RADEON_NUM_RINGS]; + + bool saved = false; + + int i, r; int resched; + down_write(&rdev->exclusive_lock); radeon_save_bios_scratch_regs(rdev); /* block TTM */ resched = ttm_bo_lock_delayed_workqueue(&rdev->mman.bdev); radeon_suspend(rdev); + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + ring_sizes[i] = radeon_ring_backup(rdev, &rdev->ring[i], + &ring_data[i]); + if (ring_sizes[i]) { + saved = true; + dev_info(rdev->dev, "Saved %d dwords of commands " + "on ring %d.\n", ring_sizes[i], i); + } + } + +retry: r = radeon_asic_reset(rdev); if (!r) { - dev_info(rdev->dev, "GPU reset succeed\n"); + dev_info(rdev->dev, "GPU reset succeeded, trying to resume\n"); radeon_resume(rdev); - radeon_restore_bios_scratch_regs(rdev); - drm_helper_resume_force_mode(rdev->ddev); - ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched); } + radeon_restore_bios_scratch_regs(rdev); + drm_helper_resume_force_mode(rdev->ddev); + + if (!r) { + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + radeon_ring_restore(rdev, &rdev->ring[i], + ring_sizes[i], ring_data[i]); + } + + r = radeon_ib_ring_tests(rdev); + if (r) { + dev_err(rdev->dev, "ib ring test failed (%d).\n", r); + if (saved) { + radeon_suspend(rdev); + goto retry; + } + } + } else { + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + kfree(ring_data[i]); + } + } + + ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched); if (r) { /* bad news, how to tell it to userspace ? */ dev_info(rdev->dev, "GPU reset failed\n"); } + up_write(&rdev->exclusive_lock); return r; } diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 64a008d14493..7ddef8f30d0e 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -1401,7 +1401,7 @@ void radeon_modeset_fini(struct radeon_device *rdev) radeon_i2c_fini(rdev); } -static bool is_hdtv_mode(struct drm_display_mode *mode) +static bool is_hdtv_mode(const struct drm_display_mode *mode) { /* try and guess if this is a tv or a monitor */ if ((mode->vdisplay == 480 && mode->hdisplay == 720) || /* 480p */ @@ -1414,7 +1414,7 @@ static bool is_hdtv_mode(struct drm_display_mode *mode) } bool radeon_crtc_scaling_mode_fixup(struct drm_crtc *crtc, - struct drm_display_mode *mode, + const struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode) { struct drm_device *dev = crtc->dev; diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 2c4d53fd20c5..27d22d709c90 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -59,9 +59,13 @@ * 2.15.0 - add max_pipes query * 2.16.0 - fix evergreen 2D tiled surface calculation * 2.17.0 - add STRMOUT_BASE_UPDATE for r7xx + * 2.18.0 - r600-eg: allow "invalid" DB formats + * 2.19.0 - r600-eg: MSAA textures + * 2.20.0 - r600-si: RADEON_INFO_TIMESTAMP query + * 2.21.0 - r600-r700: FMASK and CMASK */ #define KMS_DRIVER_MAJOR 2 -#define KMS_DRIVER_MINOR 17 +#define KMS_DRIVER_MINOR 21 #define KMS_DRIVER_PATCHLEVEL 0 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); int radeon_driver_unload_kms(struct drm_device *dev); @@ -133,7 +137,7 @@ int radeon_tv = 1; int radeon_audio = 0; int radeon_disp_priority = 0; int radeon_hw_i2c = 0; -int radeon_pcie_gen2 = 0; +int radeon_pcie_gen2 = -1; int radeon_msi = -1; int radeon_lockup_timeout = 10000; @@ -179,7 +183,7 @@ module_param_named(disp_priority, radeon_disp_priority, int, 0444); MODULE_PARM_DESC(hw_i2c, "hw i2c engine enable (0 = disable)"); module_param_named(hw_i2c, radeon_hw_i2c, int, 0444); -MODULE_PARM_DESC(pcie_gen2, "PCIE Gen2 mode (1 = enable)"); +MODULE_PARM_DESC(pcie_gen2, "PCIE Gen2 mode (-1 = auto, 0 = disable, 1 = enable)"); module_param_named(pcie_gen2, radeon_pcie_gen2, int, 0444); MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)"); @@ -262,7 +266,6 @@ static struct drm_driver driver_old = { .irq_postinstall = radeon_driver_irq_postinstall, .irq_uninstall = radeon_driver_irq_uninstall, .irq_handler = radeon_driver_irq_handler, - .reclaim_buffers = drm_core_reclaim_buffers, .ioctls = radeon_ioctls, .dma_ioctl = radeon_cp_buffers, .fops = &radeon_driver_old_fops, @@ -365,7 +368,6 @@ static struct drm_driver kms_driver = { .irq_postinstall = radeon_driver_irq_postinstall_kms, .irq_uninstall = radeon_driver_irq_uninstall_kms, .irq_handler = radeon_driver_irq_handler_kms, - .reclaim_buffers = drm_core_reclaim_buffers, .ioctls = radeon_ioctls_kms, .gem_init_object = radeon_gem_object_init, .gem_free_object = radeon_gem_object_free, diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 11f5f402d22c..7b737b9339ad 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -40,39 +40,95 @@ #include "radeon.h" #include "radeon_trace.h" +/* + * Fences + * Fences mark an event in the GPUs pipeline and are used + * for GPU/CPU synchronization. When the fence is written, + * it is expected that all buffers associated with that fence + * are no longer in use by the associated ring on the GPU and + * that the the relevant GPU caches have been flushed. Whether + * we use a scratch register or memory location depends on the asic + * and whether writeback is enabled. + */ + +/** + * radeon_fence_write - write a fence value + * + * @rdev: radeon_device pointer + * @seq: sequence number to write + * @ring: ring index the fence is associated with + * + * Writes a fence value to memory or a scratch register (all asics). + */ static void radeon_fence_write(struct radeon_device *rdev, u32 seq, int ring) { - if (rdev->wb.enabled) { - *rdev->fence_drv[ring].cpu_addr = cpu_to_le32(seq); + struct radeon_fence_driver *drv = &rdev->fence_drv[ring]; + if (likely(rdev->wb.enabled || !drv->scratch_reg)) { + *drv->cpu_addr = cpu_to_le32(seq); } else { - WREG32(rdev->fence_drv[ring].scratch_reg, seq); + WREG32(drv->scratch_reg, seq); } } +/** + * radeon_fence_read - read a fence value + * + * @rdev: radeon_device pointer + * @ring: ring index the fence is associated with + * + * Reads a fence value from memory or a scratch register (all asics). + * Returns the value of the fence read from memory or register. + */ static u32 radeon_fence_read(struct radeon_device *rdev, int ring) { + struct radeon_fence_driver *drv = &rdev->fence_drv[ring]; u32 seq = 0; - if (rdev->wb.enabled) { - seq = le32_to_cpu(*rdev->fence_drv[ring].cpu_addr); + if (likely(rdev->wb.enabled || !drv->scratch_reg)) { + seq = le32_to_cpu(*drv->cpu_addr); } else { - seq = RREG32(rdev->fence_drv[ring].scratch_reg); + seq = RREG32(drv->scratch_reg); } return seq; } -int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence) +/** + * radeon_fence_emit - emit a fence on the requested ring + * + * @rdev: radeon_device pointer + * @fence: radeon fence object + * @ring: ring index the fence is associated with + * + * Emits a fence command on the requested ring (all asics). + * Returns 0 on success, -ENOMEM on failure. + */ +int radeon_fence_emit(struct radeon_device *rdev, + struct radeon_fence **fence, + int ring) { /* we are protected by the ring emission mutex */ - if (fence->seq && fence->seq < RADEON_FENCE_NOTEMITED_SEQ) { - return 0; + *fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL); + if ((*fence) == NULL) { + return -ENOMEM; } - fence->seq = ++rdev->fence_drv[fence->ring].seq; - radeon_fence_ring_emit(rdev, fence->ring, fence); - trace_radeon_fence_emit(rdev->ddev, fence->seq); + kref_init(&((*fence)->kref)); + (*fence)->rdev = rdev; + (*fence)->seq = ++rdev->fence_drv[ring].sync_seq[ring]; + (*fence)->ring = ring; + radeon_fence_ring_emit(rdev, ring, *fence); + trace_radeon_fence_emit(rdev->ddev, (*fence)->seq); return 0; } +/** + * radeon_fence_process - process a fence + * + * @rdev: radeon_device pointer + * @ring: ring index the fence is associated with + * + * Checks the current fence value and wakes the fence queue + * if the sequence number has increased (all asics). + */ void radeon_fence_process(struct radeon_device *rdev, int ring) { uint64_t seq, last_seq; @@ -133,30 +189,35 @@ void radeon_fence_process(struct radeon_device *rdev, int ring) } } +/** + * radeon_fence_destroy - destroy a fence + * + * @kref: fence kref + * + * Frees the fence object (all asics). + */ static void radeon_fence_destroy(struct kref *kref) { struct radeon_fence *fence; fence = container_of(kref, struct radeon_fence, kref); - fence->seq = RADEON_FENCE_NOTEMITED_SEQ; kfree(fence); } -int radeon_fence_create(struct radeon_device *rdev, - struct radeon_fence **fence, - int ring) -{ - *fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL); - if ((*fence) == NULL) { - return -ENOMEM; - } - kref_init(&((*fence)->kref)); - (*fence)->rdev = rdev; - (*fence)->seq = RADEON_FENCE_NOTEMITED_SEQ; - (*fence)->ring = ring; - return 0; -} - +/** + * radeon_fence_seq_signaled - check if a fence sequeuce number has signaled + * + * @rdev: radeon device pointer + * @seq: sequence number + * @ring: ring index the fence is associated with + * + * Check if the last singled fence sequnce number is >= the requested + * sequence number (all asics). + * Returns true if the fence has signaled (current fence value + * is >= requested value) or false if it has not (current fence + * value is < the requested value. Helper function for + * radeon_fence_signaled(). + */ static bool radeon_fence_seq_signaled(struct radeon_device *rdev, u64 seq, unsigned ring) { @@ -171,15 +232,19 @@ static bool radeon_fence_seq_signaled(struct radeon_device *rdev, return false; } +/** + * radeon_fence_signaled - check if a fence has signaled + * + * @fence: radeon fence object + * + * Check if the requested fence has signaled (all asics). + * Returns true if the fence has signaled or false if it has not. + */ bool radeon_fence_signaled(struct radeon_fence *fence) { if (!fence) { return true; } - if (fence->seq == RADEON_FENCE_NOTEMITED_SEQ) { - WARN(1, "Querying an unemitted fence : %p !\n", fence); - return true; - } if (fence->seq == RADEON_FENCE_SIGNALED_SEQ) { return true; } @@ -190,6 +255,24 @@ bool radeon_fence_signaled(struct radeon_fence *fence) return false; } +/** + * radeon_fence_wait_seq - wait for a specific sequence number + * + * @rdev: radeon device pointer + * @target_seq: sequence number we want to wait for + * @ring: ring index the fence is associated with + * @intr: use interruptable sleep + * @lock_ring: whether the ring should be locked or not + * + * Wait for the requested sequence number to be written (all asics). + * @intr selects whether to use interruptable (true) or non-interruptable + * (false) sleep when waiting for the sequence number. Helper function + * for radeon_fence_wait(), et al. + * Returns 0 if the sequence number has passed, error for all other cases. + * -EDEADLK is returned when a GPU lockup has been detected and the ring is + * marked as not ready so no further jobs get scheduled until a successful + * reset. + */ static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 target_seq, unsigned ring, bool intr, bool lock_ring) { @@ -285,6 +368,17 @@ static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 target_seq, return 0; } +/** + * radeon_fence_wait - wait for a fence to signal + * + * @fence: radeon fence object + * @intr: use interruptable sleep + * + * Wait for the requested fence to signal (all asics). + * @intr selects whether to use interruptable (true) or non-interruptable + * (false) sleep when waiting for the fence. + * Returns 0 if the fence has passed, error for all other cases. + */ int radeon_fence_wait(struct radeon_fence *fence, bool intr) { int r; @@ -315,6 +409,20 @@ bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq) return false; } +/** + * radeon_fence_wait_any_seq - wait for a sequence number on any ring + * + * @rdev: radeon device pointer + * @target_seq: sequence number(s) we want to wait for + * @intr: use interruptable sleep + * + * Wait for the requested sequence number(s) to be written by any ring + * (all asics). Sequnce number array is indexed by ring id. + * @intr selects whether to use interruptable (true) or non-interruptable + * (false) sleep when waiting for the sequence number. Helper function + * for radeon_fence_wait_any(), et al. + * Returns 0 if the sequence number has passed, error for all other cases. + */ static int radeon_fence_wait_any_seq(struct radeon_device *rdev, u64 *target_seq, bool intr) { @@ -343,7 +451,7 @@ static int radeon_fence_wait_any_seq(struct radeon_device *rdev, /* nothing to wait for ? */ if (ring == RADEON_NUM_RINGS) { - return 0; + return -ENOENT; } while (!radeon_fence_any_seq_signaled(rdev, target_seq)) { @@ -424,6 +532,19 @@ static int radeon_fence_wait_any_seq(struct radeon_device *rdev, return 0; } +/** + * radeon_fence_wait_any - wait for a fence to signal on any ring + * + * @rdev: radeon device pointer + * @fences: radeon fence object(s) + * @intr: use interruptable sleep + * + * Wait for any requested fence to signal (all asics). Fence + * array is indexed by ring id. @intr selects whether to use + * interruptable (true) or non-interruptable (false) sleep when + * waiting for the fences. Used by the suballocator. + * Returns 0 if any fence has passed, error for all other cases. + */ int radeon_fence_wait_any(struct radeon_device *rdev, struct radeon_fence **fences, bool intr) @@ -444,9 +565,7 @@ int radeon_fence_wait_any(struct radeon_device *rdev, return 0; } - if (fences[i]->seq < RADEON_FENCE_NOTEMITED_SEQ) { - seq[i] = fences[i]->seq; - } + seq[i] = fences[i]->seq; } r = radeon_fence_wait_any_seq(rdev, seq, intr); @@ -456,16 +575,22 @@ int radeon_fence_wait_any(struct radeon_device *rdev, return 0; } +/** + * radeon_fence_wait_next_locked - wait for the next fence to signal + * + * @rdev: radeon device pointer + * @ring: ring index the fence is associated with + * + * Wait for the next fence on the requested ring to signal (all asics). + * Returns 0 if the next fence has passed, error for all other cases. + * Caller must hold ring lock. + */ int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring) { uint64_t seq; - /* We are not protected by ring lock when reading current seq but - * it's ok as worst case is we return to early while we could have - * wait. - */ seq = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL; - if (seq >= rdev->fence_drv[ring].seq) { + if (seq >= rdev->fence_drv[ring].sync_seq[ring]) { /* nothing to wait for, last_seq is already the last emited fence */ return -ENOENT; @@ -473,23 +598,59 @@ int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring) return radeon_fence_wait_seq(rdev, seq, ring, false, false); } -int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring) +/** + * radeon_fence_wait_empty_locked - wait for all fences to signal + * + * @rdev: radeon device pointer + * @ring: ring index the fence is associated with + * + * Wait for all fences on the requested ring to signal (all asics). + * Returns 0 if the fences have passed, error for all other cases. + * Caller must hold ring lock. + */ +void radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring) { - /* We are not protected by ring lock when reading current seq - * but it's ok as wait empty is call from place where no more - * activity can be scheduled so there won't be concurrent access - * to seq value. - */ - return radeon_fence_wait_seq(rdev, rdev->fence_drv[ring].seq, - ring, false, false); + uint64_t seq = rdev->fence_drv[ring].sync_seq[ring]; + + while(1) { + int r; + r = radeon_fence_wait_seq(rdev, seq, ring, false, false); + if (r == -EDEADLK) { + mutex_unlock(&rdev->ring_lock); + r = radeon_gpu_reset(rdev); + mutex_lock(&rdev->ring_lock); + if (!r) + continue; + } + if (r) { + dev_err(rdev->dev, "error waiting for ring to become" + " idle (%d)\n", r); + } + return; + } } +/** + * radeon_fence_ref - take a ref on a fence + * + * @fence: radeon fence object + * + * Take a reference on a fence (all asics). + * Returns the fence. + */ struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence) { kref_get(&fence->kref); return fence; } +/** + * radeon_fence_unref - remove a ref on a fence + * + * @fence: radeon fence object + * + * Remove a reference on a fence (all asics). + */ void radeon_fence_unref(struct radeon_fence **fence) { struct radeon_fence *tmp = *fence; @@ -500,6 +661,16 @@ void radeon_fence_unref(struct radeon_fence **fence) } } +/** + * radeon_fence_count_emitted - get the count of emitted fences + * + * @rdev: radeon device pointer + * @ring: ring index the fence is associated with + * + * Get the number of fences emitted on the requested ring (all asics). + * Returns the number of emitted fences on the ring. Used by the + * dynpm code to ring track activity. + */ unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring) { uint64_t emitted; @@ -508,7 +679,8 @@ unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring) * but it's ok to report slightly wrong fence count here. */ radeon_fence_process(rdev, ring); - emitted = rdev->fence_drv[ring].seq - atomic64_read(&rdev->fence_drv[ring].last_seq); + emitted = rdev->fence_drv[ring].sync_seq[ring] + - atomic64_read(&rdev->fence_drv[ring].last_seq); /* to avoid 32bits warp around */ if (emitted > 0x10000000) { emitted = 0x10000000; @@ -516,6 +688,83 @@ unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring) return (unsigned)emitted; } +/** + * radeon_fence_need_sync - do we need a semaphore + * + * @fence: radeon fence object + * @dst_ring: which ring to check against + * + * Check if the fence needs to be synced against another ring + * (all asics). If so, we need to emit a semaphore. + * Returns true if we need to sync with another ring, false if + * not. + */ +bool radeon_fence_need_sync(struct radeon_fence *fence, int dst_ring) +{ + struct radeon_fence_driver *fdrv; + + if (!fence) { + return false; + } + + if (fence->ring == dst_ring) { + return false; + } + + /* we are protected by the ring mutex */ + fdrv = &fence->rdev->fence_drv[dst_ring]; + if (fence->seq <= fdrv->sync_seq[fence->ring]) { + return false; + } + + return true; +} + +/** + * radeon_fence_note_sync - record the sync point + * + * @fence: radeon fence object + * @dst_ring: which ring to check against + * + * Note the sequence number at which point the fence will + * be synced with the requested ring (all asics). + */ +void radeon_fence_note_sync(struct radeon_fence *fence, int dst_ring) +{ + struct radeon_fence_driver *dst, *src; + unsigned i; + + if (!fence) { + return; + } + + if (fence->ring == dst_ring) { + return; + } + + /* we are protected by the ring mutex */ + src = &fence->rdev->fence_drv[fence->ring]; + dst = &fence->rdev->fence_drv[dst_ring]; + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + if (i == dst_ring) { + continue; + } + dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]); + } +} + +/** + * radeon_fence_driver_start_ring - make the fence driver + * ready for use on the requested ring. + * + * @rdev: radeon device pointer + * @ring: ring index to start the fence driver on + * + * Make the fence driver ready for processing (all asics). + * Not all asics have all rings, so each asic will only + * start the fence driver on the rings it has. + * Returns 0 for success, errors for failure. + */ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) { uint64_t index; @@ -537,24 +786,49 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) } rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4]; rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index; - radeon_fence_write(rdev, rdev->fence_drv[ring].seq, ring); + radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].last_seq), ring); rdev->fence_drv[ring].initialized = true; dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx and cpu addr 0x%p\n", ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr); return 0; } +/** + * radeon_fence_driver_init_ring - init the fence driver + * for the requested ring. + * + * @rdev: radeon device pointer + * @ring: ring index to start the fence driver on + * + * Init the fence driver for the requested ring (all asics). + * Helper function for radeon_fence_driver_init(). + */ static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring) { + int i; + rdev->fence_drv[ring].scratch_reg = -1; rdev->fence_drv[ring].cpu_addr = NULL; rdev->fence_drv[ring].gpu_addr = 0; - rdev->fence_drv[ring].seq = 0; + for (i = 0; i < RADEON_NUM_RINGS; ++i) + rdev->fence_drv[ring].sync_seq[i] = 0; atomic64_set(&rdev->fence_drv[ring].last_seq, 0); rdev->fence_drv[ring].last_activity = jiffies; rdev->fence_drv[ring].initialized = false; } +/** + * radeon_fence_driver_init - init the fence driver + * for all possible rings. + * + * @rdev: radeon device pointer + * + * Init the fence driver for all possible rings (all asics). + * Not all asics have all rings, so each asic will only + * start the fence driver on the rings it has using + * radeon_fence_driver_start_ring(). + * Returns 0 for success. + */ int radeon_fence_driver_init(struct radeon_device *rdev) { int ring; @@ -569,6 +843,14 @@ int radeon_fence_driver_init(struct radeon_device *rdev) return 0; } +/** + * radeon_fence_driver_fini - tear down the fence driver + * for all possible rings. + * + * @rdev: radeon device pointer + * + * Tear down the fence driver for all possible rings (all asics). + */ void radeon_fence_driver_fini(struct radeon_device *rdev) { int ring; @@ -595,7 +877,7 @@ static int radeon_debugfs_fence_info(struct seq_file *m, void *data) struct drm_info_node *node = (struct drm_info_node *)m->private; struct drm_device *dev = node->minor->dev; struct radeon_device *rdev = dev->dev_private; - int i; + int i, j; for (i = 0; i < RADEON_NUM_RINGS; ++i) { if (!rdev->fence_drv[i].initialized) @@ -604,8 +886,14 @@ static int radeon_debugfs_fence_info(struct seq_file *m, void *data) seq_printf(m, "--- ring %d ---\n", i); seq_printf(m, "Last signaled fence 0x%016llx\n", (unsigned long long)atomic64_read(&rdev->fence_drv[i].last_seq)); - seq_printf(m, "Last emitted 0x%016llx\n", - rdev->fence_drv[i].seq); + seq_printf(m, "Last emitted 0x%016llx\n", + rdev->fence_drv[i].sync_seq[i]); + + for (j = 0; j < RADEON_NUM_RINGS; ++j) { + if (i != j && rdev->fence_drv[j].initialized) + seq_printf(m, "Last sync to ring %d 0x%016llx\n", + j, rdev->fence_drv[i].sync_seq[j]); + } } return 0; } diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 84b648a7ddd8..bb3b7fe05ccd 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -31,8 +31,38 @@ #include "radeon_reg.h" /* + * GART + * The GART (Graphics Aperture Remapping Table) is an aperture + * in the GPU's address space. System pages can be mapped into + * the aperture and look like contiguous pages from the GPU's + * perspective. A page table maps the pages in the aperture + * to the actual backing pages in system memory. + * + * Radeon GPUs support both an internal GART, as described above, + * and AGP. AGP works similarly, but the GART table is configured + * and maintained by the northbridge rather than the driver. + * Radeon hw has a separate AGP aperture that is programmed to + * point to the AGP aperture provided by the northbridge and the + * requests are passed through to the northbridge aperture. + * Both AGP and internal GART can be used at the same time, however + * that is not currently supported by the driver. + * + * This file handles the common internal GART management. + */ + +/* * Common GART table functions. */ +/** + * radeon_gart_table_ram_alloc - allocate system ram for gart page table + * + * @rdev: radeon_device pointer + * + * Allocate system memory for GART page table + * (r1xx-r3xx, non-pcie r4xx, rs400). These asics require the + * gart table to be in system memory. + * Returns 0 for success, -ENOMEM for failure. + */ int radeon_gart_table_ram_alloc(struct radeon_device *rdev) { void *ptr; @@ -54,6 +84,15 @@ int radeon_gart_table_ram_alloc(struct radeon_device *rdev) return 0; } +/** + * radeon_gart_table_ram_free - free system ram for gart page table + * + * @rdev: radeon_device pointer + * + * Free system memory for GART page table + * (r1xx-r3xx, non-pcie r4xx, rs400). These asics require the + * gart table to be in system memory. + */ void radeon_gart_table_ram_free(struct radeon_device *rdev) { if (rdev->gart.ptr == NULL) { @@ -73,6 +112,16 @@ void radeon_gart_table_ram_free(struct radeon_device *rdev) rdev->gart.table_addr = 0; } +/** + * radeon_gart_table_vram_alloc - allocate vram for gart page table + * + * @rdev: radeon_device pointer + * + * Allocate video memory for GART page table + * (pcie r4xx, r5xx+). These asics require the + * gart table to be in video memory. + * Returns 0 for success, error for failure. + */ int radeon_gart_table_vram_alloc(struct radeon_device *rdev) { int r; @@ -88,6 +137,16 @@ int radeon_gart_table_vram_alloc(struct radeon_device *rdev) return 0; } +/** + * radeon_gart_table_vram_pin - pin gart page table in vram + * + * @rdev: radeon_device pointer + * + * Pin the GART page table in vram so it will not be moved + * by the memory manager (pcie r4xx, r5xx+). These asics require the + * gart table to be in video memory. + * Returns 0 for success, error for failure. + */ int radeon_gart_table_vram_pin(struct radeon_device *rdev) { uint64_t gpu_addr; @@ -110,6 +169,14 @@ int radeon_gart_table_vram_pin(struct radeon_device *rdev) return r; } +/** + * radeon_gart_table_vram_unpin - unpin gart page table in vram + * + * @rdev: radeon_device pointer + * + * Unpin the GART page table in vram (pcie r4xx, r5xx+). + * These asics require the gart table to be in video memory. + */ void radeon_gart_table_vram_unpin(struct radeon_device *rdev) { int r; @@ -126,6 +193,15 @@ void radeon_gart_table_vram_unpin(struct radeon_device *rdev) } } +/** + * radeon_gart_table_vram_free - free gart page table vram + * + * @rdev: radeon_device pointer + * + * Free the video memory used for the GART page table + * (pcie r4xx, r5xx+). These asics require the gart table to + * be in video memory. + */ void radeon_gart_table_vram_free(struct radeon_device *rdev) { if (rdev->gart.robj == NULL) { @@ -135,12 +211,19 @@ void radeon_gart_table_vram_free(struct radeon_device *rdev) radeon_bo_unref(&rdev->gart.robj); } - - - /* * Common gart functions. */ +/** + * radeon_gart_unbind - unbind pages from the gart page table + * + * @rdev: radeon_device pointer + * @offset: offset into the GPU's gart aperture + * @pages: number of pages to unbind + * + * Unbinds the requested pages from the gart page table and + * replaces them with the dummy page (all asics). + */ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset, int pages) { @@ -172,6 +255,19 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset, radeon_gart_tlb_flush(rdev); } +/** + * radeon_gart_bind - bind pages into the gart page table + * + * @rdev: radeon_device pointer + * @offset: offset into the GPU's gart aperture + * @pages: number of pages to bind + * @pagelist: pages to bind + * @dma_addr: DMA addresses of pages + * + * Binds the requested pages to the gart page table + * (all asics). + * Returns 0 for success, -EINVAL for failure. + */ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset, int pages, struct page **pagelist, dma_addr_t *dma_addr) { @@ -203,6 +299,14 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset, return 0; } +/** + * radeon_gart_restore - bind all pages in the gart page table + * + * @rdev: radeon_device pointer + * + * Binds all pages in the gart page table (all asics). + * Used to rebuild the gart table on device startup or resume. + */ void radeon_gart_restore(struct radeon_device *rdev) { int i, j, t; @@ -222,6 +326,14 @@ void radeon_gart_restore(struct radeon_device *rdev) radeon_gart_tlb_flush(rdev); } +/** + * radeon_gart_init - init the driver info for managing the gart + * + * @rdev: radeon_device pointer + * + * Allocate the dummy page and init the gart driver info (all asics). + * Returns 0 for success, error for failure. + */ int radeon_gart_init(struct radeon_device *rdev) { int r, i; @@ -262,6 +374,13 @@ int radeon_gart_init(struct radeon_device *rdev) return 0; } +/** + * radeon_gart_fini - tear down the driver info for managing the gart + * + * @rdev: radeon_device pointer + * + * Tear down the gart driver info and free the dummy page (all asics). + */ void radeon_gart_fini(struct radeon_device *rdev) { if (rdev->gart.pages && rdev->gart.pages_addr && rdev->gart.ready) { @@ -278,35 +397,104 @@ void radeon_gart_fini(struct radeon_device *rdev) } /* + * GPUVM + * GPUVM is similar to the legacy gart on older asics, however + * rather than there being a single global gart table + * for the entire GPU, there are multiple VM page tables active + * at any given time. The VM page tables can contain a mix + * vram pages and system memory pages and system memory pages + * can be mapped as snooped (cached system pages) or unsnooped + * (uncached system pages). + * Each VM has an ID associated with it and there is a page table + * associated with each VMID. When execting a command buffer, + * the kernel tells the the ring what VMID to use for that command + * buffer. VMIDs are allocated dynamically as commands are submitted. + * The userspace drivers maintain their own address space and the kernel + * sets up their pages tables accordingly when they submit their + * command buffers and a VMID is assigned. + * Cayman/Trinity support up to 8 active VMs at any given time; + * SI supports 16. + */ + +/* * vm helpers * * TODO bind a default page at vm initialization for default address */ + +/** + * radeon_vm_manager_init - init the vm manager + * + * @rdev: radeon_device pointer + * + * Init the vm manager (cayman+). + * Returns 0 for success, error for failure. + */ int radeon_vm_manager_init(struct radeon_device *rdev) { + struct radeon_vm *vm; + struct radeon_bo_va *bo_va; int r; - rdev->vm_manager.enabled = false; + if (!rdev->vm_manager.enabled) { + /* mark first vm as always in use, it's the system one */ + /* allocate enough for 2 full VM pts */ + r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager, + rdev->vm_manager.max_pfn * 8 * 2, + RADEON_GEM_DOMAIN_VRAM); + if (r) { + dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n", + (rdev->vm_manager.max_pfn * 8) >> 10); + return r; + } - /* mark first vm as always in use, it's the system one */ - /* allocate enough for 2 full VM pts */ - r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager, - rdev->vm_manager.max_pfn * 8 * 2, - RADEON_GEM_DOMAIN_VRAM); - if (r) { - dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n", - (rdev->vm_manager.max_pfn * 8) >> 10); - return r; + r = rdev->vm_manager.funcs->init(rdev); + if (r) + return r; + + rdev->vm_manager.enabled = true; + + r = radeon_sa_bo_manager_start(rdev, &rdev->vm_manager.sa_manager); + if (r) + return r; } - r = rdev->vm_manager.funcs->init(rdev); - if (r == 0) - rdev->vm_manager.enabled = true; + /* restore page table */ + list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) { + if (vm->id == -1) + continue; - return r; + list_for_each_entry(bo_va, &vm->va, vm_list) { + struct ttm_mem_reg *mem = NULL; + if (bo_va->valid) + mem = &bo_va->bo->tbo.mem; + + bo_va->valid = false; + r = radeon_vm_bo_update_pte(rdev, vm, bo_va->bo, mem); + if (r) { + DRM_ERROR("Failed to update pte for vm %d!\n", vm->id); + } + } + + r = rdev->vm_manager.funcs->bind(rdev, vm, vm->id); + if (r) { + DRM_ERROR("Failed to bind vm %d!\n", vm->id); + } + } + return 0; } -/* cs mutex must be lock */ +/* global mutex must be lock */ +/** + * radeon_vm_unbind_locked - unbind a specific vm + * + * @rdev: radeon_device pointer + * @vm: vm to unbind + * + * Unbind the requested vm (cayman+). + * Wait for use of the VM to finish, then unbind the page table, + * and free the page table memory. + */ static void radeon_vm_unbind_locked(struct radeon_device *rdev, struct radeon_vm *vm) { @@ -317,10 +505,21 @@ static void radeon_vm_unbind_locked(struct radeon_device *rdev, } /* wait for vm use to end */ - if (vm->fence) { - radeon_fence_wait(vm->fence, false); - radeon_fence_unref(&vm->fence); + while (vm->fence) { + int r; + r = radeon_fence_wait(vm->fence, false); + if (r) + DRM_ERROR("error while waiting for fence: %d\n", r); + if (r == -EDEADLK) { + mutex_unlock(&rdev->vm_manager.lock); + r = radeon_gpu_reset(rdev); + mutex_lock(&rdev->vm_manager.lock); + if (!r) + continue; + } + break; } + radeon_fence_unref(&vm->fence); /* hw unbind */ rdev->vm_manager.funcs->unbind(rdev, vm); @@ -335,39 +534,42 @@ static void radeon_vm_unbind_locked(struct radeon_device *rdev, } } +/** + * radeon_vm_manager_fini - tear down the vm manager + * + * @rdev: radeon_device pointer + * + * Tear down the VM manager (cayman+). + */ void radeon_vm_manager_fini(struct radeon_device *rdev) { - if (rdev->vm_manager.sa_manager.bo == NULL) - return; - radeon_vm_manager_suspend(rdev); - rdev->vm_manager.funcs->fini(rdev); - radeon_sa_bo_manager_fini(rdev, &rdev->vm_manager.sa_manager); - rdev->vm_manager.enabled = false; -} - -int radeon_vm_manager_start(struct radeon_device *rdev) -{ - if (rdev->vm_manager.sa_manager.bo == NULL) { - return -EINVAL; - } - return radeon_sa_bo_manager_start(rdev, &rdev->vm_manager.sa_manager); -} - -int radeon_vm_manager_suspend(struct radeon_device *rdev) -{ struct radeon_vm *vm, *tmp; - radeon_mutex_lock(&rdev->cs_mutex); + if (!rdev->vm_manager.enabled) + return; + + mutex_lock(&rdev->vm_manager.lock); /* unbind all active vm */ list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) { radeon_vm_unbind_locked(rdev, vm); } rdev->vm_manager.funcs->fini(rdev); - radeon_mutex_unlock(&rdev->cs_mutex); - return radeon_sa_bo_manager_suspend(rdev, &rdev->vm_manager.sa_manager); + mutex_unlock(&rdev->vm_manager.lock); + + radeon_sa_bo_manager_suspend(rdev, &rdev->vm_manager.sa_manager); + radeon_sa_bo_manager_fini(rdev, &rdev->vm_manager.sa_manager); + rdev->vm_manager.enabled = false; } -/* cs mutex must be lock */ +/* global mutex must be locked */ +/** + * radeon_vm_unbind - locked version of unbind + * + * @rdev: radeon_device pointer + * @vm: vm to unbind + * + * Locked version that wraps radeon_vm_unbind_locked (cayman+). + */ void radeon_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm) { mutex_lock(&vm->mutex); @@ -375,7 +577,19 @@ void radeon_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm) mutex_unlock(&vm->mutex); } -/* cs mutex must be lock & vm mutex must be lock */ +/* global and local mutex must be locked */ +/** + * radeon_vm_bind - bind a page table to a VMID + * + * @rdev: radeon_device pointer + * @vm: vm to bind + * + * Bind the requested vm (cayman+). + * Suballocate memory for the page table, allocate a VMID + * and bind the page table to it, and finally start to populate + * the page table. + * Returns 0 for success, error for failure. + */ int radeon_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm) { struct radeon_vm *vm_evict; @@ -438,6 +652,20 @@ retry_id: } /* object have to be reserved */ +/** + * radeon_vm_bo_add - add a bo to a specific vm + * + * @rdev: radeon_device pointer + * @vm: requested vm + * @bo: radeon buffer object + * @offset: requested offset of the buffer in the VM address space + * @flags: attributes of pages (read/write/valid/etc.) + * + * Add @bo into the requested vm (cayman+). + * Add @bo to the list of bos associated with the vm and validate + * the offset requested within the vm address space. + * Returns 0 for success, error for failure. + */ int radeon_vm_bo_add(struct radeon_device *rdev, struct radeon_vm *vm, struct radeon_bo *bo, @@ -479,7 +707,7 @@ int radeon_vm_bo_add(struct radeon_device *rdev, if (last_pfn > vm->last_pfn) { /* release mutex and lock in right order */ mutex_unlock(&vm->mutex); - radeon_mutex_lock(&rdev->cs_mutex); + mutex_lock(&rdev->vm_manager.lock); mutex_lock(&vm->mutex); /* and check again */ if (last_pfn > vm->last_pfn) { @@ -488,7 +716,7 @@ int radeon_vm_bo_add(struct radeon_device *rdev, radeon_vm_unbind_locked(rdev, vm); vm->last_pfn = (last_pfn + align) & ~align; } - radeon_mutex_unlock(&rdev->cs_mutex); + mutex_unlock(&rdev->vm_manager.lock); } head = &vm->va; last_offset = 0; @@ -515,6 +743,17 @@ int radeon_vm_bo_add(struct radeon_device *rdev, return 0; } +/** + * radeon_vm_get_addr - get the physical address of the page + * + * @rdev: radeon_device pointer + * @mem: ttm mem + * @pfn: pfn + * + * Look up the physical address of the page that the pte resolves + * to (cayman+). + * Returns the physical address of the page. + */ static u64 radeon_vm_get_addr(struct radeon_device *rdev, struct ttm_mem_reg *mem, unsigned pfn) @@ -543,7 +782,18 @@ static u64 radeon_vm_get_addr(struct radeon_device *rdev, return addr; } -/* object have to be reserved & cs mutex took & vm mutex took */ +/* object have to be reserved & global and local mutex must be locked */ +/** + * radeon_vm_bo_update_pte - map a bo into the vm page table + * + * @rdev: radeon_device pointer + * @vm: requested vm + * @bo: radeon buffer object + * @mem: ttm mem + * + * Fill in the page table entries for @bo (cayman+). + * Returns 0 for success, -EINVAL for failure. + */ int radeon_vm_bo_update_pte(struct radeon_device *rdev, struct radeon_vm *vm, struct radeon_bo *bo, @@ -564,7 +814,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, return -EINVAL; } - if (bo_va->valid) + if (bo_va->valid && mem) return 0; ngpu_pages = radeon_bo_ngpu_pages(bo); @@ -592,20 +842,48 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, } /* object have to be reserved */ +/** + * radeon_vm_bo_rmv - remove a bo to a specific vm + * + * @rdev: radeon_device pointer + * @vm: requested vm + * @bo: radeon buffer object + * + * Remove @bo from the requested vm (cayman+). + * Remove @bo from the list of bos associated with the vm and + * remove the ptes for @bo in the page table. + * Returns 0 for success. + */ int radeon_vm_bo_rmv(struct radeon_device *rdev, struct radeon_vm *vm, struct radeon_bo *bo) { struct radeon_bo_va *bo_va; + int r; bo_va = radeon_bo_va(bo, vm); if (bo_va == NULL) return 0; - radeon_mutex_lock(&rdev->cs_mutex); + /* wait for va use to end */ + while (bo_va->fence) { + r = radeon_fence_wait(bo_va->fence, false); + if (r) { + DRM_ERROR("error while waiting for fence: %d\n", r); + } + if (r == -EDEADLK) { + r = radeon_gpu_reset(rdev); + if (!r) + continue; + } + break; + } + radeon_fence_unref(&bo_va->fence); + + mutex_lock(&rdev->vm_manager.lock); mutex_lock(&vm->mutex); radeon_vm_bo_update_pte(rdev, vm, bo, NULL); - radeon_mutex_unlock(&rdev->cs_mutex); + mutex_unlock(&rdev->vm_manager.lock); list_del(&bo_va->vm_list); mutex_unlock(&vm->mutex); list_del(&bo_va->bo_list); @@ -614,6 +892,15 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev, return 0; } +/** + * radeon_vm_bo_invalidate - mark the bo as invalid + * + * @rdev: radeon_device pointer + * @vm: requested vm + * @bo: radeon buffer object + * + * Mark @bo as invalid (cayman+). + */ void radeon_vm_bo_invalidate(struct radeon_device *rdev, struct radeon_bo *bo) { @@ -625,6 +912,17 @@ void radeon_vm_bo_invalidate(struct radeon_device *rdev, } } +/** + * radeon_vm_init - initialize a vm instance + * + * @rdev: radeon_device pointer + * @vm: requested vm + * + * Init @vm (cayman+). + * Map the IB pool and any other shared objects into the VM + * by default as it's used by all VMs. + * Returns 0 for success, error for failure. + */ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) { int r; @@ -651,22 +949,34 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) return r; } +/** + * radeon_vm_fini - tear down a vm instance + * + * @rdev: radeon_device pointer + * @vm: requested vm + * + * Tear down @vm (cayman+). + * Unbind the VM and remove all bos from the vm bo list + */ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) { struct radeon_bo_va *bo_va, *tmp; int r; - radeon_mutex_lock(&rdev->cs_mutex); + mutex_lock(&rdev->vm_manager.lock); mutex_lock(&vm->mutex); radeon_vm_unbind_locked(rdev, vm); - radeon_mutex_unlock(&rdev->cs_mutex); + mutex_unlock(&rdev->vm_manager.lock); - /* remove all bo */ + /* remove all bo at this point non are busy any more because unbind + * waited for the last vm fence to signal + */ r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false); if (!r) { bo_va = radeon_bo_va(rdev->ring_tmp_bo.bo, vm); list_del_init(&bo_va->bo_list); list_del_init(&bo_va->vm_list); + radeon_fence_unref(&bo_va->fence); radeon_bo_unreserve(rdev->ring_tmp_bo.bo); kfree(bo_va); } @@ -678,6 +988,7 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) r = radeon_bo_reserve(bo_va->bo, false); if (!r) { list_del_init(&bo_va->bo_list); + radeon_fence_unref(&bo_va->fence); radeon_bo_unreserve(bo_va->bo); kfree(bo_va); } diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 21ec9f5653ce..1b57b0058ad6 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -134,36 +134,25 @@ void radeon_gem_object_close(struct drm_gem_object *obj, struct radeon_device *rdev = rbo->rdev; struct radeon_fpriv *fpriv = file_priv->driver_priv; struct radeon_vm *vm = &fpriv->vm; - struct radeon_bo_va *bo_va, *tmp; if (rdev->family < CHIP_CAYMAN) { return; } if (radeon_bo_reserve(rbo, false)) { + dev_err(rdev->dev, "leaking bo va because we fail to reserve bo\n"); return; } - list_for_each_entry_safe(bo_va, tmp, &rbo->va, bo_list) { - if (bo_va->vm == vm) { - /* remove from this vm address space */ - mutex_lock(&vm->mutex); - list_del(&bo_va->vm_list); - mutex_unlock(&vm->mutex); - list_del(&bo_va->bo_list); - kfree(bo_va); - } - } + radeon_vm_bo_rmv(rdev, vm, rbo); radeon_bo_unreserve(rbo); } static int radeon_gem_handle_lockup(struct radeon_device *rdev, int r) { if (r == -EDEADLK) { - radeon_mutex_lock(&rdev->cs_mutex); r = radeon_gpu_reset(rdev); if (!r) r = -EAGAIN; - radeon_mutex_unlock(&rdev->cs_mutex); } return r; } @@ -217,12 +206,14 @@ int radeon_gem_create_ioctl(struct drm_device *dev, void *data, uint32_t handle; int r; + down_read(&rdev->exclusive_lock); /* create a gem object to contain this object in */ args->size = roundup(args->size, PAGE_SIZE); r = radeon_gem_object_create(rdev, args->size, args->alignment, args->initial_domain, false, false, &gobj); if (r) { + up_read(&rdev->exclusive_lock); r = radeon_gem_handle_lockup(rdev, r); return r; } @@ -230,10 +221,12 @@ int radeon_gem_create_ioctl(struct drm_device *dev, void *data, /* drop reference from allocate - handle holds it now */ drm_gem_object_unreference_unlocked(gobj); if (r) { + up_read(&rdev->exclusive_lock); r = radeon_gem_handle_lockup(rdev, r); return r; } args->handle = handle; + up_read(&rdev->exclusive_lock); return 0; } @@ -242,6 +235,7 @@ int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data, { /* transition the BO to a domain - * just validate the BO into a certain domain */ + struct radeon_device *rdev = dev->dev_private; struct drm_radeon_gem_set_domain *args = data; struct drm_gem_object *gobj; struct radeon_bo *robj; @@ -249,10 +243,12 @@ int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data, /* for now if someone requests domain CPU - * just make sure the buffer is finished with */ + down_read(&rdev->exclusive_lock); /* just do a BO wait for now */ gobj = drm_gem_object_lookup(dev, filp, args->handle); if (gobj == NULL) { + up_read(&rdev->exclusive_lock); return -ENOENT; } robj = gem_to_radeon_bo(gobj); @@ -260,6 +256,7 @@ int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data, r = radeon_gem_set_domain(gobj, args->read_domains, args->write_domain); drm_gem_object_unreference_unlocked(gobj); + up_read(&rdev->exclusive_lock); r = radeon_gem_handle_lockup(robj->rdev, r); return r; } diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c index 5df58d1aba06..afaa1727abd2 100644 --- a/drivers/gpu/drm/radeon/radeon_irq_kms.c +++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c @@ -32,6 +32,17 @@ #include "radeon.h" #include "atom.h" +#define RADEON_WAIT_IDLE_TIMEOUT 200 + +/** + * radeon_driver_irq_handler_kms - irq handler for KMS + * + * @DRM_IRQ_ARGS: args + * + * This is the irq handler for the radeon KMS driver (all asics). + * radeon_irq_process is a macro that points to the per-asic + * irq handler callback. + */ irqreturn_t radeon_driver_irq_handler_kms(DRM_IRQ_ARGS) { struct drm_device *dev = (struct drm_device *) arg; @@ -43,6 +54,17 @@ irqreturn_t radeon_driver_irq_handler_kms(DRM_IRQ_ARGS) /* * Handle hotplug events outside the interrupt handler proper. */ +/** + * radeon_hotplug_work_func - display hotplug work handler + * + * @work: work struct + * + * This is the hot plug event work handler (all asics). + * The work gets scheduled from the irq handler if there + * was a hot plug interrupt. It walks the connector table + * and calls the hotplug handler for each one, then sends + * a drm hotplug event to alert userspace. + */ static void radeon_hotplug_work_func(struct work_struct *work) { struct radeon_device *rdev = container_of(work, struct radeon_device, @@ -59,61 +81,94 @@ static void radeon_hotplug_work_func(struct work_struct *work) drm_helper_hpd_irq_event(dev); } +/** + * radeon_driver_irq_preinstall_kms - drm irq preinstall callback + * + * @dev: drm dev pointer + * + * Gets the hw ready to enable irqs (all asics). + * This function disables all interrupt sources on the GPU. + */ void radeon_driver_irq_preinstall_kms(struct drm_device *dev) { struct radeon_device *rdev = dev->dev_private; + unsigned long irqflags; unsigned i; + spin_lock_irqsave(&rdev->irq.lock, irqflags); /* Disable *all* interrupts */ for (i = 0; i < RADEON_NUM_RINGS; i++) - rdev->irq.sw_int[i] = false; + atomic_set(&rdev->irq.ring_int[i], 0); rdev->irq.gui_idle = false; for (i = 0; i < RADEON_MAX_HPD_PINS; i++) rdev->irq.hpd[i] = false; for (i = 0; i < RADEON_MAX_CRTCS; i++) { rdev->irq.crtc_vblank_int[i] = false; - rdev->irq.pflip[i] = false; + atomic_set(&rdev->irq.pflip[i], 0); rdev->irq.afmt[i] = false; } radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); /* Clear bits */ radeon_irq_process(rdev); } +/** + * radeon_driver_irq_postinstall_kms - drm irq preinstall callback + * + * @dev: drm dev pointer + * + * Handles stuff to be done after enabling irqs (all asics). + * Returns 0 on success. + */ int radeon_driver_irq_postinstall_kms(struct drm_device *dev) { - struct radeon_device *rdev = dev->dev_private; - unsigned i; - dev->max_vblank_count = 0x001fffff; - for (i = 0; i < RADEON_NUM_RINGS; i++) - rdev->irq.sw_int[i] = true; - radeon_irq_set(rdev); return 0; } +/** + * radeon_driver_irq_uninstall_kms - drm irq uninstall callback + * + * @dev: drm dev pointer + * + * This function disables all interrupt sources on the GPU (all asics). + */ void radeon_driver_irq_uninstall_kms(struct drm_device *dev) { struct radeon_device *rdev = dev->dev_private; + unsigned long irqflags; unsigned i; if (rdev == NULL) { return; } + spin_lock_irqsave(&rdev->irq.lock, irqflags); /* Disable *all* interrupts */ for (i = 0; i < RADEON_NUM_RINGS; i++) - rdev->irq.sw_int[i] = false; + atomic_set(&rdev->irq.ring_int[i], 0); rdev->irq.gui_idle = false; for (i = 0; i < RADEON_MAX_HPD_PINS; i++) rdev->irq.hpd[i] = false; for (i = 0; i < RADEON_MAX_CRTCS; i++) { rdev->irq.crtc_vblank_int[i] = false; - rdev->irq.pflip[i] = false; + atomic_set(&rdev->irq.pflip[i], 0); rdev->irq.afmt[i] = false; } radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); } +/** + * radeon_msi_ok - asic specific msi checks + * + * @rdev: radeon device pointer + * + * Handles asic specific MSI checks to determine if + * MSIs should be enabled on a particular chip (all asics). + * Returns true if MSIs should be enabled, false if MSIs + * should not be enabled. + */ static bool radeon_msi_ok(struct radeon_device *rdev) { /* RV370/RV380 was first asic with MSI support */ @@ -166,17 +221,22 @@ static bool radeon_msi_ok(struct radeon_device *rdev) return true; } +/** + * radeon_irq_kms_init - init driver interrupt info + * + * @rdev: radeon device pointer + * + * Sets up the work irq handlers, vblank init, MSIs, etc. (all asics). + * Returns 0 for success, error for failure. + */ int radeon_irq_kms_init(struct radeon_device *rdev) { - int i; int r = 0; INIT_WORK(&rdev->hotplug_work, radeon_hotplug_work_func); INIT_WORK(&rdev->audio_work, r600_audio_update_hdmi); - spin_lock_init(&rdev->irq.sw_lock); - for (i = 0; i < rdev->num_crtc; i++) - spin_lock_init(&rdev->irq.pflip_lock[i]); + spin_lock_init(&rdev->irq.lock); r = drm_vblank_init(rdev->ddev, rdev->num_crtc); if (r) { return r; @@ -201,6 +261,13 @@ int radeon_irq_kms_init(struct radeon_device *rdev) return 0; } +/** + * radeon_irq_kms_fini - tear down driver interrrupt info + * + * @rdev: radeon device pointer + * + * Tears down the work irq handlers, vblank handlers, MSIs, etc. (all asics). + */ void radeon_irq_kms_fini(struct radeon_device *rdev) { drm_vblank_cleanup(rdev->ddev); @@ -213,31 +280,63 @@ void radeon_irq_kms_fini(struct radeon_device *rdev) flush_work_sync(&rdev->hotplug_work); } +/** + * radeon_irq_kms_sw_irq_get - enable software interrupt + * + * @rdev: radeon device pointer + * @ring: ring whose interrupt you want to enable + * + * Enables the software interrupt for a specific ring (all asics). + * The software interrupt is generally used to signal a fence on + * a particular ring. + */ void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring) { unsigned long irqflags; - spin_lock_irqsave(&rdev->irq.sw_lock, irqflags); - if (rdev->ddev->irq_enabled && (++rdev->irq.sw_refcount[ring] == 1)) { - rdev->irq.sw_int[ring] = true; + if (!rdev->ddev->irq_enabled) + return; + + if (atomic_inc_return(&rdev->irq.ring_int[ring]) == 1) { + spin_lock_irqsave(&rdev->irq.lock, irqflags); radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); } - spin_unlock_irqrestore(&rdev->irq.sw_lock, irqflags); } +/** + * radeon_irq_kms_sw_irq_put - disable software interrupt + * + * @rdev: radeon device pointer + * @ring: ring whose interrupt you want to disable + * + * Disables the software interrupt for a specific ring (all asics). + * The software interrupt is generally used to signal a fence on + * a particular ring. + */ void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring) { unsigned long irqflags; - spin_lock_irqsave(&rdev->irq.sw_lock, irqflags); - BUG_ON(rdev->ddev->irq_enabled && rdev->irq.sw_refcount[ring] <= 0); - if (rdev->ddev->irq_enabled && (--rdev->irq.sw_refcount[ring] == 0)) { - rdev->irq.sw_int[ring] = false; + if (!rdev->ddev->irq_enabled) + return; + + if (atomic_dec_and_test(&rdev->irq.ring_int[ring])) { + spin_lock_irqsave(&rdev->irq.lock, irqflags); radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); } - spin_unlock_irqrestore(&rdev->irq.sw_lock, irqflags); } +/** + * radeon_irq_kms_pflip_irq_get - enable pageflip interrupt + * + * @rdev: radeon device pointer + * @crtc: crtc whose interrupt you want to enable + * + * Enables the pageflip interrupt for a specific crtc (all asics). + * For pageflips we use the vblank interrupt source. + */ void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc) { unsigned long irqflags; @@ -245,14 +344,25 @@ void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc) if (crtc < 0 || crtc >= rdev->num_crtc) return; - spin_lock_irqsave(&rdev->irq.pflip_lock[crtc], irqflags); - if (rdev->ddev->irq_enabled && (++rdev->irq.pflip_refcount[crtc] == 1)) { - rdev->irq.pflip[crtc] = true; + if (!rdev->ddev->irq_enabled) + return; + + if (atomic_inc_return(&rdev->irq.pflip[crtc]) == 1) { + spin_lock_irqsave(&rdev->irq.lock, irqflags); radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); } - spin_unlock_irqrestore(&rdev->irq.pflip_lock[crtc], irqflags); } +/** + * radeon_irq_kms_pflip_irq_put - disable pageflip interrupt + * + * @rdev: radeon device pointer + * @crtc: crtc whose interrupt you want to disable + * + * Disables the pageflip interrupt for a specific crtc (all asics). + * For pageflips we use the vblank interrupt source. + */ void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc) { unsigned long irqflags; @@ -260,12 +370,121 @@ void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc) if (crtc < 0 || crtc >= rdev->num_crtc) return; - spin_lock_irqsave(&rdev->irq.pflip_lock[crtc], irqflags); - BUG_ON(rdev->ddev->irq_enabled && rdev->irq.pflip_refcount[crtc] <= 0); - if (rdev->ddev->irq_enabled && (--rdev->irq.pflip_refcount[crtc] == 0)) { - rdev->irq.pflip[crtc] = false; + if (!rdev->ddev->irq_enabled) + return; + + if (atomic_dec_and_test(&rdev->irq.pflip[crtc])) { + spin_lock_irqsave(&rdev->irq.lock, irqflags); radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); } - spin_unlock_irqrestore(&rdev->irq.pflip_lock[crtc], irqflags); } +/** + * radeon_irq_kms_enable_afmt - enable audio format change interrupt + * + * @rdev: radeon device pointer + * @block: afmt block whose interrupt you want to enable + * + * Enables the afmt change interrupt for a specific afmt block (all asics). + */ +void radeon_irq_kms_enable_afmt(struct radeon_device *rdev, int block) +{ + unsigned long irqflags; + + spin_lock_irqsave(&rdev->irq.lock, irqflags); + rdev->irq.afmt[block] = true; + radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); + +} + +/** + * radeon_irq_kms_disable_afmt - disable audio format change interrupt + * + * @rdev: radeon device pointer + * @block: afmt block whose interrupt you want to disable + * + * Disables the afmt change interrupt for a specific afmt block (all asics). + */ +void radeon_irq_kms_disable_afmt(struct radeon_device *rdev, int block) +{ + unsigned long irqflags; + + spin_lock_irqsave(&rdev->irq.lock, irqflags); + rdev->irq.afmt[block] = false; + radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); +} + +/** + * radeon_irq_kms_enable_hpd - enable hotplug detect interrupt + * + * @rdev: radeon device pointer + * @hpd_mask: mask of hpd pins you want to enable. + * + * Enables the hotplug detect interrupt for a specific hpd pin (all asics). + */ +void radeon_irq_kms_enable_hpd(struct radeon_device *rdev, unsigned hpd_mask) +{ + unsigned long irqflags; + int i; + + spin_lock_irqsave(&rdev->irq.lock, irqflags); + for (i = 0; i < RADEON_MAX_HPD_PINS; ++i) + rdev->irq.hpd[i] |= !!(hpd_mask & (1 << i)); + radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); +} + +/** + * radeon_irq_kms_disable_hpd - disable hotplug detect interrupt + * + * @rdev: radeon device pointer + * @hpd_mask: mask of hpd pins you want to disable. + * + * Disables the hotplug detect interrupt for a specific hpd pin (all asics). + */ +void radeon_irq_kms_disable_hpd(struct radeon_device *rdev, unsigned hpd_mask) +{ + unsigned long irqflags; + int i; + + spin_lock_irqsave(&rdev->irq.lock, irqflags); + for (i = 0; i < RADEON_MAX_HPD_PINS; ++i) + rdev->irq.hpd[i] &= !(hpd_mask & (1 << i)); + radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); +} + +/** + * radeon_irq_kms_wait_gui_idle - waits for drawing engine to be idle + * + * @rdev: radeon device pointer + * + * Enabled the GUI idle interrupt and waits for it to fire (r6xx+). + * This is currently used to make sure the 3D engine is idle for power + * management, but should be replaces with proper fence waits. + * GUI idle interrupts don't work very well on pre-r6xx hw and it also + * does not take into account other aspects of the chip that may be busy. + * DO NOT USE GOING FORWARD. + */ +int radeon_irq_kms_wait_gui_idle(struct radeon_device *rdev) +{ + unsigned long irqflags; + int r; + + spin_lock_irqsave(&rdev->irq.lock, irqflags); + rdev->irq.gui_idle = true; + radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); + + r = wait_event_timeout(rdev->irq.idle_queue, radeon_gui_idle(rdev), + msecs_to_jiffies(RADEON_WAIT_IDLE_TIMEOUT)); + + spin_lock_irqsave(&rdev->irq.lock, irqflags); + rdev->irq.gui_idle = false; + radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); + return r; +} diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 5c58d7d90cb2..414b4acf6947 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -29,10 +29,22 @@ #include "drm_sarea.h" #include "radeon.h" #include "radeon_drm.h" +#include "radeon_asic.h" #include <linux/vga_switcheroo.h> #include <linux/slab.h> +/** + * radeon_driver_unload_kms - Main unload function for KMS. + * + * @dev: drm dev pointer + * + * This is the main unload function for KMS (all asics). + * It calls radeon_modeset_fini() to tear down the + * displays, and radeon_device_fini() to tear down + * the rest of the device (CP, writeback, etc.). + * Returns 0 on success. + */ int radeon_driver_unload_kms(struct drm_device *dev) { struct radeon_device *rdev = dev->dev_private; @@ -46,6 +58,19 @@ int radeon_driver_unload_kms(struct drm_device *dev) return 0; } +/** + * radeon_driver_load_kms - Main load function for KMS. + * + * @dev: drm dev pointer + * @flags: device flags + * + * This is the main load function for KMS (all asics). + * It calls radeon_device_init() to set up the non-display + * parts of the chip (asic init, CP, writeback, etc.), and + * radeon_modeset_init() to set up the display parts + * (crtcs, encoders, hotplug detect, etc.). + * Returns 0 on success, error on failure. + */ int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags) { struct radeon_device *rdev; @@ -96,6 +121,16 @@ out: return r; } +/** + * radeon_set_filp_rights - Set filp right. + * + * @dev: drm dev pointer + * @owner: drm file + * @applier: drm file + * @value: value + * + * Sets the filp rights for the device (all asics). + */ static void radeon_set_filp_rights(struct drm_device *dev, struct drm_file **owner, struct drm_file *applier, @@ -118,20 +153,54 @@ static void radeon_set_filp_rights(struct drm_device *dev, /* * Userspace get information ioctl */ +/** + * radeon_info_ioctl - answer a device specific request. + * + * @rdev: radeon device pointer + * @data: request object + * @filp: drm filp + * + * This function is used to pass device specific parameters to the userspace + * drivers. Examples include: pci device id, pipeline parms, tiling params, + * etc. (all asics). + * Returns 0 on success, -EINVAL on failure. + */ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct radeon_device *rdev = dev->dev_private; - struct drm_radeon_info *info; + struct drm_radeon_info *info = data; struct radeon_mode_info *minfo = &rdev->mode_info; - uint32_t *value_ptr; - uint32_t value; + uint32_t value, *value_ptr; + uint64_t value64, *value_ptr64; struct drm_crtc *crtc; int i, found; - info = data; + /* TIMESTAMP is a 64-bit value, needs special handling. */ + if (info->request == RADEON_INFO_TIMESTAMP) { + if (rdev->family >= CHIP_R600) { + value_ptr64 = (uint64_t*)((unsigned long)info->value); + if (rdev->family >= CHIP_TAHITI) { + value64 = si_get_gpu_clock(rdev); + } else { + value64 = r600_get_gpu_clock(rdev); + } + + if (DRM_COPY_TO_USER(value_ptr64, &value64, sizeof(value64))) { + DRM_ERROR("copy_to_user %s:%u\n", __func__, __LINE__); + return -EFAULT; + } + return 0; + } else { + DRM_DEBUG_KMS("timestamp is r6xx+ only!\n"); + return -EINVAL; + } + } + value_ptr = (uint32_t *)((unsigned long)info->value); - if (DRM_COPY_FROM_USER(&value, value_ptr, sizeof(value))) + if (DRM_COPY_FROM_USER(&value, value_ptr, sizeof(value))) { + DRM_ERROR("copy_from_user %s:%u\n", __func__, __LINE__); return -EFAULT; + } switch (info->request) { case RADEON_INFO_DEVICE_ID: @@ -291,7 +360,7 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) return -EINVAL; } if (DRM_COPY_TO_USER(value_ptr, &value, sizeof(uint32_t))) { - DRM_ERROR("copy_to_user\n"); + DRM_ERROR("copy_to_user %s:%u\n", __func__, __LINE__); return -EFAULT; } return 0; @@ -301,16 +370,40 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) /* * Outdated mess for old drm with Xorg being in charge (void function now). */ +/** + * radeon_driver_firstopen_kms - drm callback for first open + * + * @dev: drm dev pointer + * + * Nothing to be done for KMS (all asics). + * Returns 0 on success. + */ int radeon_driver_firstopen_kms(struct drm_device *dev) { return 0; } +/** + * radeon_driver_firstopen_kms - drm callback for last close + * + * @dev: drm dev pointer + * + * Switch vga switcheroo state after last close (all asics). + */ void radeon_driver_lastclose_kms(struct drm_device *dev) { vga_switcheroo_process_delayed_switch(); } +/** + * radeon_driver_open_kms - drm callback for open + * + * @dev: drm dev pointer + * @file_priv: drm file + * + * On device open, init vm on cayman+ (all asics). + * Returns 0 on success, error on failure. + */ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) { struct radeon_device *rdev = dev->dev_private; @@ -339,6 +432,14 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) return 0; } +/** + * radeon_driver_postclose_kms - drm callback for post close + * + * @dev: drm dev pointer + * @file_priv: drm file + * + * On device post close, tear down vm on cayman+ (all asics). + */ void radeon_driver_postclose_kms(struct drm_device *dev, struct drm_file *file_priv) { @@ -354,6 +455,15 @@ void radeon_driver_postclose_kms(struct drm_device *dev, } } +/** + * radeon_driver_preclose_kms - drm callback for pre close + * + * @dev: drm dev pointer + * @file_priv: drm file + * + * On device pre close, tear down hyperz and cmask filps on r1xx-r5xx + * (all asics). + */ void radeon_driver_preclose_kms(struct drm_device *dev, struct drm_file *file_priv) { @@ -367,6 +477,15 @@ void radeon_driver_preclose_kms(struct drm_device *dev, /* * VBlank related functions. */ +/** + * radeon_get_vblank_counter_kms - get frame count + * + * @dev: drm dev pointer + * @crtc: crtc to get the frame count from + * + * Gets the frame count on the requested crtc (all asics). + * Returns frame count on success, -EINVAL on failure. + */ u32 radeon_get_vblank_counter_kms(struct drm_device *dev, int crtc) { struct radeon_device *rdev = dev->dev_private; @@ -379,34 +498,70 @@ u32 radeon_get_vblank_counter_kms(struct drm_device *dev, int crtc) return radeon_get_vblank_counter(rdev, crtc); } +/** + * radeon_enable_vblank_kms - enable vblank interrupt + * + * @dev: drm dev pointer + * @crtc: crtc to enable vblank interrupt for + * + * Enable the interrupt on the requested crtc (all asics). + * Returns 0 on success, -EINVAL on failure. + */ int radeon_enable_vblank_kms(struct drm_device *dev, int crtc) { struct radeon_device *rdev = dev->dev_private; + unsigned long irqflags; + int r; if (crtc < 0 || crtc >= rdev->num_crtc) { DRM_ERROR("Invalid crtc %d\n", crtc); return -EINVAL; } + spin_lock_irqsave(&rdev->irq.lock, irqflags); rdev->irq.crtc_vblank_int[crtc] = true; - - return radeon_irq_set(rdev); + r = radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); + return r; } +/** + * radeon_disable_vblank_kms - disable vblank interrupt + * + * @dev: drm dev pointer + * @crtc: crtc to disable vblank interrupt for + * + * Disable the interrupt on the requested crtc (all asics). + */ void radeon_disable_vblank_kms(struct drm_device *dev, int crtc) { struct radeon_device *rdev = dev->dev_private; + unsigned long irqflags; if (crtc < 0 || crtc >= rdev->num_crtc) { DRM_ERROR("Invalid crtc %d\n", crtc); return; } + spin_lock_irqsave(&rdev->irq.lock, irqflags); rdev->irq.crtc_vblank_int[crtc] = false; - radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); } +/** + * radeon_get_vblank_timestamp_kms - get vblank timestamp + * + * @dev: drm dev pointer + * @crtc: crtc to get the timestamp for + * @max_error: max error + * @vblank_time: time value + * @flags: flags passed to the driver + * + * Gets the timestamp on the requested crtc based on the + * scanout position. (all asics). + * Returns postive status flags on success, negative error on failure. + */ int radeon_get_vblank_timestamp_kms(struct drm_device *dev, int crtc, int *max_error, struct timeval *vblank_time, diff --git a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c index 210317c7045e..94b4a1c12893 100644 --- a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c +++ b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c @@ -990,7 +990,7 @@ static void radeon_set_pll(struct drm_crtc *crtc, struct drm_display_mode *mode) } static bool radeon_crtc_mode_fixup(struct drm_crtc *crtc, - struct drm_display_mode *mode, + const struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode) { if (!radeon_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode)) @@ -1025,9 +1025,11 @@ static int radeon_crtc_mode_set(struct drm_crtc *crtc, static void radeon_crtc_prepare(struct drm_crtc *crtc) { + struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); struct drm_device *dev = crtc->dev; struct drm_crtc *crtci; + radeon_crtc->in_mode_set = true; /* * The hardware wedges sometimes if you reconfigure one CRTC * whilst another is running (see fdo bug #24611). @@ -1038,6 +1040,7 @@ static void radeon_crtc_prepare(struct drm_crtc *crtc) static void radeon_crtc_commit(struct drm_crtc *crtc) { + struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); struct drm_device *dev = crtc->dev; struct drm_crtc *crtci; @@ -1048,6 +1051,7 @@ static void radeon_crtc_commit(struct drm_crtc *crtc) if (crtci->enabled) radeon_crtc_dpms(crtci, DRM_MODE_DPMS_ON); } + radeon_crtc->in_mode_set = false; } static const struct drm_crtc_helper_funcs legacy_helper_funcs = { diff --git a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c index a0c82229e8f0..670e9910f869 100644 --- a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c +++ b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c @@ -244,7 +244,7 @@ static void radeon_legacy_lvds_mode_set(struct drm_encoder *encoder, } static bool radeon_legacy_mode_fixup(struct drm_encoder *encoder, - struct drm_display_mode *mode, + const struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode) { struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index 5b10ffd7bb2f..d56978949f34 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -275,6 +275,7 @@ struct radeon_crtc { u16 lut_r[256], lut_g[256], lut_b[256]; bool enabled; bool can_tile; + bool in_mode_set; uint32_t crtc_offset; struct drm_gem_object *cursor_bo; uint64_t cursor_addr; @@ -488,7 +489,7 @@ extern void radeon_connector_hotplug(struct drm_connector *connector); extern int radeon_dp_mode_valid_helper(struct drm_connector *connector, struct drm_display_mode *mode); extern void radeon_dp_set_link_config(struct drm_connector *connector, - struct drm_display_mode *mode); + const struct drm_display_mode *mode); extern void radeon_dp_link_train(struct drm_encoder *encoder, struct drm_connector *connector); extern bool radeon_dp_needs_link_train(struct radeon_connector *radeon_connector); @@ -678,7 +679,7 @@ void radeon_enc_destroy(struct drm_encoder *encoder); void radeon_copy_fb(struct drm_device *dev, struct drm_gem_object *dst_obj); void radeon_combios_asic_init(struct drm_device *dev); bool radeon_crtc_scaling_mode_fixup(struct drm_crtc *crtc, - struct drm_display_mode *mode, + const struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode); void radeon_panel_mode_fixup(struct drm_encoder *encoder, struct drm_display_mode *adjusted_mode); diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 830f1a7b486f..9024e7222839 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -52,11 +52,7 @@ void radeon_bo_clear_va(struct radeon_bo *bo) list_for_each_entry_safe(bo_va, tmp, &bo->va, bo_list) { /* remove from all vm address space */ - mutex_lock(&bo_va->vm->mutex); - list_del(&bo_va->vm_list); - mutex_unlock(&bo_va->vm->mutex); - list_del(&bo_va->bo_list); - kfree(bo_va); + radeon_vm_bo_rmv(bo->rdev, bo_va->vm, bo); } } @@ -115,9 +111,7 @@ int radeon_bo_create(struct radeon_device *rdev, size = ALIGN(size, PAGE_SIZE); - if (unlikely(rdev->mman.bdev.dev_mapping == NULL)) { - rdev->mman.bdev.dev_mapping = rdev->ddev->dev_mapping; - } + rdev->mman.bdev.dev_mapping = rdev->ddev->dev_mapping; if (kernel) { type = ttm_bo_type_kernel; } else if (sg) { @@ -154,11 +148,11 @@ retry: INIT_LIST_HEAD(&bo->va); radeon_ttm_placement_from_domain(bo, domain); /* Kernel allocation are uninterruptible */ - mutex_lock(&rdev->vram_mutex); + down_read(&rdev->pm.mclk_lock); r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type, &bo->placement, page_align, 0, !kernel, NULL, acc_size, sg, &radeon_ttm_bo_destroy); - mutex_unlock(&rdev->vram_mutex); + up_read(&rdev->pm.mclk_lock); if (unlikely(r != 0)) { if (r != -ERESTARTSYS) { if (domain == RADEON_GEM_DOMAIN_VRAM) { @@ -219,9 +213,9 @@ void radeon_bo_unref(struct radeon_bo **bo) return; rdev = (*bo)->rdev; tbo = &((*bo)->tbo); - mutex_lock(&rdev->vram_mutex); + down_read(&rdev->pm.mclk_lock); ttm_bo_unref(&tbo); - mutex_unlock(&rdev->vram_mutex); + up_read(&rdev->pm.mclk_lock); if (tbo == NULL) *bo = NULL; } diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index 5b37e283ec38..7ae606600107 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -34,7 +34,6 @@ #define RADEON_IDLE_LOOP_MS 100 #define RADEON_RECLOCK_DELAY_MS 200 #define RADEON_WAIT_VBLANK_TIMEOUT 200 -#define RADEON_WAIT_IDLE_TIMEOUT 200 static const char *radeon_pm_state_type_name[5] = { "Default", @@ -251,21 +250,14 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev) return; mutex_lock(&rdev->ddev->struct_mutex); - mutex_lock(&rdev->vram_mutex); + down_write(&rdev->pm.mclk_lock); mutex_lock(&rdev->ring_lock); /* gui idle int has issues on older chips it seems */ if (rdev->family >= CHIP_R600) { if (rdev->irq.installed) { - /* wait for GPU idle */ - rdev->pm.gui_idle = false; - rdev->irq.gui_idle = true; - radeon_irq_set(rdev); - wait_event_interruptible_timeout( - rdev->irq.idle_queue, rdev->pm.gui_idle, - msecs_to_jiffies(RADEON_WAIT_IDLE_TIMEOUT)); - rdev->irq.gui_idle = false; - radeon_irq_set(rdev); + /* wait for GPU to become idle */ + radeon_irq_kms_wait_gui_idle(rdev); } } else { struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; @@ -303,7 +295,7 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev) rdev->pm.dynpm_planned_action = DYNPM_ACTION_NONE; mutex_unlock(&rdev->ring_lock); - mutex_unlock(&rdev->vram_mutex); + up_write(&rdev->pm.mclk_lock); mutex_unlock(&rdev->ddev->struct_mutex); } diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 983658c91358..43c431a2686d 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -35,47 +35,97 @@ #include "atom.h" /* - * IB. + * IB + * IBs (Indirect Buffers) and areas of GPU accessible memory where + * commands are stored. You can put a pointer to the IB in the + * command ring and the hw will fetch the commands from the IB + * and execute them. Generally userspace acceleration drivers + * produce command buffers which are send to the kernel and + * put in IBs for execution by the requested ring. */ int radeon_debugfs_sa_init(struct radeon_device *rdev); +/** + * radeon_ib_get - request an IB (Indirect Buffer) + * + * @rdev: radeon_device pointer + * @ring: ring index the IB is associated with + * @ib: IB object returned + * @size: requested IB size + * + * Request an IB (all asics). IBs are allocated using the + * suballocator. + * Returns 0 on success, error on failure. + */ int radeon_ib_get(struct radeon_device *rdev, int ring, struct radeon_ib *ib, unsigned size) { - int r; + int i, r; r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &ib->sa_bo, size, 256, true); if (r) { dev_err(rdev->dev, "failed to get a new IB (%d)\n", r); return r; } - r = radeon_fence_create(rdev, &ib->fence, ring); + + r = radeon_semaphore_create(rdev, &ib->semaphore); if (r) { - dev_err(rdev->dev, "failed to create fence for new IB (%d)\n", r); - radeon_sa_bo_free(rdev, &ib->sa_bo, NULL); return r; } + ib->ring = ring; + ib->fence = NULL; ib->ptr = radeon_sa_bo_cpu_addr(ib->sa_bo); ib->gpu_addr = radeon_sa_bo_gpu_addr(ib->sa_bo); ib->vm_id = 0; ib->is_const_ib = false; - ib->semaphore = NULL; + for (i = 0; i < RADEON_NUM_RINGS; ++i) + ib->sync_to[i] = NULL; return 0; } +/** + * radeon_ib_free - free an IB (Indirect Buffer) + * + * @rdev: radeon_device pointer + * @ib: IB object to free + * + * Free an IB (all asics). + */ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib) { - radeon_semaphore_free(rdev, ib->semaphore, ib->fence); + radeon_semaphore_free(rdev, &ib->semaphore, ib->fence); radeon_sa_bo_free(rdev, &ib->sa_bo, ib->fence); radeon_fence_unref(&ib->fence); } -int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib) +/** + * radeon_ib_schedule - schedule an IB (Indirect Buffer) on the ring + * + * @rdev: radeon_device pointer + * @ib: IB object to schedule + * @const_ib: Const IB to schedule (SI only) + * + * Schedule an IB on the associated ring (all asics). + * Returns 0 on success, error on failure. + * + * On SI, there are two parallel engines fed from the primary ring, + * the CE (Constant Engine) and the DE (Drawing Engine). Since + * resource descriptors have moved to memory, the CE allows you to + * prime the caches while the DE is updating register state so that + * the resource descriptors will be already in cache when the draw is + * processed. To accomplish this, the userspace driver submits two + * IBs, one for the CE and one for the DE. If there is a CE IB (called + * a CONST_IB), it will be put on the ring prior to the DE IB. Prior + * to SI there was just a DE IB. + */ +int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, + struct radeon_ib *const_ib) { - struct radeon_ring *ring = &rdev->ring[ib->fence->ring]; - int r = 0; + struct radeon_ring *ring = &rdev->ring[ib->ring]; + bool need_sync = false; + int i, r = 0; if (!ib->length_dw || !ring->ready) { /* TODO: Nothings in the ib we should report. */ @@ -84,17 +134,51 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib) } /* 64 dwords should be enough for fence too */ - r = radeon_ring_lock(rdev, ring, 64); + r = radeon_ring_lock(rdev, ring, 64 + RADEON_NUM_RINGS * 8); if (r) { dev_err(rdev->dev, "scheduling IB failed (%d).\n", r); return r; } - radeon_ring_ib_execute(rdev, ib->fence->ring, ib); - radeon_fence_emit(rdev, ib->fence); + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + struct radeon_fence *fence = ib->sync_to[i]; + if (radeon_fence_need_sync(fence, ib->ring)) { + need_sync = true; + radeon_semaphore_sync_rings(rdev, ib->semaphore, + fence->ring, ib->ring); + radeon_fence_note_sync(fence, ib->ring); + } + } + /* immediately free semaphore when we don't need to sync */ + if (!need_sync) { + radeon_semaphore_free(rdev, &ib->semaphore, NULL); + } + if (const_ib) { + radeon_ring_ib_execute(rdev, const_ib->ring, const_ib); + radeon_semaphore_free(rdev, &const_ib->semaphore, NULL); + } + radeon_ring_ib_execute(rdev, ib->ring, ib); + r = radeon_fence_emit(rdev, &ib->fence, ib->ring); + if (r) { + dev_err(rdev->dev, "failed to emit fence for new IB (%d)\n", r); + radeon_ring_unlock_undo(rdev, ring); + return r; + } + if (const_ib) { + const_ib->fence = radeon_fence_ref(ib->fence); + } radeon_ring_unlock_commit(rdev, ring); return 0; } +/** + * radeon_ib_pool_init - Init the IB (Indirect Buffer) pool + * + * @rdev: radeon_device pointer + * + * Initialize the suballocator to manage a pool of memory + * for use as IBs (all asics). + * Returns 0 on success, error on failure. + */ int radeon_ib_pool_init(struct radeon_device *rdev) { int r; @@ -108,6 +192,12 @@ int radeon_ib_pool_init(struct radeon_device *rdev) if (r) { return r; } + + r = radeon_sa_bo_manager_start(rdev, &rdev->ring_tmp_bo); + if (r) { + return r; + } + rdev->ib_pool_ready = true; if (radeon_debugfs_sa_init(rdev)) { dev_err(rdev->dev, "failed to register debugfs file for SA\n"); @@ -115,24 +205,33 @@ int radeon_ib_pool_init(struct radeon_device *rdev) return 0; } +/** + * radeon_ib_pool_fini - Free the IB (Indirect Buffer) pool + * + * @rdev: radeon_device pointer + * + * Tear down the suballocator managing the pool of memory + * for use as IBs (all asics). + */ void radeon_ib_pool_fini(struct radeon_device *rdev) { if (rdev->ib_pool_ready) { + radeon_sa_bo_manager_suspend(rdev, &rdev->ring_tmp_bo); radeon_sa_bo_manager_fini(rdev, &rdev->ring_tmp_bo); rdev->ib_pool_ready = false; } } -int radeon_ib_pool_start(struct radeon_device *rdev) -{ - return radeon_sa_bo_manager_start(rdev, &rdev->ring_tmp_bo); -} - -int radeon_ib_pool_suspend(struct radeon_device *rdev) -{ - return radeon_sa_bo_manager_suspend(rdev, &rdev->ring_tmp_bo); -} - +/** + * radeon_ib_ring_tests - test IBs on the rings + * + * @rdev: radeon_device pointer + * + * Test an IB (Indirect Buffer) on each ring. + * If the test fails, disable the ring. + * Returns 0 on success, error if the primary GFX ring + * IB test fails. + */ int radeon_ib_ring_tests(struct radeon_device *rdev) { unsigned i; @@ -164,10 +263,28 @@ int radeon_ib_ring_tests(struct radeon_device *rdev) } /* - * Ring. + * Rings + * Most engines on the GPU are fed via ring buffers. Ring + * buffers are areas of GPU accessible memory that the host + * writes commands into and the GPU reads commands out of. + * There is a rptr (read pointer) that determines where the + * GPU is currently reading, and a wptr (write pointer) + * which determines where the host has written. When the + * pointers are equal, the ring is idle. When the host + * writes commands to the ring buffer, it increments the + * wptr. The GPU then starts fetching commands and executes + * them until the pointers are equal again. */ int radeon_debugfs_ring_init(struct radeon_device *rdev, struct radeon_ring *ring); +/** + * radeon_ring_write - write a value to the ring + * + * @ring: radeon_ring structure holding ring information + * @v: dword (dw) value to write + * + * Write a value to the requested ring buffer (all asics). + */ void radeon_ring_write(struct radeon_ring *ring, uint32_t v) { #if DRM_DEBUG_CODE @@ -181,21 +298,37 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v) ring->ring_free_dw--; } -int radeon_ring_index(struct radeon_device *rdev, struct radeon_ring *ring) +/** + * radeon_ring_supports_scratch_reg - check if the ring supports + * writing to scratch registers + * + * @rdev: radeon_device pointer + * @ring: radeon_ring structure holding ring information + * + * Check if a specific ring supports writing to scratch registers (all asics). + * Returns true if the ring supports writing to scratch regs, false if not. + */ +bool radeon_ring_supports_scratch_reg(struct radeon_device *rdev, + struct radeon_ring *ring) { - /* r1xx-r5xx only has CP ring */ - if (rdev->family < CHIP_R600) - return RADEON_RING_TYPE_GFX_INDEX; - - if (rdev->family >= CHIP_CAYMAN) { - if (ring == &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]) - return CAYMAN_RING_TYPE_CP1_INDEX; - else if (ring == &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]) - return CAYMAN_RING_TYPE_CP2_INDEX; + switch (ring->idx) { + case RADEON_RING_TYPE_GFX_INDEX: + case CAYMAN_RING_TYPE_CP1_INDEX: + case CAYMAN_RING_TYPE_CP2_INDEX: + return true; + default: + return false; } - return RADEON_RING_TYPE_GFX_INDEX; } +/** + * radeon_ring_free_size - update the free size + * + * @rdev: radeon_device pointer + * @ring: radeon_ring structure holding ring information + * + * Update the free dw slots in the ring buffer (all asics). + */ void radeon_ring_free_size(struct radeon_device *rdev, struct radeon_ring *ring) { u32 rptr; @@ -214,7 +347,16 @@ void radeon_ring_free_size(struct radeon_device *rdev, struct radeon_ring *ring) } } - +/** + * radeon_ring_alloc - allocate space on the ring buffer + * + * @rdev: radeon_device pointer + * @ring: radeon_ring structure holding ring information + * @ndw: number of dwords to allocate in the ring buffer + * + * Allocate @ndw dwords in the ring buffer (all asics). + * Returns 0 on success, error on failure. + */ int radeon_ring_alloc(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ndw) { int r; @@ -227,7 +369,7 @@ int radeon_ring_alloc(struct radeon_device *rdev, struct radeon_ring *ring, unsi if (ndw < ring->ring_free_dw) { break; } - r = radeon_fence_wait_next_locked(rdev, radeon_ring_index(rdev, ring)); + r = radeon_fence_wait_next_locked(rdev, ring->idx); if (r) return r; } @@ -236,6 +378,17 @@ int radeon_ring_alloc(struct radeon_device *rdev, struct radeon_ring *ring, unsi return 0; } +/** + * radeon_ring_lock - lock the ring and allocate space on it + * + * @rdev: radeon_device pointer + * @ring: radeon_ring structure holding ring information + * @ndw: number of dwords to allocate in the ring buffer + * + * Lock the ring and allocate @ndw dwords in the ring buffer + * (all asics). + * Returns 0 on success, error on failure. + */ int radeon_ring_lock(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ndw) { int r; @@ -249,15 +402,20 @@ int radeon_ring_lock(struct radeon_device *rdev, struct radeon_ring *ring, unsig return 0; } +/** + * radeon_ring_commit - tell the GPU to execute the new + * commands on the ring buffer + * + * @rdev: radeon_device pointer + * @ring: radeon_ring structure holding ring information + * + * Update the wptr (write pointer) to tell the GPU to + * execute new commands on the ring buffer (all asics). + */ void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring) { - unsigned count_dw_pad; - unsigned i; - /* We pad to match fetch size */ - count_dw_pad = (ring->align_mask + 1) - - (ring->wptr & ring->align_mask); - for (i = 0; i < count_dw_pad; i++) { + while (ring->wptr & ring->align_mask) { radeon_ring_write(ring, ring->nop); } DRM_MEMORYBARRIER(); @@ -265,23 +423,55 @@ void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring) (void)RREG32(ring->wptr_reg); } +/** + * radeon_ring_unlock_commit - tell the GPU to execute the new + * commands on the ring buffer and unlock it + * + * @rdev: radeon_device pointer + * @ring: radeon_ring structure holding ring information + * + * Call radeon_ring_commit() then unlock the ring (all asics). + */ void radeon_ring_unlock_commit(struct radeon_device *rdev, struct radeon_ring *ring) { radeon_ring_commit(rdev, ring); mutex_unlock(&rdev->ring_lock); } +/** + * radeon_ring_undo - reset the wptr + * + * @ring: radeon_ring structure holding ring information + * + * Reset the driver's copy of the wtpr (all asics). + */ void radeon_ring_undo(struct radeon_ring *ring) { ring->wptr = ring->wptr_old; } +/** + * radeon_ring_unlock_undo - reset the wptr and unlock the ring + * + * @ring: radeon_ring structure holding ring information + * + * Call radeon_ring_undo() then unlock the ring (all asics). + */ void radeon_ring_unlock_undo(struct radeon_device *rdev, struct radeon_ring *ring) { radeon_ring_undo(ring); mutex_unlock(&rdev->ring_lock); } +/** + * radeon_ring_force_activity - add some nop packets to the ring + * + * @rdev: radeon_device pointer + * @ring: radeon_ring structure holding ring information + * + * Add some nop packets to the ring to force activity (all asics). + * Used for lockup detection to see if the rptr is advancing. + */ void radeon_ring_force_activity(struct radeon_device *rdev, struct radeon_ring *ring) { int r; @@ -296,6 +486,13 @@ void radeon_ring_force_activity(struct radeon_device *rdev, struct radeon_ring * } } +/** + * radeon_ring_force_activity - update lockup variables + * + * @ring: radeon_ring structure holding ring information + * + * Update the last rptr value and timestamp (all asics). + */ void radeon_ring_lockup_update(struct radeon_ring *ring) { ring->last_rptr = ring->rptr; @@ -349,6 +546,116 @@ bool radeon_ring_test_lockup(struct radeon_device *rdev, struct radeon_ring *rin return false; } +/** + * radeon_ring_backup - Back up the content of a ring + * + * @rdev: radeon_device pointer + * @ring: the ring we want to back up + * + * Saves all unprocessed commits from a ring, returns the number of dwords saved. + */ +unsigned radeon_ring_backup(struct radeon_device *rdev, struct radeon_ring *ring, + uint32_t **data) +{ + unsigned size, ptr, i; + + /* just in case lock the ring */ + mutex_lock(&rdev->ring_lock); + *data = NULL; + + if (ring->ring_obj == NULL) { + mutex_unlock(&rdev->ring_lock); + return 0; + } + + /* it doesn't make sense to save anything if all fences are signaled */ + if (!radeon_fence_count_emitted(rdev, ring->idx)) { + mutex_unlock(&rdev->ring_lock); + return 0; + } + + /* calculate the number of dw on the ring */ + if (ring->rptr_save_reg) + ptr = RREG32(ring->rptr_save_reg); + else if (rdev->wb.enabled) + ptr = le32_to_cpu(*ring->next_rptr_cpu_addr); + else { + /* no way to read back the next rptr */ + mutex_unlock(&rdev->ring_lock); + return 0; + } + + size = ring->wptr + (ring->ring_size / 4); + size -= ptr; + size &= ring->ptr_mask; + if (size == 0) { + mutex_unlock(&rdev->ring_lock); + return 0; + } + + /* and then save the content of the ring */ + *data = kmalloc_array(size, sizeof(uint32_t), GFP_KERNEL); + if (!*data) { + mutex_unlock(&rdev->ring_lock); + return 0; + } + for (i = 0; i < size; ++i) { + (*data)[i] = ring->ring[ptr++]; + ptr &= ring->ptr_mask; + } + + mutex_unlock(&rdev->ring_lock); + return size; +} + +/** + * radeon_ring_restore - append saved commands to the ring again + * + * @rdev: radeon_device pointer + * @ring: ring to append commands to + * @size: number of dwords we want to write + * @data: saved commands + * + * Allocates space on the ring and restore the previously saved commands. + */ +int radeon_ring_restore(struct radeon_device *rdev, struct radeon_ring *ring, + unsigned size, uint32_t *data) +{ + int i, r; + + if (!size || !data) + return 0; + + /* restore the saved ring content */ + r = radeon_ring_lock(rdev, ring, size); + if (r) + return r; + + for (i = 0; i < size; ++i) { + radeon_ring_write(ring, data[i]); + } + + radeon_ring_unlock_commit(rdev, ring); + kfree(data); + return 0; +} + +/** + * radeon_ring_init - init driver ring struct. + * + * @rdev: radeon_device pointer + * @ring: radeon_ring structure holding ring information + * @ring_size: size of the ring + * @rptr_offs: offset of the rptr writeback location in the WB buffer + * @rptr_reg: MMIO offset of the rptr register + * @wptr_reg: MMIO offset of the wptr register + * @ptr_reg_shift: bit offset of the rptr/wptr values + * @ptr_reg_mask: bit mask of the rptr/wptr values + * @nop: nop packet for this ring + * + * Initialize the driver information for the selected ring (all asics). + * Returns 0 on success, error on failure. + */ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ring_size, unsigned rptr_offs, unsigned rptr_reg, unsigned wptr_reg, u32 ptr_reg_shift, u32 ptr_reg_mask, u32 nop) @@ -391,12 +698,26 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsig } ring->ptr_mask = (ring->ring_size / 4) - 1; ring->ring_free_dw = ring->ring_size / 4; + if (rdev->wb.enabled) { + u32 index = RADEON_WB_RING0_NEXT_RPTR + (ring->idx * 4); + ring->next_rptr_gpu_addr = rdev->wb.gpu_addr + index; + ring->next_rptr_cpu_addr = &rdev->wb.wb[index/4]; + } if (radeon_debugfs_ring_init(rdev, ring)) { DRM_ERROR("Failed to register debugfs file for rings !\n"); } + radeon_ring_lockup_update(ring); return 0; } +/** + * radeon_ring_fini - tear down the driver ring struct. + * + * @rdev: radeon_device pointer + * @ring: radeon_ring structure holding ring information + * + * Tear down the driver information for the selected ring (all asics). + */ void radeon_ring_fini(struct radeon_device *rdev, struct radeon_ring *ring) { int r; @@ -438,6 +759,10 @@ static int radeon_debugfs_ring_info(struct seq_file *m, void *data) count = (ring->ring_size / 4) - ring->ring_free_dw; seq_printf(m, "wptr(0x%04x): 0x%08x\n", ring->wptr_reg, RREG32(ring->wptr_reg)); seq_printf(m, "rptr(0x%04x): 0x%08x\n", ring->rptr_reg, RREG32(ring->rptr_reg)); + if (ring->rptr_save_reg) { + seq_printf(m, "rptr next(0x%04x): 0x%08x\n", ring->rptr_save_reg, + RREG32(ring->rptr_save_reg)); + } seq_printf(m, "driver's copy of the wptr: 0x%08x\n", ring->wptr); seq_printf(m, "driver's copy of the rptr: 0x%08x\n", ring->rptr); seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw); diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c index 32059b745728..4e771240fdd0 100644 --- a/drivers/gpu/drm/radeon/radeon_sa.c +++ b/drivers/gpu/drm/radeon/radeon_sa.c @@ -54,7 +54,7 @@ int radeon_sa_bo_manager_init(struct radeon_device *rdev, { int i, r; - spin_lock_init(&sa_manager->lock); + init_waitqueue_head(&sa_manager->wq); sa_manager->bo = NULL; sa_manager->size = size; sa_manager->domain = domain; @@ -211,6 +211,39 @@ static bool radeon_sa_bo_try_alloc(struct radeon_sa_manager *sa_manager, return false; } +/** + * radeon_sa_event - Check if we can stop waiting + * + * @sa_manager: pointer to the sa_manager + * @size: number of bytes we want to allocate + * @align: alignment we need to match + * + * Check if either there is a fence we can wait for or + * enough free memory to satisfy the allocation directly + */ +static bool radeon_sa_event(struct radeon_sa_manager *sa_manager, + unsigned size, unsigned align) +{ + unsigned soffset, eoffset, wasted; + int i; + + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + if (!list_empty(&sa_manager->flist[i])) { + return true; + } + } + + soffset = radeon_sa_bo_hole_soffset(sa_manager); + eoffset = radeon_sa_bo_hole_eoffset(sa_manager); + wasted = (align - (soffset % align)) % align; + + if ((eoffset - soffset) >= (size + wasted)) { + return true; + } + + return false; +} + static bool radeon_sa_bo_next_hole(struct radeon_sa_manager *sa_manager, struct radeon_fence **fences, unsigned *tries) @@ -297,8 +330,8 @@ int radeon_sa_bo_new(struct radeon_device *rdev, INIT_LIST_HEAD(&(*sa_bo)->olist); INIT_LIST_HEAD(&(*sa_bo)->flist); - spin_lock(&sa_manager->lock); - do { + spin_lock(&sa_manager->wq.lock); + while(1) { for (i = 0; i < RADEON_NUM_RINGS; ++i) { fences[i] = NULL; tries[i] = 0; @@ -309,30 +342,34 @@ int radeon_sa_bo_new(struct radeon_device *rdev, if (radeon_sa_bo_try_alloc(sa_manager, *sa_bo, size, align)) { - spin_unlock(&sa_manager->lock); + spin_unlock(&sa_manager->wq.lock); return 0; } /* see if we can skip over some allocations */ } while (radeon_sa_bo_next_hole(sa_manager, fences, tries)); - if (block) { - spin_unlock(&sa_manager->lock); - r = radeon_fence_wait_any(rdev, fences, false); - spin_lock(&sa_manager->lock); - if (r) { - /* if we have nothing to wait for we - are practically out of memory */ - if (r == -ENOENT) { - r = -ENOMEM; - } - goto out_err; - } + if (!block) { + break; + } + + spin_unlock(&sa_manager->wq.lock); + r = radeon_fence_wait_any(rdev, fences, false); + spin_lock(&sa_manager->wq.lock); + /* if we have nothing to wait for block */ + if (r == -ENOENT) { + r = wait_event_interruptible_locked( + sa_manager->wq, + radeon_sa_event(sa_manager, size, align) + ); + } + if (r) { + goto out_err; } - } while (block); + }; out_err: - spin_unlock(&sa_manager->lock); + spin_unlock(&sa_manager->wq.lock); kfree(*sa_bo); *sa_bo = NULL; return r; @@ -348,15 +385,16 @@ void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo **sa_bo, } sa_manager = (*sa_bo)->manager; - spin_lock(&sa_manager->lock); - if (fence && fence->seq && fence->seq < RADEON_FENCE_NOTEMITED_SEQ) { + spin_lock(&sa_manager->wq.lock); + if (fence && !radeon_fence_signaled(fence)) { (*sa_bo)->fence = radeon_fence_ref(fence); list_add_tail(&(*sa_bo)->flist, &sa_manager->flist[fence->ring]); } else { radeon_sa_bo_remove_locked(*sa_bo); } - spin_unlock(&sa_manager->lock); + wake_up_all_locked(&sa_manager->wq); + spin_unlock(&sa_manager->wq.lock); *sa_bo = NULL; } @@ -366,7 +404,7 @@ void radeon_sa_bo_dump_debug_info(struct radeon_sa_manager *sa_manager, { struct radeon_sa_bo *i; - spin_lock(&sa_manager->lock); + spin_lock(&sa_manager->wq.lock); list_for_each_entry(i, &sa_manager->olist, olist) { if (&i->olist == sa_manager->hole) { seq_printf(m, ">"); @@ -381,6 +419,6 @@ void radeon_sa_bo_dump_debug_info(struct radeon_sa_manager *sa_manager, } seq_printf(m, "\n"); } - spin_unlock(&sa_manager->lock); + spin_unlock(&sa_manager->wq.lock); } #endif diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c index e2ace5dce117..7cc78de6ddc3 100644 --- a/drivers/gpu/drm/radeon/radeon_semaphore.c +++ b/drivers/gpu/drm/radeon/radeon_semaphore.c @@ -68,70 +68,49 @@ void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring, radeon_semaphore_ring_emit(rdev, ring, &rdev->ring[ring], semaphore, true); } +/* caller must hold ring lock */ int radeon_semaphore_sync_rings(struct radeon_device *rdev, struct radeon_semaphore *semaphore, - bool sync_to[RADEON_NUM_RINGS], - int dst_ring) + int signaler, int waiter) { - int i = 0, r; + int r; - mutex_lock(&rdev->ring_lock); - r = radeon_ring_alloc(rdev, &rdev->ring[dst_ring], RADEON_NUM_RINGS * 8); - if (r) { - goto error; + /* no need to signal and wait on the same ring */ + if (signaler == waiter) { + return 0; } - for (i = 0; i < RADEON_NUM_RINGS; ++i) { - /* no need to sync to our own or unused rings */ - if (!sync_to[i] || i == dst_ring) - continue; - - /* prevent GPU deadlocks */ - if (!rdev->ring[i].ready) { - dev_err(rdev->dev, "Trying to sync to a disabled ring!"); - r = -EINVAL; - goto error; - } - - r = radeon_ring_alloc(rdev, &rdev->ring[i], 8); - if (r) { - goto error; - } - - radeon_semaphore_emit_signal(rdev, i, semaphore); - radeon_semaphore_emit_wait(rdev, dst_ring, semaphore); + /* prevent GPU deadlocks */ + if (!rdev->ring[signaler].ready) { + dev_err(rdev->dev, "Trying to sync to a disabled ring!"); + return -EINVAL; + } - radeon_ring_commit(rdev, &rdev->ring[i]); + r = radeon_ring_alloc(rdev, &rdev->ring[signaler], 8); + if (r) { + return r; } + radeon_semaphore_emit_signal(rdev, signaler, semaphore); + radeon_ring_commit(rdev, &rdev->ring[signaler]); - radeon_ring_commit(rdev, &rdev->ring[dst_ring]); - mutex_unlock(&rdev->ring_lock); + /* we assume caller has already allocated space on waiters ring */ + radeon_semaphore_emit_wait(rdev, waiter, semaphore); return 0; - -error: - /* unlock all locks taken so far */ - for (--i; i >= 0; --i) { - if (sync_to[i] || i == dst_ring) { - radeon_ring_undo(&rdev->ring[i]); - } - } - radeon_ring_undo(&rdev->ring[dst_ring]); - mutex_unlock(&rdev->ring_lock); - return r; } void radeon_semaphore_free(struct radeon_device *rdev, - struct radeon_semaphore *semaphore, + struct radeon_semaphore **semaphore, struct radeon_fence *fence) { - if (semaphore == NULL) { + if (semaphore == NULL || *semaphore == NULL) { return; } - if (semaphore->waiters > 0) { + if ((*semaphore)->waiters > 0) { dev_err(rdev->dev, "semaphore %p has more waiters than signalers," - " hardware lockup imminent!\n", semaphore); + " hardware lockup imminent!\n", *semaphore); } - radeon_sa_bo_free(rdev, &semaphore->sa_bo, fence); - kfree(semaphore); + radeon_sa_bo_free(rdev, &(*semaphore)->sa_bo, fence); + kfree(*semaphore); + *semaphore = NULL; } diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c index efff929ea49d..7c16540c10ff 100644 --- a/drivers/gpu/drm/radeon/radeon_test.c +++ b/drivers/gpu/drm/radeon/radeon_test.c @@ -106,13 +106,7 @@ void radeon_test_moves(struct radeon_device *rdev) radeon_bo_kunmap(gtt_obj[i]); - r = radeon_fence_create(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX); - if (r) { - DRM_ERROR("Failed to create GTT->VRAM fence %d\n", i); - goto out_cleanup; - } - - r = radeon_copy(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, fence); + r = radeon_copy(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence); if (r) { DRM_ERROR("Failed GTT->VRAM copy %d\n", i); goto out_cleanup; @@ -155,13 +149,7 @@ void radeon_test_moves(struct radeon_device *rdev) radeon_bo_kunmap(vram_obj); - r = radeon_fence_create(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX); - if (r) { - DRM_ERROR("Failed to create VRAM->GTT fence %d\n", i); - goto out_cleanup; - } - - r = radeon_copy(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, fence); + r = radeon_copy(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence); if (r) { DRM_ERROR("Failed VRAM->GTT copy %d\n", i); goto out_cleanup; @@ -241,36 +229,33 @@ void radeon_test_ring_sync(struct radeon_device *rdev, { struct radeon_fence *fence1 = NULL, *fence2 = NULL; struct radeon_semaphore *semaphore = NULL; - int ridxA = radeon_ring_index(rdev, ringA); - int ridxB = radeon_ring_index(rdev, ringB); int r; - r = radeon_fence_create(rdev, &fence1, ridxA); + r = radeon_semaphore_create(rdev, &semaphore); if (r) { - DRM_ERROR("Failed to create sync fence 1\n"); + DRM_ERROR("Failed to create semaphore\n"); goto out_cleanup; } - r = radeon_fence_create(rdev, &fence2, ridxA); + + r = radeon_ring_lock(rdev, ringA, 64); if (r) { - DRM_ERROR("Failed to create sync fence 2\n"); + DRM_ERROR("Failed to lock ring A %d\n", ringA->idx); goto out_cleanup; } - - r = radeon_semaphore_create(rdev, &semaphore); + radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore); + r = radeon_fence_emit(rdev, &fence1, ringA->idx); if (r) { - DRM_ERROR("Failed to create semaphore\n"); + DRM_ERROR("Failed to emit fence 1\n"); + radeon_ring_unlock_undo(rdev, ringA); goto out_cleanup; } - - r = radeon_ring_lock(rdev, ringA, 64); + radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore); + r = radeon_fence_emit(rdev, &fence2, ringA->idx); if (r) { - DRM_ERROR("Failed to lock ring A %d\n", ridxA); + DRM_ERROR("Failed to emit fence 2\n"); + radeon_ring_unlock_undo(rdev, ringA); goto out_cleanup; } - radeon_semaphore_emit_wait(rdev, ridxA, semaphore); - radeon_fence_emit(rdev, fence1); - radeon_semaphore_emit_wait(rdev, ridxA, semaphore); - radeon_fence_emit(rdev, fence2); radeon_ring_unlock_commit(rdev, ringA); mdelay(1000); @@ -285,7 +270,7 @@ void radeon_test_ring_sync(struct radeon_device *rdev, DRM_ERROR("Failed to lock ring B %p\n", ringB); goto out_cleanup; } - radeon_semaphore_emit_signal(rdev, ridxB, semaphore); + radeon_semaphore_emit_signal(rdev, ringB->idx, semaphore); radeon_ring_unlock_commit(rdev, ringB); r = radeon_fence_wait(fence1, false); @@ -306,7 +291,7 @@ void radeon_test_ring_sync(struct radeon_device *rdev, DRM_ERROR("Failed to lock ring B %p\n", ringB); goto out_cleanup; } - radeon_semaphore_emit_signal(rdev, ridxB, semaphore); + radeon_semaphore_emit_signal(rdev, ringB->idx, semaphore); radeon_ring_unlock_commit(rdev, ringB); r = radeon_fence_wait(fence2, false); @@ -316,8 +301,7 @@ void radeon_test_ring_sync(struct radeon_device *rdev, } out_cleanup: - if (semaphore) - radeon_semaphore_free(rdev, semaphore, NULL); + radeon_semaphore_free(rdev, &semaphore, NULL); if (fence1) radeon_fence_unref(&fence1); @@ -336,23 +320,9 @@ void radeon_test_ring_sync2(struct radeon_device *rdev, { struct radeon_fence *fenceA = NULL, *fenceB = NULL; struct radeon_semaphore *semaphore = NULL; - int ridxA = radeon_ring_index(rdev, ringA); - int ridxB = radeon_ring_index(rdev, ringB); - int ridxC = radeon_ring_index(rdev, ringC); bool sigA, sigB; int i, r; - r = radeon_fence_create(rdev, &fenceA, ridxA); - if (r) { - DRM_ERROR("Failed to create sync fence 1\n"); - goto out_cleanup; - } - r = radeon_fence_create(rdev, &fenceB, ridxB); - if (r) { - DRM_ERROR("Failed to create sync fence 2\n"); - goto out_cleanup; - } - r = radeon_semaphore_create(rdev, &semaphore); if (r) { DRM_ERROR("Failed to create semaphore\n"); @@ -361,20 +331,30 @@ void radeon_test_ring_sync2(struct radeon_device *rdev, r = radeon_ring_lock(rdev, ringA, 64); if (r) { - DRM_ERROR("Failed to lock ring A %d\n", ridxA); + DRM_ERROR("Failed to lock ring A %d\n", ringA->idx); + goto out_cleanup; + } + radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore); + r = radeon_fence_emit(rdev, &fenceA, ringA->idx); + if (r) { + DRM_ERROR("Failed to emit sync fence 1\n"); + radeon_ring_unlock_undo(rdev, ringA); goto out_cleanup; } - radeon_semaphore_emit_wait(rdev, ridxA, semaphore); - radeon_fence_emit(rdev, fenceA); radeon_ring_unlock_commit(rdev, ringA); r = radeon_ring_lock(rdev, ringB, 64); if (r) { - DRM_ERROR("Failed to lock ring B %d\n", ridxB); + DRM_ERROR("Failed to lock ring B %d\n", ringB->idx); + goto out_cleanup; + } + radeon_semaphore_emit_wait(rdev, ringB->idx, semaphore); + r = radeon_fence_emit(rdev, &fenceB, ringB->idx); + if (r) { + DRM_ERROR("Failed to create sync fence 2\n"); + radeon_ring_unlock_undo(rdev, ringB); goto out_cleanup; } - radeon_semaphore_emit_wait(rdev, ridxB, semaphore); - radeon_fence_emit(rdev, fenceB); radeon_ring_unlock_commit(rdev, ringB); mdelay(1000); @@ -393,7 +373,7 @@ void radeon_test_ring_sync2(struct radeon_device *rdev, DRM_ERROR("Failed to lock ring B %p\n", ringC); goto out_cleanup; } - radeon_semaphore_emit_signal(rdev, ridxC, semaphore); + radeon_semaphore_emit_signal(rdev, ringC->idx, semaphore); radeon_ring_unlock_commit(rdev, ringC); for (i = 0; i < 30; ++i) { @@ -419,7 +399,7 @@ void radeon_test_ring_sync2(struct radeon_device *rdev, DRM_ERROR("Failed to lock ring B %p\n", ringC); goto out_cleanup; } - radeon_semaphore_emit_signal(rdev, ridxC, semaphore); + radeon_semaphore_emit_signal(rdev, ringC->idx, semaphore); radeon_ring_unlock_commit(rdev, ringC); mdelay(1000); @@ -436,8 +416,7 @@ void radeon_test_ring_sync2(struct radeon_device *rdev, } out_cleanup: - if (semaphore) - radeon_semaphore_free(rdev, semaphore, NULL); + radeon_semaphore_free(rdev, &semaphore, NULL); if (fenceA) radeon_fence_unref(&fenceA); diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index c94a2257761f..5b71c716d83f 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -222,15 +222,11 @@ static int radeon_move_blit(struct ttm_buffer_object *bo, { struct radeon_device *rdev; uint64_t old_start, new_start; - struct radeon_fence *fence, *old_fence; - struct radeon_semaphore *sem = NULL; - int r; + struct radeon_fence *fence; + int r, ridx; rdev = radeon_get_rdev(bo->bdev); - r = radeon_fence_create(rdev, &fence, radeon_copy_ring_index(rdev)); - if (unlikely(r)) { - return r; - } + ridx = radeon_copy_ring_index(rdev); old_start = old_mem->start << PAGE_SHIFT; new_start = new_mem->start << PAGE_SHIFT; @@ -243,7 +239,6 @@ static int radeon_move_blit(struct ttm_buffer_object *bo, break; default: DRM_ERROR("Unknown placement %d\n", old_mem->mem_type); - radeon_fence_unref(&fence); return -EINVAL; } switch (new_mem->mem_type) { @@ -255,46 +250,23 @@ static int radeon_move_blit(struct ttm_buffer_object *bo, break; default: DRM_ERROR("Unknown placement %d\n", old_mem->mem_type); - radeon_fence_unref(&fence); return -EINVAL; } - if (!rdev->ring[radeon_copy_ring_index(rdev)].ready) { + if (!rdev->ring[ridx].ready) { DRM_ERROR("Trying to move memory with ring turned off.\n"); - radeon_fence_unref(&fence); return -EINVAL; } BUILD_BUG_ON((PAGE_SIZE % RADEON_GPU_PAGE_SIZE) != 0); /* sync other rings */ - old_fence = bo->sync_obj; - if (old_fence && old_fence->ring != fence->ring - && !radeon_fence_signaled(old_fence)) { - bool sync_to_ring[RADEON_NUM_RINGS] = { }; - sync_to_ring[old_fence->ring] = true; - - r = radeon_semaphore_create(rdev, &sem); - if (r) { - radeon_fence_unref(&fence); - return r; - } - - r = radeon_semaphore_sync_rings(rdev, sem, - sync_to_ring, fence->ring); - if (r) { - radeon_semaphore_free(rdev, sem, NULL); - radeon_fence_unref(&fence); - return r; - } - } - + fence = bo->sync_obj; r = radeon_copy(rdev, old_start, new_start, new_mem->num_pages * (PAGE_SIZE / RADEON_GPU_PAGE_SIZE), /* GPU pages */ - fence); + &fence); /* FIXME: handle copy error */ r = ttm_bo_move_accel_cleanup(bo, (void *)fence, NULL, evict, no_wait_reserve, no_wait_gpu, new_mem); - radeon_semaphore_free(rdev, sem, fence); radeon_fence_unref(&fence); return r; } @@ -762,9 +734,7 @@ int radeon_ttm_init(struct radeon_device *rdev) } DRM_INFO("radeon: %uM of GTT memory ready.\n", (unsigned)(rdev->mc.gtt_size / (1024 * 1024))); - if (unlikely(rdev->mman.bdev.dev_mapping == NULL)) { - rdev->mman.bdev.dev_mapping = rdev->ddev->dev_mapping; - } + rdev->mman.bdev.dev_mapping = rdev->ddev->dev_mapping; r = radeon_ttm_debugfs_init(rdev); if (r) { @@ -825,9 +795,9 @@ static int radeon_ttm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) return VM_FAULT_NOPAGE; } rdev = radeon_get_rdev(bo->bdev); - mutex_lock(&rdev->vram_mutex); + down_read(&rdev->pm.mclk_lock); r = ttm_vm_ops->fault(vma, vmf); - mutex_unlock(&rdev->vram_mutex); + up_read(&rdev->pm.mclk_lock); return r; } diff --git a/drivers/gpu/drm/radeon/reg_srcs/r600 b/drivers/gpu/drm/radeon/reg_srcs/r600 index 5e659b034d9a..f93e45d869f4 100644 --- a/drivers/gpu/drm/radeon/reg_srcs/r600 +++ b/drivers/gpu/drm/radeon/reg_srcs/r600 @@ -744,14 +744,6 @@ r600 0x9400 0x00028C38 CB_CLRCMP_DST 0x00028C3C CB_CLRCMP_MSK 0x00028C34 CB_CLRCMP_SRC -0x00028100 CB_COLOR0_MASK -0x00028104 CB_COLOR1_MASK -0x00028108 CB_COLOR2_MASK -0x0002810C CB_COLOR3_MASK -0x00028110 CB_COLOR4_MASK -0x00028114 CB_COLOR5_MASK -0x00028118 CB_COLOR6_MASK -0x0002811C CB_COLOR7_MASK 0x00028808 CB_COLOR_CONTROL 0x0002842C CB_FOG_BLUE 0x00028428 CB_FOG_GREEN diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c index a464eb5e2df2..2752f7f78237 100644 --- a/drivers/gpu/drm/radeon/rs400.c +++ b/drivers/gpu/drm/radeon/rs400.c @@ -426,13 +426,11 @@ static int rs400_startup(struct radeon_device *rdev) return r; } - r = radeon_ib_pool_start(rdev); - if (r) - return r; - - r = radeon_ib_ring_tests(rdev); - if (r) + r = radeon_ib_pool_init(rdev); + if (r) { + dev_err(rdev->dev, "IB initialization failed (%d).\n", r); return r; + } return 0; } @@ -470,7 +468,6 @@ int rs400_resume(struct radeon_device *rdev) int rs400_suspend(struct radeon_device *rdev) { - radeon_ib_pool_suspend(rdev); r100_cp_disable(rdev); radeon_wb_disable(rdev); r100_irq_disable(rdev); @@ -482,7 +479,7 @@ void rs400_fini(struct radeon_device *rdev) { r100_cp_fini(rdev); radeon_wb_fini(rdev); - r100_ib_fini(rdev); + radeon_ib_pool_fini(rdev); radeon_gem_fini(rdev); rs400_gart_fini(rdev); radeon_irq_kms_fini(rdev); @@ -550,20 +547,14 @@ int rs400_init(struct radeon_device *rdev) return r; r300_set_reg_safe(rdev); - r = radeon_ib_pool_init(rdev); rdev->accel_working = true; - if (r) { - dev_err(rdev->dev, "IB initialization failed (%d).\n", r); - rdev->accel_working = false; - } - r = rs400_startup(rdev); if (r) { /* Somethings want wront with the accel init stop accel */ dev_err(rdev->dev, "Disabling GPU acceleration\n"); r100_cp_fini(rdev); radeon_wb_fini(rdev); - r100_ib_fini(rdev); + radeon_ib_pool_fini(rdev); rs400_gart_fini(rdev); radeon_irq_kms_fini(rdev); rdev->accel_working = false; diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index e95c5e61d4e2..5301b3df8466 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -294,6 +294,7 @@ void rs600_hpd_init(struct radeon_device *rdev) { struct drm_device *dev = rdev->ddev; struct drm_connector *connector; + unsigned enable = 0; list_for_each_entry(connector, &dev->mode_config.connector_list, head) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); @@ -301,26 +302,25 @@ void rs600_hpd_init(struct radeon_device *rdev) case RADEON_HPD_1: WREG32(R_007D00_DC_HOT_PLUG_DETECT1_CONTROL, S_007D00_DC_HOT_PLUG_DETECT1_EN(1)); - rdev->irq.hpd[0] = true; break; case RADEON_HPD_2: WREG32(R_007D10_DC_HOT_PLUG_DETECT2_CONTROL, S_007D10_DC_HOT_PLUG_DETECT2_EN(1)); - rdev->irq.hpd[1] = true; break; default: break; } + enable |= 1 << radeon_connector->hpd.hpd; radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd); } - if (rdev->irq.installed) - rs600_irq_set(rdev); + radeon_irq_kms_enable_hpd(rdev, enable); } void rs600_hpd_fini(struct radeon_device *rdev) { struct drm_device *dev = rdev->ddev; struct drm_connector *connector; + unsigned disable = 0; list_for_each_entry(connector, &dev->mode_config.connector_list, head) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); @@ -328,17 +328,17 @@ void rs600_hpd_fini(struct radeon_device *rdev) case RADEON_HPD_1: WREG32(R_007D00_DC_HOT_PLUG_DETECT1_CONTROL, S_007D00_DC_HOT_PLUG_DETECT1_EN(0)); - rdev->irq.hpd[0] = false; break; case RADEON_HPD_2: WREG32(R_007D10_DC_HOT_PLUG_DETECT2_CONTROL, S_007D10_DC_HOT_PLUG_DETECT2_EN(0)); - rdev->irq.hpd[1] = false; break; default: break; } + disable |= 1 << radeon_connector->hpd.hpd; } + radeon_irq_kms_disable_hpd(rdev, disable); } int rs600_asic_reset(struct radeon_device *rdev) @@ -564,18 +564,18 @@ int rs600_irq_set(struct radeon_device *rdev) WREG32(R_000040_GEN_INT_CNTL, 0); return -EINVAL; } - if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) { + if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { tmp |= S_000040_SW_INT_EN(1); } if (rdev->irq.gui_idle) { tmp |= S_000040_GUI_IDLE(1); } if (rdev->irq.crtc_vblank_int[0] || - rdev->irq.pflip[0]) { + atomic_read(&rdev->irq.pflip[0])) { mode_int |= S_006540_D1MODE_VBLANK_INT_MASK(1); } if (rdev->irq.crtc_vblank_int[1] || - rdev->irq.pflip[1]) { + atomic_read(&rdev->irq.pflip[1])) { mode_int |= S_006540_D2MODE_VBLANK_INT_MASK(1); } if (rdev->irq.hpd[0]) { @@ -686,7 +686,6 @@ int rs600_irq_process(struct radeon_device *rdev) /* GUI idle */ if (G_000040_GUI_IDLE(status)) { rdev->irq.gui_idle_acked = true; - rdev->pm.gui_idle = true; wake_up(&rdev->irq.idle_queue); } /* Vertical blank interrupts */ @@ -696,7 +695,7 @@ int rs600_irq_process(struct radeon_device *rdev) rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[0]) + if (atomic_read(&rdev->irq.pflip[0])) radeon_crtc_handle_flip(rdev, 0); } if (G_007EDC_LB_D2_VBLANK_INTERRUPT(rdev->irq.stat_regs.r500.disp_int)) { @@ -705,7 +704,7 @@ int rs600_irq_process(struct radeon_device *rdev) rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[1]) + if (atomic_read(&rdev->irq.pflip[1])) radeon_crtc_handle_flip(rdev, 1); } if (G_007EDC_DC_HOT_PLUG_DETECT1_INTERRUPT(rdev->irq.stat_regs.r500.disp_int)) { @@ -908,13 +907,11 @@ static int rs600_startup(struct radeon_device *rdev) return r; } - r = radeon_ib_pool_start(rdev); - if (r) - return r; - - r = radeon_ib_ring_tests(rdev); - if (r) + r = radeon_ib_pool_init(rdev); + if (r) { + dev_err(rdev->dev, "IB initialization failed (%d).\n", r); return r; + } r = r600_audio_init(rdev); if (r) { @@ -956,7 +953,6 @@ int rs600_resume(struct radeon_device *rdev) int rs600_suspend(struct radeon_device *rdev) { - radeon_ib_pool_suspend(rdev); r600_audio_fini(rdev); r100_cp_disable(rdev); radeon_wb_disable(rdev); @@ -970,7 +966,7 @@ void rs600_fini(struct radeon_device *rdev) r600_audio_fini(rdev); r100_cp_fini(rdev); radeon_wb_fini(rdev); - r100_ib_fini(rdev); + radeon_ib_pool_fini(rdev); radeon_gem_fini(rdev); rs600_gart_fini(rdev); radeon_irq_kms_fini(rdev); @@ -1038,20 +1034,14 @@ int rs600_init(struct radeon_device *rdev) return r; rs600_set_safe_registers(rdev); - r = radeon_ib_pool_init(rdev); rdev->accel_working = true; - if (r) { - dev_err(rdev->dev, "IB initialization failed (%d).\n", r); - rdev->accel_working = false; - } - r = rs600_startup(rdev); if (r) { /* Somethings want wront with the accel init stop accel */ dev_err(rdev->dev, "Disabling GPU acceleration\n"); r100_cp_fini(rdev); radeon_wb_fini(rdev); - r100_ib_fini(rdev); + radeon_ib_pool_fini(rdev); rs600_gart_fini(rdev); radeon_irq_kms_fini(rdev); rdev->accel_working = false; diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c index 159b6a43fda0..3b663fcfe061 100644 --- a/drivers/gpu/drm/radeon/rs690.c +++ b/drivers/gpu/drm/radeon/rs690.c @@ -637,13 +637,11 @@ static int rs690_startup(struct radeon_device *rdev) return r; } - r = radeon_ib_pool_start(rdev); - if (r) - return r; - - r = radeon_ib_ring_tests(rdev); - if (r) + r = radeon_ib_pool_init(rdev); + if (r) { + dev_err(rdev->dev, "IB initialization failed (%d).\n", r); return r; + } r = r600_audio_init(rdev); if (r) { @@ -685,7 +683,6 @@ int rs690_resume(struct radeon_device *rdev) int rs690_suspend(struct radeon_device *rdev) { - radeon_ib_pool_suspend(rdev); r600_audio_fini(rdev); r100_cp_disable(rdev); radeon_wb_disable(rdev); @@ -699,7 +696,7 @@ void rs690_fini(struct radeon_device *rdev) r600_audio_fini(rdev); r100_cp_fini(rdev); radeon_wb_fini(rdev); - r100_ib_fini(rdev); + radeon_ib_pool_fini(rdev); radeon_gem_fini(rdev); rs400_gart_fini(rdev); radeon_irq_kms_fini(rdev); @@ -768,20 +765,14 @@ int rs690_init(struct radeon_device *rdev) return r; rs600_set_safe_registers(rdev); - r = radeon_ib_pool_init(rdev); rdev->accel_working = true; - if (r) { - dev_err(rdev->dev, "IB initialization failed (%d).\n", r); - rdev->accel_working = false; - } - r = rs690_startup(rdev); if (r) { /* Somethings want wront with the accel init stop accel */ dev_err(rdev->dev, "Disabling GPU acceleration\n"); r100_cp_fini(rdev); radeon_wb_fini(rdev); - r100_ib_fini(rdev); + radeon_ib_pool_fini(rdev); rs400_gart_fini(rdev); radeon_irq_kms_fini(rdev); rdev->accel_working = false; diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c index 7f08cedb5333..aa8ef491ef3c 100644 --- a/drivers/gpu/drm/radeon/rv515.c +++ b/drivers/gpu/drm/radeon/rv515.c @@ -281,12 +281,8 @@ int rv515_debugfs_ga_info_init(struct radeon_device *rdev) void rv515_mc_stop(struct radeon_device *rdev, struct rv515_mc_save *save) { - save->d1vga_control = RREG32(R_000330_D1VGA_CONTROL); - save->d2vga_control = RREG32(R_000338_D2VGA_CONTROL); save->vga_render_control = RREG32(R_000300_VGA_RENDER_CONTROL); save->vga_hdp_control = RREG32(R_000328_VGA_HDP_CONTROL); - save->d1crtc_control = RREG32(R_006080_D1CRTC_CONTROL); - save->d2crtc_control = RREG32(R_006880_D2CRTC_CONTROL); /* Stop all video */ WREG32(R_0068E8_D2CRTC_UPDATE_LOCK, 0); @@ -311,15 +307,6 @@ void rv515_mc_resume(struct radeon_device *rdev, struct rv515_mc_save *save) /* Unlock host access */ WREG32(R_000328_VGA_HDP_CONTROL, save->vga_hdp_control); mdelay(1); - /* Restore video state */ - WREG32(R_000330_D1VGA_CONTROL, save->d1vga_control); - WREG32(R_000338_D2VGA_CONTROL, save->d2vga_control); - WREG32(R_0060E8_D1CRTC_UPDATE_LOCK, 1); - WREG32(R_0068E8_D2CRTC_UPDATE_LOCK, 1); - WREG32(R_006080_D1CRTC_CONTROL, save->d1crtc_control); - WREG32(R_006880_D2CRTC_CONTROL, save->d2crtc_control); - WREG32(R_0060E8_D1CRTC_UPDATE_LOCK, 0); - WREG32(R_0068E8_D2CRTC_UPDATE_LOCK, 0); WREG32(R_000300_VGA_RENDER_CONTROL, save->vga_render_control); } @@ -408,13 +395,11 @@ static int rv515_startup(struct radeon_device *rdev) return r; } - r = radeon_ib_pool_start(rdev); - if (r) - return r; - - r = radeon_ib_ring_tests(rdev); - if (r) + r = radeon_ib_pool_init(rdev); + if (r) { + dev_err(rdev->dev, "IB initialization failed (%d).\n", r); return r; + } return 0; } @@ -469,7 +454,7 @@ void rv515_fini(struct radeon_device *rdev) { r100_cp_fini(rdev); radeon_wb_fini(rdev); - r100_ib_fini(rdev); + radeon_ib_pool_fini(rdev); radeon_gem_fini(rdev); rv370_pcie_gart_fini(rdev); radeon_agp_fini(rdev); @@ -543,20 +528,14 @@ int rv515_init(struct radeon_device *rdev) return r; rv515_set_safe_registers(rdev); - r = radeon_ib_pool_init(rdev); rdev->accel_working = true; - if (r) { - dev_err(rdev->dev, "IB initialization failed (%d).\n", r); - rdev->accel_working = false; - } - r = rv515_startup(rdev); if (r) { /* Somethings want wront with the accel init stop accel */ dev_err(rdev->dev, "Disabling GPU acceleration\n"); r100_cp_fini(rdev); radeon_wb_fini(rdev); - r100_ib_fini(rdev); + radeon_ib_pool_fini(rdev); radeon_irq_kms_fini(rdev); rv370_pcie_gart_fini(rdev); radeon_agp_fini(rdev); diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index b4f51c569c36..ca8ffec10ff6 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -358,8 +358,10 @@ static int rv770_cp_load_microcode(struct radeon_device *rdev) void r700_cp_fini(struct radeon_device *rdev) { + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; r700_cp_stop(rdev); - radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); + radeon_ring_fini(rdev, ring); + radeon_scratch_free(rdev, ring->rptr_save_reg); } /* @@ -951,13 +953,11 @@ static int rv770_startup(struct radeon_device *rdev) if (r) return r; - r = radeon_ib_pool_start(rdev); - if (r) - return r; - - r = radeon_ib_ring_tests(rdev); - if (r) + r = radeon_ib_pool_init(rdev); + if (r) { + dev_err(rdev->dev, "IB initialization failed (%d).\n", r); return r; + } r = r600_audio_init(rdev); if (r) { @@ -994,9 +994,6 @@ int rv770_resume(struct radeon_device *rdev) int rv770_suspend(struct radeon_device *rdev) { r600_audio_fini(rdev); - radeon_ib_pool_suspend(rdev); - r600_blit_suspend(rdev); - /* FIXME: we should wait for ring to be empty */ r700_cp_stop(rdev); rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; r600_irq_suspend(rdev); @@ -1076,20 +1073,14 @@ int rv770_init(struct radeon_device *rdev) if (r) return r; - r = radeon_ib_pool_init(rdev); rdev->accel_working = true; - if (r) { - dev_err(rdev->dev, "IB initialization failed (%d).\n", r); - rdev->accel_working = false; - } - r = rv770_startup(rdev); if (r) { dev_err(rdev->dev, "disabling GPU acceleration\n"); r700_cp_fini(rdev); r600_irq_fini(rdev); radeon_wb_fini(rdev); - r100_ib_fini(rdev); + radeon_ib_pool_fini(rdev); radeon_irq_kms_fini(rdev); rv770_pcie_gart_fini(rdev); rdev->accel_working = false; @@ -1104,7 +1095,7 @@ void rv770_fini(struct radeon_device *rdev) r700_cp_fini(rdev); r600_irq_fini(rdev); radeon_wb_fini(rdev); - r100_ib_fini(rdev); + radeon_ib_pool_fini(rdev); radeon_irq_kms_fini(rdev); rv770_pcie_gart_fini(rdev); r600_vram_scratch_fini(rdev); @@ -1121,6 +1112,8 @@ static void rv770_pcie_gen2_enable(struct radeon_device *rdev) { u32 link_width_cntl, lanes, speed_cntl, tmp; u16 link_cntl2; + u32 mask; + int ret; if (radeon_pcie_gen2 == 0) return; @@ -1135,6 +1128,15 @@ static void rv770_pcie_gen2_enable(struct radeon_device *rdev) if (ASIC_IS_X2(rdev)) return; + ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask); + if (ret != 0) + return; + + if (!(mask & DRM_PCIE_SPEED_50)) + return; + + DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n"); + /* advertise upconfig capability */ link_width_cntl = RREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL); link_width_cntl &= ~LC_UPCONFIGURE_DIS; diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 0b0279291a73..0139e227e3c7 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -1639,11 +1639,19 @@ static void si_gpu_init(struct radeon_device *rdev) /* XXX what about 12? */ rdev->config.si.tile_config |= (3 << 0); break; - } - if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) - rdev->config.si.tile_config |= 1 << 4; - else + } + switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) { + case 0: /* four banks */ rdev->config.si.tile_config |= 0 << 4; + break; + case 1: /* eight banks */ + rdev->config.si.tile_config |= 1 << 4; + break; + case 2: /* sixteen banks */ + default: + rdev->config.si.tile_config |= 2 << 4; + break; + } rdev->config.si.tile_config |= ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8; rdev->config.si.tile_config |= @@ -1762,13 +1770,34 @@ void si_fence_ring_emit(struct radeon_device *rdev, */ void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { - struct radeon_ring *ring = &rdev->ring[ib->fence->ring]; + struct radeon_ring *ring = &rdev->ring[ib->ring]; u32 header; - if (ib->is_const_ib) + if (ib->is_const_ib) { + /* set switch buffer packet before const IB */ + radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); + radeon_ring_write(ring, 0); + header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); - else + } else { + u32 next_rptr; + if (ring->rptr_save_reg) { + next_rptr = ring->wptr + 3 + 4 + 8; + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(ring, ((ring->rptr_save_reg - + PACKET3_SET_CONFIG_REG_START) >> 2)); + radeon_ring_write(ring, next_rptr); + } else if (rdev->wb.enabled) { + next_rptr = ring->wptr + 5 + 4 + 8; + radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + radeon_ring_write(ring, (1 << 8)); + radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); + radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff); + radeon_ring_write(ring, next_rptr); + } + header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); + } radeon_ring_write(ring, header); radeon_ring_write(ring, @@ -1779,18 +1808,20 @@ void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); radeon_ring_write(ring, ib->length_dw | (ib->vm_id << 24)); - /* flush read cache over gart for this vmid */ - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); - radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2); - radeon_ring_write(ring, ib->vm_id); - radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); - radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA | - PACKET3_TC_ACTION_ENA | - PACKET3_SH_KCACHE_ACTION_ENA | - PACKET3_SH_ICACHE_ACTION_ENA); - radeon_ring_write(ring, 0xFFFFFFFF); - radeon_ring_write(ring, 0); - radeon_ring_write(ring, 10); /* poll interval */ + if (!ib->is_const_ib) { + /* flush read cache over gart for this vmid */ + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2); + radeon_ring_write(ring, ib->vm_id); + radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); + radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA | + PACKET3_TC_ACTION_ENA | + PACKET3_SH_KCACHE_ACTION_ENA | + PACKET3_SH_ICACHE_ACTION_ENA); + radeon_ring_write(ring, 0xFFFFFFFF); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, 10); /* poll interval */ + } } /* @@ -1917,10 +1948,20 @@ static int si_cp_start(struct radeon_device *rdev) static void si_cp_fini(struct radeon_device *rdev) { + struct radeon_ring *ring; si_cp_enable(rdev, false); - radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); - radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]); - radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]); + + ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; + radeon_ring_fini(rdev, ring); + radeon_scratch_free(rdev, ring->rptr_save_reg); + + ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; + radeon_ring_fini(rdev, ring); + radeon_scratch_free(rdev, ring->rptr_save_reg); + + ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; + radeon_ring_fini(rdev, ring); + radeon_scratch_free(rdev, ring->rptr_save_reg); } static int si_cp_resume(struct radeon_device *rdev) @@ -2702,7 +2743,7 @@ int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) if (ib->is_const_ib) ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt); else { - switch (ib->fence->ring) { + switch (ib->ring) { case RADEON_RING_TYPE_GFX_INDEX: ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt); break; @@ -2711,7 +2752,7 @@ int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt); break; default: - dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->fence->ring); + dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring); ret = -EINVAL; break; } @@ -2942,7 +2983,6 @@ static void si_disable_interrupts(struct radeon_device *rdev) WREG32(IH_RB_RPTR, 0); WREG32(IH_RB_WPTR, 0); rdev->ih.enabled = false; - rdev->ih.wptr = 0; rdev->ih.rptr = 0; } @@ -3093,45 +3133,45 @@ int si_irq_set(struct radeon_device *rdev) hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN; /* enable CP interrupts on all rings */ - if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) { + if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { DRM_DEBUG("si_irq_set: sw int gfx\n"); cp_int_cntl |= TIME_STAMP_INT_ENABLE; } - if (rdev->irq.sw_int[CAYMAN_RING_TYPE_CP1_INDEX]) { + if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) { DRM_DEBUG("si_irq_set: sw int cp1\n"); cp_int_cntl1 |= TIME_STAMP_INT_ENABLE; } - if (rdev->irq.sw_int[CAYMAN_RING_TYPE_CP2_INDEX]) { + if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) { DRM_DEBUG("si_irq_set: sw int cp2\n"); cp_int_cntl2 |= TIME_STAMP_INT_ENABLE; } if (rdev->irq.crtc_vblank_int[0] || - rdev->irq.pflip[0]) { + atomic_read(&rdev->irq.pflip[0])) { DRM_DEBUG("si_irq_set: vblank 0\n"); crtc1 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[1] || - rdev->irq.pflip[1]) { + atomic_read(&rdev->irq.pflip[1])) { DRM_DEBUG("si_irq_set: vblank 1\n"); crtc2 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[2] || - rdev->irq.pflip[2]) { + atomic_read(&rdev->irq.pflip[2])) { DRM_DEBUG("si_irq_set: vblank 2\n"); crtc3 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[3] || - rdev->irq.pflip[3]) { + atomic_read(&rdev->irq.pflip[3])) { DRM_DEBUG("si_irq_set: vblank 3\n"); crtc4 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[4] || - rdev->irq.pflip[4]) { + atomic_read(&rdev->irq.pflip[4])) { DRM_DEBUG("si_irq_set: vblank 4\n"); crtc5 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[5] || - rdev->irq.pflip[5]) { + atomic_read(&rdev->irq.pflip[5])) { DRM_DEBUG("si_irq_set: vblank 5\n"); crtc6 |= VBLANK_INT_MASK; } @@ -3359,29 +3399,27 @@ int si_irq_process(struct radeon_device *rdev) u32 rptr; u32 src_id, src_data, ring_id; u32 ring_index; - unsigned long flags; bool queue_hotplug = false; if (!rdev->ih.enabled || rdev->shutdown) return IRQ_NONE; wptr = si_get_ih_wptr(rdev); + +restart_ih: + /* is somebody else already processing irqs? */ + if (atomic_xchg(&rdev->ih.lock, 1)) + return IRQ_NONE; + rptr = rdev->ih.rptr; DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr); - spin_lock_irqsave(&rdev->ih.lock, flags); - if (rptr == wptr) { - spin_unlock_irqrestore(&rdev->ih.lock, flags); - return IRQ_NONE; - } -restart_ih: /* Order reading of wptr vs. reading of IH ring data */ rmb(); /* display interrupts */ si_irq_ack(rdev); - rdev->ih.wptr = wptr; while (rptr != wptr) { /* wptr/rptr are in bytes! */ ring_index = rptr / 4; @@ -3399,7 +3437,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[0]) + if (atomic_read(&rdev->irq.pflip[0])) radeon_crtc_handle_flip(rdev, 0); rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT; DRM_DEBUG("IH: D1 vblank\n"); @@ -3425,7 +3463,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[1]) + if (atomic_read(&rdev->irq.pflip[1])) radeon_crtc_handle_flip(rdev, 1); rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; DRM_DEBUG("IH: D2 vblank\n"); @@ -3451,7 +3489,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[2]) + if (atomic_read(&rdev->irq.pflip[2])) radeon_crtc_handle_flip(rdev, 2); rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; DRM_DEBUG("IH: D3 vblank\n"); @@ -3477,7 +3515,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[3]) + if (atomic_read(&rdev->irq.pflip[3])) radeon_crtc_handle_flip(rdev, 3); rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; DRM_DEBUG("IH: D4 vblank\n"); @@ -3503,7 +3541,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[4]) + if (atomic_read(&rdev->irq.pflip[4])) radeon_crtc_handle_flip(rdev, 4); rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; DRM_DEBUG("IH: D5 vblank\n"); @@ -3529,7 +3567,7 @@ restart_ih: rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (rdev->irq.pflip[5]) + if (atomic_read(&rdev->irq.pflip[5])) radeon_crtc_handle_flip(rdev, 5); rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; DRM_DEBUG("IH: D6 vblank\n"); @@ -3620,7 +3658,6 @@ restart_ih: break; case 233: /* GUI IDLE */ DRM_DEBUG("IH: GUI idle\n"); - rdev->pm.gui_idle = true; wake_up(&rdev->irq.idle_queue); break; default: @@ -3632,15 +3669,17 @@ restart_ih: rptr += 16; rptr &= rdev->ih.ptr_mask; } - /* make sure wptr hasn't changed while processing */ - wptr = si_get_ih_wptr(rdev); - if (wptr != rdev->ih.wptr) - goto restart_ih; if (queue_hotplug) schedule_work(&rdev->hotplug_work); rdev->ih.rptr = rptr; WREG32(IH_RB_RPTR, rdev->ih.rptr); - spin_unlock_irqrestore(&rdev->ih.lock, flags); + atomic_set(&rdev->ih.lock, 0); + + /* make sure wptr hasn't changed while processing */ + wptr = si_get_ih_wptr(rdev); + if (wptr != rptr) + goto restart_ih; + return IRQ_HANDLED; } @@ -3752,35 +3791,18 @@ static int si_startup(struct radeon_device *rdev) if (r) return r; - r = radeon_ib_pool_start(rdev); - if (r) - return r; - - r = radeon_ib_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); - if (r) { - DRM_ERROR("radeon: failed testing IB (%d) on CP ring 0\n", r); - rdev->accel_working = false; - return r; - } - - r = radeon_ib_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]); + r = radeon_ib_pool_init(rdev); if (r) { - DRM_ERROR("radeon: failed testing IB (%d) on CP ring 1\n", r); - rdev->accel_working = false; + dev_err(rdev->dev, "IB initialization failed (%d).\n", r); return r; } - r = radeon_ib_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]); + r = radeon_vm_manager_init(rdev); if (r) { - DRM_ERROR("radeon: failed testing IB (%d) on CP ring 2\n", r); - rdev->accel_working = false; + dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r); return r; } - r = radeon_vm_manager_start(rdev); - if (r) - return r; - return 0; } @@ -3809,12 +3831,6 @@ int si_resume(struct radeon_device *rdev) int si_suspend(struct radeon_device *rdev) { - /* FIXME: we should wait for ring to be empty */ - radeon_ib_pool_suspend(rdev); - radeon_vm_manager_suspend(rdev); -#if 0 - r600_blit_suspend(rdev); -#endif si_cp_enable(rdev, false); rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false; @@ -3903,17 +3919,7 @@ int si_init(struct radeon_device *rdev) if (r) return r; - r = radeon_ib_pool_init(rdev); rdev->accel_working = true; - if (r) { - dev_err(rdev->dev, "IB initialization failed (%d).\n", r); - rdev->accel_working = false; - } - r = radeon_vm_manager_init(rdev); - if (r) { - dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r); - } - r = si_startup(rdev); if (r) { dev_err(rdev->dev, "disabling GPU acceleration\n"); @@ -3921,7 +3927,7 @@ int si_init(struct radeon_device *rdev) si_irq_fini(rdev); si_rlc_fini(rdev); radeon_wb_fini(rdev); - r100_ib_fini(rdev); + radeon_ib_pool_fini(rdev); radeon_vm_manager_fini(rdev); radeon_irq_kms_fini(rdev); si_pcie_gart_fini(rdev); @@ -3950,7 +3956,7 @@ void si_fini(struct radeon_device *rdev) si_rlc_fini(rdev); radeon_wb_fini(rdev); radeon_vm_manager_fini(rdev); - r100_ib_fini(rdev); + radeon_ib_pool_fini(rdev); radeon_irq_kms_fini(rdev); si_pcie_gart_fini(rdev); r600_vram_scratch_fini(rdev); @@ -3962,3 +3968,22 @@ void si_fini(struct radeon_device *rdev) rdev->bios = NULL; } +/** + * si_get_gpu_clock - return GPU clock counter snapshot + * + * @rdev: radeon_device pointer + * + * Fetches a GPU clock counter snapshot (SI). + * Returns the 64 bit clock counter snapshot. + */ +uint64_t si_get_gpu_clock(struct radeon_device *rdev) +{ + uint64_t clock; + + mutex_lock(&rdev->gpu_clock_mutex); + WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1); + clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) | + ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL); + mutex_unlock(&rdev->gpu_clock_mutex); + return clock; +} diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h index db4067962868..ef4815c27b1c 100644 --- a/drivers/gpu/drm/radeon/sid.h +++ b/drivers/gpu/drm/radeon/sid.h @@ -698,6 +698,9 @@ #define RLC_UCODE_ADDR 0xC32C #define RLC_UCODE_DATA 0xC330 +#define RLC_GPU_CLOCK_COUNT_LSB 0xC338 +#define RLC_GPU_CLOCK_COUNT_MSB 0xC33C +#define RLC_CAPTURE_GPU_CLOCK_COUNT 0xC340 #define RLC_MC_CNTL 0xC344 #define RLC_UCODE_CNTL 0xC348 @@ -901,5 +904,6 @@ #define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88 #define PACKET3_SET_CE_DE_COUNTERS 0x89 #define PACKET3_WAIT_ON_AVAIL_BUFFER 0x8A +#define PACKET3_SWITCH_BUFFER 0x8B #endif |