diff options
34 files changed, 1551 insertions, 464 deletions
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 9a1fdcf400c2..1f2301d26c0a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -234,10 +234,10 @@ validate_fini_list(struct list_head *list, struct nouveau_fence *fence) if (likely(fence)) { struct nouveau_fence *prev_fence; - spin_lock(&nvbo->bo.lock); + spin_lock(&nvbo->bo.bdev->fence_lock); prev_fence = nvbo->bo.sync_obj; nvbo->bo.sync_obj = nouveau_fence_ref(fence); - spin_unlock(&nvbo->bo.lock); + spin_unlock(&nvbo->bo.bdev->fence_lock); nouveau_fence_unref((void *)&prev_fence); } @@ -557,9 +557,9 @@ nouveau_gem_pushbuf_reloc_apply(struct drm_device *dev, data |= r->vor; } - spin_lock(&nvbo->bo.lock); + spin_lock(&nvbo->bo.bdev->fence_lock); ret = ttm_bo_wait(&nvbo->bo, false, false, false); - spin_unlock(&nvbo->bo.lock); + spin_unlock(&nvbo->bo.bdev->fence_lock); if (ret) { NV_ERROR(dev, "reloc wait_idle failed: %d\n", ret); break; @@ -791,9 +791,9 @@ nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void *data, } if (req->flags & NOUVEAU_GEM_CPU_PREP_NOBLOCK) { - spin_lock(&nvbo->bo.lock); + spin_lock(&nvbo->bo.bdev->fence_lock); ret = ttm_bo_wait(&nvbo->bo, false, false, no_wait); - spin_unlock(&nvbo->bo.lock); + spin_unlock(&nvbo->bo.bdev->fence_lock); } else { ret = ttm_bo_synccpu_write_grab(&nvbo->bo, no_wait); if (ret == 0) diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 075d6c172595..522d29b37007 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -40,6 +40,61 @@ static void evergreen_gpu_init(struct radeon_device *rdev); void evergreen_fini(struct radeon_device *rdev); +void evergreen_pre_page_flip(struct radeon_device *rdev, int crtc) +{ + struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc]; + u32 tmp; + + /* make sure flip is at vb rather than hb */ + tmp = RREG32(EVERGREEN_GRPH_FLIP_CONTROL + radeon_crtc->crtc_offset); + tmp &= ~EVERGREEN_GRPH_SURFACE_UPDATE_H_RETRACE_EN; + WREG32(EVERGREEN_GRPH_FLIP_CONTROL + radeon_crtc->crtc_offset, tmp); + + /* set pageflip to happen anywhere in vblank interval */ + WREG32(EVERGREEN_MASTER_UPDATE_MODE + radeon_crtc->crtc_offset, 0); + + /* enable the pflip int */ + radeon_irq_kms_pflip_irq_get(rdev, crtc); +} + +void evergreen_post_page_flip(struct radeon_device *rdev, int crtc) +{ + /* disable the pflip int */ + radeon_irq_kms_pflip_irq_put(rdev, crtc); +} + +u32 evergreen_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base) +{ + struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id]; + u32 tmp = RREG32(EVERGREEN_GRPH_UPDATE + radeon_crtc->crtc_offset); + + /* Lock the graphics update lock */ + tmp |= EVERGREEN_GRPH_UPDATE_LOCK; + WREG32(EVERGREEN_GRPH_UPDATE + radeon_crtc->crtc_offset, tmp); + + /* update the scanout addresses */ + WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH + radeon_crtc->crtc_offset, + upper_32_bits(crtc_base)); + WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS + radeon_crtc->crtc_offset, + (u32)crtc_base); + + WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH + radeon_crtc->crtc_offset, + upper_32_bits(crtc_base)); + WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS + radeon_crtc->crtc_offset, + (u32)crtc_base); + + /* Wait for update_pending to go high. */ + while (!(RREG32(EVERGREEN_GRPH_UPDATE + radeon_crtc->crtc_offset) & EVERGREEN_GRPH_SURFACE_UPDATE_PENDING)); + DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n"); + + /* Unlock the lock, so double-buffering can take place inside vblank */ + tmp &= ~EVERGREEN_GRPH_UPDATE_LOCK; + WREG32(EVERGREEN_GRPH_UPDATE + radeon_crtc->crtc_offset, tmp); + + /* Return current update_pending status: */ + return RREG32(EVERGREEN_GRPH_UPDATE + radeon_crtc->crtc_offset) & EVERGREEN_GRPH_SURFACE_UPDATE_PENDING; +} + /* get temperature in millidegrees */ u32 evergreen_get_temp(struct radeon_device *rdev) { @@ -2171,6 +2226,7 @@ int evergreen_irq_set(struct radeon_device *rdev) u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0; u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6; u32 grbm_int_cntl = 0; + u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0; if (!rdev->irq.installed) { WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); @@ -2196,27 +2252,33 @@ int evergreen_irq_set(struct radeon_device *rdev) cp_int_cntl |= RB_INT_ENABLE; cp_int_cntl |= TIME_STAMP_INT_ENABLE; } - if (rdev->irq.crtc_vblank_int[0]) { + if (rdev->irq.crtc_vblank_int[0] || + rdev->irq.pflip[0]) { DRM_DEBUG("evergreen_irq_set: vblank 0\n"); crtc1 |= VBLANK_INT_MASK; } - if (rdev->irq.crtc_vblank_int[1]) { + if (rdev->irq.crtc_vblank_int[1] || + rdev->irq.pflip[1]) { DRM_DEBUG("evergreen_irq_set: vblank 1\n"); crtc2 |= VBLANK_INT_MASK; } - if (rdev->irq.crtc_vblank_int[2]) { + if (rdev->irq.crtc_vblank_int[2] || + rdev->irq.pflip[2]) { DRM_DEBUG("evergreen_irq_set: vblank 2\n"); crtc3 |= VBLANK_INT_MASK; } - if (rdev->irq.crtc_vblank_int[3]) { + if (rdev->irq.crtc_vblank_int[3] || + rdev->irq.pflip[3]) { DRM_DEBUG("evergreen_irq_set: vblank 3\n"); crtc4 |= VBLANK_INT_MASK; } - if (rdev->irq.crtc_vblank_int[4]) { + if (rdev->irq.crtc_vblank_int[4] || + rdev->irq.pflip[4]) { DRM_DEBUG("evergreen_irq_set: vblank 4\n"); crtc5 |= VBLANK_INT_MASK; } - if (rdev->irq.crtc_vblank_int[5]) { + if (rdev->irq.crtc_vblank_int[5] || + rdev->irq.pflip[5]) { DRM_DEBUG("evergreen_irq_set: vblank 5\n"); crtc6 |= VBLANK_INT_MASK; } @@ -2261,6 +2323,13 @@ int evergreen_irq_set(struct radeon_device *rdev) WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6); } + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1); + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2); + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3); + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4); + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5); + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6); + WREG32(DC_HPD1_INT_CONTROL, hpd1); WREG32(DC_HPD2_INT_CONTROL, hpd2); WREG32(DC_HPD3_INT_CONTROL, hpd3); @@ -2271,79 +2340,92 @@ int evergreen_irq_set(struct radeon_device *rdev) return 0; } -static inline void evergreen_irq_ack(struct radeon_device *rdev, - u32 *disp_int, - u32 *disp_int_cont, - u32 *disp_int_cont2, - u32 *disp_int_cont3, - u32 *disp_int_cont4, - u32 *disp_int_cont5) +static inline void evergreen_irq_ack(struct radeon_device *rdev) { u32 tmp; - *disp_int = RREG32(DISP_INTERRUPT_STATUS); - *disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE); - *disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2); - *disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3); - *disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4); - *disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5); - - if (*disp_int & LB_D1_VBLANK_INTERRUPT) + rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS); + rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE); + rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2); + rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3); + rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4); + rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5); + rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET); + rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET); + rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET); + rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET); + rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET); + rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET); + + if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED) + WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR); + if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED) + WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR); + if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED) + WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR); + if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED) + WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR); + if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED) + WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR); + if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED) + WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR); + + if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK); - if (*disp_int & LB_D1_VLINE_INTERRUPT) + if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK); - if (*disp_int_cont & LB_D2_VBLANK_INTERRUPT) + if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK); - if (*disp_int_cont & LB_D2_VLINE_INTERRUPT) + if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK); - if (*disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) + if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK); - if (*disp_int_cont2 & LB_D3_VLINE_INTERRUPT) + if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK); - if (*disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) + if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK); - if (*disp_int_cont3 & LB_D4_VLINE_INTERRUPT) + if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK); - if (*disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) + if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK); - if (*disp_int_cont4 & LB_D5_VLINE_INTERRUPT) + if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK); - if (*disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) + if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK); - if (*disp_int_cont5 & LB_D6_VLINE_INTERRUPT) + if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK); - if (*disp_int & DC_HPD1_INTERRUPT) { + if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) { tmp = RREG32(DC_HPD1_INT_CONTROL); tmp |= DC_HPDx_INT_ACK; WREG32(DC_HPD1_INT_CONTROL, tmp); } - if (*disp_int_cont & DC_HPD2_INTERRUPT) { + if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) { tmp = RREG32(DC_HPD2_INT_CONTROL); tmp |= DC_HPDx_INT_ACK; WREG32(DC_HPD2_INT_CONTROL, tmp); } - if (*disp_int_cont2 & DC_HPD3_INTERRUPT) { + if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) { tmp = RREG32(DC_HPD3_INT_CONTROL); tmp |= DC_HPDx_INT_ACK; WREG32(DC_HPD3_INT_CONTROL, tmp); } - if (*disp_int_cont3 & DC_HPD4_INTERRUPT) { + if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) { tmp = RREG32(DC_HPD4_INT_CONTROL); tmp |= DC_HPDx_INT_ACK; WREG32(DC_HPD4_INT_CONTROL, tmp); } - if (*disp_int_cont4 & DC_HPD5_INTERRUPT) { + if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) { tmp = RREG32(DC_HPD5_INT_CONTROL); tmp |= DC_HPDx_INT_ACK; WREG32(DC_HPD5_INT_CONTROL, tmp); } - if (*disp_int_cont5 & DC_HPD6_INTERRUPT) { + if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) { tmp = RREG32(DC_HPD5_INT_CONTROL); tmp |= DC_HPDx_INT_ACK; WREG32(DC_HPD6_INT_CONTROL, tmp); @@ -2352,14 +2434,10 @@ static inline void evergreen_irq_ack(struct radeon_device *rdev, void evergreen_irq_disable(struct radeon_device *rdev) { - u32 disp_int, disp_int_cont, disp_int_cont2; - u32 disp_int_cont3, disp_int_cont4, disp_int_cont5; - r600_disable_interrupts(rdev); /* Wait and acknowledge irq */ mdelay(1); - evergreen_irq_ack(rdev, &disp_int, &disp_int_cont, &disp_int_cont2, - &disp_int_cont3, &disp_int_cont4, &disp_int_cont5); + evergreen_irq_ack(rdev); evergreen_disable_interrupt_state(rdev); } @@ -2399,8 +2477,6 @@ int evergreen_irq_process(struct radeon_device *rdev) u32 rptr = rdev->ih.rptr; u32 src_id, src_data; u32 ring_index; - u32 disp_int, disp_int_cont, disp_int_cont2; - u32 disp_int_cont3, disp_int_cont4, disp_int_cont5; unsigned long flags; bool queue_hotplug = false; @@ -2421,8 +2497,7 @@ int evergreen_irq_process(struct radeon_device *rdev) restart_ih: /* display interrupts */ - evergreen_irq_ack(rdev, &disp_int, &disp_int_cont, &disp_int_cont2, - &disp_int_cont3, &disp_int_cont4, &disp_int_cont5); + evergreen_irq_ack(rdev); rdev->ih.wptr = wptr; while (rptr != wptr) { @@ -2435,17 +2510,21 @@ restart_ih: case 1: /* D1 vblank/vline */ switch (src_data) { case 0: /* D1 vblank */ - if (disp_int & LB_D1_VBLANK_INTERRUPT) { - drm_handle_vblank(rdev->ddev, 0); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - disp_int &= ~LB_D1_VBLANK_INTERRUPT; + if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) { + if (rdev->irq.crtc_vblank_int[0]) { + drm_handle_vblank(rdev->ddev, 0); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); + } + if (rdev->irq.pflip[0]) + radeon_crtc_handle_flip(rdev, 0); + rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT; DRM_DEBUG("IH: D1 vblank\n"); } break; case 1: /* D1 vline */ - if (disp_int & LB_D1_VLINE_INTERRUPT) { - disp_int &= ~LB_D1_VLINE_INTERRUPT; + if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) { + rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT; DRM_DEBUG("IH: D1 vline\n"); } break; @@ -2457,17 +2536,21 @@ restart_ih: case 2: /* D2 vblank/vline */ switch (src_data) { case 0: /* D2 vblank */ - if (disp_int_cont & LB_D2_VBLANK_INTERRUPT) { - drm_handle_vblank(rdev->ddev, 1); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; + if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) { + if (rdev->irq.crtc_vblank_int[1]) { + drm_handle_vblank(rdev->ddev, 1); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); + } + if (rdev->irq.pflip[1]) + radeon_crtc_handle_flip(rdev, 1); + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; DRM_DEBUG("IH: D2 vblank\n"); } break; case 1: /* D2 vline */ - if (disp_int_cont & LB_D2_VLINE_INTERRUPT) { - disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; + if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) { + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; DRM_DEBUG("IH: D2 vline\n"); } break; @@ -2479,17 +2562,21 @@ restart_ih: case 3: /* D3 vblank/vline */ switch (src_data) { case 0: /* D3 vblank */ - if (disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) { - drm_handle_vblank(rdev->ddev, 2); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; + if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) { + if (rdev->irq.crtc_vblank_int[2]) { + drm_handle_vblank(rdev->ddev, 2); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); + } + if (rdev->irq.pflip[2]) + radeon_crtc_handle_flip(rdev, 2); + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; DRM_DEBUG("IH: D3 vblank\n"); } break; case 1: /* D3 vline */ - if (disp_int_cont2 & LB_D3_VLINE_INTERRUPT) { - disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; + if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) { + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; DRM_DEBUG("IH: D3 vline\n"); } break; @@ -2501,17 +2588,21 @@ restart_ih: case 4: /* D4 vblank/vline */ switch (src_data) { case 0: /* D4 vblank */ - if (disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) { - drm_handle_vblank(rdev->ddev, 3); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; + if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) { + if (rdev->irq.crtc_vblank_int[3]) { + drm_handle_vblank(rdev->ddev, 3); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); + } + if (rdev->irq.pflip[3]) + radeon_crtc_handle_flip(rdev, 3); + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; DRM_DEBUG("IH: D4 vblank\n"); } break; case 1: /* D4 vline */ - if (disp_int_cont3 & LB_D4_VLINE_INTERRUPT) { - disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; + if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) { + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; DRM_DEBUG("IH: D4 vline\n"); } break; @@ -2523,17 +2614,21 @@ restart_ih: case 5: /* D5 vblank/vline */ switch (src_data) { case 0: /* D5 vblank */ - if (disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) { - drm_handle_vblank(rdev->ddev, 4); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; + if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) { + if (rdev->irq.crtc_vblank_int[4]) { + drm_handle_vblank(rdev->ddev, 4); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); + } + if (rdev->irq.pflip[4]) + radeon_crtc_handle_flip(rdev, 4); + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; DRM_DEBUG("IH: D5 vblank\n"); } break; case 1: /* D5 vline */ - if (disp_int_cont4 & LB_D5_VLINE_INTERRUPT) { - disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; + if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) { + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; DRM_DEBUG("IH: D5 vline\n"); } break; @@ -2545,17 +2640,21 @@ restart_ih: case 6: /* D6 vblank/vline */ switch (src_data) { case 0: /* D6 vblank */ - if (disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) { - drm_handle_vblank(rdev->ddev, 5); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; + if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) { + if (rdev->irq.crtc_vblank_int[5]) { + drm_handle_vblank(rdev->ddev, 5); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); + } + if (rdev->irq.pflip[5]) + radeon_crtc_handle_flip(rdev, 5); + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; DRM_DEBUG("IH: D6 vblank\n"); } break; case 1: /* D6 vline */ - if (disp_int_cont5 & LB_D6_VLINE_INTERRUPT) { - disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; + if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) { + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; DRM_DEBUG("IH: D6 vline\n"); } break; @@ -2567,43 +2666,43 @@ restart_ih: case 42: /* HPD hotplug */ switch (src_data) { case 0: - if (disp_int & DC_HPD1_INTERRUPT) { - disp_int &= ~DC_HPD1_INTERRUPT; + if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) { + rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT; queue_hotplug = true; DRM_DEBUG("IH: HPD1\n"); } break; case 1: - if (disp_int_cont & DC_HPD2_INTERRUPT) { - disp_int_cont &= ~DC_HPD2_INTERRUPT; + if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) { + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT; queue_hotplug = true; DRM_DEBUG("IH: HPD2\n"); } break; case 2: - if (disp_int_cont2 & DC_HPD3_INTERRUPT) { - disp_int_cont2 &= ~DC_HPD3_INTERRUPT; + if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) { + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; queue_hotplug = true; DRM_DEBUG("IH: HPD3\n"); } break; case 3: - if (disp_int_cont3 & DC_HPD4_INTERRUPT) { - disp_int_cont3 &= ~DC_HPD4_INTERRUPT; + if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) { + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; queue_hotplug = true; DRM_DEBUG("IH: HPD4\n"); } break; case 4: - if (disp_int_cont4 & DC_HPD5_INTERRUPT) { - disp_int_cont4 &= ~DC_HPD5_INTERRUPT; + if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) { + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; queue_hotplug = true; DRM_DEBUG("IH: HPD5\n"); } break; case 5: - if (disp_int_cont5 & DC_HPD6_INTERRUPT) { - disp_int_cont5 &= ~DC_HPD6_INTERRUPT; + if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) { + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; queue_hotplug = true; DRM_DEBUG("IH: HPD6\n"); } diff --git a/drivers/gpu/drm/radeon/evergreen_reg.h b/drivers/gpu/drm/radeon/evergreen_reg.h index 2330f3a36fd5..c781c92c3451 100644 --- a/drivers/gpu/drm/radeon/evergreen_reg.h +++ b/drivers/gpu/drm/radeon/evergreen_reg.h @@ -105,6 +105,11 @@ #define EVERGREEN_GRPH_Y_START 0x6830 #define EVERGREEN_GRPH_X_END 0x6834 #define EVERGREEN_GRPH_Y_END 0x6838 +#define EVERGREEN_GRPH_UPDATE 0x6844 +# define EVERGREEN_GRPH_SURFACE_UPDATE_PENDING (1 << 2) +# define EVERGREEN_GRPH_UPDATE_LOCK (1 << 16) +#define EVERGREEN_GRPH_FLIP_CONTROL 0x6848 +# define EVERGREEN_GRPH_SURFACE_UPDATE_H_RETRACE_EN (1 << 0) /* CUR blocks at 0x6998, 0x7598, 0x10198, 0x10d98, 0x11998, 0x12598 */ #define EVERGREEN_CUR_CONTROL 0x6998 @@ -178,6 +183,7 @@ # define EVERGREEN_CRTC_DISP_READ_REQUEST_DISABLE (1 << 24) #define EVERGREEN_CRTC_STATUS 0x6e8c #define EVERGREEN_CRTC_STATUS_POSITION 0x6e90 +#define EVERGREEN_MASTER_UPDATE_MODE 0x6ef8 #define EVERGREEN_CRTC_UPDATE_LOCK 0x6ed4 #define EVERGREEN_DC_GPIO_HPD_MASK 0x64b0 diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 8e10aa9f74b0..300b4a64d8fe 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -68,6 +68,56 @@ MODULE_FIRMWARE(FIRMWARE_R520); * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280 */ +void r100_pre_page_flip(struct radeon_device *rdev, int crtc) +{ + struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc]; + u32 tmp; + + /* make sure flip is at vb rather than hb */ + tmp = RREG32(RADEON_CRTC_OFFSET_CNTL + radeon_crtc->crtc_offset); + tmp &= ~RADEON_CRTC_OFFSET_FLIP_CNTL; + /* make sure pending bit is asserted */ + tmp |= RADEON_CRTC_GUI_TRIG_OFFSET_LEFT_EN; + WREG32(RADEON_CRTC_OFFSET_CNTL + radeon_crtc->crtc_offset, tmp); + + /* set pageflip to happen as late as possible in the vblank interval. + * same field for crtc1/2 + */ + tmp = RREG32(RADEON_CRTC_GEN_CNTL); + tmp &= ~RADEON_CRTC_VSTAT_MODE_MASK; + WREG32(RADEON_CRTC_GEN_CNTL, tmp); + + /* enable the pflip int */ + radeon_irq_kms_pflip_irq_get(rdev, crtc); +} + +void r100_post_page_flip(struct radeon_device *rdev, int crtc) +{ + /* disable the pflip int */ + radeon_irq_kms_pflip_irq_put(rdev, crtc); +} + +u32 r100_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base) +{ + struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id]; + u32 tmp = ((u32)crtc_base) | RADEON_CRTC_OFFSET__OFFSET_LOCK; + + /* Lock the graphics update lock */ + /* update the scanout addresses */ + WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, tmp); + + /* Wait for update_pending to go high. */ + while (!(RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) & RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET)); + DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n"); + + /* Unlock the lock, so double-buffering can take place inside vblank */ + tmp &= ~RADEON_CRTC_OFFSET__OFFSET_LOCK; + WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, tmp); + + /* Return current update_pending status: */ + return RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) & RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET; +} + void r100_pm_get_dynpm_state(struct radeon_device *rdev) { int i; @@ -526,10 +576,12 @@ int r100_irq_set(struct radeon_device *rdev) if (rdev->irq.gui_idle) { tmp |= RADEON_GUI_IDLE_MASK; } - if (rdev->irq.crtc_vblank_int[0]) { + if (rdev->irq.crtc_vblank_int[0] || + rdev->irq.pflip[0]) { tmp |= RADEON_CRTC_VBLANK_MASK; } - if (rdev->irq.crtc_vblank_int[1]) { + if (rdev->irq.crtc_vblank_int[1] || + rdev->irq.pflip[1]) { tmp |= RADEON_CRTC2_VBLANK_MASK; } if (rdev->irq.hpd[0]) { @@ -600,14 +652,22 @@ int r100_irq_process(struct radeon_device *rdev) } /* Vertical blank interrupts */ if (status & RADEON_CRTC_VBLANK_STAT) { - drm_handle_vblank(rdev->ddev, 0); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); + if (rdev->irq.crtc_vblank_int[0]) { + drm_handle_vblank(rdev->ddev, 0); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); + } + if (rdev->irq.pflip[0]) + radeon_crtc_handle_flip(rdev, 0); } if (status & RADEON_CRTC2_VBLANK_STAT) { - drm_handle_vblank(rdev->ddev, 1); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); + if (rdev->irq.crtc_vblank_int[1]) { + drm_handle_vblank(rdev->ddev, 1); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); + } + if (rdev->irq.pflip[1]) + radeon_crtc_handle_flip(rdev, 1); } if (status & RADEON_FP_DETECT_STAT) { queue_hotplug = true; diff --git a/drivers/gpu/drm/radeon/r500_reg.h b/drivers/gpu/drm/radeon/r500_reg.h index 6ac1f604e29b..fc437059918f 100644 --- a/drivers/gpu/drm/radeon/r500_reg.h +++ b/drivers/gpu/drm/radeon/r500_reg.h @@ -355,6 +355,8 @@ #define AVIVO_D1CRTC_FRAME_COUNT 0x60a4 #define AVIVO_D1CRTC_STEREO_CONTROL 0x60c4 +#define AVIVO_D1MODE_MASTER_UPDATE_MODE 0x60e4 + /* master controls */ #define AVIVO_DC_CRTC_MASTER_EN 0x60f8 #define AVIVO_DC_CRTC_TV_CONTROL 0x60fc @@ -409,8 +411,10 @@ #define AVIVO_D1GRPH_X_END 0x6134 #define AVIVO_D1GRPH_Y_END 0x6138 #define AVIVO_D1GRPH_UPDATE 0x6144 +# define AVIVO_D1GRPH_SURFACE_UPDATE_PENDING (1 << 2) # define AVIVO_D1GRPH_UPDATE_LOCK (1 << 16) #define AVIVO_D1GRPH_FLIP_CONTROL 0x6148 +# define AVIVO_D1GRPH_SURFACE_UPDATE_H_RETRACE_EN (1 << 0) #define AVIVO_D1CUR_CONTROL 0x6400 # define AVIVO_D1CURSOR_EN (1 << 0) diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index b37361b69ad5..53bfe3afb0fa 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2870,6 +2870,8 @@ static void r600_disable_interrupt_state(struct radeon_device *rdev) WREG32(CP_INT_CNTL, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); WREG32(GRBM_INT_CNTL, 0); WREG32(DxMODE_INT_MASK, 0); + WREG32(D1GRPH_INTERRUPT_CONTROL, 0); + WREG32(D2GRPH_INTERRUPT_CONTROL, 0); if (ASIC_IS_DCE3(rdev)) { WREG32(DCE3_DACA_AUTODETECT_INT_CONTROL, 0); WREG32(DCE3_DACB_AUTODETECT_INT_CONTROL, 0); @@ -2994,6 +2996,7 @@ int r600_irq_set(struct radeon_device *rdev) u32 hpd1, hpd2, hpd3, hpd4 = 0, hpd5 = 0, hpd6 = 0; u32 grbm_int_cntl = 0; u32 hdmi1, hdmi2; + u32 d1grph = 0, d2grph = 0; if (!rdev->irq.installed) { WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); @@ -3030,11 +3033,13 @@ int r600_irq_set(struct radeon_device *rdev) cp_int_cntl |= RB_INT_ENABLE; cp_int_cntl |= TIME_STAMP_INT_ENABLE; } - if (rdev->irq.crtc_vblank_int[0]) { + if (rdev->irq.crtc_vblank_int[0] || + rdev->irq.pflip[0]) { DRM_DEBUG("r600_irq_set: vblank 0\n"); mode_int |= D1MODE_VBLANK_INT_MASK; } - if (rdev->irq.crtc_vblank_int[1]) { + if (rdev->irq.crtc_vblank_int[1] || + rdev->irq.pflip[1]) { DRM_DEBUG("r600_irq_set: vblank 1\n"); mode_int |= D2MODE_VBLANK_INT_MASK; } @@ -3077,6 +3082,8 @@ int r600_irq_set(struct radeon_device *rdev) WREG32(CP_INT_CNTL, cp_int_cntl); WREG32(DxMODE_INT_MASK, mode_int); + WREG32(D1GRPH_INTERRUPT_CONTROL, d1grph); + WREG32(D2GRPH_INTERRUPT_CONTROL, d2grph); WREG32(GRBM_INT_CNTL, grbm_int_cntl); WREG32(R600_HDMI_BLOCK1 + R600_HDMI_CNTL, hdmi1); if (ASIC_IS_DCE3(rdev)) { @@ -3099,32 +3106,35 @@ int r600_irq_set(struct radeon_device *rdev) return 0; } -static inline void r600_irq_ack(struct radeon_device *rdev, - u32 *disp_int, - u32 *disp_int_cont, - u32 *disp_int_cont2) +static inline void r600_irq_ack(struct radeon_device *rdev) { u32 tmp; if (ASIC_IS_DCE3(rdev)) { - *disp_int = RREG32(DCE3_DISP_INTERRUPT_STATUS); - *disp_int_cont = RREG32(DCE3_DISP_INTERRUPT_STATUS_CONTINUE); - *disp_int_cont2 = RREG32(DCE3_DISP_INTERRUPT_STATUS_CONTINUE2); + rdev->irq.stat_regs.r600.disp_int = RREG32(DCE3_DISP_INTERRUPT_STATUS); + rdev->irq.stat_regs.r600.disp_int_cont = RREG32(DCE3_DISP_INTERRUPT_STATUS_CONTINUE); + rdev->irq.stat_regs.r600.disp_int_cont2 = RREG32(DCE3_DISP_INTERRUPT_STATUS_CONTINUE2); } else { - *disp_int = RREG32(DISP_INTERRUPT_STATUS); - *disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE); - *disp_int_cont2 = 0; - } - - if (*disp_int & LB_D1_VBLANK_INTERRUPT) + rdev->irq.stat_regs.r600.disp_int = RREG32(DISP_INTERRUPT_STATUS); + rdev->irq.stat_regs.r600.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE); + rdev->irq.stat_regs.r600.disp_int_cont2 = 0; + } + rdev->irq.stat_regs.r600.d1grph_int = RREG32(D1GRPH_INTERRUPT_STATUS); + rdev->irq.stat_regs.r600.d2grph_int = RREG32(D2GRPH_INTERRUPT_STATUS); + + if (rdev->irq.stat_regs.r600.d1grph_int & DxGRPH_PFLIP_INT_OCCURRED) + WREG32(D1GRPH_INTERRUPT_STATUS, DxGRPH_PFLIP_INT_CLEAR); + if (rdev->irq.stat_regs.r600.d2grph_int & DxGRPH_PFLIP_INT_OCCURRED) + WREG32(D2GRPH_INTERRUPT_STATUS, DxGRPH_PFLIP_INT_CLEAR); + if (rdev->irq.stat_regs.r600.disp_int & LB_D1_VBLANK_INTERRUPT) WREG32(D1MODE_VBLANK_STATUS, DxMODE_VBLANK_ACK); - if (*disp_int & LB_D1_VLINE_INTERRUPT) + if (rdev->irq.stat_regs.r600.disp_int & LB_D1_VLINE_INTERRUPT) WREG32(D1MODE_VLINE_STATUS, DxMODE_VLINE_ACK); - if (*disp_int & LB_D2_VBLANK_INTERRUPT) + if (rdev->irq.stat_regs.r600.disp_int & LB_D2_VBLANK_INTERRUPT) WREG32(D2MODE_VBLANK_STATUS, DxMODE_VBLANK_ACK); - if (*disp_int & LB_D2_VLINE_INTERRUPT) + if (rdev->irq.stat_regs.r600.disp_int & LB_D2_VLINE_INTERRUPT) WREG32(D2MODE_VLINE_STATUS, DxMODE_VLINE_ACK); - if (*disp_int & DC_HPD1_INTERRUPT) { + if (rdev->irq.stat_regs.r600.disp_int & DC_HPD1_INTERRUPT) { if (ASIC_IS_DCE3(rdev)) { tmp = RREG32(DC_HPD1_INT_CONTROL); tmp |= DC_HPDx_INT_ACK; @@ -3135,7 +3145,7 @@ static inline void r600_irq_ack(struct radeon_device *rdev, WREG32(DC_HOT_PLUG_DETECT1_INT_CONTROL, tmp); } } - if (*disp_int & DC_HPD2_INTERRUPT) { + if (rdev->irq.stat_regs.r600.disp_int & DC_HPD2_INTERRUPT) { if (ASIC_IS_DCE3(rdev)) { tmp = RREG32(DC_HPD2_INT_CONTROL); tmp |= DC_HPDx_INT_ACK; @@ -3146,7 +3156,7 @@ static inline void r600_irq_ack(struct radeon_device *rdev, WREG32(DC_HOT_PLUG_DETECT2_INT_CONTROL, tmp); } } - if (*disp_int_cont & DC_HPD3_INTERRUPT) { + if (rdev->irq.stat_regs.r600.disp_int_cont & DC_HPD3_INTERRUPT) { if (ASIC_IS_DCE3(rdev)) { tmp = RREG32(DC_HPD3_INT_CONTROL); tmp |= DC_HPDx_INT_ACK; @@ -3157,18 +3167,18 @@ static inline void r600_irq_ack(struct radeon_device *rdev, WREG32(DC_HOT_PLUG_DETECT3_INT_CONTROL, tmp); } } - if (*disp_int_cont & DC_HPD4_INTERRUPT) { + if (rdev->irq.stat_regs.r600.disp_int_cont & DC_HPD4_INTERRUPT) { tmp = RREG32(DC_HPD4_INT_CONTROL); tmp |= DC_HPDx_INT_ACK; WREG32(DC_HPD4_INT_CONTROL, tmp); } if (ASIC_IS_DCE32(rdev)) { - if (*disp_int_cont2 & DC_HPD5_INTERRUPT) { + if (rdev->irq.stat_regs.r600.disp_int_cont2 & DC_HPD5_INTERRUPT) { tmp = RREG32(DC_HPD5_INT_CONTROL); tmp |= DC_HPDx_INT_ACK; WREG32(DC_HPD5_INT_CONTROL, tmp); } - if (*disp_int_cont2 & DC_HPD6_INTERRUPT) { + if (rdev->irq.stat_regs.r600.disp_int_cont2 & DC_HPD6_INTERRUPT) { tmp = RREG32(DC_HPD5_INT_CONTROL); tmp |= DC_HPDx_INT_ACK; WREG32(DC_HPD6_INT_CONTROL, tmp); @@ -3190,12 +3200,10 @@ static inline void r600_irq_ack(struct radeon_device *rdev, void r600_irq_disable(struct radeon_device *rdev) { - u32 disp_int, disp_int_cont, disp_int_cont2; - r600_disable_interrupts(rdev); /* Wait and acknowledge irq */ mdelay(1); - r600_irq_ack(rdev, &disp_int, &disp_int_cont, &disp_int_cont2); + r600_irq_ack(rdev); r600_disable_interrupt_state(rdev); } @@ -3258,7 +3266,7 @@ int r600_irq_process(struct radeon_device *rdev) u32 wptr = r600_get_ih_wptr(rdev); u32 rptr = rdev->ih.rptr; u32 src_id, src_data; - u32 ring_index, disp_int, disp_int_cont, disp_int_cont2; + u32 ring_index; unsigned long flags; bool queue_hotplug = false; @@ -3279,7 +3287,7 @@ int r600_irq_process(struct radeon_device *rdev) restart_ih: /* display interrupts */ - r600_irq_ack(rdev, &disp_int, &disp_int_cont, &disp_int_cont2); + r600_irq_ack(rdev); rdev->ih.wptr = wptr; while (rptr != wptr) { @@ -3292,17 +3300,21 @@ restart_ih: case 1: /* D1 vblank/vline */ switch (src_data) { case 0: /* D1 vblank */ - if (disp_int & LB_D1_VBLANK_INTERRUPT) { - drm_handle_vblank(rdev->ddev, 0); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - disp_int &= ~LB_D1_VBLANK_INTERRUPT; + if (rdev->irq.stat_regs.r600.disp_int & LB_D1_VBLANK_INTERRUPT) { + if (rdev->irq.crtc_vblank_int[0]) { + drm_handle_vblank(rdev->ddev, 0); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); + } + if (rdev->irq.pflip[0]) + radeon_crtc_handle_flip(rdev, 0); + rdev->irq.stat_regs.r600.disp_int &= ~LB_D1_VBLANK_INTERRUPT; DRM_DEBUG("IH: D1 vblank\n"); } break; case 1: /* D1 vline */ - if (disp_int & LB_D1_VLINE_INTERRUPT) { - disp_int &= ~LB_D1_VLINE_INTERRUPT; + if (rdev->irq.stat_regs.r600.disp_int & LB_D1_VLINE_INTERRUPT) { + rdev->irq.stat_regs.r600.disp_int &= ~LB_D1_VLINE_INTERRUPT; DRM_DEBUG("IH: D1 vline\n"); } break; @@ -3314,17 +3326,21 @@ restart_ih: case 5: /* D2 vblank/vline */ switch (src_data) { case 0: /* D2 vblank */ - if (disp_int & LB_D2_VBLANK_INTERRUPT) { - drm_handle_vblank(rdev->ddev, 1); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - disp_int &= ~LB_D2_VBLANK_INTERRUPT; + if (rdev->irq.stat_regs.r600.disp_int & LB_D2_VBLANK_INTERRUPT) { + if (rdev->irq.crtc_vblank_int[1]) { + drm_handle_vblank(rdev->ddev, 1); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); + } + if (rdev->irq.pflip[1]) + radeon_crtc_handle_flip(rdev, 1); + rdev->irq.stat_regs.r600.disp_int &= ~LB_D2_VBLANK_INTERRUPT; DRM_DEBUG("IH: D2 vblank\n"); } break; case 1: /* D1 vline */ - if (disp_int & LB_D2_VLINE_INTERRUPT) { - disp_int &= ~LB_D2_VLINE_INTERRUPT; + if (rdev->irq.stat_regs.r600.disp_int & LB_D2_VLINE_INTERRUPT) { + rdev->irq.stat_regs.r600.disp_int &= ~LB_D2_VLINE_INTERRUPT; DRM_DEBUG("IH: D2 vline\n"); } break; @@ -3336,43 +3352,43 @@ restart_ih: case 19: /* HPD/DAC hotplug */ switch (src_data) { case 0: - if (disp_int & DC_HPD1_INTERRUPT) { - disp_int &= ~DC_HPD1_INTERRUPT; + if (rdev->irq.stat_regs.r600.disp_int & DC_HPD1_INTERRUPT) { + rdev->irq.stat_regs.r600.disp_int &= ~DC_HPD1_INTERRUPT; queue_hotplug = true; DRM_DEBUG("IH: HPD1\n"); } break; case 1: - if (disp_int & DC_HPD2_INTERRUPT) { - disp_int &= ~DC_HPD2_INTERRUPT; + if (rdev->irq.stat_regs.r600.disp_int & DC_HPD2_INTERRUPT) { + rdev->irq.stat_regs.r600.disp_int &= ~DC_HPD2_INTERRUPT; queue_hotplug = true; DRM_DEBUG("IH: HPD2\n"); } break; case 4: - if (disp_int_cont & DC_HPD3_INTERRUPT) { - disp_int_cont &= ~DC_HPD3_INTERRUPT; + if (rdev->irq.stat_regs.r600.disp_int_cont & DC_HPD3_INTERRUPT) { + rdev->irq.stat_regs.r600.disp_int_cont &= ~DC_HPD3_INTERRUPT; queue_hotplug = true; DRM_DEBUG("IH: HPD3\n"); } break; case 5: - if (disp_int_cont & DC_HPD4_INTERRUPT) { - disp_int_cont &= ~DC_HPD4_INTERRUPT; + if (rdev->irq.stat_regs.r600.disp_int_cont & DC_HPD4_INTERRUPT) { + rdev->irq.stat_regs.r600.disp_int_cont &= ~DC_HPD4_INTERRUPT; queue_hotplug = true; DRM_DEBUG("IH: HPD4\n"); } break; case 10: - if (disp_int_cont2 & DC_HPD5_INTERRUPT) { - disp_int_cont2 &= ~DC_HPD5_INTERRUPT; + if (rdev->irq.stat_regs.r600.disp_int_cont2 & DC_HPD5_INTERRUPT) { + rdev->irq.stat_regs.r600.disp_int_cont2 &= ~DC_HPD5_INTERRUPT; queue_hotplug = true; DRM_DEBUG("IH: HPD5\n"); } break; case 12: - if (disp_int_cont2 & DC_HPD6_INTERRUPT) { - disp_int_cont2 &= ~DC_HPD6_INTERRUPT; + if (rdev->irq.stat_regs.r600.disp_int_cont2 & DC_HPD6_INTERRUPT) { + rdev->irq.stat_regs.r600.disp_int_cont2 &= ~DC_HPD6_INTERRUPT; queue_hotplug = true; DRM_DEBUG("IH: HPD6\n"); } diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h index bff4dc4f410f..c89cfa8e0c05 100644 --- a/drivers/gpu/drm/radeon/r600d.h +++ b/drivers/gpu/drm/radeon/r600d.h @@ -728,6 +728,15 @@ /* DCE 3.2 */ # define DC_HPDx_EN (1 << 28) +#define D1GRPH_INTERRUPT_STATUS 0x6158 +#define D2GRPH_INTERRUPT_STATUS 0x6958 +# define DxGRPH_PFLIP_INT_OCCURRED (1 << 0) +# define DxGRPH_PFLIP_INT_CLEAR (1 << 8) +#define D1GRPH_INTERRUPT_CONTROL 0x615c +#define D2GRPH_INTERRUPT_CONTROL 0x695c +# define DxGRPH_PFLIP_INT_MASK (1 << 0) +# define DxGRPH_PFLIP_INT_TYPE (1 << 8) + /* * PM4 */ diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 0507ee7e16eb..431d4186ddf0 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -69,6 +69,7 @@ #include <ttm/ttm_bo_driver.h> #include <ttm/ttm_placement.h> #include <ttm/ttm_module.h> +#include <ttm/ttm_execbuf_util.h> #include "radeon_family.h" #include "radeon_mode.h" @@ -260,13 +261,12 @@ struct radeon_bo { }; struct radeon_bo_list { - struct list_head list; + struct ttm_validate_buffer tv; struct radeon_bo *bo; uint64_t gpu_offset; unsigned rdomain; unsigned wdomain; u32 tiling_flags; - bool reserved; }; /* @@ -378,11 +378,56 @@ void radeon_scratch_free(struct radeon_device *rdev, uint32_t reg); /* * IRQS. */ + +struct radeon_unpin_work { + struct work_struct work; + struct radeon_device *rdev; + int crtc_id; + struct radeon_fence *fence; + struct drm_pending_vblank_event *event; + struct radeon_bo *old_rbo; + u64 new_crtc_base; +}; + +struct r500_irq_stat_regs { + u32 disp_int; +}; + +struct r600_irq_stat_regs { + u32 disp_int; + u32 disp_int_cont; + u32 disp_int_cont2; + u32 d1grph_int; + u32 d2grph_int; +}; + +struct evergreen_irq_stat_regs { + u32 disp_int; + u32 disp_int_cont; + u32 disp_int_cont2; + u32 disp_int_cont3; + u32 disp_int_cont4; + u32 disp_int_cont5; + u32 d1grph_int; + u32 d2grph_int; + u32 d3grph_int; + u32 d4grph_int; + u32 d5grph_int; + u32 d6grph_int; +}; + +union radeon_irq_stat_regs { + struct r500_irq_stat_regs r500; + struct r600_irq_stat_regs r600; + struct evergreen_irq_stat_regs evergreen; +}; + struct radeon_irq { bool installed; bool sw_int; /* FIXME: use a define max crtc rather than hardcode it */ bool crtc_vblank_int[6]; + bool pflip[6]; wait_queue_head_t vblank_queue; /* FIXME: use defines for max hpd/dacs */ bool hpd[6]; @@ -393,12 +438,17 @@ struct radeon_irq { bool hdmi[2]; spinlock_t sw_lock; int sw_refcount; + union radeon_irq_stat_regs stat_regs; + spinlock_t pflip_lock[6]; + int pflip_refcount[6]; }; int radeon_irq_kms_init(struct radeon_device *rdev); void radeon_irq_kms_fini(struct radeon_device *rdev); void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev); void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev); +void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc); +void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc); /* * CP & ring. @@ -883,6 +933,10 @@ struct radeon_asic { void (*pm_finish)(struct radeon_device *rdev); void (*pm_init_profile)(struct radeon_device *rdev); void (*pm_get_dynpm_state)(struct radeon_device *rdev); + /* pageflipping */ + void (*pre_page_flip)(struct radeon_device *rdev, int crtc); + u32 (*page_flip)(struct radeon_device *rdev, int crtc, u64 crtc_base); + void (*post_page_flip)(struct radeon_device *rdev, int crtc); }; /* @@ -1347,6 +1401,9 @@ static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v) #define radeon_pm_finish(rdev) (rdev)->asic->pm_finish((rdev)) #define radeon_pm_init_profile(rdev) (rdev)->asic->pm_init_profile((rdev)) #define radeon_pm_get_dynpm_state(rdev) (rdev)->asic->pm_get_dynpm_state((rdev)) +#define radeon_pre_page_flip(rdev, crtc) rdev->asic->pre_page_flip((rdev), (crtc)) +#define radeon_page_flip(rdev, crtc, base) rdev->asic->page_flip((rdev), (crtc), (base)) +#define radeon_post_page_flip(rdev, crtc) rdev->asic->post_page_flip((rdev), (crtc)) /* Common functions */ /* AGP */ diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index de7bfbcd09c9..3d73fe484f42 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -171,6 +171,9 @@ static struct radeon_asic r100_asic = { .pm_finish = &r100_pm_finish, .pm_init_profile = &r100_pm_init_profile, .pm_get_dynpm_state = &r100_pm_get_dynpm_state, + .pre_page_flip = &r100_pre_page_flip, + .page_flip = &r100_page_flip, + .post_page_flip = &r100_post_page_flip, }; static struct radeon_asic r200_asic = { @@ -215,6 +218,9 @@ static struct radeon_asic r200_asic = { .pm_finish = &r100_pm_finish, .pm_init_profile = &r100_pm_init_profile, .pm_get_dynpm_state = &r100_pm_get_dynpm_state, + .pre_page_flip = &r100_pre_page_flip, + .page_flip = &r100_page_flip, + .post_page_flip = &r100_post_page_flip, }; static struct radeon_asic r300_asic = { @@ -260,6 +266,9 @@ static struct radeon_asic r300_asic = { .pm_finish = &r100_pm_finish, .pm_init_profile = &r100_pm_init_profile, .pm_get_dynpm_state = &r100_pm_get_dynpm_state, + .pre_page_flip = &r100_pre_page_flip, + .page_flip = &r100_page_flip, + .post_page_flip = &r100_post_page_flip, }; static struct radeon_asic r300_asic_pcie = { @@ -304,6 +313,9 @@ static struct radeon_asic r300_asic_pcie = { .pm_finish = &r100_pm_finish, .pm_init_profile = &r100_pm_init_profile, .pm_get_dynpm_state = &r100_pm_get_dynpm_state, + .pre_page_flip = &r100_pre_page_flip, + .page_flip = &r100_page_flip, + .post_page_flip = &r100_post_page_flip, }; static struct radeon_asic r420_asic = { @@ -349,6 +361,9 @@ static struct radeon_asic r420_asic = { .pm_finish = &r100_pm_finish, .pm_init_profile = &r420_pm_init_profile, .pm_get_dynpm_state = &r100_pm_get_dynpm_state, + .pre_page_flip = &r100_pre_page_flip, + .page_flip = &r100_page_flip, + .post_page_flip = &r100_post_page_flip, }; static struct radeon_asic rs400_asic = { @@ -394,6 +409,9 @@ static struct radeon_asic rs400_asic = { .pm_finish = &r100_pm_finish, .pm_init_profile = &r100_pm_init_profile, .pm_get_dynpm_state = &r100_pm_get_dynpm_state, + .pre_page_flip = &r100_pre_page_flip, + .page_flip = &r100_page_flip, + .post_page_flip = &r100_post_page_flip, }; static struct radeon_asic rs600_asic = { @@ -439,6 +457,9 @@ static struct radeon_asic rs600_asic = { .pm_finish = &rs600_pm_finish, .pm_init_profile = &r420_pm_init_profile, .pm_get_dynpm_state = &r100_pm_get_dynpm_state, + .pre_page_flip = &rs600_pre_page_flip, + .page_flip = &rs600_page_flip, + .post_page_flip = &rs600_post_page_flip, }; static struct radeon_asic rs690_asic = { @@ -484,6 +505,9 @@ static struct radeon_asic rs690_asic = { .pm_finish = &rs600_pm_finish, .pm_init_profile = &r420_pm_init_profile, .pm_get_dynpm_state = &r100_pm_get_dynpm_state, + .pre_page_flip = &rs600_pre_page_flip, + .page_flip = &rs600_page_flip, + .post_page_flip = &rs600_post_page_flip, }; static struct radeon_asic rv515_asic = { @@ -529,6 +553,9 @@ static struct radeon_asic rv515_asic = { .pm_finish = &rs600_pm_finish, .pm_init_profile = &r420_pm_init_profile, .pm_get_dynpm_state = &r100_pm_get_dynpm_state, + .pre_page_flip = &rs600_pre_page_flip, + .page_flip = &rs600_page_flip, + .post_page_flip = &rs600_post_page_flip, }; static struct radeon_asic r520_asic = { @@ -574,6 +601,9 @@ static struct radeon_asic r520_asic = { .pm_finish = &rs600_pm_finish, .pm_init_profile = &r420_pm_init_profile, .pm_get_dynpm_state = &r100_pm_get_dynpm_state, + .pre_page_flip = &rs600_pre_page_flip, + .page_flip = &rs600_page_flip, + .post_page_flip = &rs600_post_page_flip, }; static struct radeon_asic r600_asic = { @@ -618,6 +648,9 @@ static struct radeon_asic r600_asic = { .pm_finish = &rs600_pm_finish, .pm_init_profile = &r600_pm_init_profile, .pm_get_dynpm_state = &r600_pm_get_dynpm_state, + .pre_page_flip = &rs600_pre_page_flip, + .page_flip = &rs600_page_flip, + .post_page_flip = &rs600_post_page_flip, }; static struct radeon_asic rs780_asic = { @@ -662,6 +695,9 @@ static struct radeon_asic rs780_asic = { .pm_finish = &rs600_pm_finish, .pm_init_profile = &rs780_pm_init_profile, .pm_get_dynpm_state = &r600_pm_get_dynpm_state, + .pre_page_flip = &rs600_pre_page_flip, + .page_flip = &rs600_page_flip, + .post_page_flip = &rs600_post_page_flip, }; static struct radeon_asic rv770_asic = { @@ -706,6 +742,9 @@ static struct radeon_asic rv770_asic = { .pm_finish = &rs600_pm_finish, .pm_init_profile = &r600_pm_init_profile, .pm_get_dynpm_state = &r600_pm_get_dynpm_state, + .pre_page_flip = &rs600_pre_page_flip, + .page_flip = &rv770_page_flip, + .post_page_flip = &rs600_post_page_flip, }; static struct radeon_asic evergreen_asic = { @@ -749,6 +788,9 @@ static struct radeon_asic evergreen_asic = { .pm_finish = &evergreen_pm_finish, .pm_init_profile = &r600_pm_init_profile, .pm_get_dynpm_state = &r600_pm_get_dynpm_state, + .pre_page_flip = &evergreen_pre_page_flip, + .page_flip = &evergreen_page_flip, + .post_page_flip = &evergreen_post_page_flip, }; static struct radeon_asic sumo_asic = { diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 740988244143..4970eda1bd41 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -130,6 +130,9 @@ extern void r100_pm_prepare(struct radeon_device *rdev); extern void r100_pm_finish(struct radeon_device *rdev); extern void r100_pm_init_profile(struct radeon_device *rdev); extern void r100_pm_get_dynpm_state(struct radeon_device *rdev); +extern void r100_pre_page_flip(struct radeon_device *rdev, int crtc); +extern u32 r100_page_flip(struct radeon_device *rdev, int crtc, u64 crtc_base); +extern void r100_post_page_flip(struct radeon_device *rdev, int crtc); /* * r200,rv250,rs300,rv280 @@ -205,6 +208,9 @@ void rs600_hpd_set_polarity(struct radeon_device *rdev, extern void rs600_pm_misc(struct radeon_device *rdev); extern void rs600_pm_prepare(struct radeon_device *rdev); extern void rs600_pm_finish(struct radeon_device *rdev); +extern void rs600_pre_page_flip(struct radeon_device *rdev, int crtc); +extern u32 rs600_page_flip(struct radeon_device *rdev, int crtc, u64 crtc_base); +extern void rs600_post_page_flip(struct radeon_device *rdev, int crtc); /* * rs690,rs740 @@ -287,6 +293,7 @@ void rv770_fini(struct radeon_device *rdev); int rv770_suspend(struct radeon_device *rdev); int rv770_resume(struct radeon_device *rdev); extern void rv770_pm_misc(struct radeon_device *rdev); +extern u32 rv770_page_flip(struct radeon_device *rdev, int crtc, u64 crtc_base); /* * evergreen @@ -314,5 +321,8 @@ extern int evergreen_cs_parse(struct radeon_cs_parser *p); extern void evergreen_pm_misc(struct radeon_device *rdev); extern void evergreen_pm_prepare(struct radeon_device *rdev); extern void evergreen_pm_finish(struct radeon_device *rdev); +extern void evergreen_pre_page_flip(struct radeon_device *rdev, int crtc); +extern u32 evergreen_page_flip(struct radeon_device *rdev, int crtc, u64 crtc_base); +extern void evergreen_post_page_flip(struct radeon_device *rdev, int crtc); #endif diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 6d64a2705f12..35b5eb8fbe2a 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -77,13 +77,13 @@ int radeon_cs_parser_relocs(struct radeon_cs_parser *p) p->relocs_ptr[i] = &p->relocs[i]; p->relocs[i].robj = p->relocs[i].gobj->driver_private; p->relocs[i].lobj.bo = p->relocs[i].robj; - p->relocs[i].lobj.rdomain = r->read_domains; p->relocs[i].lobj.wdomain = r->write_domain; + p->relocs[i].lobj.rdomain = r->read_domains; + p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo; p->relocs[i].handle = r->handle; p->relocs[i].flags = r->flags; - INIT_LIST_HEAD(&p->relocs[i].lobj.list); radeon_bo_list_add_object(&p->relocs[i].lobj, - &p->validated); + &p->validated); } } return radeon_bo_list_validate(&p->validated); @@ -189,10 +189,13 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error) { unsigned i; - if (!error && parser->ib) { - radeon_bo_list_fence(&parser->validated, parser->ib->fence); - } - radeon_bo_list_unreserve(&parser->validated); + + if (!error && parser->ib) + ttm_eu_fence_buffer_objects(&parser->validated, + parser->ib->fence); + else + ttm_eu_backoff_reservation(&parser->validated); + if (parser->relocs != NULL) { for (i = 0; i < parser->nrelocs; i++) { if (parser->relocs[i].gobj) diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 2697801e36e7..7b17e639ab32 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -183,12 +183,272 @@ static void radeon_crtc_destroy(struct drm_crtc *crtc) kfree(radeon_crtc); } +/* + * Handle unpin events outside the interrupt handler proper. + */ +static void radeon_unpin_work_func(struct work_struct *__work) +{ + struct radeon_unpin_work *work = + container_of(__work, struct radeon_unpin_work, work); + int r; + + /* unpin of the old buffer */ + r = radeon_bo_reserve(work->old_rbo, false); + if (likely(r == 0)) { + r = radeon_bo_unpin(work->old_rbo); + if (unlikely(r != 0)) { + DRM_ERROR("failed to unpin buffer after flip\n"); + } + radeon_bo_unreserve(work->old_rbo); + } else + DRM_ERROR("failed to reserve buffer after flip\n"); + kfree(work); +} + +void radeon_crtc_handle_flip(struct radeon_device *rdev, int crtc_id) +{ + struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id]; + struct radeon_unpin_work *work; + struct drm_pending_vblank_event *e; + struct timeval now; + unsigned long flags; + u32 update_pending; + int vpos, hpos; + + spin_lock_irqsave(&rdev->ddev->event_lock, flags); + work = radeon_crtc->unpin_work; + if (work == NULL || + !radeon_fence_signaled(work->fence)) { + spin_unlock_irqrestore(&rdev->ddev->event_lock, flags); + return; + } + /* New pageflip, or just completion of a previous one? */ + if (!radeon_crtc->deferred_flip_completion) { + /* do the flip (mmio) */ + update_pending = radeon_page_flip(rdev, crtc_id, work->new_crtc_base); + } else { + /* This is just a completion of a flip queued in crtc + * at last invocation. Make sure we go directly to + * completion routine. + */ + update_pending = 0; + radeon_crtc->deferred_flip_completion = 0; + } + + /* Has the pageflip already completed in crtc, or is it certain + * to complete in this vblank? + */ + if (update_pending && + (DRM_SCANOUTPOS_VALID & radeon_get_crtc_scanoutpos(rdev->ddev, crtc_id, + &vpos, &hpos)) && + (vpos >=0) && + (vpos < (99 * rdev->mode_info.crtcs[crtc_id]->base.hwmode.crtc_vdisplay)/100)) { + /* crtc didn't flip in this target vblank interval, + * but flip is pending in crtc. It will complete it + * in next vblank interval, so complete the flip at + * next vblank irq. + */ + radeon_crtc->deferred_flip_completion = 1; + spin_unlock_irqrestore(&rdev->ddev->event_lock, flags); + return; + } + + /* Pageflip (will be) certainly completed in this vblank. Clean up. */ + radeon_crtc->unpin_work = NULL; + + /* wakeup userspace */ + if (work->event) { + e = work->event; + e->event.sequence = drm_vblank_count_and_time(rdev->ddev, crtc_id, &now); + e->event.tv_sec = now.tv_sec; + e->event.tv_usec = now.tv_usec; + list_add_tail(&e->base.link, &e->base.file_priv->event_list); + wake_up_interruptible(&e->base.file_priv->event_wait); + } + spin_unlock_irqrestore(&rdev->ddev->event_lock, flags); + + drm_vblank_put(rdev->ddev, radeon_crtc->crtc_id); + radeon_fence_unref(&work->fence); + radeon_post_page_flip(work->rdev, work->crtc_id); + schedule_work(&work->work); +} + +static int radeon_crtc_page_flip(struct drm_crtc *crtc, + struct drm_framebuffer *fb, + struct drm_pending_vblank_event *event) +{ + struct drm_device *dev = crtc->dev; + struct radeon_device *rdev = dev->dev_private; + struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); + struct radeon_framebuffer *old_radeon_fb; + struct radeon_framebuffer *new_radeon_fb; + struct drm_gem_object *obj; + struct radeon_bo *rbo; + struct radeon_fence *fence; + struct radeon_unpin_work *work; + unsigned long flags; + u32 tiling_flags, pitch_pixels; + u64 base; + int r; + + work = kzalloc(sizeof *work, GFP_KERNEL); + if (work == NULL) + return -ENOMEM; + + r = radeon_fence_create(rdev, &fence); + if (unlikely(r != 0)) { + kfree(work); + DRM_ERROR("flip queue: failed to create fence.\n"); + return -ENOMEM; + } + work->event = event; + work->rdev = rdev; + work->crtc_id = radeon_crtc->crtc_id; + work->fence = radeon_fence_ref(fence); + old_radeon_fb = to_radeon_framebuffer(crtc->fb); + new_radeon_fb = to_radeon_framebuffer(fb); + /* schedule unpin of the old buffer */ + obj = old_radeon_fb->obj; + rbo = obj->driver_private; + work->old_rbo = rbo; + INIT_WORK(&work->work, radeon_unpin_work_func); + + /* We borrow the event spin lock for protecting unpin_work */ + spin_lock_irqsave(&dev->event_lock, flags); + if (radeon_crtc->unpin_work) { + spin_unlock_irqrestore(&dev->event_lock, flags); + kfree(work); + radeon_fence_unref(&fence); + + DRM_DEBUG_DRIVER("flip queue: crtc already busy\n"); + return -EBUSY; + } + radeon_crtc->unpin_work = work; + radeon_crtc->deferred_flip_completion = 0; + spin_unlock_irqrestore(&dev->event_lock, flags); + + /* pin the new buffer */ + obj = new_radeon_fb->obj; + rbo = obj->driver_private; + + DRM_DEBUG_DRIVER("flip-ioctl() cur_fbo = %p, cur_bbo = %p\n", + work->old_rbo, rbo); + + r = radeon_bo_reserve(rbo, false); + if (unlikely(r != 0)) { + DRM_ERROR("failed to reserve new rbo buffer before flip\n"); + goto pflip_cleanup; + } + r = radeon_bo_pin(rbo, RADEON_GEM_DOMAIN_VRAM, &base); + if (unlikely(r != 0)) { + radeon_bo_unreserve(rbo); + r = -EINVAL; + DRM_ERROR("failed to pin new rbo buffer before flip\n"); + goto pflip_cleanup; + } + radeon_bo_get_tiling_flags(rbo, &tiling_flags, NULL); + radeon_bo_unreserve(rbo); + + if (!ASIC_IS_AVIVO(rdev)) { + /* crtc offset is from display base addr not FB location */ + base -= radeon_crtc->legacy_display_base_addr; + pitch_pixels = fb->pitch / (fb->bits_per_pixel / 8); + + if (tiling_flags & RADEON_TILING_MACRO) { + if (ASIC_IS_R300(rdev)) { + base &= ~0x7ff; + } else { + int byteshift = fb->bits_per_pixel >> 4; + int tile_addr = (((crtc->y >> 3) * pitch_pixels + crtc->x) >> (8 - byteshift)) << 11; + base += tile_addr + ((crtc->x << byteshift) % 256) + ((crtc->y % 8) << 8); + } + } else { + int offset = crtc->y * pitch_pixels + crtc->x; + switch (fb->bits_per_pixel) { + case 8: + default: + offset *= 1; + break; + case 15: + case 16: + offset *= 2; + break; + case 24: + offset *= 3; + break; + case 32: + offset *= 4; + break; + } + base += offset; + } + base &= ~7; + } + + spin_lock_irqsave(&dev->event_lock, flags); + work->new_crtc_base = base; + spin_unlock_irqrestore(&dev->event_lock, flags); + + /* update crtc fb */ + crtc->fb = fb; + + r = drm_vblank_get(dev, radeon_crtc->crtc_id); + if (r) { + DRM_ERROR("failed to get vblank before flip\n"); + goto pflip_cleanup1; + } + + /* 32 ought to cover us */ + r = radeon_ring_lock(rdev, 32); + if (r) { + DRM_ERROR("failed to lock the ring before flip\n"); + goto pflip_cleanup2; + } + + /* emit the fence */ + radeon_fence_emit(rdev, fence); + /* set the proper interrupt */ + radeon_pre_page_flip(rdev, radeon_crtc->crtc_id); + /* fire the ring */ + radeon_ring_unlock_commit(rdev); + + return 0; + +pflip_cleanup2: + drm_vblank_put(dev, radeon_crtc->crtc_id); + +pflip_cleanup1: + r = radeon_bo_reserve(rbo, false); + if (unlikely(r != 0)) { + DRM_ERROR("failed to reserve new rbo in error path\n"); + goto pflip_cleanup; + } + r = radeon_bo_unpin(rbo); + if (unlikely(r != 0)) { + radeon_bo_unreserve(rbo); + r = -EINVAL; + DRM_ERROR("failed to unpin new rbo in error path\n"); + goto pflip_cleanup; + } + radeon_bo_unreserve(rbo); + +pflip_cleanup: + spin_lock_irqsave(&dev->event_lock, flags); + radeon_crtc->unpin_work = NULL; + spin_unlock_irqrestore(&dev->event_lock, flags); + radeon_fence_unref(&fence); + kfree(work); + + return r; +} + static const struct drm_crtc_funcs radeon_crtc_funcs = { .cursor_set = radeon_crtc_cursor_set, .cursor_move = radeon_crtc_cursor_move, .gamma_set = radeon_crtc_gamma_set, .set_config = drm_crtc_helper_set_config, .destroy = radeon_crtc_destroy, + .page_flip = radeon_crtc_page_flip, }; static void radeon_crtc_init(struct drm_device *dev, int index) @@ -1021,7 +1281,7 @@ bool radeon_crtc_scaling_mode_fixup(struct drm_crtc *crtc, /* * Retrieve current video scanout position of crtc on a given gpu. * - * \param rdev Device to query. + * \param dev Device to query. * \param crtc Crtc to query. * \param *vpos Location where vertical scanout position should be stored. * \param *hpos Location where horizontal scanout position should go. @@ -1033,72 +1293,74 @@ bool radeon_crtc_scaling_mode_fixup(struct drm_crtc *crtc, * * \return Flags, or'ed together as follows: * - * RADEON_SCANOUTPOS_VALID = Query successfull. - * RADEON_SCANOUTPOS_INVBL = Inside vblank. - * RADEON_SCANOUTPOS_ACCURATE = Returned position is accurate. A lack of + * DRM_SCANOUTPOS_VALID = Query successfull. + * DRM_SCANOUTPOS_INVBL = Inside vblank. + * DRM_SCANOUTPOS_ACCURATE = Returned position is accurate. A lack of * this flag means that returned position may be offset by a constant but * unknown small number of scanlines wrt. real scanout position. * */ -int radeon_get_crtc_scanoutpos(struct radeon_device *rdev, int crtc, int *vpos, int *hpos) +int radeon_get_crtc_scanoutpos(struct drm_device *dev, int crtc, int *vpos, int *hpos) { u32 stat_crtc = 0, vbl = 0, position = 0; int vbl_start, vbl_end, vtotal, ret = 0; bool in_vbl = true; + struct radeon_device *rdev = dev->dev_private; + if (ASIC_IS_DCE4(rdev)) { if (crtc == 0) { vbl = RREG32(EVERGREEN_CRTC_V_BLANK_START_END + EVERGREEN_CRTC0_REGISTER_OFFSET); position = RREG32(EVERGREEN_CRTC_STATUS_POSITION + EVERGREEN_CRTC0_REGISTER_OFFSET); - ret |= RADEON_SCANOUTPOS_VALID; + ret |= DRM_SCANOUTPOS_VALID; } if (crtc == 1) { vbl = RREG32(EVERGREEN_CRTC_V_BLANK_START_END + EVERGREEN_CRTC1_REGISTER_OFFSET); position = RREG32(EVERGREEN_CRTC_STATUS_POSITION + EVERGREEN_CRTC1_REGISTER_OFFSET); - ret |= RADEON_SCANOUTPOS_VALID; + ret |= DRM_SCANOUTPOS_VALID; } if (crtc == 2) { vbl = RREG32(EVERGREEN_CRTC_V_BLANK_START_END + EVERGREEN_CRTC2_REGISTER_OFFSET); position = RREG32(EVERGREEN_CRTC_STATUS_POSITION + EVERGREEN_CRTC2_REGISTER_OFFSET); - ret |= RADEON_SCANOUTPOS_VALID; + ret |= DRM_SCANOUTPOS_VALID; } if (crtc == 3) { vbl = RREG32(EVERGREEN_CRTC_V_BLANK_START_END + EVERGREEN_CRTC3_REGISTER_OFFSET); position = RREG32(EVERGREEN_CRTC_STATUS_POSITION + EVERGREEN_CRTC3_REGISTER_OFFSET); - ret |= RADEON_SCANOUTPOS_VALID; + ret |= DRM_SCANOUTPOS_VALID; } if (crtc == 4) { vbl = RREG32(EVERGREEN_CRTC_V_BLANK_START_END + EVERGREEN_CRTC4_REGISTER_OFFSET); position = RREG32(EVERGREEN_CRTC_STATUS_POSITION + EVERGREEN_CRTC4_REGISTER_OFFSET); - ret |= RADEON_SCANOUTPOS_VALID; + ret |= DRM_SCANOUTPOS_VALID; } if (crtc == 5) { vbl = RREG32(EVERGREEN_CRTC_V_BLANK_START_END + EVERGREEN_CRTC5_REGISTER_OFFSET); position = RREG32(EVERGREEN_CRTC_STATUS_POSITION + EVERGREEN_CRTC5_REGISTER_OFFSET); - ret |= RADEON_SCANOUTPOS_VALID; + ret |= DRM_SCANOUTPOS_VALID; } } else if (ASIC_IS_AVIVO(rdev)) { if (crtc == 0) { vbl = RREG32(AVIVO_D1CRTC_V_BLANK_START_END); position = RREG32(AVIVO_D1CRTC_STATUS_POSITION); - ret |= RADEON_SCANOUTPOS_VALID; + ret |= DRM_SCANOUTPOS_VALID; } if (crtc == 1) { vbl = RREG32(AVIVO_D2CRTC_V_BLANK_START_END); position = RREG32(AVIVO_D2CRTC_STATUS_POSITION); - ret |= RADEON_SCANOUTPOS_VALID; + ret |= DRM_SCANOUTPOS_VALID; } } else { /* Pre-AVIVO: Different encoding of scanout pos and vblank interval. */ @@ -1114,7 +1376,7 @@ int radeon_get_crtc_scanoutpos(struct radeon_device *rdev, int crtc, int *vpos, if (!(stat_crtc & 1)) in_vbl = false; - ret |= RADEON_SCANOUTPOS_VALID; + ret |= DRM_SCANOUTPOS_VALID; } if (crtc == 1) { vbl = (RREG32(RADEON_CRTC2_V_TOTAL_DISP) & @@ -1124,7 +1386,7 @@ int radeon_get_crtc_scanoutpos(struct radeon_device *rdev, int crtc, int *vpos, if (!(stat_crtc & 1)) in_vbl = false; - ret |= RADEON_SCANOUTPOS_VALID; + ret |= DRM_SCANOUTPOS_VALID; } } @@ -1135,13 +1397,13 @@ int radeon_get_crtc_scanoutpos(struct radeon_device *rdev, int crtc, int *vpos, /* Valid vblank area boundaries from gpu retrieved? */ if (vbl > 0) { /* Yes: Decode. */ - ret |= RADEON_SCANOUTPOS_ACCURATE; + ret |= DRM_SCANOUTPOS_ACCURATE; vbl_start = vbl & 0x1fff; vbl_end = (vbl >> 16) & 0x1fff; } else { /* No: Fake something reasonable which gives at least ok results. */ - vbl_start = rdev->mode_info.crtcs[crtc]->base.mode.crtc_vdisplay; + vbl_start = rdev->mode_info.crtcs[crtc]->base.hwmode.crtc_vdisplay; vbl_end = 0; } @@ -1157,7 +1419,7 @@ int radeon_get_crtc_scanoutpos(struct radeon_device *rdev, int crtc, int *vpos, /* Inside "upper part" of vblank area? Apply corrective offset if so: */ if (in_vbl && (*vpos >= vbl_start)) { - vtotal = rdev->mode_info.crtcs[crtc]->base.mode.crtc_vtotal; + vtotal = rdev->mode_info.crtcs[crtc]->base.hwmode.crtc_vtotal; *vpos = *vpos - vtotal; } @@ -1166,7 +1428,7 @@ int radeon_get_crtc_scanoutpos(struct radeon_device *rdev, int crtc, int *vpos, /* In vblank? */ if (in_vbl) - ret |= RADEON_SCANOUTPOS_INVBL; + ret |= DRM_SCANOUTPOS_INVBL; return ret; } diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 88e4ea925900..a92d2a5cea90 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -48,9 +48,10 @@ * - 2.5.0 - add get accel 2 to work around ddx breakage for evergreen * - 2.6.0 - add tiling config query (r6xx+), add initial HiZ support (r300->r500) * 2.7.0 - fixups for r600 2D tiling support. (no external ABI change), add eg dyn gpr regs + * 2.8.0 - pageflip support */ #define KMS_DRIVER_MAJOR 2 -#define KMS_DRIVER_MINOR 7 +#define KMS_DRIVER_MINOR 8 #define KMS_DRIVER_PATCHLEVEL 0 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); int radeon_driver_unload_kms(struct drm_device *dev); @@ -66,6 +67,10 @@ int radeon_resume_kms(struct drm_device *dev); u32 radeon_get_vblank_counter_kms(struct drm_device *dev, int crtc); int radeon_enable_vblank_kms(struct drm_device *dev, int crtc); void radeon_disable_vblank_kms(struct drm_device *dev, int crtc); +int radeon_get_vblank_timestamp_kms(struct drm_device *dev, int crtc, + int *max_error, + struct timeval *vblank_time, + unsigned flags); void radeon_driver_irq_preinstall_kms(struct drm_device *dev); int radeon_driver_irq_postinstall_kms(struct drm_device *dev); void radeon_driver_irq_uninstall_kms(struct drm_device *dev); @@ -74,6 +79,8 @@ int radeon_dma_ioctl_kms(struct drm_device *dev, void *data, struct drm_file *file_priv); int radeon_gem_object_init(struct drm_gem_object *obj); void radeon_gem_object_free(struct drm_gem_object *obj); +extern int radeon_get_crtc_scanoutpos(struct drm_device *dev, int crtc, + int *vpos, int *hpos); extern struct drm_ioctl_desc radeon_ioctls_kms[]; extern int radeon_max_kms_ioctl; int radeon_mmap(struct file *filp, struct vm_area_struct *vma); @@ -277,6 +284,8 @@ static struct drm_driver kms_driver = { .get_vblank_counter = radeon_get_vblank_counter_kms, .enable_vblank = radeon_enable_vblank_kms, .disable_vblank = radeon_disable_vblank_kms, + .get_vblank_timestamp = radeon_get_vblank_timestamp_kms, + .get_scanout_position = radeon_get_crtc_scanoutpos, #if defined(CONFIG_DEBUG_FS) .debugfs_init = radeon_debugfs_init, .debugfs_cleanup = radeon_debugfs_cleanup, diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c index b492b7dc63ac..c6861bb751ad 100644 --- a/drivers/gpu/drm/radeon/radeon_irq_kms.c +++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c @@ -71,8 +71,10 @@ void radeon_driver_irq_preinstall_kms(struct drm_device *dev) rdev->irq.gui_idle = false; for (i = 0; i < rdev->num_crtc; i++) rdev->irq.crtc_vblank_int[i] = false; - for (i = 0; i < 6; i++) + for (i = 0; i < 6; i++) { rdev->irq.hpd[i] = false; + rdev->irq.pflip[i] = false; + } radeon_irq_set(rdev); /* Clear bits */ radeon_irq_process(rdev); @@ -101,8 +103,10 @@ void radeon_driver_irq_uninstall_kms(struct drm_device *dev) rdev->irq.gui_idle = false; for (i = 0; i < rdev->num_crtc; i++) rdev->irq.crtc_vblank_int[i] = false; - for (i = 0; i < 6; i++) + for (i = 0; i < 6; i++) { rdev->irq.hpd[i] = false; + rdev->irq.pflip[i] = false; + } radeon_irq_set(rdev); } @@ -175,3 +179,34 @@ void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev) spin_unlock_irqrestore(&rdev->irq.sw_lock, irqflags); } +void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc) +{ + unsigned long irqflags; + + if (crtc < 0 || crtc >= rdev->num_crtc) + return; + + spin_lock_irqsave(&rdev->irq.pflip_lock[crtc], irqflags); + if (rdev->ddev->irq_enabled && (++rdev->irq.pflip_refcount[crtc] == 1)) { + rdev->irq.pflip[crtc] = true; + radeon_irq_set(rdev); + } + spin_unlock_irqrestore(&rdev->irq.pflip_lock[crtc], irqflags); +} + +void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc) +{ + unsigned long irqflags; + + if (crtc < 0 || crtc >= rdev->num_crtc) + return; + + spin_lock_irqsave(&rdev->irq.pflip_lock[crtc], irqflags); + BUG_ON(rdev->ddev->irq_enabled && rdev->irq.pflip_refcount[crtc] <= 0); + if (rdev->ddev->irq_enabled && (--rdev->irq.pflip_refcount[crtc] == 0)) { + rdev->irq.pflip[crtc] = false; + radeon_irq_set(rdev); + } + spin_unlock_irqrestore(&rdev->irq.pflip_lock[crtc], irqflags); +} + diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 8fbbe1c6ebbd..4bf423ca4c12 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -277,6 +277,27 @@ void radeon_disable_vblank_kms(struct drm_device *dev, int crtc) radeon_irq_set(rdev); } +int radeon_get_vblank_timestamp_kms(struct drm_device *dev, int crtc, + int *max_error, + struct timeval *vblank_time, + unsigned flags) +{ + struct drm_crtc *drmcrtc; + struct radeon_device *rdev = dev->dev_private; + + if (crtc < 0 || crtc >= dev->num_crtcs) { + DRM_ERROR("Invalid crtc %d\n", crtc); + return -EINVAL; + } + + /* Get associated drm_crtc: */ + drmcrtc = &rdev->mode_info.crtcs[crtc]->base; + + /* Helper routine in DRM core does all the work: */ + return drm_calc_vbltimestamp_from_scanoutpos(dev, crtc, max_error, + vblank_time, flags, + drmcrtc); +} /* * IOCTL. diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index e301c6f9e059..f406f02bf14e 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -277,6 +277,9 @@ struct radeon_crtc { fixed20_12 hsc; struct drm_display_mode native_mode; int pll_id; + /* page flipping */ + struct radeon_unpin_work *unpin_work; + int deferred_flip_completion; }; struct radeon_encoder_primary_dac { @@ -442,10 +445,6 @@ struct radeon_framebuffer { struct drm_gem_object *obj; }; -/* radeon_get_crtc_scanoutpos() return flags */ -#define RADEON_SCANOUTPOS_VALID (1 << 0) -#define RADEON_SCANOUTPOS_INVBL (1 << 1) -#define RADEON_SCANOUTPOS_ACCURATE (1 << 2) extern enum radeon_tv_std radeon_combios_get_tv_info(struct radeon_device *rdev); @@ -562,7 +561,8 @@ extern int radeon_crtc_cursor_set(struct drm_crtc *crtc, extern int radeon_crtc_cursor_move(struct drm_crtc *crtc, int x, int y); -extern int radeon_get_crtc_scanoutpos(struct radeon_device *rdev, int crtc, int *vpos, int *hpos); +extern int radeon_get_crtc_scanoutpos(struct drm_device *dev, int crtc, + int *vpos, int *hpos); extern bool radeon_combios_check_hardcoded_edid(struct radeon_device *rdev); extern struct edid * @@ -662,4 +662,7 @@ int radeon_fbdev_total_size(struct radeon_device *rdev); bool radeon_fbdev_robj_is_fb(struct radeon_device *rdev, struct radeon_bo *robj); void radeon_fb_output_poll_changed(struct radeon_device *rdev); + +void radeon_crtc_handle_flip(struct radeon_device *rdev, int crtc_id); + #endif diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 1d067743fee0..a8594d289bcf 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -293,34 +293,9 @@ void radeon_bo_list_add_object(struct radeon_bo_list *lobj, struct list_head *head) { if (lobj->wdomain) { - list_add(&lobj->list, head); + list_add(&lobj->tv.head, head); } else { - list_add_tail(&lobj->list, head); - } -} - -int radeon_bo_list_reserve(struct list_head *head) -{ - struct radeon_bo_list *lobj; - int r; - - list_for_each_entry(lobj, head, list){ - r = radeon_bo_reserve(lobj->bo, false); - if (unlikely(r != 0)) - return r; - lobj->reserved = true; - } - return 0; -} - -void radeon_bo_list_unreserve(struct list_head *head) -{ - struct radeon_bo_list *lobj; - - list_for_each_entry(lobj, head, list) { - /* only unreserve object we successfully reserved */ - if (lobj->reserved && radeon_bo_is_reserved(lobj->bo)) - radeon_bo_unreserve(lobj->bo); + list_add_tail(&lobj->tv.head, head); } } @@ -331,14 +306,11 @@ int radeon_bo_list_validate(struct list_head *head) u32 domain; int r; - list_for_each_entry(lobj, head, list) { - lobj->reserved = false; - } - r = radeon_bo_list_reserve(head); + r = ttm_eu_reserve_buffers(head); if (unlikely(r != 0)) { return r; } - list_for_each_entry(lobj, head, list) { + list_for_each_entry(lobj, head, tv.head) { bo = lobj->bo; if (!bo->pin_count) { domain = lobj->wdomain ? lobj->wdomain : lobj->rdomain; @@ -361,25 +333,6 @@ int radeon_bo_list_validate(struct list_head *head) return 0; } -void radeon_bo_list_fence(struct list_head *head, void *fence) -{ - struct radeon_bo_list *lobj; - struct radeon_bo *bo; - struct radeon_fence *old_fence = NULL; - - list_for_each_entry(lobj, head, list) { - bo = lobj->bo; - spin_lock(&bo->tbo.lock); - old_fence = (struct radeon_fence *)bo->tbo.sync_obj; - bo->tbo.sync_obj = radeon_fence_ref(fence); - bo->tbo.sync_obj_arg = NULL; - spin_unlock(&bo->tbo.lock); - if (old_fence) { - radeon_fence_unref(&old_fence); - } - } -} - int radeon_bo_fbdev_mmap(struct radeon_bo *bo, struct vm_area_struct *vma) { diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h index d143702b244a..22d4c237dea5 100644 --- a/drivers/gpu/drm/radeon/radeon_object.h +++ b/drivers/gpu/drm/radeon/radeon_object.h @@ -126,12 +126,12 @@ static inline int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0); if (unlikely(r != 0)) return r; - spin_lock(&bo->tbo.lock); + spin_lock(&bo->tbo.bdev->fence_lock); if (mem_type) *mem_type = bo->tbo.mem.mem_type; if (bo->tbo.sync_obj) r = ttm_bo_wait(&bo->tbo, true, true, no_wait); - spin_unlock(&bo->tbo.lock); + spin_unlock(&bo->tbo.bdev->fence_lock); ttm_bo_unreserve(&bo->tbo); return r; } @@ -152,10 +152,7 @@ extern int radeon_bo_init(struct radeon_device *rdev); extern void radeon_bo_fini(struct radeon_device *rdev); extern void radeon_bo_list_add_object(struct radeon_bo_list *lobj, struct list_head *head); -extern int radeon_bo_list_reserve(struct list_head *head); -extern void radeon_bo_list_unreserve(struct list_head *head); extern int radeon_bo_list_validate(struct list_head *head); -extern void radeon_bo_list_fence(struct list_head *head, void *fence); extern int radeon_bo_fbdev_mmap(struct radeon_bo *bo, struct vm_area_struct *vma); extern int radeon_bo_set_tiling_flags(struct radeon_bo *bo, diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index ac4efb0f1cb4..4de7776bd1c5 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -724,9 +724,9 @@ static bool radeon_pm_in_vbl(struct radeon_device *rdev) */ for (crtc = 0; (crtc < rdev->num_crtc) && in_vbl; crtc++) { if (rdev->pm.active_crtcs & (1 << crtc)) { - vbl_status = radeon_get_crtc_scanoutpos(rdev, crtc, &vpos, &hpos); - if ((vbl_status & RADEON_SCANOUTPOS_VALID) && - !(vbl_status & RADEON_SCANOUTPOS_INVBL)) + vbl_status = radeon_get_crtc_scanoutpos(rdev->ddev, crtc, &vpos, &hpos); + if ((vbl_status & DRM_SCANOUTPOS_VALID) && + !(vbl_status & DRM_SCANOUTPOS_INVBL)) in_vbl = false; } } diff --git a/drivers/gpu/drm/radeon/radeon_reg.h b/drivers/gpu/drm/radeon/radeon_reg.h index 64928814de53..0a310b7f71c3 100644 --- a/drivers/gpu/drm/radeon/radeon_reg.h +++ b/drivers/gpu/drm/radeon/radeon_reg.h @@ -422,6 +422,7 @@ # define RADEON_CRTC_CSYNC_EN (1 << 4) # define RADEON_CRTC_ICON_EN (1 << 15) # define RADEON_CRTC_CUR_EN (1 << 16) +# define RADEON_CRTC_VSTAT_MODE_MASK (3 << 17) # define RADEON_CRTC_CUR_MODE_MASK (7 << 20) # define RADEON_CRTC_CUR_MODE_SHIFT 20 # define RADEON_CRTC_CUR_MODE_MONO 0 @@ -509,6 +510,8 @@ # define RADEON_CRTC_TILE_EN (1 << 15) # define RADEON_CRTC_OFFSET_FLIP_CNTL (1 << 16) # define RADEON_CRTC_STEREO_OFFSET_EN (1 << 17) +# define RADEON_CRTC_GUI_TRIG_OFFSET_LEFT_EN (1 << 28) +# define RADEON_CRTC_GUI_TRIG_OFFSET_RIGHT_EN (1 << 29) #define R300_CRTC_TILE_X0_Y0 0x0350 #define R300_CRTC2_TILE_X0_Y0 0x0358 diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index f1c6e02c2e6b..9a85b1614c86 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -46,6 +46,56 @@ void rs600_gpu_init(struct radeon_device *rdev); int rs600_mc_wait_for_idle(struct radeon_device *rdev); +void rs600_pre_page_flip(struct radeon_device *rdev, int crtc) +{ + struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc]; + u32 tmp; + + /* make sure flip is at vb rather than hb */ + tmp = RREG32(AVIVO_D1GRPH_FLIP_CONTROL + radeon_crtc->crtc_offset); + tmp &= ~AVIVO_D1GRPH_SURFACE_UPDATE_H_RETRACE_EN; + WREG32(AVIVO_D1GRPH_FLIP_CONTROL + radeon_crtc->crtc_offset, tmp); + + /* set pageflip to happen anywhere in vblank interval */ + WREG32(AVIVO_D1MODE_MASTER_UPDATE_MODE + radeon_crtc->crtc_offset, 0); + + /* enable the pflip int */ + radeon_irq_kms_pflip_irq_get(rdev, crtc); +} + +void rs600_post_page_flip(struct radeon_device *rdev, int crtc) +{ + /* disable the pflip int */ + radeon_irq_kms_pflip_irq_put(rdev, crtc); +} + +u32 rs600_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base) +{ + struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id]; + u32 tmp = RREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset); + + /* Lock the graphics update lock */ + tmp |= AVIVO_D1GRPH_UPDATE_LOCK; + WREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset, tmp); + + /* update the scanout addresses */ + WREG32(AVIVO_D1GRPH_SECONDARY_SURFACE_ADDRESS + radeon_crtc->crtc_offset, + (u32)crtc_base); + WREG32(AVIVO_D1GRPH_PRIMARY_SURFACE_ADDRESS + radeon_crtc->crtc_offset, + (u32)crtc_base); + + /* Wait for update_pending to go high. */ + while (!(RREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset) & AVIVO_D1GRPH_SURFACE_UPDATE_PENDING)); + DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n"); + + /* Unlock the lock, so double-buffering can take place inside vblank */ + tmp &= ~AVIVO_D1GRPH_UPDATE_LOCK; + WREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset, tmp); + + /* Return current update_pending status: */ + return RREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset) & AVIVO_D1GRPH_SURFACE_UPDATE_PENDING; +} + void rs600_pm_misc(struct radeon_device *rdev) { int requested_index = rdev->pm.requested_power_state_index; @@ -515,10 +565,12 @@ int rs600_irq_set(struct radeon_device *rdev) if (rdev->irq.gui_idle) { tmp |= S_000040_GUI_IDLE(1); } - if (rdev->irq.crtc_vblank_int[0]) { + if (rdev->irq.crtc_vblank_int[0] || + rdev->irq.pflip[0]) { mode_int |= S_006540_D1MODE_VBLANK_INT_MASK(1); } - if (rdev->irq.crtc_vblank_int[1]) { + if (rdev->irq.crtc_vblank_int[1] || + rdev->irq.pflip[1]) { mode_int |= S_006540_D2MODE_VBLANK_INT_MASK(1); } if (rdev->irq.hpd[0]) { @@ -534,7 +586,7 @@ int rs600_irq_set(struct radeon_device *rdev) return 0; } -static inline uint32_t rs600_irq_ack(struct radeon_device *rdev, u32 *r500_disp_int) +static inline u32 rs600_irq_ack(struct radeon_device *rdev) { uint32_t irqs = RREG32(R_000044_GEN_INT_STATUS); uint32_t irq_mask = S_000044_SW_INT(1); @@ -547,27 +599,27 @@ static inline uint32_t rs600_irq_ack(struct radeon_device *rdev, u32 *r500_disp_ } if (G_000044_DISPLAY_INT_STAT(irqs)) { - *r500_disp_int = RREG32(R_007EDC_DISP_INTERRUPT_STATUS); - if (G_007EDC_LB_D1_VBLANK_INTERRUPT(*r500_disp_int)) { + rdev->irq.stat_regs.r500.disp_int = RREG32(R_007EDC_DISP_INTERRUPT_STATUS); + if (G_007EDC_LB_D1_VBLANK_INTERRUPT(rdev->irq.stat_regs.r500.disp_int)) { WREG32(R_006534_D1MODE_VBLANK_STATUS, S_006534_D1MODE_VBLANK_ACK(1)); } - if (G_007EDC_LB_D2_VBLANK_INTERRUPT(*r500_disp_int)) { + if (G_007EDC_LB_D2_VBLANK_INTERRUPT(rdev->irq.stat_regs.r500.disp_int)) { WREG32(R_006D34_D2MODE_VBLANK_STATUS, S_006D34_D2MODE_VBLANK_ACK(1)); } - if (G_007EDC_DC_HOT_PLUG_DETECT1_INTERRUPT(*r500_disp_int)) { + if (G_007EDC_DC_HOT_PLUG_DETECT1_INTERRUPT(rdev->irq.stat_regs.r500.disp_int)) { tmp = RREG32(R_007D08_DC_HOT_PLUG_DETECT1_INT_CONTROL); tmp |= S_007D08_DC_HOT_PLUG_DETECT1_INT_ACK(1); WREG32(R_007D08_DC_HOT_PLUG_DETECT1_INT_CONTROL, tmp); } - if (G_007EDC_DC_HOT_PLUG_DETECT2_INTERRUPT(*r500_disp_int)) { + if (G_007EDC_DC_HOT_PLUG_DETECT2_INTERRUPT(rdev->irq.stat_regs.r500.disp_int)) { tmp = RREG32(R_007D18_DC_HOT_PLUG_DETECT2_INT_CONTROL); tmp |= S_007D18_DC_HOT_PLUG_DETECT2_INT_ACK(1); WREG32(R_007D18_DC_HOT_PLUG_DETECT2_INT_CONTROL, tmp); } } else { - *r500_disp_int = 0; + rdev->irq.stat_regs.r500.disp_int = 0; } if (irqs) { @@ -578,32 +630,30 @@ static inline uint32_t rs600_irq_ack(struct radeon_device *rdev, u32 *r500_disp_ void rs600_irq_disable(struct radeon_device *rdev) { - u32 tmp; - WREG32(R_000040_GEN_INT_CNTL, 0); WREG32(R_006540_DxMODE_INT_MASK, 0); /* Wait and acknowledge irq */ mdelay(1); - rs600_irq_ack(rdev, &tmp); + rs600_irq_ack(rdev); } int rs600_irq_process(struct radeon_device *rdev) { - uint32_t status, msi_rearm; - uint32_t r500_disp_int; + u32 status, msi_rearm; bool queue_hotplug = false; /* reset gui idle ack. the status bit is broken */ rdev->irq.gui_idle_acked = false; - status = rs600_irq_ack(rdev, &r500_disp_int); - if (!status && !r500_disp_int) { + status = rs600_irq_ack(rdev); + if (!status && !rdev->irq.stat_regs.r500.disp_int) { return IRQ_NONE; } - while (status || r500_disp_int) { + while (status || rdev->irq.stat_regs.r500.disp_int) { /* SW interrupt */ - if (G_000044_SW_INT(status)) + if (G_000044_SW_INT(status)) { radeon_fence_process(rdev); + } /* GUI idle */ if (G_000040_GUI_IDLE(status)) { rdev->irq.gui_idle_acked = true; @@ -611,25 +661,33 @@ int rs600_irq_process(struct radeon_device *rdev) wake_up(&rdev->irq.idle_queue); } /* Vertical blank interrupts */ - if (G_007EDC_LB_D1_VBLANK_INTERRUPT(r500_disp_int)) { - drm_handle_vblank(rdev->ddev, 0); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); + if (G_007EDC_LB_D1_VBLANK_INTERRUPT(rdev->irq.stat_regs.r500.disp_int)) { + if (rdev->irq.crtc_vblank_int[0]) { + drm_handle_vblank(rdev->ddev, 0); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); + } + if (rdev->irq.pflip[0]) + radeon_crtc_handle_flip(rdev, 0); } - if (G_007EDC_LB_D2_VBLANK_INTERRUPT(r500_disp_int)) { - drm_handle_vblank(rdev->ddev, 1); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); + if (G_007EDC_LB_D2_VBLANK_INTERRUPT(rdev->irq.stat_regs.r500.disp_int)) { + if (rdev->irq.crtc_vblank_int[1]) { + drm_handle_vblank(rdev->ddev, 1); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); + } + if (rdev->irq.pflip[1]) + radeon_crtc_handle_flip(rdev, 1); } - if (G_007EDC_DC_HOT_PLUG_DETECT1_INTERRUPT(r500_disp_int)) { + if (G_007EDC_DC_HOT_PLUG_DETECT1_INTERRUPT(rdev->irq.stat_regs.r500.disp_int)) { queue_hotplug = true; DRM_DEBUG("HPD1\n"); } - if (G_007EDC_DC_HOT_PLUG_DETECT2_INTERRUPT(r500_disp_int)) { + if (G_007EDC_DC_HOT_PLUG_DETECT2_INTERRUPT(rdev->irq.stat_regs.r500.disp_int)) { queue_hotplug = true; DRM_DEBUG("HPD2\n"); } - status = rs600_irq_ack(rdev, &r500_disp_int); + status = rs600_irq_ack(rdev); } /* reset gui idle ack. the status bit is broken */ rdev->irq.gui_idle_acked = false; diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 2b66af9066b2..7c2e0b19a558 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -42,6 +42,40 @@ static void rv770_gpu_init(struct radeon_device *rdev); void rv770_fini(struct radeon_device *rdev); +u32 rv770_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base) +{ + struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id]; + u32 tmp = RREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset); + + /* Lock the graphics update lock */ + tmp |= AVIVO_D1GRPH_UPDATE_LOCK; + WREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset, tmp); + + /* update the scanout addresses */ + if (radeon_crtc->crtc_id) { + WREG32(D2GRPH_SECONDARY_SURFACE_ADDRESS_HIGH, upper_32_bits(crtc_base)); + WREG32(D2GRPH_PRIMARY_SURFACE_ADDRESS_HIGH, upper_32_bits(crtc_base)); + } else { + WREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS_HIGH, upper_32_bits(crtc_base)); + WREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS_HIGH, upper_32_bits(crtc_base)); + } + WREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS + radeon_crtc->crtc_offset, + (u32)crtc_base); + WREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS + radeon_crtc->crtc_offset, + (u32)crtc_base); + + /* Wait for update_pending to go high. */ + while (!(RREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset) & AVIVO_D1GRPH_SURFACE_UPDATE_PENDING)); + DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n"); + + /* Unlock the lock, so double-buffering can take place inside vblank */ + tmp &= ~AVIVO_D1GRPH_UPDATE_LOCK; + WREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset, tmp); + + /* Return current update_pending status: */ + return RREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset) & AVIVO_D1GRPH_SURFACE_UPDATE_PENDING; +} + /* get temperature in millidegrees */ u32 rv770_get_temp(struct radeon_device *rdev) { diff --git a/drivers/gpu/drm/radeon/rv770d.h b/drivers/gpu/drm/radeon/rv770d.h index e09a403f1c64..98f9ad256d3d 100644 --- a/drivers/gpu/drm/radeon/rv770d.h +++ b/drivers/gpu/drm/radeon/rv770d.h @@ -354,4 +354,11 @@ #define SRBM_STATUS 0x0E50 +#define D1GRPH_PRIMARY_SURFACE_ADDRESS 0x6110 +#define D1GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x6914 +#define D2GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x6114 +#define D1GRPH_SECONDARY_SURFACE_ADDRESS 0x6118 +#define D1GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x691c +#define D2GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x611c + #endif diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 148a322d8f5d..cf2ec562550e 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -169,7 +169,7 @@ int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo, bool interruptible) } EXPORT_SYMBOL(ttm_bo_wait_unreserved); -static void ttm_bo_add_to_lru(struct ttm_buffer_object *bo) +void ttm_bo_add_to_lru(struct ttm_buffer_object *bo) { struct ttm_bo_device *bdev = bo->bdev; struct ttm_mem_type_manager *man; @@ -191,11 +191,7 @@ static void ttm_bo_add_to_lru(struct ttm_buffer_object *bo) } } -/** - * Call with the lru_lock held. - */ - -static int ttm_bo_del_from_lru(struct ttm_buffer_object *bo) +int ttm_bo_del_from_lru(struct ttm_buffer_object *bo) { int put_count = 0; @@ -227,9 +223,18 @@ int ttm_bo_reserve_locked(struct ttm_buffer_object *bo, /** * Deadlock avoidance for multi-bo reserving. */ - if (use_sequence && bo->seq_valid && - (sequence - bo->val_seq < (1 << 31))) { - return -EAGAIN; + if (use_sequence && bo->seq_valid) { + /** + * We've already reserved this one. + */ + if (unlikely(sequence == bo->val_seq)) + return -EDEADLK; + /** + * Already reserved by a thread that will not back + * off for us. We need to back off. + */ + if (unlikely(sequence - bo->val_seq < (1 << 31))) + return -EAGAIN; } if (no_wait) @@ -267,6 +272,13 @@ static void ttm_bo_ref_bug(struct kref *list_kref) BUG(); } +void ttm_bo_list_ref_sub(struct ttm_buffer_object *bo, int count, + bool never_free) +{ + kref_sub(&bo->list_kref, count, + (never_free) ? ttm_bo_ref_bug : ttm_bo_release_list); +} + int ttm_bo_reserve(struct ttm_buffer_object *bo, bool interruptible, bool no_wait, bool use_sequence, uint32_t sequence) @@ -282,20 +294,24 @@ int ttm_bo_reserve(struct ttm_buffer_object *bo, put_count = ttm_bo_del_from_lru(bo); spin_unlock(&glob->lru_lock); - while (put_count--) - kref_put(&bo->list_kref, ttm_bo_ref_bug); + ttm_bo_list_ref_sub(bo, put_count, true); return ret; } +void ttm_bo_unreserve_locked(struct ttm_buffer_object *bo) +{ + ttm_bo_add_to_lru(bo); + atomic_set(&bo->reserved, 0); + wake_up_all(&bo->event_queue); +} + void ttm_bo_unreserve(struct ttm_buffer_object *bo) { struct ttm_bo_global *glob = bo->glob; spin_lock(&glob->lru_lock); - ttm_bo_add_to_lru(bo); - atomic_set(&bo->reserved, 0); - wake_up_all(&bo->event_queue); + ttm_bo_unreserve_locked(bo); spin_unlock(&glob->lru_lock); } EXPORT_SYMBOL(ttm_bo_unreserve); @@ -362,8 +378,13 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo, int ret = 0; if (old_is_pci || new_is_pci || - ((mem->placement & bo->mem.placement & TTM_PL_MASK_CACHING) == 0)) - ttm_bo_unmap_virtual(bo); + ((mem->placement & bo->mem.placement & TTM_PL_MASK_CACHING) == 0)) { + ret = ttm_mem_io_lock(old_man, true); + if (unlikely(ret != 0)) + goto out_err; + ttm_bo_unmap_virtual_locked(bo); + ttm_mem_io_unlock(old_man); + } /* * Create and bind a ttm if required. @@ -416,11 +437,9 @@ moved: } if (bo->mem.mm_node) { - spin_lock(&bo->lock); bo->offset = (bo->mem.start << PAGE_SHIFT) + bdev->man[bo->mem.mem_type].gpu_offset; bo->cur_placement = bo->mem.placement; - spin_unlock(&bo->lock); } else bo->offset = 0; @@ -452,7 +471,6 @@ static void ttm_bo_cleanup_memtype_use(struct ttm_buffer_object *bo) ttm_tt_destroy(bo->ttm); bo->ttm = NULL; } - ttm_bo_mem_put(bo, &bo->mem); atomic_set(&bo->reserved, 0); @@ -474,14 +492,14 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) int put_count; int ret; - spin_lock(&bo->lock); + spin_lock(&bdev->fence_lock); (void) ttm_bo_wait(bo, false, false, true); if (!bo->sync_obj) { spin_lock(&glob->lru_lock); /** - * Lock inversion between bo::reserve and bo::lock here, + * Lock inversion between bo:reserve and bdev::fence_lock here, * but that's OK, since we're only trylocking. */ @@ -490,14 +508,13 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) if (unlikely(ret == -EBUSY)) goto queue; - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); put_count = ttm_bo_del_from_lru(bo); spin_unlock(&glob->lru_lock); ttm_bo_cleanup_memtype_use(bo); - while (put_count--) - kref_put(&bo->list_kref, ttm_bo_ref_bug); + ttm_bo_list_ref_sub(bo, put_count, true); return; } else { @@ -512,7 +529,7 @@ queue: kref_get(&bo->list_kref); list_add_tail(&bo->ddestroy, &bdev->ddestroy); spin_unlock(&glob->lru_lock); - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); if (sync_obj) { driver->sync_obj_flush(sync_obj, sync_obj_arg); @@ -537,14 +554,15 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool no_wait_reserve, bool no_wait_gpu) { + struct ttm_bo_device *bdev = bo->bdev; struct ttm_bo_global *glob = bo->glob; int put_count; int ret = 0; retry: - spin_lock(&bo->lock); + spin_lock(&bdev->fence_lock); ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu); - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); if (unlikely(ret != 0)) return ret; @@ -580,8 +598,7 @@ retry: spin_unlock(&glob->lru_lock); ttm_bo_cleanup_memtype_use(bo); - while (put_count--) - kref_put(&bo->list_kref, ttm_bo_ref_bug); + ttm_bo_list_ref_sub(bo, put_count, true); return 0; } @@ -652,6 +669,7 @@ static void ttm_bo_release(struct kref *kref) struct ttm_buffer_object *bo = container_of(kref, struct ttm_buffer_object, kref); struct ttm_bo_device *bdev = bo->bdev; + struct ttm_mem_type_manager *man = &bdev->man[bo->mem.mem_type]; if (likely(bo->vm_node != NULL)) { rb_erase(&bo->vm_rb, &bdev->addr_space_rb); @@ -659,6 +677,9 @@ static void ttm_bo_release(struct kref *kref) bo->vm_node = NULL; } write_unlock(&bdev->vm_lock); + ttm_mem_io_lock(man, false); + ttm_mem_io_free_vm(bo); + ttm_mem_io_unlock(man); ttm_bo_cleanup_refs_or_queue(bo); kref_put(&bo->list_kref, ttm_bo_release_list); write_lock(&bdev->vm_lock); @@ -698,9 +719,9 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible, struct ttm_placement placement; int ret = 0; - spin_lock(&bo->lock); + spin_lock(&bdev->fence_lock); ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu); - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); if (unlikely(ret != 0)) { if (ret != -ERESTARTSYS) { @@ -715,7 +736,8 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible, evict_mem = bo->mem; evict_mem.mm_node = NULL; - evict_mem.bus.io_reserved = false; + evict_mem.bus.io_reserved_vm = false; + evict_mem.bus.io_reserved_count = 0; placement.fpfn = 0; placement.lpfn = 0; @@ -802,8 +824,7 @@ retry: BUG_ON(ret != 0); - while (put_count--) - kref_put(&bo->list_kref, ttm_bo_ref_bug); + ttm_bo_list_ref_sub(bo, put_count, true); ret = ttm_bo_evict(bo, interruptible, no_wait_reserve, no_wait_gpu); ttm_bo_unreserve(bo); @@ -1036,6 +1057,7 @@ int ttm_bo_move_buffer(struct ttm_buffer_object *bo, { int ret = 0; struct ttm_mem_reg mem; + struct ttm_bo_device *bdev = bo->bdev; BUG_ON(!atomic_read(&bo->reserved)); @@ -1044,15 +1066,16 @@ int ttm_bo_move_buffer(struct ttm_buffer_object *bo, * Have the driver move function wait for idle when necessary, * instead of doing it here. */ - spin_lock(&bo->lock); + spin_lock(&bdev->fence_lock); ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu); - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); if (ret) return ret; mem.num_pages = bo->num_pages; mem.size = mem.num_pages << PAGE_SHIFT; mem.page_alignment = bo->mem.page_alignment; - mem.bus.io_reserved = false; + mem.bus.io_reserved_vm = false; + mem.bus.io_reserved_count = 0; /* * Determine where to move the buffer. */ @@ -1163,7 +1186,6 @@ int ttm_bo_init(struct ttm_bo_device *bdev, } bo->destroy = destroy; - spin_lock_init(&bo->lock); kref_init(&bo->kref); kref_init(&bo->list_kref); atomic_set(&bo->cpu_writers, 0); @@ -1172,6 +1194,7 @@ int ttm_bo_init(struct ttm_bo_device *bdev, INIT_LIST_HEAD(&bo->lru); INIT_LIST_HEAD(&bo->ddestroy); INIT_LIST_HEAD(&bo->swap); + INIT_LIST_HEAD(&bo->io_reserve_lru); bo->bdev = bdev; bo->glob = bdev->glob; bo->type = type; @@ -1181,7 +1204,8 @@ int ttm_bo_init(struct ttm_bo_device *bdev, bo->mem.num_pages = bo->num_pages; bo->mem.mm_node = NULL; bo->mem.page_alignment = page_alignment; - bo->mem.bus.io_reserved = false; + bo->mem.bus.io_reserved_vm = false; + bo->mem.bus.io_reserved_count = 0; bo->buffer_start = buffer_start & PAGE_MASK; bo->priv_flags = 0; bo->mem.placement = (TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED); @@ -1355,6 +1379,10 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type, BUG_ON(type >= TTM_NUM_MEM_TYPES); man = &bdev->man[type]; BUG_ON(man->has_type); + man->io_reserve_fastpath = true; + man->use_io_reserve_lru = false; + mutex_init(&man->io_reserve_mutex); + INIT_LIST_HEAD(&man->io_reserve_lru); ret = bdev->driver->init_mem_type(bdev, type, man); if (ret) @@ -1527,7 +1555,8 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev, bdev->dev_mapping = NULL; bdev->glob = glob; bdev->need_dma32 = need_dma32; - + bdev->val_seq = 0; + spin_lock_init(&bdev->fence_lock); mutex_lock(&glob->device_list_mutex); list_add_tail(&bdev->device_list, &glob->device_list); mutex_unlock(&glob->device_list_mutex); @@ -1561,7 +1590,7 @@ bool ttm_mem_reg_is_pci(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem) return true; } -void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo) +void ttm_bo_unmap_virtual_locked(struct ttm_buffer_object *bo) { struct ttm_bo_device *bdev = bo->bdev; loff_t offset = (loff_t) bo->addr_space_offset; @@ -1570,8 +1599,20 @@ void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo) if (!bdev->dev_mapping) return; unmap_mapping_range(bdev->dev_mapping, offset, holelen, 1); - ttm_mem_io_free(bdev, &bo->mem); + ttm_mem_io_free_vm(bo); +} + +void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo) +{ + struct ttm_bo_device *bdev = bo->bdev; + struct ttm_mem_type_manager *man = &bdev->man[bo->mem.mem_type]; + + ttm_mem_io_lock(man, false); + ttm_bo_unmap_virtual_locked(bo); + ttm_mem_io_unlock(man); } + + EXPORT_SYMBOL(ttm_bo_unmap_virtual); static void ttm_bo_vm_insert_rb(struct ttm_buffer_object *bo) @@ -1651,6 +1692,7 @@ int ttm_bo_wait(struct ttm_buffer_object *bo, bool lazy, bool interruptible, bool no_wait) { struct ttm_bo_driver *driver = bo->bdev->driver; + struct ttm_bo_device *bdev = bo->bdev; void *sync_obj; void *sync_obj_arg; int ret = 0; @@ -1664,9 +1706,9 @@ int ttm_bo_wait(struct ttm_buffer_object *bo, void *tmp_obj = bo->sync_obj; bo->sync_obj = NULL; clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags); - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); driver->sync_obj_unref(&tmp_obj); - spin_lock(&bo->lock); + spin_lock(&bdev->fence_lock); continue; } @@ -1675,29 +1717,29 @@ int ttm_bo_wait(struct ttm_buffer_object *bo, sync_obj = driver->sync_obj_ref(bo->sync_obj); sync_obj_arg = bo->sync_obj_arg; - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); ret = driver->sync_obj_wait(sync_obj, sync_obj_arg, lazy, interruptible); if (unlikely(ret != 0)) { driver->sync_obj_unref(&sync_obj); - spin_lock(&bo->lock); + spin_lock(&bdev->fence_lock); return ret; } - spin_lock(&bo->lock); + spin_lock(&bdev->fence_lock); if (likely(bo->sync_obj == sync_obj && bo->sync_obj_arg == sync_obj_arg)) { void *tmp_obj = bo->sync_obj; bo->sync_obj = NULL; clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags); - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); driver->sync_obj_unref(&sync_obj); driver->sync_obj_unref(&tmp_obj); - spin_lock(&bo->lock); + spin_lock(&bdev->fence_lock); } else { - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); driver->sync_obj_unref(&sync_obj); - spin_lock(&bo->lock); + spin_lock(&bdev->fence_lock); } } return 0; @@ -1706,6 +1748,7 @@ EXPORT_SYMBOL(ttm_bo_wait); int ttm_bo_synccpu_write_grab(struct ttm_buffer_object *bo, bool no_wait) { + struct ttm_bo_device *bdev = bo->bdev; int ret = 0; /* @@ -1715,9 +1758,9 @@ int ttm_bo_synccpu_write_grab(struct ttm_buffer_object *bo, bool no_wait) ret = ttm_bo_reserve(bo, true, no_wait, false, 0); if (unlikely(ret != 0)) return ret; - spin_lock(&bo->lock); + spin_lock(&bdev->fence_lock); ret = ttm_bo_wait(bo, false, true, no_wait); - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); if (likely(ret == 0)) atomic_inc(&bo->cpu_writers); ttm_bo_unreserve(bo); @@ -1783,16 +1826,15 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink) put_count = ttm_bo_del_from_lru(bo); spin_unlock(&glob->lru_lock); - while (put_count--) - kref_put(&bo->list_kref, ttm_bo_ref_bug); + ttm_bo_list_ref_sub(bo, put_count, true); /** * Wait for GPU, then move to system cached. */ - spin_lock(&bo->lock); + spin_lock(&bo->bdev->fence_lock); ret = ttm_bo_wait(bo, false, false, false); - spin_unlock(&bo->lock); + spin_unlock(&bo->bdev->fence_lock); if (unlikely(ret != 0)) goto out; diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 3106d5bcce32..a89839f83f6c 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -75,37 +75,123 @@ int ttm_bo_move_ttm(struct ttm_buffer_object *bo, } EXPORT_SYMBOL(ttm_bo_move_ttm); -int ttm_mem_io_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem) +int ttm_mem_io_lock(struct ttm_mem_type_manager *man, bool interruptible) { - int ret; + if (likely(man->io_reserve_fastpath)) + return 0; + + if (interruptible) + return mutex_lock_interruptible(&man->io_reserve_mutex); + + mutex_lock(&man->io_reserve_mutex); + return 0; +} - if (!mem->bus.io_reserved) { - mem->bus.io_reserved = true; +void ttm_mem_io_unlock(struct ttm_mem_type_manager *man) +{ + if (likely(man->io_reserve_fastpath)) + return; + + mutex_unlock(&man->io_reserve_mutex); +} + +static int ttm_mem_io_evict(struct ttm_mem_type_manager *man) +{ + struct ttm_buffer_object *bo; + + if (!man->use_io_reserve_lru || list_empty(&man->io_reserve_lru)) + return -EAGAIN; + + bo = list_first_entry(&man->io_reserve_lru, + struct ttm_buffer_object, + io_reserve_lru); + list_del_init(&bo->io_reserve_lru); + ttm_bo_unmap_virtual_locked(bo); + + return 0; +} + +static int ttm_mem_io_reserve(struct ttm_bo_device *bdev, + struct ttm_mem_reg *mem) +{ + struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type]; + int ret = 0; + + if (!bdev->driver->io_mem_reserve) + return 0; + if (likely(man->io_reserve_fastpath)) + return bdev->driver->io_mem_reserve(bdev, mem); + + if (bdev->driver->io_mem_reserve && + mem->bus.io_reserved_count++ == 0) { +retry: ret = bdev->driver->io_mem_reserve(bdev, mem); + if (ret == -EAGAIN) { + ret = ttm_mem_io_evict(man); + if (ret == 0) + goto retry; + } + } + return ret; +} + +static void ttm_mem_io_free(struct ttm_bo_device *bdev, + struct ttm_mem_reg *mem) +{ + struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type]; + + if (likely(man->io_reserve_fastpath)) + return; + + if (bdev->driver->io_mem_reserve && + --mem->bus.io_reserved_count == 0 && + bdev->driver->io_mem_free) + bdev->driver->io_mem_free(bdev, mem); + +} + +int ttm_mem_io_reserve_vm(struct ttm_buffer_object *bo) +{ + struct ttm_mem_reg *mem = &bo->mem; + int ret; + + if (!mem->bus.io_reserved_vm) { + struct ttm_mem_type_manager *man = + &bo->bdev->man[mem->mem_type]; + + ret = ttm_mem_io_reserve(bo->bdev, mem); if (unlikely(ret != 0)) return ret; + mem->bus.io_reserved_vm = true; + if (man->use_io_reserve_lru) + list_add_tail(&bo->io_reserve_lru, + &man->io_reserve_lru); } return 0; } -void ttm_mem_io_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem) +void ttm_mem_io_free_vm(struct ttm_buffer_object *bo) { - if (bdev->driver->io_mem_reserve) { - if (mem->bus.io_reserved) { - mem->bus.io_reserved = false; - bdev->driver->io_mem_free(bdev, mem); - } + struct ttm_mem_reg *mem = &bo->mem; + + if (mem->bus.io_reserved_vm) { + mem->bus.io_reserved_vm = false; + list_del_init(&bo->io_reserve_lru); + ttm_mem_io_free(bo->bdev, mem); } } int ttm_mem_reg_ioremap(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem, void **virtual) { + struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type]; int ret; void *addr; *virtual = NULL; + (void) ttm_mem_io_lock(man, false); ret = ttm_mem_io_reserve(bdev, mem); + ttm_mem_io_unlock(man); if (ret || !mem->bus.is_iomem) return ret; @@ -117,7 +203,9 @@ int ttm_mem_reg_ioremap(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem, else addr = ioremap_nocache(mem->bus.base + mem->bus.offset, mem->bus.size); if (!addr) { + (void) ttm_mem_io_lock(man, false); ttm_mem_io_free(bdev, mem); + ttm_mem_io_unlock(man); return -ENOMEM; } } @@ -134,7 +222,9 @@ void ttm_mem_reg_iounmap(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem, if (virtual && mem->bus.addr == NULL) iounmap(virtual); + (void) ttm_mem_io_lock(man, false); ttm_mem_io_free(bdev, mem); + ttm_mem_io_unlock(man); } static int ttm_copy_io_page(void *dst, void *src, unsigned long page) @@ -231,7 +321,7 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, struct ttm_mem_type_manager *man = &bdev->man[new_mem->mem_type]; struct ttm_tt *ttm = bo->ttm; struct ttm_mem_reg *old_mem = &bo->mem; - struct ttm_mem_reg old_copy = *old_mem; + struct ttm_mem_reg old_copy; void *old_iomap; void *new_iomap; int ret; @@ -281,7 +371,7 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, mb(); out2: ttm_bo_free_old_node(bo); - + old_copy = *old_mem; *old_mem = *new_mem; new_mem->mm_node = NULL; @@ -292,7 +382,7 @@ out2: } out1: - ttm_mem_reg_iounmap(bdev, new_mem, new_iomap); + ttm_mem_reg_iounmap(bdev, old_mem, new_iomap); out: ttm_mem_reg_iounmap(bdev, &old_copy, old_iomap); return ret; @@ -337,11 +427,11 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, * TODO: Explicit member copy would probably be better here. */ - spin_lock_init(&fbo->lock); init_waitqueue_head(&fbo->event_queue); INIT_LIST_HEAD(&fbo->ddestroy); INIT_LIST_HEAD(&fbo->lru); INIT_LIST_HEAD(&fbo->swap); + INIT_LIST_HEAD(&fbo->io_reserve_lru); fbo->vm_node = NULL; atomic_set(&fbo->cpu_writers, 0); @@ -453,6 +543,8 @@ int ttm_bo_kmap(struct ttm_buffer_object *bo, unsigned long start_page, unsigned long num_pages, struct ttm_bo_kmap_obj *map) { + struct ttm_mem_type_manager *man = + &bo->bdev->man[bo->mem.mem_type]; unsigned long offset, size; int ret; @@ -467,7 +559,9 @@ int ttm_bo_kmap(struct ttm_buffer_object *bo, if (num_pages > 1 && !DRM_SUSER(DRM_CURPROC)) return -EPERM; #endif + (void) ttm_mem_io_lock(man, false); ret = ttm_mem_io_reserve(bo->bdev, &bo->mem); + ttm_mem_io_unlock(man); if (ret) return ret; if (!bo->mem.bus.is_iomem) { @@ -482,12 +576,15 @@ EXPORT_SYMBOL(ttm_bo_kmap); void ttm_bo_kunmap(struct ttm_bo_kmap_obj *map) { + struct ttm_buffer_object *bo = map->bo; + struct ttm_mem_type_manager *man = + &bo->bdev->man[bo->mem.mem_type]; + if (!map->virtual) return; switch (map->bo_kmap_type) { case ttm_bo_map_iomap: iounmap(map->virtual); - ttm_mem_io_free(map->bo->bdev, &map->bo->mem); break; case ttm_bo_map_vmap: vunmap(map->virtual); @@ -500,6 +597,9 @@ void ttm_bo_kunmap(struct ttm_bo_kmap_obj *map) default: BUG(); } + (void) ttm_mem_io_lock(man, false); + ttm_mem_io_free(map->bo->bdev, &map->bo->mem); + ttm_mem_io_unlock(man); map->virtual = NULL; map->page = NULL; } @@ -520,7 +620,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, struct ttm_buffer_object *ghost_obj; void *tmp_obj = NULL; - spin_lock(&bo->lock); + spin_lock(&bdev->fence_lock); if (bo->sync_obj) { tmp_obj = bo->sync_obj; bo->sync_obj = NULL; @@ -529,7 +629,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, bo->sync_obj_arg = sync_obj_arg; if (evict) { ret = ttm_bo_wait(bo, false, false, false); - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); if (tmp_obj) driver->sync_obj_unref(&tmp_obj); if (ret) @@ -552,7 +652,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, */ set_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags); - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); if (tmp_obj) driver->sync_obj_unref(&tmp_obj); diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index fe6cb77899f4..221b924acebe 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -83,6 +83,8 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) int i; unsigned long address = (unsigned long)vmf->virtual_address; int retval = VM_FAULT_NOPAGE; + struct ttm_mem_type_manager *man = + &bdev->man[bo->mem.mem_type]; /* * Work around locking order reversal in fault / nopfn @@ -118,24 +120,28 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) * move. */ - spin_lock(&bo->lock); + spin_lock(&bdev->fence_lock); if (test_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags)) { ret = ttm_bo_wait(bo, false, true, false); - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); if (unlikely(ret != 0)) { retval = (ret != -ERESTARTSYS) ? VM_FAULT_SIGBUS : VM_FAULT_NOPAGE; goto out_unlock; } } else - spin_unlock(&bo->lock); + spin_unlock(&bdev->fence_lock); - - ret = ttm_mem_io_reserve(bdev, &bo->mem); - if (ret) { - retval = VM_FAULT_SIGBUS; + ret = ttm_mem_io_lock(man, true); + if (unlikely(ret != 0)) { + retval = VM_FAULT_NOPAGE; goto out_unlock; } + ret = ttm_mem_io_reserve_vm(bo); + if (unlikely(ret != 0)) { + retval = VM_FAULT_SIGBUS; + goto out_io_unlock; + } page_offset = ((address - vma->vm_start) >> PAGE_SHIFT) + bo->vm_node->start - vma->vm_pgoff; @@ -144,7 +150,7 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) if (unlikely(page_offset >= bo->num_pages)) { retval = VM_FAULT_SIGBUS; - goto out_unlock; + goto out_io_unlock; } /* @@ -182,7 +188,7 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) page = ttm_tt_get_page(ttm, page_offset); if (unlikely(!page && i == 0)) { retval = VM_FAULT_OOM; - goto out_unlock; + goto out_io_unlock; } else if (unlikely(!page)) { break; } @@ -200,14 +206,15 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) else if (unlikely(ret != 0)) { retval = (ret == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS; - goto out_unlock; + goto out_io_unlock; } address += PAGE_SIZE; if (unlikely(++page_offset >= page_last)) break; } - +out_io_unlock: + ttm_mem_io_unlock(man); out_unlock: ttm_bo_unreserve(bo); return retval; diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index c285c2902d15..3832fe10b4df 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -32,7 +32,7 @@ #include <linux/sched.h> #include <linux/module.h> -void ttm_eu_backoff_reservation(struct list_head *list) +static void ttm_eu_backoff_reservation_locked(struct list_head *list) { struct ttm_validate_buffer *entry; @@ -41,10 +41,77 @@ void ttm_eu_backoff_reservation(struct list_head *list) if (!entry->reserved) continue; + if (entry->removed) { + ttm_bo_add_to_lru(bo); + entry->removed = false; + + } entry->reserved = false; - ttm_bo_unreserve(bo); + atomic_set(&bo->reserved, 0); + wake_up_all(&bo->event_queue); + } +} + +static void ttm_eu_del_from_lru_locked(struct list_head *list) +{ + struct ttm_validate_buffer *entry; + + list_for_each_entry(entry, list, head) { + struct ttm_buffer_object *bo = entry->bo; + if (!entry->reserved) + continue; + + if (!entry->removed) { + entry->put_count = ttm_bo_del_from_lru(bo); + entry->removed = true; + } } } + +static void ttm_eu_list_ref_sub(struct list_head *list) +{ + struct ttm_validate_buffer *entry; + + list_for_each_entry(entry, list, head) { + struct ttm_buffer_object *bo = entry->bo; + + if (entry->put_count) { + ttm_bo_list_ref_sub(bo, entry->put_count, true); + entry->put_count = 0; + } + } +} + +static int ttm_eu_wait_unreserved_locked(struct list_head *list, + struct ttm_buffer_object *bo) +{ + struct ttm_bo_global *glob = bo->glob; + int ret; + + ttm_eu_del_from_lru_locked(list); + spin_unlock(&glob->lru_lock); + ret = ttm_bo_wait_unreserved(bo, true); + spin_lock(&glob->lru_lock); + if (unlikely(ret != 0)) + ttm_eu_backoff_reservation_locked(list); + return ret; +} + + +void ttm_eu_backoff_reservation(struct list_head *list) +{ + struct ttm_validate_buffer *entry; + struct ttm_bo_global *glob; + + if (list_empty(list)) + return; + + entry = list_first_entry(list, struct ttm_validate_buffer, head); + glob = entry->bo->glob; + spin_lock(&glob->lru_lock); + ttm_eu_backoff_reservation_locked(list); + spin_unlock(&glob->lru_lock); +} EXPORT_SYMBOL(ttm_eu_backoff_reservation); /* @@ -59,37 +126,76 @@ EXPORT_SYMBOL(ttm_eu_backoff_reservation); * buffers in different orders. */ -int ttm_eu_reserve_buffers(struct list_head *list, uint32_t val_seq) +int ttm_eu_reserve_buffers(struct list_head *list) { + struct ttm_bo_global *glob; struct ttm_validate_buffer *entry; int ret; + uint32_t val_seq; + + if (list_empty(list)) + return 0; + + list_for_each_entry(entry, list, head) { + entry->reserved = false; + entry->put_count = 0; + entry->removed = false; + } + + entry = list_first_entry(list, struct ttm_validate_buffer, head); + glob = entry->bo->glob; retry: + spin_lock(&glob->lru_lock); + val_seq = entry->bo->bdev->val_seq++; + list_for_each_entry(entry, list, head) { struct ttm_buffer_object *bo = entry->bo; - entry->reserved = false; - ret = ttm_bo_reserve(bo, true, false, true, val_seq); - if (ret != 0) { - ttm_eu_backoff_reservation(list); - if (ret == -EAGAIN) { - ret = ttm_bo_wait_unreserved(bo, true); - if (unlikely(ret != 0)) - return ret; - goto retry; - } else +retry_this_bo: + ret = ttm_bo_reserve_locked(bo, true, true, true, val_seq); + switch (ret) { + case 0: + break; + case -EBUSY: + ret = ttm_eu_wait_unreserved_locked(list, bo); + if (unlikely(ret != 0)) { + spin_unlock(&glob->lru_lock); + ttm_eu_list_ref_sub(list); return ret; + } + goto retry_this_bo; + case -EAGAIN: + ttm_eu_backoff_reservation_locked(list); + spin_unlock(&glob->lru_lock); + ttm_eu_list_ref_sub(list); + ret = ttm_bo_wait_unreserved(bo, true); + if (unlikely(ret != 0)) + return ret; + goto retry; + default: + ttm_eu_backoff_reservation_locked(list); + spin_unlock(&glob->lru_lock); + ttm_eu_list_ref_sub(list); + return ret; } entry->reserved = true; if (unlikely(atomic_read(&bo->cpu_writers) > 0)) { - ttm_eu_backoff_reservation(list); + ttm_eu_backoff_reservation_locked(list); + spin_unlock(&glob->lru_lock); + ttm_eu_list_ref_sub(list); ret = ttm_bo_wait_cpu(bo, false); if (ret) return ret; goto retry; } } + + ttm_eu_del_from_lru_locked(list); + spin_unlock(&glob->lru_lock); + ttm_eu_list_ref_sub(list); + return 0; } EXPORT_SYMBOL(ttm_eu_reserve_buffers); @@ -97,21 +203,36 @@ EXPORT_SYMBOL(ttm_eu_reserve_buffers); void ttm_eu_fence_buffer_objects(struct list_head *list, void *sync_obj) { struct ttm_validate_buffer *entry; + struct ttm_buffer_object *bo; + struct ttm_bo_global *glob; + struct ttm_bo_device *bdev; + struct ttm_bo_driver *driver; - list_for_each_entry(entry, list, head) { - struct ttm_buffer_object *bo = entry->bo; - struct ttm_bo_driver *driver = bo->bdev->driver; - void *old_sync_obj; + if (list_empty(list)) + return; + + bo = list_first_entry(list, struct ttm_validate_buffer, head)->bo; + bdev = bo->bdev; + driver = bdev->driver; + glob = bo->glob; - spin_lock(&bo->lock); - old_sync_obj = bo->sync_obj; + spin_lock(&bdev->fence_lock); + spin_lock(&glob->lru_lock); + + list_for_each_entry(entry, list, head) { + bo = entry->bo; + entry->old_sync_obj = bo->sync_obj; bo->sync_obj = driver->sync_obj_ref(sync_obj); bo->sync_obj_arg = entry->new_sync_obj_arg; - spin_unlock(&bo->lock); - ttm_bo_unreserve(bo); + ttm_bo_unreserve_locked(bo); entry->reserved = false; - if (old_sync_obj) - driver->sync_obj_unref(&old_sync_obj); + } + spin_unlock(&glob->lru_lock); + spin_unlock(&bdev->fence_lock); + + list_for_each_entry(entry, list, head) { + if (entry->old_sync_obj) + driver->sync_obj_unref(&entry->old_sync_obj); } } EXPORT_SYMBOL(ttm_eu_fence_buffer_objects); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index e7a58d055041..10fc01f69c40 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -264,7 +264,6 @@ struct vmw_private { */ struct vmw_sw_context ctx; - uint32_t val_seq; struct mutex cmdbuf_mutex; /** diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 76954e3528c1..41b95ed6dbcd 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -653,8 +653,7 @@ int vmw_execbuf_ioctl(struct drm_device *dev, void *data, ret = vmw_cmd_check_all(dev_priv, sw_context, cmd, arg->command_size); if (unlikely(ret != 0)) goto out_err; - ret = ttm_eu_reserve_buffers(&sw_context->validate_nodes, - dev_priv->val_seq++); + ret = ttm_eu_reserve_buffers(&sw_context->validate_nodes); if (unlikely(ret != 0)) goto out_err; diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index beafc156a535..50852aad260a 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -74,6 +74,8 @@ struct ttm_placement { * @is_iomem: is this io memory ? * @size: size in byte * @offset: offset from the base address + * @io_reserved_vm: The VM system has a refcount in @io_reserved_count + * @io_reserved_count: Refcounting the numbers of callers to ttm_mem_io_reserve * * Structure indicating the bus placement of an object. */ @@ -83,7 +85,8 @@ struct ttm_bus_placement { unsigned long size; unsigned long offset; bool is_iomem; - bool io_reserved; + bool io_reserved_vm; + uint64_t io_reserved_count; }; @@ -154,7 +157,6 @@ struct ttm_tt; * keeps one refcount. When this refcount reaches zero, * the object is destroyed. * @event_queue: Queue for processes waiting on buffer object status change. - * @lock: spinlock protecting mostly synchronization members. * @mem: structure describing current placement. * @persistant_swap_storage: Usually the swap storage is deleted for buffers * pinned in physical memory. If this behaviour is not desired, this member @@ -213,7 +215,6 @@ struct ttm_buffer_object { struct kref kref; struct kref list_kref; wait_queue_head_t event_queue; - spinlock_t lock; /** * Members protected by the bo::reserved lock. @@ -237,6 +238,7 @@ struct ttm_buffer_object { struct list_head lru; struct list_head ddestroy; struct list_head swap; + struct list_head io_reserve_lru; uint32_t val_seq; bool seq_valid; @@ -248,10 +250,10 @@ struct ttm_buffer_object { atomic_t reserved; /** - * Members protected by the bo::lock + * Members protected by struct buffer_object_device::fence_lock * In addition, setting sync_obj to anything else * than NULL requires bo::reserved to be held. This allows for - * checking NULL while reserved but not holding bo::lock. + * checking NULL while reserved but not holding the mentioned lock. */ void *sync_obj_arg; @@ -364,6 +366,44 @@ extern int ttm_bo_validate(struct ttm_buffer_object *bo, */ extern void ttm_bo_unref(struct ttm_buffer_object **bo); + +/** + * ttm_bo_list_ref_sub + * + * @bo: The buffer object. + * @count: The number of references with which to decrease @bo::list_kref; + * @never_free: The refcount should not reach zero with this operation. + * + * Release @count lru list references to this buffer object. + */ +extern void ttm_bo_list_ref_sub(struct ttm_buffer_object *bo, int count, + bool never_free); + +/** + * ttm_bo_add_to_lru + * + * @bo: The buffer object. + * + * Add this bo to the relevant mem type lru and, if it's backed by + * system pages (ttms) to the swap list. + * This function must be called with struct ttm_bo_global::lru_lock held, and + * is typically called immediately prior to unreserving a bo. + */ +extern void ttm_bo_add_to_lru(struct ttm_buffer_object *bo); + +/** + * ttm_bo_del_from_lru + * + * @bo: The buffer object. + * + * Remove this bo from all lru lists used to lookup and reserve an object. + * This function must be called with struct ttm_bo_global::lru_lock held, + * and is usually called just immediately after the bo has been reserved to + * avoid recursive reservation from lru lists. + */ +extern int ttm_bo_del_from_lru(struct ttm_buffer_object *bo); + + /** * ttm_bo_lock_delayed_workqueue * diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 8e0c848326b6..1da8af6ac884 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -179,30 +179,6 @@ struct ttm_tt { #define TTM_MEMTYPE_FLAG_MAPPABLE (1 << 1) /* Memory mappable */ #define TTM_MEMTYPE_FLAG_CMA (1 << 3) /* Can't map aperture */ -/** - * struct ttm_mem_type_manager - * - * @has_type: The memory type has been initialized. - * @use_type: The memory type is enabled. - * @flags: TTM_MEMTYPE_XX flags identifying the traits of the memory - * managed by this memory type. - * @gpu_offset: If used, the GPU offset of the first managed page of - * fixed memory or the first managed location in an aperture. - * @size: Size of the managed region. - * @available_caching: A mask of available caching types, TTM_PL_FLAG_XX, - * as defined in ttm_placement_common.h - * @default_caching: The default caching policy used for a buffer object - * placed in this memory type if the user doesn't provide one. - * @manager: The range manager used for this memory type. FIXME: If the aperture - * has a page size different from the underlying system, the granularity - * of this manager should take care of this. But the range allocating code - * in ttm_bo.c needs to be modified for this. - * @lru: The lru list for this memory type. - * - * This structure is used to identify and manage memory types for a device. - * It's set up by the ttm_bo_driver::init_mem_type method. - */ - struct ttm_mem_type_manager; struct ttm_mem_type_manager_func { @@ -287,6 +263,36 @@ struct ttm_mem_type_manager_func { void (*debug)(struct ttm_mem_type_manager *man, const char *prefix); }; +/** + * struct ttm_mem_type_manager + * + * @has_type: The memory type has been initialized. + * @use_type: The memory type is enabled. + * @flags: TTM_MEMTYPE_XX flags identifying the traits of the memory + * managed by this memory type. + * @gpu_offset: If used, the GPU offset of the first managed page of + * fixed memory or the first managed location in an aperture. + * @size: Size of the managed region. + * @available_caching: A mask of available caching types, TTM_PL_FLAG_XX, + * as defined in ttm_placement_common.h + * @default_caching: The default caching policy used for a buffer object + * placed in this memory type if the user doesn't provide one. + * @func: structure pointer implementing the range manager. See above + * @priv: Driver private closure for @func. + * @io_reserve_mutex: Mutex optionally protecting shared io_reserve structures + * @use_io_reserve_lru: Use an lru list to try to unreserve io_mem_regions + * reserved by the TTM vm system. + * @io_reserve_lru: Optional lru list for unreserving io mem regions. + * @io_reserve_fastpath: Only use bdev::driver::io_mem_reserve to obtain + * static information. bdev::driver::io_mem_free is never used. + * @lru: The lru list for this memory type. + * + * This structure is used to identify and manage memory types for a device. + * It's set up by the ttm_bo_driver::init_mem_type method. + */ + + + struct ttm_mem_type_manager { struct ttm_bo_device *bdev; @@ -303,6 +309,15 @@ struct ttm_mem_type_manager { uint32_t default_caching; const struct ttm_mem_type_manager_func *func; void *priv; + struct mutex io_reserve_mutex; + bool use_io_reserve_lru; + bool io_reserve_fastpath; + + /* + * Protected by @io_reserve_mutex: + */ + + struct list_head io_reserve_lru; /* * Protected by the global->lru_lock. @@ -510,9 +525,12 @@ struct ttm_bo_global { * * @driver: Pointer to a struct ttm_bo_driver struct setup by the driver. * @man: An array of mem_type_managers. + * @fence_lock: Protects the synchronizing members on *all* bos belonging + * to this device. * @addr_space_mm: Range manager for the device address space. * lru_lock: Spinlock that protects the buffer+device lru lists and * ddestroy lists. + * @val_seq: Current validation sequence. * @nice_mode: Try nicely to wait for buffer idle when cleaning a manager. * If a GPU lockup has been detected, this is forced to 0. * @dev_mapping: A pointer to the struct address_space representing the @@ -531,6 +549,7 @@ struct ttm_bo_device { struct ttm_bo_driver *driver; rwlock_t vm_lock; struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES]; + spinlock_t fence_lock; /* * Protected by the vm lock. */ @@ -541,6 +560,7 @@ struct ttm_bo_device { * Protected by the global:lru lock. */ struct list_head ddestroy; + uint32_t val_seq; /* * Protected by load / firstopen / lastclose /unload sync. @@ -753,31 +773,6 @@ extern void ttm_bo_mem_put_locked(struct ttm_buffer_object *bo, extern int ttm_bo_wait_cpu(struct ttm_buffer_object *bo, bool no_wait); -/** - * ttm_bo_pci_offset - Get the PCI offset for the buffer object memory. - * - * @bo Pointer to a struct ttm_buffer_object. - * @bus_base On return the base of the PCI region - * @bus_offset On return the byte offset into the PCI region - * @bus_size On return the byte size of the buffer object or zero if - * the buffer object memory is not accessible through a PCI region. - * - * Returns: - * -EINVAL if the buffer object is currently not mappable. - * 0 otherwise. - */ - -extern int ttm_bo_pci_offset(struct ttm_bo_device *bdev, - struct ttm_mem_reg *mem, - unsigned long *bus_base, - unsigned long *bus_offset, - unsigned long *bus_size); - -extern int ttm_mem_io_reserve(struct ttm_bo_device *bdev, - struct ttm_mem_reg *mem); -extern void ttm_mem_io_free(struct ttm_bo_device *bdev, - struct ttm_mem_reg *mem); - extern void ttm_bo_global_release(struct drm_global_reference *ref); extern int ttm_bo_global_init(struct drm_global_reference *ref); @@ -810,6 +805,22 @@ extern int ttm_bo_device_init(struct ttm_bo_device *bdev, extern void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo); /** + * ttm_bo_unmap_virtual + * + * @bo: tear down the virtual mappings for this BO + * + * The caller must take ttm_mem_io_lock before calling this function. + */ +extern void ttm_bo_unmap_virtual_locked(struct ttm_buffer_object *bo); + +extern int ttm_mem_io_reserve_vm(struct ttm_buffer_object *bo); +extern void ttm_mem_io_free_vm(struct ttm_buffer_object *bo); +extern int ttm_mem_io_lock(struct ttm_mem_type_manager *man, + bool interruptible); +extern void ttm_mem_io_unlock(struct ttm_mem_type_manager *man); + + +/** * ttm_bo_reserve: * * @bo: A pointer to a struct ttm_buffer_object. @@ -859,11 +870,44 @@ extern void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo); * try again. (only if use_sequence == 1). * -ERESTARTSYS: A wait for the buffer to become unreserved was interrupted by * a signal. Release all buffer reservations and return to user-space. + * -EBUSY: The function needed to sleep, but @no_wait was true + * -EDEADLK: Bo already reserved using @sequence. This error code will only + * be returned if @use_sequence is set to true. */ extern int ttm_bo_reserve(struct ttm_buffer_object *bo, bool interruptible, bool no_wait, bool use_sequence, uint32_t sequence); + +/** + * ttm_bo_reserve_locked: + * + * @bo: A pointer to a struct ttm_buffer_object. + * @interruptible: Sleep interruptible if waiting. + * @no_wait: Don't sleep while trying to reserve, rather return -EBUSY. + * @use_sequence: If @bo is already reserved, Only sleep waiting for + * it to become unreserved if @sequence < (@bo)->sequence. + * + * Must be called with struct ttm_bo_global::lru_lock held, + * and will not remove reserved buffers from the lru lists. + * The function may release the LRU spinlock if it needs to sleep. + * Otherwise identical to ttm_bo_reserve. + * + * Returns: + * -EAGAIN: The reservation may cause a deadlock. + * Release all buffer reservations, wait for @bo to become unreserved and + * try again. (only if use_sequence == 1). + * -ERESTARTSYS: A wait for the buffer to become unreserved was interrupted by + * a signal. Release all buffer reservations and return to user-space. + * -EBUSY: The function needed to sleep, but @no_wait was true + * -EDEADLK: Bo already reserved using @sequence. This error code will only + * be returned if @use_sequence is set to true. + */ +extern int ttm_bo_reserve_locked(struct ttm_buffer_object *bo, + bool interruptible, + bool no_wait, bool use_sequence, + uint32_t sequence); + /** * ttm_bo_unreserve * @@ -874,6 +918,16 @@ extern int ttm_bo_reserve(struct ttm_buffer_object *bo, extern void ttm_bo_unreserve(struct ttm_buffer_object *bo); /** + * ttm_bo_unreserve_locked + * + * @bo: A pointer to a struct ttm_buffer_object. + * + * Unreserve a previous reservation of @bo. + * Needs to be called with struct ttm_bo_global::lru_lock held. + */ +extern void ttm_bo_unreserve_locked(struct ttm_buffer_object *bo); + +/** * ttm_bo_wait_unreserved * * @bo: A pointer to a struct ttm_buffer_object. diff --git a/include/drm/ttm/ttm_execbuf_util.h b/include/drm/ttm/ttm_execbuf_util.h index cd2c475da9ea..26cc7f9ffa41 100644 --- a/include/drm/ttm/ttm_execbuf_util.h +++ b/include/drm/ttm/ttm_execbuf_util.h @@ -41,7 +41,10 @@ * @bo: refcounted buffer object pointer. * @new_sync_obj_arg: New sync_obj_arg for @bo, to be used once * adding a new sync object. - * @reservied: Indicates whether @bo has been reserved for validation. + * @reserved: Indicates whether @bo has been reserved for validation. + * @removed: Indicates whether @bo has been removed from lru lists. + * @put_count: Number of outstanding references on bo::list_kref. + * @old_sync_obj: Pointer to a sync object about to be unreferenced */ struct ttm_validate_buffer { @@ -49,6 +52,9 @@ struct ttm_validate_buffer { struct ttm_buffer_object *bo; void *new_sync_obj_arg; bool reserved; + bool removed; + int put_count; + void *old_sync_obj; }; /** @@ -66,7 +72,6 @@ extern void ttm_eu_backoff_reservation(struct list_head *list); * function ttm_eu_reserve_buffers * * @list: thread private list of ttm_validate_buffer structs. - * @val_seq: A unique sequence number. * * Tries to reserve bos pointed to by the list entries for validation. * If the function returns 0, all buffers are marked as "unfenced", @@ -88,7 +93,7 @@ extern void ttm_eu_backoff_reservation(struct list_head *list); * has failed. */ -extern int ttm_eu_reserve_buffers(struct list_head *list, uint32_t val_seq); +extern int ttm_eu_reserve_buffers(struct list_head *list); /** * function ttm_eu_fence_buffer_objects. diff --git a/include/linux/kref.h b/include/linux/kref.h index 6cc38fc07ab7..d4a62ab2ee5e 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -24,5 +24,7 @@ struct kref { void kref_init(struct kref *kref); void kref_get(struct kref *kref); int kref_put(struct kref *kref, void (*release) (struct kref *kref)); +int kref_sub(struct kref *kref, unsigned int count, + void (*release) (struct kref *kref)); #endif /* _KREF_H_ */ diff --git a/lib/kref.c b/lib/kref.c index d3d227a08a4b..3efb882b11db 100644 --- a/lib/kref.c +++ b/lib/kref.c @@ -62,6 +62,36 @@ int kref_put(struct kref *kref, void (*release)(struct kref *kref)) return 0; } + +/** + * kref_sub - subtract a number of refcounts for object. + * @kref: object. + * @count: Number of recounts to subtract. + * @release: pointer to the function that will clean up the object when the + * last reference to the object is released. + * This pointer is required, and it is not acceptable to pass kfree + * in as this function. + * + * Subtract @count from the refcount, and if 0, call release(). + * Return 1 if the object was removed, otherwise return 0. Beware, if this + * function returns 0, you still can not count on the kref from remaining in + * memory. Only use the return value if you want to see if the kref is now + * gone, not present. + */ +int kref_sub(struct kref *kref, unsigned int count, + void (*release)(struct kref *kref)) +{ + WARN_ON(release == NULL); + WARN_ON(release == (void (*)(struct kref *))kfree); + + if (atomic_sub_and_test((int) count, &kref->refcount)) { + release(kref); + return 1; + } + return 0; +} + EXPORT_SYMBOL(kref_init); EXPORT_SYMBOL(kref_get); EXPORT_SYMBOL(kref_put); +EXPORT_SYMBOL(kref_sub); |