diff options
Diffstat (limited to 'drivers/gpu/drm/radeon')
51 files changed, 2068 insertions, 531 deletions
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index 0013ad0db9ef..7d7aed5357f0 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -60,7 +60,7 @@ radeon-y := radeon_drv.o # add UMS driver radeon-$(CONFIG_DRM_RADEON_UMS)+= radeon_cp.o radeon_state.o radeon_mem.o \ - radeon_irq.o r300_cmdbuf.o r600_cp.o r600_blit.o + radeon_irq.o r300_cmdbuf.o r600_cp.o r600_blit.o drm_buffer.o # add KMS driver radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \ @@ -76,11 +76,11 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \ evergreen.o evergreen_cs.o evergreen_blit_shaders.o \ evergreen_hdmi.o radeon_trace_points.o ni.o cayman_blit_shaders.o \ atombios_encoders.o radeon_semaphore.o radeon_sa.o atombios_i2c.o si.o \ - si_blit_shaders.o radeon_prime.o radeon_uvd.o cik.o cik_blit_shaders.o \ + si_blit_shaders.o radeon_prime.o cik.o cik_blit_shaders.o \ r600_dpm.o rs780_dpm.o rv6xx_dpm.o rv770_dpm.o rv730_dpm.o rv740_dpm.o \ rv770_smc.o cypress_dpm.o btc_dpm.o sumo_dpm.o sumo_smc.o trinity_dpm.o \ trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \ - ci_dpm.o dce6_afmt.o radeon_vm.o radeon_ucode.o radeon_ib.o + ci_dpm.o dce6_afmt.o radeon_vm.o radeon_ucode.o radeon_ib.o radeon_mn.o # add async DMA block radeon-y += \ diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index 022561e28707..d416bb2ff48d 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -869,6 +869,9 @@ static int ci_set_thermal_temperature_range(struct radeon_device *rdev, WREG32_SMC(CG_THERMAL_CTRL, tmp); #endif + rdev->pm.dpm.thermal.min_temp = low_temp; + rdev->pm.dpm.thermal.max_temp = high_temp; + return 0; } diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index b625646bf3e2..1f598ab3b9a7 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -3483,7 +3483,7 @@ static void cik_gpu_init(struct radeon_device *rdev) u32 mc_shared_chmap, mc_arb_ramcfg; u32 hdp_host_path_cntl; u32 tmp; - int i, j, k; + int i, j; switch (rdev->family) { case CHIP_BONAIRE: @@ -3544,6 +3544,7 @@ static void cik_gpu_init(struct radeon_device *rdev) (rdev->pdev->device == 0x130B) || (rdev->pdev->device == 0x130E) || (rdev->pdev->device == 0x1315) || + (rdev->pdev->device == 0x1318) || (rdev->pdev->device == 0x131B)) { rdev->config.cik.max_cu_per_sh = 4; rdev->config.cik.max_backends_per_se = 1; @@ -3672,12 +3673,11 @@ static void cik_gpu_init(struct radeon_device *rdev) rdev->config.cik.max_sh_per_se, rdev->config.cik.max_backends_per_se); + rdev->config.cik.active_cus = 0; for (i = 0; i < rdev->config.cik.max_shader_engines; i++) { for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) { - for (k = 0; k < rdev->config.cik.max_cu_per_sh; k++) { - rdev->config.cik.active_cus += - hweight32(cik_get_cu_active_bitmap(rdev, i, j)); - } + rdev->config.cik.active_cus += + hweight32(cik_get_cu_active_bitmap(rdev, i, j)); } } @@ -3801,7 +3801,7 @@ int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2)); radeon_ring_write(ring, 0xDEADBEEF); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); for (i = 0; i < rdev->usec_timeout; i++) { tmp = RREG32(scratch); @@ -3920,6 +3920,17 @@ void cik_fence_compute_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, 0); } +/** + * cik_semaphore_ring_emit - emit a semaphore on the CP ring + * + * @rdev: radeon_device pointer + * @ring: radeon ring buffer object + * @semaphore: radeon semaphore object + * @emit_wait: Is this a sempahore wait? + * + * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP + * from running ahead of semaphore waits. + */ bool cik_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, @@ -3932,6 +3943,12 @@ bool cik_semaphore_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, lower_32_bits(addr)); radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel); + if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) { + /* Prevent the PFP from running ahead of the semaphore wait */ + radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); + radeon_ring_write(ring, 0x0); + } + return true; } @@ -4004,7 +4021,7 @@ int cik_copy_cpdma(struct radeon_device *rdev, return r; } - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); radeon_semaphore_free(rdev, &sem, *fence); return r; @@ -4103,7 +4120,7 @@ int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2); ib.ptr[2] = 0xDEADBEEF; ib.length_dw = 3; - r = radeon_ib_schedule(rdev, &ib, NULL); + r = radeon_ib_schedule(rdev, &ib, NULL, false); if (r) { radeon_scratch_free(rdev, scratch); radeon_ib_free(rdev, &ib); @@ -4324,7 +4341,7 @@ static int cik_cp_gfx_start(struct radeon_device *rdev) radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */ radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */ - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); return 0; } @@ -5958,14 +5975,14 @@ void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) /* update SH_MEM_* regs */ radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | + radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | WRITE_DATA_DST_SEL(0))); radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); radeon_ring_write(ring, 0); radeon_ring_write(ring, VMID(vm->id)); radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6)); - radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | + radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | WRITE_DATA_DST_SEL(0))); radeon_ring_write(ring, SH_MEM_BASES >> 2); radeon_ring_write(ring, 0); @@ -5976,7 +5993,7 @@ void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */ radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | + radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | WRITE_DATA_DST_SEL(0))); radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); radeon_ring_write(ring, 0); @@ -5987,7 +6004,7 @@ void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) /* bits 0-15 are the VM contexts0-15 */ radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | + radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | WRITE_DATA_DST_SEL(0))); radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); radeon_ring_write(ring, 0); @@ -8229,8 +8246,10 @@ restart_ih: } if (queue_hotplug) schedule_work(&rdev->hotplug_work); - if (queue_reset) - schedule_work(&rdev->reset_work); + if (queue_reset) { + rdev->needs_reset = true; + wake_up_all(&rdev->fence_queue); + } if (queue_thermal) schedule_work(&rdev->pm.dpm.thermal.work); rdev->ih.rptr = rptr; diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index bcf480510ac2..192278bc993c 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -596,7 +596,7 @@ int cik_copy_dma(struct radeon_device *rdev, return r; } - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); radeon_semaphore_free(rdev, &sem, *fence); return r; @@ -638,7 +638,7 @@ int cik_sdma_ring_test(struct radeon_device *rdev, radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr)); radeon_ring_write(ring, 1); /* number of DWs to follow */ radeon_ring_write(ring, 0xDEADBEEF); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); for (i = 0; i < rdev->usec_timeout; i++) { tmp = readl(ptr); @@ -695,7 +695,7 @@ int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) ib.ptr[4] = 0xDEADBEEF; ib.length_dw = 5; - r = radeon_ib_schedule(rdev, &ib, NULL); + r = radeon_ib_schedule(rdev, &ib, NULL, false); if (r) { radeon_ib_free(rdev, &ib); DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); diff --git a/drivers/gpu/drm/radeon/drm_buffer.c b/drivers/gpu/drm/radeon/drm_buffer.c new file mode 100644 index 000000000000..f4e0f3a3d7b1 --- /dev/null +++ b/drivers/gpu/drm/radeon/drm_buffer.c @@ -0,0 +1,177 @@ +/************************************************************************** + * + * Copyright 2010 Pauli Nieminen. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * + **************************************************************************/ +/* + * Multipart buffer for coping data which is larger than the page size. + * + * Authors: + * Pauli Nieminen <suokkos-at-gmail-dot-com> + */ + +#include <linux/export.h> +#include "drm_buffer.h" + +/** + * Allocate the drm buffer object. + * + * buf: Pointer to a pointer where the object is stored. + * size: The number of bytes to allocate. + */ +int drm_buffer_alloc(struct drm_buffer **buf, int size) +{ + int nr_pages = size / PAGE_SIZE + 1; + int idx; + + /* Allocating pointer table to end of structure makes drm_buffer + * variable sized */ + *buf = kzalloc(sizeof(struct drm_buffer) + nr_pages*sizeof(char *), + GFP_KERNEL); + + if (*buf == NULL) { + DRM_ERROR("Failed to allocate drm buffer object to hold" + " %d bytes in %d pages.\n", + size, nr_pages); + return -ENOMEM; + } + + (*buf)->size = size; + + for (idx = 0; idx < nr_pages; ++idx) { + + (*buf)->data[idx] = + kmalloc(min(PAGE_SIZE, size - idx * PAGE_SIZE), + GFP_KERNEL); + + + if ((*buf)->data[idx] == NULL) { + DRM_ERROR("Failed to allocate %dth page for drm" + " buffer with %d bytes and %d pages.\n", + idx + 1, size, nr_pages); + goto error_out; + } + + } + + return 0; + +error_out: + + for (; idx >= 0; --idx) + kfree((*buf)->data[idx]); + + kfree(*buf); + return -ENOMEM; +} + +/** + * Copy the user data to the begin of the buffer and reset the processing + * iterator. + * + * user_data: A pointer the data that is copied to the buffer. + * size: The Number of bytes to copy. + */ +int drm_buffer_copy_from_user(struct drm_buffer *buf, + void __user *user_data, int size) +{ + int nr_pages = size / PAGE_SIZE + 1; + int idx; + + if (size > buf->size) { + DRM_ERROR("Requesting to copy %d bytes to a drm buffer with" + " %d bytes space\n", + size, buf->size); + return -EFAULT; + } + + for (idx = 0; idx < nr_pages; ++idx) { + + if (copy_from_user(buf->data[idx], + user_data + idx * PAGE_SIZE, + min(PAGE_SIZE, size - idx * PAGE_SIZE))) { + DRM_ERROR("Failed to copy user data (%p) to drm buffer" + " (%p) %dth page.\n", + user_data, buf, idx); + return -EFAULT; + + } + } + buf->iterator = 0; + return 0; +} + +/** + * Free the drm buffer object + */ +void drm_buffer_free(struct drm_buffer *buf) +{ + + if (buf != NULL) { + + int nr_pages = buf->size / PAGE_SIZE + 1; + int idx; + for (idx = 0; idx < nr_pages; ++idx) + kfree(buf->data[idx]); + + kfree(buf); + } +} + +/** + * Read an object from buffer that may be split to multiple parts. If object + * is not split function just returns the pointer to object in buffer. But in + * case of split object data is copied to given stack object that is suplied + * by caller. + * + * The processing location of the buffer is also advanced to the next byte + * after the object. + * + * objsize: The size of the objet in bytes. + * stack_obj: A pointer to a memory location where object can be copied. + */ +void *drm_buffer_read_object(struct drm_buffer *buf, + int objsize, void *stack_obj) +{ + int idx = drm_buffer_index(buf); + int page = drm_buffer_page(buf); + void *obj = NULL; + + if (idx + objsize <= PAGE_SIZE) { + obj = &buf->data[page][idx]; + } else { + /* The object is split which forces copy to temporary object.*/ + int beginsz = PAGE_SIZE - idx; + memcpy(stack_obj, &buf->data[page][idx], beginsz); + + memcpy(stack_obj + beginsz, &buf->data[page + 1][0], + objsize - beginsz); + + obj = stack_obj; + } + + drm_buffer_advance(buf, objsize); + return obj; +} diff --git a/drivers/gpu/drm/radeon/drm_buffer.h b/drivers/gpu/drm/radeon/drm_buffer.h new file mode 100644 index 000000000000..c80d3a340b94 --- /dev/null +++ b/drivers/gpu/drm/radeon/drm_buffer.h @@ -0,0 +1,148 @@ +/************************************************************************** + * + * Copyright 2010 Pauli Nieminen. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * + **************************************************************************/ +/* + * Multipart buffer for coping data which is larger than the page size. + * + * Authors: + * Pauli Nieminen <suokkos-at-gmail-dot-com> + */ + +#ifndef _DRM_BUFFER_H_ +#define _DRM_BUFFER_H_ + +#include <drm/drmP.h> + +struct drm_buffer { + int iterator; + int size; + char *data[]; +}; + + +/** + * Return the index of page that buffer is currently pointing at. + */ +static inline int drm_buffer_page(struct drm_buffer *buf) +{ + return buf->iterator / PAGE_SIZE; +} +/** + * Return the index of the current byte in the page + */ +static inline int drm_buffer_index(struct drm_buffer *buf) +{ + return buf->iterator & (PAGE_SIZE - 1); +} +/** + * Return number of bytes that is left to process + */ +static inline int drm_buffer_unprocessed(struct drm_buffer *buf) +{ + return buf->size - buf->iterator; +} + +/** + * Advance the buffer iterator number of bytes that is given. + */ +static inline void drm_buffer_advance(struct drm_buffer *buf, int bytes) +{ + buf->iterator += bytes; +} + +/** + * Allocate the drm buffer object. + * + * buf: A pointer to a pointer where the object is stored. + * size: The number of bytes to allocate. + */ +extern int drm_buffer_alloc(struct drm_buffer **buf, int size); + +/** + * Copy the user data to the begin of the buffer and reset the processing + * iterator. + * + * user_data: A pointer the data that is copied to the buffer. + * size: The Number of bytes to copy. + */ +extern int drm_buffer_copy_from_user(struct drm_buffer *buf, + void __user *user_data, int size); + +/** + * Free the drm buffer object + */ +extern void drm_buffer_free(struct drm_buffer *buf); + +/** + * Read an object from buffer that may be split to multiple parts. If object + * is not split function just returns the pointer to object in buffer. But in + * case of split object data is copied to given stack object that is suplied + * by caller. + * + * The processing location of the buffer is also advanced to the next byte + * after the object. + * + * objsize: The size of the objet in bytes. + * stack_obj: A pointer to a memory location where object can be copied. + */ +extern void *drm_buffer_read_object(struct drm_buffer *buf, + int objsize, void *stack_obj); + +/** + * Returns the pointer to the dword which is offset number of elements from the + * current processing location. + * + * Caller must make sure that dword is not split in the buffer. This + * requirement is easily met if all the sizes of objects in buffer are + * multiples of dword and PAGE_SIZE is multiple dword. + * + * Call to this function doesn't change the processing location. + * + * offset: The index of the dword relative to the internat iterator. + */ +static inline void *drm_buffer_pointer_to_dword(struct drm_buffer *buffer, + int offset) +{ + int iter = buffer->iterator + offset * 4; + return &buffer->data[iter / PAGE_SIZE][iter & (PAGE_SIZE - 1)]; +} +/** + * Returns the pointer to the dword which is offset number of elements from + * the current processing location. + * + * Call to this function doesn't change the processing location. + * + * offset: The index of the byte relative to the internat iterator. + */ +static inline void *drm_buffer_pointer_to_byte(struct drm_buffer *buffer, + int offset) +{ + int iter = buffer->iterator + offset; + return &buffer->data[iter / PAGE_SIZE][iter & (PAGE_SIZE - 1)]; +} + +#endif diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 4fedd14e670a..dbca60c7d097 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -2869,7 +2869,7 @@ static int evergreen_cp_start(struct radeon_device *rdev) radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1)); radeon_ring_write(ring, 0); radeon_ring_write(ring, 0); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); cp_me = 0xff; WREG32(CP_ME_CNTL, cp_me); @@ -2912,7 +2912,7 @@ static int evergreen_cp_start(struct radeon_device *rdev) radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */ radeon_ring_write(ring, 0x00000010); /* */ - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); return 0; } diff --git a/drivers/gpu/drm/radeon/evergreen_dma.c b/drivers/gpu/drm/radeon/evergreen_dma.c index 478caefe0fef..afaba388c36d 100644 --- a/drivers/gpu/drm/radeon/evergreen_dma.c +++ b/drivers/gpu/drm/radeon/evergreen_dma.c @@ -155,7 +155,7 @@ int evergreen_copy_dma(struct radeon_device *rdev, return r; } - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); radeon_semaphore_free(rdev, &sem, *fence); return r; diff --git a/drivers/gpu/drm/radeon/kv_dpm.c b/drivers/gpu/drm/radeon/kv_dpm.c index 9ef8c38f2d66..8b58e11b64fa 100644 --- a/drivers/gpu/drm/radeon/kv_dpm.c +++ b/drivers/gpu/drm/radeon/kv_dpm.c @@ -1438,14 +1438,14 @@ static int kv_update_uvd_dpm(struct radeon_device *rdev, bool gate) return kv_enable_uvd_dpm(rdev, !gate); } -static u8 kv_get_vce_boot_level(struct radeon_device *rdev) +static u8 kv_get_vce_boot_level(struct radeon_device *rdev, u32 evclk) { u8 i; struct radeon_vce_clock_voltage_dependency_table *table = &rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table; for (i = 0; i < table->count; i++) { - if (table->entries[i].evclk >= 0) /* XXX */ + if (table->entries[i].evclk >= evclk) break; } @@ -1468,7 +1468,7 @@ static int kv_update_vce_dpm(struct radeon_device *rdev, if (pi->caps_stable_p_state) pi->vce_boot_level = table->count - 1; else - pi->vce_boot_level = kv_get_vce_boot_level(rdev); + pi->vce_boot_level = kv_get_vce_boot_level(rdev, radeon_new_state->evclk); ret = kv_copy_bytes_to_smc(rdev, pi->dpm_table_start + @@ -2726,7 +2726,10 @@ int kv_dpm_init(struct radeon_device *rdev) pi->caps_sclk_ds = true; pi->enable_auto_thermal_throttling = true; pi->disable_nb_ps3_in_battery = false; - pi->bapm_enable = true; + if (radeon_bapm == 0) + pi->bapm_enable = false; + else + pi->bapm_enable = true; pi->voltage_drop_t = 0; pi->caps_sclk_throttle_low_notification = false; pi->caps_fps = false; /* true? */ diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 327b85f7fd0d..ba89375f197f 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1505,7 +1505,7 @@ static int cayman_cp_start(struct radeon_device *rdev) radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1)); radeon_ring_write(ring, 0); radeon_ring_write(ring, 0); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); cayman_cp_enable(rdev, true); @@ -1547,7 +1547,7 @@ static int cayman_cp_start(struct radeon_device *rdev) radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */ radeon_ring_write(ring, 0x00000010); /* */ - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); /* XXX init other rings */ diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 04b5940b8923..4c5ec44ff328 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -925,7 +925,7 @@ int r100_copy_blit(struct radeon_device *rdev, if (fence) { r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX); } - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); return r; } @@ -958,7 +958,7 @@ void r100_ring_start(struct radeon_device *rdev, struct radeon_ring *ring) RADEON_ISYNC_ANY3D_IDLE2D | RADEON_ISYNC_WAIT_IDLEGUI | RADEON_ISYNC_CPSCRATCH_IDLEGUI); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); } @@ -3638,7 +3638,7 @@ int r100_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) } radeon_ring_write(ring, PACKET0(scratch, 0)); radeon_ring_write(ring, 0xDEADBEEF); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); for (i = 0; i < rdev->usec_timeout; i++) { tmp = RREG32(scratch); if (tmp == 0xDEADBEEF) { @@ -3700,7 +3700,7 @@ int r100_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) ib.ptr[6] = PACKET2(0); ib.ptr[7] = PACKET2(0); ib.length_dw = 8; - r = radeon_ib_schedule(rdev, &ib, NULL); + r = radeon_ib_schedule(rdev, &ib, NULL, false); if (r) { DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); goto free_ib; diff --git a/drivers/gpu/drm/radeon/r200.c b/drivers/gpu/drm/radeon/r200.c index 58f0473aa73f..67780374a652 100644 --- a/drivers/gpu/drm/radeon/r200.c +++ b/drivers/gpu/drm/radeon/r200.c @@ -121,7 +121,7 @@ int r200_copy_dma(struct radeon_device *rdev, if (fence) { r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX); } - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); return r; } diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index 75b30338c226..1bc4704034ce 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -295,7 +295,7 @@ void r300_ring_start(struct radeon_device *rdev, struct radeon_ring *ring) radeon_ring_write(ring, R300_GEOMETRY_ROUND_NEAREST | R300_COLOR_ROUND_NEAREST); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); } static void r300_errata(struct radeon_device *rdev) diff --git a/drivers/gpu/drm/radeon/r300_cmdbuf.c b/drivers/gpu/drm/radeon/r300_cmdbuf.c index 84b1d5367a11..9418e388b045 100644 --- a/drivers/gpu/drm/radeon/r300_cmdbuf.c +++ b/drivers/gpu/drm/radeon/r300_cmdbuf.c @@ -34,10 +34,10 @@ */ #include <drm/drmP.h> -#include <drm/drm_buffer.h> #include <drm/radeon_drm.h> #include "radeon_drv.h" #include "r300_reg.h" +#include "drm_buffer.h" #include <asm/unaligned.h> diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c index 802b19220a21..2828605aef3f 100644 --- a/drivers/gpu/drm/radeon/r420.c +++ b/drivers/gpu/drm/radeon/r420.c @@ -219,7 +219,7 @@ static void r420_cp_errata_init(struct radeon_device *rdev) radeon_ring_write(ring, PACKET0(R300_CP_RESYNC_ADDR, 1)); radeon_ring_write(ring, rdev->config.r300.resync_scratch); radeon_ring_write(ring, 0xDEADBEEF); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); } static void r420_cp_errata_fini(struct radeon_device *rdev) @@ -232,7 +232,7 @@ static void r420_cp_errata_fini(struct radeon_device *rdev) radeon_ring_lock(rdev, ring, 8); radeon_ring_write(ring, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); radeon_ring_write(ring, R300_RB3D_DC_FINISH); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); radeon_scratch_free(rdev, rdev->config.r300.resync_scratch); } diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index c70a504d96af..a95ced569d84 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -122,6 +122,94 @@ u32 r600_get_xclk(struct radeon_device *rdev) int r600_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk) { + unsigned fb_div = 0, ref_div, vclk_div = 0, dclk_div = 0; + int r; + + /* bypass vclk and dclk with bclk */ + WREG32_P(CG_UPLL_FUNC_CNTL_2, + VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1), + ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK)); + + /* assert BYPASS_EN, deassert UPLL_RESET, UPLL_SLEEP and UPLL_CTLREQ */ + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~( + UPLL_RESET_MASK | UPLL_SLEEP_MASK | UPLL_CTLREQ_MASK)); + + if (rdev->family >= CHIP_RS780) + WREG32_P(GFX_MACRO_BYPASS_CNTL, UPLL_BYPASS_CNTL, + ~UPLL_BYPASS_CNTL); + + if (!vclk || !dclk) { + /* keep the Bypass mode, put PLL to sleep */ + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK); + return 0; + } + + if (rdev->clock.spll.reference_freq == 10000) + ref_div = 34; + else + ref_div = 4; + + r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 50000, 160000, + ref_div + 1, 0xFFF, 2, 30, ~0, + &fb_div, &vclk_div, &dclk_div); + if (r) + return r; + + if (rdev->family >= CHIP_RV670 && rdev->family < CHIP_RS780) + fb_div >>= 1; + else + fb_div |= 1; + + r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL); + if (r) + return r; + + /* assert PLL_RESET */ + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK); + + /* For RS780 we have to choose ref clk */ + if (rdev->family >= CHIP_RS780) + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_REFCLK_SRC_SEL_MASK, + ~UPLL_REFCLK_SRC_SEL_MASK); + + /* set the required fb, ref and post divder values */ + WREG32_P(CG_UPLL_FUNC_CNTL, + UPLL_FB_DIV(fb_div) | + UPLL_REF_DIV(ref_div), + ~(UPLL_FB_DIV_MASK | UPLL_REF_DIV_MASK)); + WREG32_P(CG_UPLL_FUNC_CNTL_2, + UPLL_SW_HILEN(vclk_div >> 1) | + UPLL_SW_LOLEN((vclk_div >> 1) + (vclk_div & 1)) | + UPLL_SW_HILEN2(dclk_div >> 1) | + UPLL_SW_LOLEN2((dclk_div >> 1) + (dclk_div & 1)) | + UPLL_DIVEN_MASK | UPLL_DIVEN2_MASK, + ~UPLL_SW_MASK); + + /* give the PLL some time to settle */ + mdelay(15); + + /* deassert PLL_RESET */ + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK); + + mdelay(15); + + /* deassert BYPASS EN */ + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK); + + if (rdev->family >= CHIP_RS780) + WREG32_P(GFX_MACRO_BYPASS_CNTL, 0, ~UPLL_BYPASS_CNTL); + + r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL); + if (r) + return r; + + /* switch VCLK and DCLK selection */ + WREG32_P(CG_UPLL_FUNC_CNTL_2, + VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2), + ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK)); + + mdelay(100); + return 0; } @@ -992,6 +1080,8 @@ static int r600_pcie_gart_enable(struct radeon_device *rdev) WREG32(MC_VM_L1_TLB_MCB_WR_GFX_CNTL, tmp); WREG32(MC_VM_L1_TLB_MCB_RD_PDMA_CNTL, tmp); WREG32(MC_VM_L1_TLB_MCB_WR_PDMA_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_RD_UVD_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_WR_UVD_CNTL, tmp); WREG32(MC_VM_L1_TLB_MCB_RD_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE); WREG32(MC_VM_L1_TLB_MCB_WR_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE); WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); @@ -1042,6 +1132,8 @@ static void r600_pcie_gart_disable(struct radeon_device *rdev) WREG32(MC_VM_L1_TLB_MCB_WR_SYS_CNTL, tmp); WREG32(MC_VM_L1_TLB_MCB_RD_HDP_CNTL, tmp); WREG32(MC_VM_L1_TLB_MCB_WR_HDP_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_RD_UVD_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_WR_UVD_CNTL, tmp); radeon_gart_table_vram_unpin(rdev); } @@ -2547,7 +2639,7 @@ int r600_cp_start(struct radeon_device *rdev) radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1)); radeon_ring_write(ring, 0); radeon_ring_write(ring, 0); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); cp_me = 0xff; WREG32(R_0086D8_CP_ME_CNTL, cp_me); @@ -2683,7 +2775,7 @@ int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); radeon_ring_write(ring, ((scratch - PACKET3_SET_CONFIG_REG_OFFSET) >> 2)); radeon_ring_write(ring, 0xDEADBEEF); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); for (i = 0; i < rdev->usec_timeout; i++) { tmp = RREG32(scratch); if (tmp == 0xDEADBEEF) @@ -2753,6 +2845,17 @@ void r600_fence_ring_emit(struct radeon_device *rdev, } } +/** + * r600_semaphore_ring_emit - emit a semaphore on the CP ring + * + * @rdev: radeon_device pointer + * @ring: radeon ring buffer object + * @semaphore: radeon semaphore object + * @emit_wait: Is this a sempahore wait? + * + * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP + * from running ahead of semaphore waits. + */ bool r600_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, @@ -2768,6 +2871,13 @@ bool r600_semaphore_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, lower_32_bits(addr)); radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | sel); + /* PFP_SYNC_ME packet only exists on 7xx+ */ + if (emit_wait && (rdev->family >= CHIP_RV770)) { + /* Prevent the PFP from running ahead of the semaphore wait */ + radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); + radeon_ring_write(ring, 0x0); + } + return true; } @@ -2845,7 +2955,7 @@ int r600_copy_cpdma(struct radeon_device *rdev, return r; } - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); radeon_semaphore_free(rdev, &sem, *fence); return r; @@ -2899,6 +3009,18 @@ static int r600_startup(struct radeon_device *rdev) return r; } + if (rdev->has_uvd) { + r = uvd_v1_0_resume(rdev); + if (!r) { + r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX); + if (r) { + dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r); + } + } + if (r) + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0; + } + /* Enable IRQ */ if (!rdev->irq.installed) { r = radeon_irq_kms_init(rdev); @@ -2927,6 +3049,18 @@ static int r600_startup(struct radeon_device *rdev) if (r) return r; + if (rdev->has_uvd) { + ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; + if (ring->ring_size) { + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, + RADEON_CP_PACKET2); + if (!r) + r = uvd_v1_0_init(rdev); + if (r) + DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); + } + } + r = radeon_ib_pool_init(rdev); if (r) { dev_err(rdev->dev, "IB initialization failed (%d).\n", r); @@ -2986,6 +3120,10 @@ int r600_suspend(struct radeon_device *rdev) radeon_pm_suspend(rdev); r600_audio_fini(rdev); r600_cp_stop(rdev); + if (rdev->has_uvd) { + uvd_v1_0_fini(rdev); + radeon_uvd_suspend(rdev); + } r600_irq_suspend(rdev); radeon_wb_disable(rdev); r600_pcie_gart_disable(rdev); @@ -3065,6 +3203,14 @@ int r600_init(struct radeon_device *rdev) rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL; r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024); + if (rdev->has_uvd) { + r = radeon_uvd_init(rdev); + if (!r) { + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL; + r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096); + } + } + rdev->ih.ring_obj = NULL; r600_ih_ring_init(rdev, 64 * 1024); @@ -3094,6 +3240,10 @@ void r600_fini(struct radeon_device *rdev) r600_audio_fini(rdev); r600_cp_fini(rdev); r600_irq_fini(rdev); + if (rdev->has_uvd) { + uvd_v1_0_fini(rdev); + radeon_uvd_fini(rdev); + } radeon_wb_fini(rdev); radeon_ib_pool_fini(rdev); radeon_irq_kms_fini(rdev); @@ -3165,7 +3315,7 @@ int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) ib.ptr[1] = ((scratch - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); ib.ptr[2] = 0xDEADBEEF; ib.length_dw = 3; - r = radeon_ib_schedule(rdev, &ib, NULL); + r = radeon_ib_schedule(rdev, &ib, NULL, false); if (r) { DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); goto free_ib; diff --git a/drivers/gpu/drm/radeon/r600_cp.c b/drivers/gpu/drm/radeon/r600_cp.c index 8c9b7e26533c..639d6681ef5b 100644 --- a/drivers/gpu/drm/radeon/r600_cp.c +++ b/drivers/gpu/drm/radeon/r600_cp.c @@ -2052,7 +2052,7 @@ int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init, dev_priv->buffers_offset = init->buffers_offset; dev_priv->gart_textures_offset = init->gart_textures_offset; - master_priv->sarea = drm_getsarea(dev); + master_priv->sarea = drm_legacy_getsarea(dev); if (!master_priv->sarea) { DRM_ERROR("could not find sarea!\n"); r600_do_cleanup_cp(dev); diff --git a/drivers/gpu/drm/radeon/r600_dma.c b/drivers/gpu/drm/radeon/r600_dma.c index 4969cef44a19..51fd98553eaf 100644 --- a/drivers/gpu/drm/radeon/r600_dma.c +++ b/drivers/gpu/drm/radeon/r600_dma.c @@ -261,7 +261,7 @@ int r600_dma_ring_test(struct radeon_device *rdev, radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc); radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff); radeon_ring_write(ring, 0xDEADBEEF); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); for (i = 0; i < rdev->usec_timeout; i++) { tmp = readl(ptr); @@ -368,7 +368,7 @@ int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) ib.ptr[3] = 0xDEADBEEF; ib.length_dw = 4; - r = radeon_ib_schedule(rdev, &ib, NULL); + r = radeon_ib_schedule(rdev, &ib, NULL, false); if (r) { radeon_ib_free(rdev, &ib); DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); @@ -493,7 +493,7 @@ int r600_copy_dma(struct radeon_device *rdev, return r; } - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); radeon_semaphore_free(rdev, &sem, *fence); return r; diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h index f94e7a9afe75..671b48032a3d 100644 --- a/drivers/gpu/drm/radeon/r600d.h +++ b/drivers/gpu/drm/radeon/r600d.h @@ -330,11 +330,12 @@ #define HDP_TILING_CONFIG 0x2F3C #define HDP_DEBUG1 0x2F34 +#define MC_CONFIG 0x2000 #define MC_VM_AGP_TOP 0x2184 #define MC_VM_AGP_BOT 0x2188 #define MC_VM_AGP_BASE 0x218C #define MC_VM_FB_LOCATION 0x2180 -#define MC_VM_L1_TLB_MCD_RD_A_CNTL 0x219C +#define MC_VM_L1_TLB_MCB_RD_UVD_CNTL 0x2124 #define ENABLE_L1_TLB (1 << 0) #define ENABLE_L1_FRAGMENT_PROCESSING (1 << 1) #define ENABLE_L1_STRICT_ORDERING (1 << 2) @@ -354,12 +355,14 @@ #define EFFECTIVE_L1_QUEUE_SIZE(x) (((x) & 7) << 15) #define EFFECTIVE_L1_QUEUE_SIZE_MASK 0x00038000 #define EFFECTIVE_L1_QUEUE_SIZE_SHIFT 15 +#define MC_VM_L1_TLB_MCD_RD_A_CNTL 0x219C #define MC_VM_L1_TLB_MCD_RD_B_CNTL 0x21A0 #define MC_VM_L1_TLB_MCB_RD_GFX_CNTL 0x21FC #define MC_VM_L1_TLB_MCB_RD_HDP_CNTL 0x2204 #define MC_VM_L1_TLB_MCB_RD_PDMA_CNTL 0x2208 #define MC_VM_L1_TLB_MCB_RD_SEM_CNTL 0x220C #define MC_VM_L1_TLB_MCB_RD_SYS_CNTL 0x2200 +#define MC_VM_L1_TLB_MCB_WR_UVD_CNTL 0x212c #define MC_VM_L1_TLB_MCD_WR_A_CNTL 0x21A4 #define MC_VM_L1_TLB_MCD_WR_B_CNTL 0x21A8 #define MC_VM_L1_TLB_MCB_WR_GFX_CNTL 0x2210 @@ -373,6 +376,8 @@ #define MC_VM_SYSTEM_APERTURE_HIGH_ADDR 0x2194 #define MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR 0x2198 +#define RS_DQ_RD_RET_CONF 0x2348 + #define PA_CL_ENHANCE 0x8A14 #define CLIP_VTX_REORDER_ENA (1 << 0) #define NUM_CLIP_SEQ(x) ((x) << 1) @@ -1483,6 +1488,7 @@ #define UVD_CGC_GATE 0xf4a8 #define UVD_LMI_CTRL2 0xf4f4 #define UVD_MASTINT_EN 0xf500 +#define UVD_FW_START 0xf51C #define UVD_LMI_ADDR_EXT 0xf594 #define UVD_LMI_CTRL 0xf598 #define UVD_LMI_SWAP_CNTL 0xf5b4 @@ -1495,6 +1501,13 @@ #define UVD_MPC_SET_MUX 0xf5f4 #define UVD_MPC_SET_ALU 0xf5f8 +#define UVD_VCPU_CACHE_OFFSET0 0xf608 +#define UVD_VCPU_CACHE_SIZE0 0xf60c +#define UVD_VCPU_CACHE_OFFSET1 0xf610 +#define UVD_VCPU_CACHE_SIZE1 0xf614 +#define UVD_VCPU_CACHE_OFFSET2 0xf618 +#define UVD_VCPU_CACHE_SIZE2 0xf61c + #define UVD_VCPU_CNTL 0xf660 #define UVD_SOFT_RESET 0xf680 #define RBC_SOFT_RESET (1<<0) @@ -1524,9 +1537,35 @@ #define UVD_CONTEXT_ID 0xf6f4 +/* rs780 only */ +#define GFX_MACRO_BYPASS_CNTL 0x30c0 +#define SPLL_BYPASS_CNTL (1 << 0) +#define UPLL_BYPASS_CNTL (1 << 1) + +#define CG_UPLL_FUNC_CNTL 0x7e0 +# define UPLL_RESET_MASK 0x00000001 +# define UPLL_SLEEP_MASK 0x00000002 +# define UPLL_BYPASS_EN_MASK 0x00000004 # define UPLL_CTLREQ_MASK 0x00000008 +# define UPLL_FB_DIV(x) ((x) << 4) +# define UPLL_FB_DIV_MASK 0x0000FFF0 +# define UPLL_REF_DIV(x) ((x) << 16) +# define UPLL_REF_DIV_MASK 0x003F0000 +# define UPLL_REFCLK_SRC_SEL_MASK 0x20000000 # define UPLL_CTLACK_MASK 0x40000000 # define UPLL_CTLACK2_MASK 0x80000000 +#define CG_UPLL_FUNC_CNTL_2 0x7e4 +# define UPLL_SW_HILEN(x) ((x) << 0) +# define UPLL_SW_LOLEN(x) ((x) << 4) +# define UPLL_SW_HILEN2(x) ((x) << 8) +# define UPLL_SW_LOLEN2(x) ((x) << 12) +# define UPLL_DIVEN_MASK 0x00010000 +# define UPLL_DIVEN2_MASK 0x00020000 +# define UPLL_SW_MASK 0x0003FFFF +# define VCLK_SRC_SEL(x) ((x) << 20) +# define VCLK_SRC_SEL_MASK 0x01F00000 +# define DCLK_SRC_SEL(x) ((x) << 25) +# define DCLK_SRC_SEL_MASK 0x3E000000 /* * PM4 @@ -1597,6 +1636,7 @@ */ # define PACKET3_CP_DMA_CMD_SAIC (1 << 28) # define PACKET3_CP_DMA_CMD_DAIC (1 << 29) +#define PACKET3_PFP_SYNC_ME 0x42 /* r7xx+ only */ #define PACKET3_SURFACE_SYNC 0x43 # define PACKET3_CB0_DEST_BASE_ENA (1 << 6) # define PACKET3_FULL_CACHE_ENA (1 << 20) /* r7xx+ only */ diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 9e1732eb402c..79c988db79ad 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -65,6 +65,8 @@ #include <linux/list.h> #include <linux/kref.h> #include <linux/interval_tree.h> +#include <linux/hashtable.h> +#include <linux/fence.h> #include <ttm/ttm_bo_api.h> #include <ttm/ttm_bo_driver.h> @@ -105,6 +107,7 @@ extern int radeon_vm_size; extern int radeon_vm_block_size; extern int radeon_deep_color; extern int radeon_use_pflipirq; +extern int radeon_bapm; /* * Copy from radeon_drv.h so we don't have to include both and have conflicting @@ -118,9 +121,6 @@ extern int radeon_use_pflipirq; #define RADEONFB_CONN_LIMIT 4 #define RADEON_BIOS_NUM_SCRATCH 8 -/* fence seq are set to this number when signaled */ -#define RADEON_FENCE_SIGNALED_SEQ 0LL - /* internal ring indices */ /* r1xx+ has gfx CP ring */ #define RADEON_RING_TYPE_GFX_INDEX 0 @@ -348,28 +348,32 @@ extern void evergreen_tiling_fields(unsigned tiling_flags, unsigned *bankw, * Fences. */ struct radeon_fence_driver { + struct radeon_device *rdev; uint32_t scratch_reg; uint64_t gpu_addr; volatile uint32_t *cpu_addr; /* sync_seq is protected by ring emission lock */ uint64_t sync_seq[RADEON_NUM_RINGS]; atomic64_t last_seq; - bool initialized; + bool initialized, delayed_irq; + struct delayed_work lockup_work; }; struct radeon_fence { + struct fence base; + struct radeon_device *rdev; - struct kref kref; - /* protected by radeon_fence.lock */ uint64_t seq; /* RB, DMA, etc. */ unsigned ring; + + wait_queue_t fence_wake; }; int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring); int radeon_fence_driver_init(struct radeon_device *rdev); void radeon_fence_driver_fini(struct radeon_device *rdev); -void radeon_fence_driver_force_completion(struct radeon_device *rdev); +void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring); int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, int ring); void radeon_fence_process(struct radeon_device *rdev, int ring); bool radeon_fence_signaled(struct radeon_fence *fence); @@ -467,7 +471,7 @@ struct radeon_bo { struct list_head list; /* Protected by tbo.reserved */ u32 initial_domain; - u32 placements[3]; + struct ttm_place placements[3]; struct ttm_placement placement; struct ttm_buffer_object tbo; struct ttm_bo_kmap_obj kmap; @@ -487,6 +491,9 @@ struct radeon_bo { struct ttm_bo_kmap_obj dma_buf_vmap; pid_t pid; + + struct radeon_mn *mn; + struct interval_tree_node mn_it; }; #define gem_to_radeon_bo(gobj) container_of((gobj), struct radeon_bo, gem_base) @@ -778,6 +785,7 @@ struct radeon_irq { int radeon_irq_kms_init(struct radeon_device *rdev); void radeon_irq_kms_fini(struct radeon_device *rdev); void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring); +bool radeon_irq_kms_sw_irq_get_delayed(struct radeon_device *rdev, int ring); void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring); void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc); void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc); @@ -967,7 +975,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring, unsigned size); void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib); int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, - struct radeon_ib *const_ib); + struct radeon_ib *const_ib, bool hdp_flush); int radeon_ib_pool_init(struct radeon_device *rdev); void radeon_ib_pool_fini(struct radeon_device *rdev); int radeon_ib_ring_tests(struct radeon_device *rdev); @@ -977,8 +985,10 @@ bool radeon_ring_supports_scratch_reg(struct radeon_device *rdev, void radeon_ring_free_size(struct radeon_device *rdev, struct radeon_ring *cp); int radeon_ring_alloc(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ndw); int radeon_ring_lock(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ndw); -void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *cp); -void radeon_ring_unlock_commit(struct radeon_device *rdev, struct radeon_ring *cp); +void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *cp, + bool hdp_flush); +void radeon_ring_unlock_commit(struct radeon_device *rdev, struct radeon_ring *cp, + bool hdp_flush); void radeon_ring_undo(struct radeon_ring *ring); void radeon_ring_unlock_undo(struct radeon_device *rdev, struct radeon_ring *cp); int radeon_ring_test(struct radeon_device *rdev, struct radeon_ring *cp); @@ -1636,7 +1646,8 @@ int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring, uint32_t handle, struct radeon_fence **fence); int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring, uint32_t handle, struct radeon_fence **fence); -void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo); +void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo, + uint32_t allowed_domains); void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp); int radeon_uvd_cs_parse(struct radeon_cs_parser *parser); @@ -1725,6 +1736,11 @@ void radeon_test_ring_sync(struct radeon_device *rdev, struct radeon_ring *cpB); void radeon_test_syncing(struct radeon_device *rdev); +/* + * MMU Notifier + */ +int radeon_mn_register(struct radeon_bo *bo, unsigned long addr); +void radeon_mn_unregister(struct radeon_bo *bo); /* * Debugfs @@ -2138,6 +2154,8 @@ int radeon_gem_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int radeon_gem_create_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); int radeon_gem_pin_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int radeon_gem_unpin_ioctl(struct drm_device *dev, void *data, @@ -2294,6 +2312,7 @@ struct radeon_device { struct radeon_mman mman; struct radeon_fence_driver fence_drv[RADEON_NUM_RINGS]; wait_queue_head_t fence_queue; + unsigned fence_context; struct mutex ring_lock; struct radeon_ring ring[RADEON_NUM_RINGS]; bool ib_pool_ready; @@ -2312,7 +2331,7 @@ struct radeon_device { bool need_dma32; bool accel_working; bool fastfb_working; /* IGP feature*/ - bool needs_reset; + bool needs_reset, in_reset; struct radeon_surface_reg surface_regs[RADEON_GEM_MAX_SURFACES]; const struct firmware *me_fw; /* all family ME firmware */ const struct firmware *pfp_fw; /* r6/700 PFP firmware */ @@ -2333,7 +2352,6 @@ struct radeon_device { struct radeon_mec mec; struct work_struct hotplug_work; struct work_struct audio_work; - struct work_struct reset_work; int num_crtc; /* number of crtcs */ struct mutex dc_hw_i2c_mutex; /* display controller hw i2c mutex */ bool has_uvd; @@ -2370,6 +2388,9 @@ struct radeon_device { /* tracking pinned memory */ u64 vram_pin_size; u64 gart_pin_size; + + struct mutex mn_lock; + DECLARE_HASHTABLE(mn_hash, 7); }; bool radeon_is_px(struct drm_device *dev); @@ -2425,7 +2446,17 @@ void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v); /* * Cast helper */ -#define to_radeon_fence(p) ((struct radeon_fence *)(p)) +extern const struct fence_ops radeon_fence_ops; + +static inline struct radeon_fence *to_radeon_fence(struct fence *f) +{ + struct radeon_fence *__f = container_of(f, struct radeon_fence, base); + + if (__f->base.ops == &radeon_fence_ops) + return __f; + + return NULL; +} /* * Registers read & write functions. @@ -2745,18 +2776,25 @@ void radeon_atombios_fini(struct radeon_device *rdev); /* * RING helpers. */ -#if DRM_DEBUG_CODE == 0 + +/** + * radeon_ring_write - write a value to the ring + * + * @ring: radeon_ring structure holding ring information + * @v: dword (dw) value to write + * + * Write a value to the requested ring buffer (all asics). + */ static inline void radeon_ring_write(struct radeon_ring *ring, uint32_t v) { + if (ring->count_dw <= 0) + DRM_ERROR("radeon: writing more dwords to the ring than expected!\n"); + ring->ring[ring->wptr++] = v; ring->wptr &= ring->ptr_mask; ring->count_dw--; ring->ring_free_dw--; } -#else -/* With debugging this is just too big to inline */ -void radeon_ring_write(struct radeon_ring *ring, uint32_t v); -#endif /* * ASICs macro. @@ -2871,6 +2909,10 @@ extern void radeon_legacy_set_clock_gating(struct radeon_device *rdev, int enabl extern void radeon_atom_set_clock_gating(struct radeon_device *rdev, int enable); extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain); extern bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo); +extern int radeon_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, + uint32_t flags); +extern bool radeon_ttm_tt_has_userptr(struct ttm_tt *ttm); +extern bool radeon_ttm_tt_is_readonly(struct ttm_tt *ttm); extern void radeon_vram_location(struct radeon_device *rdev, struct radeon_mc *mc, u64 base); extern void radeon_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc); extern int radeon_resume_kms(struct drm_device *dev, bool resume, bool fbcon); diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index eeeeabe09758..d91f965e8219 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -965,6 +965,19 @@ static struct radeon_asic r600_asic = { }, }; +static struct radeon_asic_ring rv6xx_uvd_ring = { + .ib_execute = &uvd_v1_0_ib_execute, + .emit_fence = &uvd_v1_0_fence_emit, + .emit_semaphore = &uvd_v1_0_semaphore_emit, + .cs_parse = &radeon_uvd_cs_parse, + .ring_test = &uvd_v1_0_ring_test, + .ib_test = &uvd_v1_0_ib_test, + .is_lockup = &radeon_ring_test_lockup, + .get_rptr = &uvd_v1_0_get_rptr, + .get_wptr = &uvd_v1_0_get_wptr, + .set_wptr = &uvd_v1_0_set_wptr, +}; + static struct radeon_asic rv6xx_asic = { .init = &r600_init, .fini = &r600_fini, @@ -984,6 +997,7 @@ static struct radeon_asic rv6xx_asic = { .ring = { [RADEON_RING_TYPE_GFX_INDEX] = &r600_gfx_ring, [R600_RING_TYPE_DMA_INDEX] = &r600_dma_ring, + [R600_RING_TYPE_UVD_INDEX] = &rv6xx_uvd_ring, }, .irq = { .set = &r600_irq_set, @@ -1074,6 +1088,7 @@ static struct radeon_asic rs780_asic = { .ring = { [RADEON_RING_TYPE_GFX_INDEX] = &r600_gfx_ring, [R600_RING_TYPE_DMA_INDEX] = &r600_dma_ring, + [R600_RING_TYPE_UVD_INDEX] = &rv6xx_uvd_ring, }, .irq = { .set = &r600_irq_set, @@ -2298,7 +2313,15 @@ int radeon_asic_init(struct radeon_device *rdev) case CHIP_RS780: case CHIP_RS880: rdev->asic = &rs780_asic; - rdev->has_uvd = true; + /* 760G/780V/880V don't have UVD */ + if ((rdev->pdev->device == 0x9616)|| + (rdev->pdev->device == 0x9611)|| + (rdev->pdev->device == 0x9613)|| + (rdev->pdev->device == 0x9711)|| + (rdev->pdev->device == 0x9713)) + rdev->has_uvd = false; + else + rdev->has_uvd = true; break; case CHIP_RV770: case CHIP_RV730: diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 275a5dc01780..987a3b713e06 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -883,6 +883,7 @@ uint32_t uvd_v1_0_get_wptr(struct radeon_device *rdev, struct radeon_ring *ring); void uvd_v1_0_set_wptr(struct radeon_device *rdev, struct radeon_ring *ring); +int uvd_v1_0_resume(struct radeon_device *rdev); int uvd_v1_0_init(struct radeon_device *rdev); void uvd_v1_0_fini(struct radeon_device *rdev); @@ -890,6 +891,8 @@ int uvd_v1_0_start(struct radeon_device *rdev); void uvd_v1_0_stop(struct radeon_device *rdev); int uvd_v1_0_ring_test(struct radeon_device *rdev, struct radeon_ring *ring); +void uvd_v1_0_fence_emit(struct radeon_device *rdev, + struct radeon_fence *fence); int uvd_v1_0_ib_test(struct radeon_device *rdev, struct radeon_ring *ring); bool uvd_v1_0_semaphore_emit(struct radeon_device *rdev, struct radeon_ring *ring, diff --git a/drivers/gpu/drm/radeon/radeon_cp.c b/drivers/gpu/drm/radeon/radeon_cp.c index bb0d5c3a8311..0c388016eecb 100644 --- a/drivers/gpu/drm/radeon/radeon_cp.c +++ b/drivers/gpu/drm/radeon/radeon_cp.c @@ -1298,7 +1298,7 @@ static int radeon_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init, dev_priv->buffers_offset = init->buffers_offset; dev_priv->gart_textures_offset = init->gart_textures_offset; - master_priv->sarea = drm_getsarea(dev); + master_priv->sarea = drm_legacy_getsarea(dev); if (!master_priv->sarea) { DRM_ERROR("could not find sarea!\n"); radeon_do_cleanup_cp(dev); @@ -2106,9 +2106,9 @@ int radeon_driver_load(struct drm_device *dev, unsigned long flags) else dev_priv->flags |= RADEON_IS_PCI; - ret = drm_addmap(dev, pci_resource_start(dev->pdev, 2), - pci_resource_len(dev->pdev, 2), _DRM_REGISTERS, - _DRM_READ_ONLY | _DRM_DRIVER, &dev_priv->mmio); + ret = drm_legacy_addmap(dev, pci_resource_start(dev->pdev, 2), + pci_resource_len(dev->pdev, 2), _DRM_REGISTERS, + _DRM_READ_ONLY | _DRM_DRIVER, &dev_priv->mmio); if (ret != 0) return ret; @@ -2135,8 +2135,8 @@ int radeon_master_create(struct drm_device *dev, struct drm_master *master) /* prebuild the SAREA */ sareapage = max_t(unsigned long, SAREA_MAX, PAGE_SIZE); - ret = drm_addmap(dev, 0, sareapage, _DRM_SHM, _DRM_CONTAINS_LOCK, - &master_priv->sarea); + ret = drm_legacy_addmap(dev, 0, sareapage, _DRM_SHM, _DRM_CONTAINS_LOCK, + &master_priv->sarea); if (ret) { DRM_ERROR("SAREA setup failed\n"); kfree(master_priv); @@ -2162,7 +2162,7 @@ void radeon_master_destroy(struct drm_device *dev, struct drm_master *master) master_priv->sarea_priv = NULL; if (master_priv->sarea) - drm_rmmap_locked(dev, master_priv->sarea); + drm_legacy_rmmap_locked(dev, master_priv->sarea); kfree(master_priv); @@ -2181,9 +2181,9 @@ int radeon_driver_firstopen(struct drm_device *dev) dev_priv->gart_info.table_size = RADEON_PCIGART_TABLE_SIZE; dev_priv->fb_aper_offset = pci_resource_start(dev->pdev, 0); - ret = drm_addmap(dev, dev_priv->fb_aper_offset, - pci_resource_len(dev->pdev, 0), _DRM_FRAME_BUFFER, - _DRM_WRITE_COMBINING, &map); + ret = drm_legacy_addmap(dev, dev_priv->fb_aper_offset, + pci_resource_len(dev->pdev, 0), + _DRM_FRAME_BUFFER, _DRM_WRITE_COMBINING, &map); if (ret != 0) return ret; @@ -2196,7 +2196,7 @@ int radeon_driver_unload(struct drm_device *dev) DRM_DEBUG("\n"); - drm_rmmap(dev, dev_priv->mmio); + drm_legacy_rmmap(dev, dev_priv->mmio); kfree(dev_priv); diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index ee712c199b25..6e3d1c8f3483 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -78,7 +78,8 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) struct radeon_cs_chunk *chunk; struct radeon_cs_buckets buckets; unsigned i, j; - bool duplicate; + bool duplicate, need_mmap_lock = false; + int r; if (p->chunk_relocs_idx == -1) { return 0; @@ -132,13 +133,17 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) * the buffers used for read only, which doubles the range * to 0 to 31. 32 is reserved for the kernel driver. */ - priority = (r->flags & 0xf) * 2 + !!r->write_domain; + priority = (r->flags & RADEON_RELOC_PRIO_MASK) * 2 + + !!r->write_domain; /* the first reloc of an UVD job is the msg and that must be in - VRAM, also but everything into VRAM on AGP cards to avoid - image corruptions */ + VRAM, also but everything into VRAM on AGP cards and older + IGP chips to avoid image corruptions */ if (p->ring == R600_RING_TYPE_UVD_INDEX && - (i == 0 || drm_pci_device_is_agp(p->rdev->ddev))) { + (i == 0 || drm_pci_device_is_agp(p->rdev->ddev) || + p->rdev->family == CHIP_RS780 || + p->rdev->family == CHIP_RS880)) { + /* TODO: is this still needed for NI+ ? */ p->relocs[i].prefered_domains = RADEON_GEM_DOMAIN_VRAM; @@ -164,6 +169,19 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) p->relocs[i].allowed_domains = domain; } + if (radeon_ttm_tt_has_userptr(p->relocs[i].robj->tbo.ttm)) { + uint32_t domain = p->relocs[i].prefered_domains; + if (!(domain & RADEON_GEM_DOMAIN_GTT)) { + DRM_ERROR("Only RADEON_GEM_DOMAIN_GTT is " + "allowed for userptr BOs\n"); + return -EINVAL; + } + need_mmap_lock = true; + domain = RADEON_GEM_DOMAIN_GTT; + p->relocs[i].prefered_domains = domain; + p->relocs[i].allowed_domains = domain; + } + p->relocs[i].tv.bo = &p->relocs[i].robj->tbo; p->relocs[i].handle = r->handle; @@ -176,8 +194,15 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) if (p->cs_flags & RADEON_CS_USE_VM) p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm, &p->validated); + if (need_mmap_lock) + down_read(¤t->mm->mmap_sem); + + r = radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring); + + if (need_mmap_lock) + up_read(¤t->mm->mmap_sem); - return radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring); + return r; } static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority) @@ -228,11 +253,17 @@ static void radeon_cs_sync_rings(struct radeon_cs_parser *p) int i; for (i = 0; i < p->nrelocs; i++) { + struct reservation_object *resv; + struct fence *fence; + if (!p->relocs[i].robj) continue; + resv = p->relocs[i].robj->tbo.resv; + fence = reservation_object_get_excl(resv); + radeon_semaphore_sync_to(p->ib.semaphore, - p->relocs[i].robj->tbo.sync_obj); + (struct radeon_fence *)fence); } } @@ -402,7 +433,7 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo ttm_eu_fence_buffer_objects(&parser->ticket, &parser->validated, - parser->ib.fence); + &parser->ib.fence->base); } else if (backoff) { ttm_eu_backoff_reservation(&parser->ticket, &parser->validated); @@ -450,7 +481,7 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev, radeon_vce_note_usage(rdev); radeon_cs_sync_rings(parser); - r = radeon_ib_schedule(rdev, &parser->ib, NULL); + r = radeon_ib_schedule(rdev, &parser->ib, NULL, true); if (r) { DRM_ERROR("Failed to schedule IB !\n"); } @@ -541,9 +572,9 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, if ((rdev->family >= CHIP_TAHITI) && (parser->chunk_const_ib_idx != -1)) { - r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib); + r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib, true); } else { - r = radeon_ib_schedule(rdev, &parser->ib, NULL); + r = radeon_ib_schedule(rdev, &parser->ib, NULL, true); } out: @@ -628,6 +659,13 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) up_read(&rdev->exclusive_lock); return -EBUSY; } + if (rdev->in_reset) { + up_read(&rdev->exclusive_lock); + r = radeon_gpu_reset(rdev); + if (!r) + r = -EAGAIN; + return r; + } /* initialize parser */ memset(&parser, 0, sizeof(struct radeon_cs_parser)); parser.filp = filp; diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index c8ea050c8fa4..e84a76e6656a 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1253,6 +1253,7 @@ int radeon_device_init(struct radeon_device *rdev, for (i = 0; i < RADEON_NUM_RINGS; i++) { rdev->ring[i].idx = i; } + rdev->fence_context = fence_context_alloc(RADEON_NUM_RINGS); DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X).\n", radeon_family_name[rdev->family], pdev->vendor, pdev->device, @@ -1270,6 +1271,8 @@ int radeon_device_init(struct radeon_device *rdev, init_rwsem(&rdev->pm.mclk_lock); init_rwsem(&rdev->exclusive_lock); init_waitqueue_head(&rdev->irq.vblank_queue); + mutex_init(&rdev->mn_lock); + hash_init(rdev->mn_hash); r = radeon_gem_init(rdev); if (r) return r; @@ -1395,10 +1398,6 @@ int radeon_device_init(struct radeon_device *rdev, if (r) return r; - r = radeon_ib_ring_tests(rdev); - if (r) - DRM_ERROR("ib ring test failed (%d).\n", r); - r = radeon_gem_debugfs_init(rdev); if (r) { DRM_ERROR("registering gem debugfs failed (%d).\n", r); @@ -1416,6 +1415,10 @@ int radeon_device_init(struct radeon_device *rdev, return r; } + r = radeon_ib_ring_tests(rdev); + if (r) + DRM_ERROR("ib ring test failed (%d).\n", r); + if ((radeon_testing & 1)) { if (rdev->accel_working) radeon_test_moves(rdev); @@ -1486,7 +1489,6 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon) struct drm_crtc *crtc; struct drm_connector *connector; int i, r; - bool force_completion = false; if (dev == NULL || dev->dev_private == NULL) { return -ENODEV; @@ -1530,12 +1532,9 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon) r = radeon_fence_wait_empty(rdev, i); if (r) { /* delay GPU reset to resume */ - force_completion = true; + radeon_fence_driver_force_completion(rdev, i); } } - if (force_completion) { - radeon_fence_driver_force_completion(rdev); - } radeon_save_bios_scratch_regs(rdev); @@ -1675,13 +1674,11 @@ int radeon_gpu_reset(struct radeon_device *rdev) return 0; } - rdev->needs_reset = false; - radeon_save_bios_scratch_regs(rdev); /* block TTM */ resched = ttm_bo_lock_delayed_workqueue(&rdev->mman.bdev); - radeon_pm_suspend(rdev); radeon_suspend(rdev); + radeon_hpd_fini(rdev); for (i = 0; i < RADEON_NUM_RINGS; ++i) { ring_sizes[i] = radeon_ring_backup(rdev, &rdev->ring[i], @@ -1693,7 +1690,6 @@ int radeon_gpu_reset(struct radeon_device *rdev) } } -retry: r = radeon_asic_reset(rdev); if (!r) { dev_info(rdev->dev, "GPU reset succeeded, trying to resume\n"); @@ -1702,40 +1698,69 @@ retry: radeon_restore_bios_scratch_regs(rdev); - if (!r) { - for (i = 0; i < RADEON_NUM_RINGS; ++i) { + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + if (!r && ring_data[i]) { radeon_ring_restore(rdev, &rdev->ring[i], ring_sizes[i], ring_data[i]); - ring_sizes[i] = 0; - ring_data[i] = NULL; + } else { + radeon_fence_driver_force_completion(rdev, i); + kfree(ring_data[i]); } + } - r = radeon_ib_ring_tests(rdev); + if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { + /* do dpm late init */ + r = radeon_pm_late_init(rdev); if (r) { - dev_err(rdev->dev, "ib ring test failed (%d).\n", r); - if (saved) { - saved = false; - radeon_suspend(rdev); - goto retry; - } + rdev->pm.dpm_enabled = false; + DRM_ERROR("radeon_pm_late_init failed, disabling dpm\n"); } } else { - radeon_fence_driver_force_completion(rdev); - for (i = 0; i < RADEON_NUM_RINGS; ++i) { - kfree(ring_data[i]); + /* resume old pm late */ + radeon_pm_resume(rdev); + } + + /* init dig PHYs, disp eng pll */ + if (rdev->is_atom_bios) { + radeon_atom_encoder_init(rdev); + radeon_atom_disp_eng_pll_init(rdev); + /* turn on the BL */ + if (rdev->mode_info.bl_encoder) { + u8 bl_level = radeon_get_backlight_level(rdev, + rdev->mode_info.bl_encoder); + radeon_set_backlight_level(rdev, rdev->mode_info.bl_encoder, + bl_level); } } + /* reset hpd state */ + radeon_hpd_init(rdev); + + ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched); + + rdev->in_reset = true; + rdev->needs_reset = false; + + downgrade_write(&rdev->exclusive_lock); - radeon_pm_resume(rdev); drm_helper_resume_force_mode(rdev->ddev); - ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched); - if (r) { + /* set the power state here in case we are a PX system or headless */ + if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) + radeon_pm_compute_clocks(rdev); + + if (!r) { + r = radeon_ib_ring_tests(rdev); + if (r && saved) + r = -EAGAIN; + } else { /* bad news, how to tell it to userspace ? */ dev_info(rdev->dev, "GPU reset failed\n"); } - up_write(&rdev->exclusive_lock); + rdev->needs_reset = r == -EAGAIN; + rdev->in_reset = false; + + up_read(&rdev->exclusive_lock); return r; } diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index fd8cd0c3600f..4eb37976f879 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -405,7 +405,9 @@ static void radeon_flip_work_func(struct work_struct *__work) r = radeon_fence_wait(work->fence, false); if (r == -EDEADLK) { up_read(&rdev->exclusive_lock); - r = radeon_gpu_reset(rdev); + do { + r = radeon_gpu_reset(rdev); + } while (r == -EAGAIN); down_read(&rdev->exclusive_lock); } if (r) @@ -474,11 +476,6 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc, obj = new_radeon_fb->obj; new_rbo = gem_to_radeon_bo(obj); - spin_lock(&new_rbo->tbo.bdev->fence_lock); - if (new_rbo->tbo.sync_obj) - work->fence = radeon_fence_ref(new_rbo->tbo.sync_obj); - spin_unlock(&new_rbo->tbo.bdev->fence_lock); - /* pin the new buffer */ DRM_DEBUG_DRIVER("flip-ioctl() cur_rbo = %p, new_rbo = %p\n", work->old_rbo, new_rbo); @@ -497,6 +494,7 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc, DRM_ERROR("failed to pin new rbo buffer before flip\n"); goto cleanup; } + work->fence = (struct radeon_fence *)fence_get(reservation_object_get_excl(new_rbo->tbo.resv)); radeon_bo_get_tiling_flags(new_rbo, &tiling_flags, NULL); radeon_bo_unreserve(new_rbo); @@ -580,7 +578,6 @@ cleanup: drm_gem_object_unreference_unlocked(&work->old_rbo->gem_base); radeon_fence_unref(&work->fence); kfree(work); - return r; } diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index a773830c6c40..ec7e963d9bf7 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -114,6 +114,9 @@ int radeon_gem_object_open(struct drm_gem_object *obj, struct drm_file *file_priv); void radeon_gem_object_close(struct drm_gem_object *obj, struct drm_file *file_priv); +struct dma_buf *radeon_gem_prime_export(struct drm_device *dev, + struct drm_gem_object *gobj, + int flags); extern int radeon_get_crtc_scanoutpos(struct drm_device *dev, int crtc, unsigned int flags, int *vpos, int *hpos, ktime_t *stime, @@ -134,6 +137,7 @@ struct drm_gem_object *radeon_gem_prime_import_sg_table(struct drm_device *dev, struct sg_table *sg); int radeon_gem_prime_pin(struct drm_gem_object *obj); void radeon_gem_prime_unpin(struct drm_gem_object *obj); +struct reservation_object *radeon_gem_prime_res_obj(struct drm_gem_object *); void *radeon_gem_prime_vmap(struct drm_gem_object *obj); void radeon_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); extern long radeon_kms_compat_ioctl(struct file *filp, unsigned int cmd, @@ -179,6 +183,7 @@ int radeon_vm_size = 8; int radeon_vm_block_size = -1; int radeon_deep_color = 0; int radeon_use_pflipirq = 2; +int radeon_bapm = -1; MODULE_PARM_DESC(no_wb, "Disable AGP writeback for scratch registers"); module_param_named(no_wb, radeon_no_wb, int, 0444); @@ -258,6 +263,9 @@ module_param_named(deep_color, radeon_deep_color, int, 0444); MODULE_PARM_DESC(use_pflipirq, "Pflip irqs for pageflip completion (0 = disable, 1 = as fallback, 2 = exclusive (default))"); module_param_named(use_pflipirq, radeon_use_pflipirq, int, 0444); +MODULE_PARM_DESC(bapm, "BAPM support (1 = enable, 0 = disable, -1 = auto)"); +module_param_named(bapm, radeon_bapm, int, 0444); + static struct pci_device_id pciidlist[] = { radeon_PCI_IDS }; @@ -320,6 +328,7 @@ static struct drm_driver driver_old = { .preclose = radeon_driver_preclose, .postclose = radeon_driver_postclose, .lastclose = radeon_driver_lastclose, + .set_busid = drm_pci_set_busid, .unload = radeon_driver_unload, .suspend = radeon_suspend, .resume = radeon_resume, @@ -543,6 +552,7 @@ static struct drm_driver kms_driver = { .preclose = radeon_driver_preclose_kms, .postclose = radeon_driver_postclose_kms, .lastclose = radeon_driver_lastclose_kms, + .set_busid = drm_pci_set_busid, .unload = radeon_driver_unload_kms, .get_vblank_counter = radeon_get_vblank_counter_kms, .enable_vblank = radeon_enable_vblank_kms, @@ -568,10 +578,11 @@ static struct drm_driver kms_driver = { .prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_fd_to_handle = drm_gem_prime_fd_to_handle, - .gem_prime_export = drm_gem_prime_export, + .gem_prime_export = radeon_gem_prime_export, .gem_prime_import = drm_gem_prime_import, .gem_prime_pin = radeon_gem_prime_pin, .gem_prime_unpin = radeon_gem_prime_unpin, + .gem_prime_res_obj = radeon_gem_prime_res_obj, .gem_prime_get_sg_table = radeon_gem_prime_get_sg_table, .gem_prime_import_sg_table = radeon_gem_prime_import_sg_table, .gem_prime_vmap = radeon_gem_prime_vmap, diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 913787085dfa..af9f2d6bd7d0 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -98,6 +98,25 @@ static u32 radeon_fence_read(struct radeon_device *rdev, int ring) } /** + * radeon_fence_schedule_check - schedule lockup check + * + * @rdev: radeon_device pointer + * @ring: ring index we should work with + * + * Queues a delayed work item to check for lockups. + */ +static void radeon_fence_schedule_check(struct radeon_device *rdev, int ring) +{ + /* + * Do not reset the timer here with mod_delayed_work, + * this can livelock in an interaction with TTM delayed destroy. + */ + queue_delayed_work(system_power_efficient_wq, + &rdev->fence_drv[ring].lockup_work, + RADEON_FENCE_JIFFIES_TIMEOUT); +} + +/** * radeon_fence_emit - emit a fence on the requested ring * * @rdev: radeon_device pointer @@ -111,30 +130,70 @@ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, int ring) { + u64 seq = ++rdev->fence_drv[ring].sync_seq[ring]; + /* we are protected by the ring emission mutex */ *fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL); if ((*fence) == NULL) { return -ENOMEM; } - kref_init(&((*fence)->kref)); (*fence)->rdev = rdev; - (*fence)->seq = ++rdev->fence_drv[ring].sync_seq[ring]; + (*fence)->seq = seq; (*fence)->ring = ring; + fence_init(&(*fence)->base, &radeon_fence_ops, + &rdev->fence_queue.lock, rdev->fence_context + ring, seq); radeon_fence_ring_emit(rdev, ring, *fence); trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq); + radeon_fence_schedule_check(rdev, ring); return 0; } /** - * radeon_fence_process - process a fence + * radeon_fence_check_signaled - callback from fence_queue + * + * this function is called with fence_queue lock held, which is also used + * for the fence locking itself, so unlocked variants are used for + * fence_signal, and remove_wait_queue. + */ +static int radeon_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key) +{ + struct radeon_fence *fence; + u64 seq; + + fence = container_of(wait, struct radeon_fence, fence_wake); + + /* + * We cannot use radeon_fence_process here because we're already + * in the waitqueue, in a call from wake_up_all. + */ + seq = atomic64_read(&fence->rdev->fence_drv[fence->ring].last_seq); + if (seq >= fence->seq) { + int ret = fence_signal_locked(&fence->base); + + if (!ret) + FENCE_TRACE(&fence->base, "signaled from irq context\n"); + else + FENCE_TRACE(&fence->base, "was already signaled\n"); + + radeon_irq_kms_sw_irq_put(fence->rdev, fence->ring); + __remove_wait_queue(&fence->rdev->fence_queue, &fence->fence_wake); + fence_put(&fence->base); + } else + FENCE_TRACE(&fence->base, "pending\n"); + return 0; +} + +/** + * radeon_fence_activity - check for fence activity * * @rdev: radeon_device pointer * @ring: ring index the fence is associated with * - * Checks the current fence value and wakes the fence queue - * if the sequence number has increased (all asics). + * Checks the current fence value and calculates the last + * signalled fence value. Returns true if activity occured + * on the ring, and the fence_queue should be waken up. */ -void radeon_fence_process(struct radeon_device *rdev, int ring) +static bool radeon_fence_activity(struct radeon_device *rdev, int ring) { uint64_t seq, last_seq, last_emitted; unsigned count_loop = 0; @@ -190,23 +249,77 @@ void radeon_fence_process(struct radeon_device *rdev, int ring) } } while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq); - if (wake) - wake_up_all(&rdev->fence_queue); + if (seq < last_emitted) + radeon_fence_schedule_check(rdev, ring); + + return wake; } /** - * radeon_fence_destroy - destroy a fence + * radeon_fence_check_lockup - check for hardware lockup * - * @kref: fence kref + * @work: delayed work item * - * Frees the fence object (all asics). + * Checks for fence activity and if there is none probe + * the hardware if a lockup occured. */ -static void radeon_fence_destroy(struct kref *kref) +static void radeon_fence_check_lockup(struct work_struct *work) { - struct radeon_fence *fence; + struct radeon_fence_driver *fence_drv; + struct radeon_device *rdev; + int ring; + + fence_drv = container_of(work, struct radeon_fence_driver, + lockup_work.work); + rdev = fence_drv->rdev; + ring = fence_drv - &rdev->fence_drv[0]; + + if (!down_read_trylock(&rdev->exclusive_lock)) { + /* just reschedule the check if a reset is going on */ + radeon_fence_schedule_check(rdev, ring); + return; + } + + if (fence_drv->delayed_irq && rdev->ddev->irq_enabled) { + unsigned long irqflags; + + fence_drv->delayed_irq = false; + spin_lock_irqsave(&rdev->irq.lock, irqflags); + radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); + } + + if (radeon_fence_activity(rdev, ring)) + wake_up_all(&rdev->fence_queue); - fence = container_of(kref, struct radeon_fence, kref); - kfree(fence); + else if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) { + + /* good news we believe it's a lockup */ + dev_warn(rdev->dev, "GPU lockup (current fence id " + "0x%016llx last fence id 0x%016llx on ring %d)\n", + (uint64_t)atomic64_read(&fence_drv->last_seq), + fence_drv->sync_seq[ring], ring); + + /* remember that we need an reset */ + rdev->needs_reset = true; + wake_up_all(&rdev->fence_queue); + } + up_read(&rdev->exclusive_lock); +} + +/** + * radeon_fence_process - process a fence + * + * @rdev: radeon_device pointer + * @ring: ring index the fence is associated with + * + * Checks the current fence value and wakes the fence queue + * if the sequence number has increased (all asics). + */ +void radeon_fence_process(struct radeon_device *rdev, int ring) +{ + if (radeon_fence_activity(rdev, ring)) + wake_up_all(&rdev->fence_queue); } /** @@ -237,6 +350,75 @@ static bool radeon_fence_seq_signaled(struct radeon_device *rdev, return false; } +static bool radeon_fence_is_signaled(struct fence *f) +{ + struct radeon_fence *fence = to_radeon_fence(f); + struct radeon_device *rdev = fence->rdev; + unsigned ring = fence->ring; + u64 seq = fence->seq; + + if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) { + return true; + } + + if (down_read_trylock(&rdev->exclusive_lock)) { + radeon_fence_process(rdev, ring); + up_read(&rdev->exclusive_lock); + + if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) { + return true; + } + } + return false; +} + +/** + * radeon_fence_enable_signaling - enable signalling on fence + * @fence: fence + * + * This function is called with fence_queue lock held, and adds a callback + * to fence_queue that checks if this fence is signaled, and if so it + * signals the fence and removes itself. + */ +static bool radeon_fence_enable_signaling(struct fence *f) +{ + struct radeon_fence *fence = to_radeon_fence(f); + struct radeon_device *rdev = fence->rdev; + + if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq) + return false; + + if (down_read_trylock(&rdev->exclusive_lock)) { + radeon_irq_kms_sw_irq_get(rdev, fence->ring); + + if (radeon_fence_activity(rdev, fence->ring)) + wake_up_all_locked(&rdev->fence_queue); + + /* did fence get signaled after we enabled the sw irq? */ + if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq) { + radeon_irq_kms_sw_irq_put(rdev, fence->ring); + up_read(&rdev->exclusive_lock); + return false; + } + + up_read(&rdev->exclusive_lock); + } else { + /* we're probably in a lockup, lets not fiddle too much */ + if (radeon_irq_kms_sw_irq_get_delayed(rdev, fence->ring)) + rdev->fence_drv[fence->ring].delayed_irq = true; + radeon_fence_schedule_check(rdev, fence->ring); + } + + fence->fence_wake.flags = 0; + fence->fence_wake.private = NULL; + fence->fence_wake.func = radeon_fence_check_signaled; + __add_wait_queue(&rdev->fence_queue, &fence->fence_wake); + fence_get(f); + + FENCE_TRACE(&fence->base, "armed on ring %i!\n", fence->ring); + return true; +} + /** * radeon_fence_signaled - check if a fence has signaled * @@ -247,14 +429,15 @@ static bool radeon_fence_seq_signaled(struct radeon_device *rdev, */ bool radeon_fence_signaled(struct radeon_fence *fence) { - if (!fence) { + if (!fence) return true; - } - if (fence->seq == RADEON_FENCE_SIGNALED_SEQ) { - return true; - } + if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) { - fence->seq = RADEON_FENCE_SIGNALED_SEQ; + int ret; + + ret = fence_signal(&fence->base); + if (!ret) + FENCE_TRACE(&fence->base, "signaled from radeon_fence_signaled\n"); return true; } return false; @@ -283,110 +466,70 @@ static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq) } /** - * radeon_fence_wait_seq - wait for a specific sequence numbers + * radeon_fence_wait_seq_timeout - wait for a specific sequence numbers * * @rdev: radeon device pointer * @target_seq: sequence number(s) we want to wait for * @intr: use interruptable sleep + * @timeout: maximum time to wait, or MAX_SCHEDULE_TIMEOUT for infinite wait * * Wait for the requested sequence number(s) to be written by any ring * (all asics). Sequnce number array is indexed by ring id. * @intr selects whether to use interruptable (true) or non-interruptable * (false) sleep when waiting for the sequence number. Helper function * for radeon_fence_wait_*(). - * Returns 0 if the sequence number has passed, error for all other cases. + * Returns remaining time if the sequence number has passed, 0 when + * the wait timeout, or an error for all other cases. * -EDEADLK is returned when a GPU lockup has been detected. */ -static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq, - bool intr) +static long radeon_fence_wait_seq_timeout(struct radeon_device *rdev, + u64 *target_seq, bool intr, + long timeout) { - uint64_t last_seq[RADEON_NUM_RINGS]; - bool signaled; - int i, r; - - while (!radeon_fence_any_seq_signaled(rdev, target_seq)) { + long r; + int i; - /* Save current sequence values, used to check for GPU lockups */ - for (i = 0; i < RADEON_NUM_RINGS; ++i) { - if (!target_seq[i]) - continue; + if (radeon_fence_any_seq_signaled(rdev, target_seq)) + return timeout; - last_seq[i] = atomic64_read(&rdev->fence_drv[i].last_seq); - trace_radeon_fence_wait_begin(rdev->ddev, i, target_seq[i]); - radeon_irq_kms_sw_irq_get(rdev, i); - } + /* enable IRQs and tracing */ + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + if (!target_seq[i]) + continue; - if (intr) { - r = wait_event_interruptible_timeout(rdev->fence_queue, ( - (signaled = radeon_fence_any_seq_signaled(rdev, target_seq)) - || rdev->needs_reset), RADEON_FENCE_JIFFIES_TIMEOUT); - } else { - r = wait_event_timeout(rdev->fence_queue, ( - (signaled = radeon_fence_any_seq_signaled(rdev, target_seq)) - || rdev->needs_reset), RADEON_FENCE_JIFFIES_TIMEOUT); - } + trace_radeon_fence_wait_begin(rdev->ddev, i, target_seq[i]); + radeon_irq_kms_sw_irq_get(rdev, i); + } - for (i = 0; i < RADEON_NUM_RINGS; ++i) { - if (!target_seq[i]) - continue; + if (intr) { + r = wait_event_interruptible_timeout(rdev->fence_queue, ( + radeon_fence_any_seq_signaled(rdev, target_seq) + || rdev->needs_reset), timeout); + } else { + r = wait_event_timeout(rdev->fence_queue, ( + radeon_fence_any_seq_signaled(rdev, target_seq) + || rdev->needs_reset), timeout); + } - radeon_irq_kms_sw_irq_put(rdev, i); - trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]); - } + if (rdev->needs_reset) + r = -EDEADLK; - if (unlikely(r < 0)) - return r; + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + if (!target_seq[i]) + continue; - if (unlikely(!signaled)) { - if (rdev->needs_reset) - return -EDEADLK; - - /* we were interrupted for some reason and fence - * isn't signaled yet, resume waiting */ - if (r) - continue; - - for (i = 0; i < RADEON_NUM_RINGS; ++i) { - if (!target_seq[i]) - continue; - - if (last_seq[i] != atomic64_read(&rdev->fence_drv[i].last_seq)) - break; - } - - if (i != RADEON_NUM_RINGS) - continue; - - for (i = 0; i < RADEON_NUM_RINGS; ++i) { - if (!target_seq[i]) - continue; - - if (radeon_ring_is_lockup(rdev, i, &rdev->ring[i])) - break; - } - - if (i < RADEON_NUM_RINGS) { - /* good news we believe it's a lockup */ - dev_warn(rdev->dev, "GPU lockup (waiting for " - "0x%016llx last fence id 0x%016llx on" - " ring %d)\n", - target_seq[i], last_seq[i], i); - - /* remember that we need an reset */ - rdev->needs_reset = true; - wake_up_all(&rdev->fence_queue); - return -EDEADLK; - } - } + radeon_irq_kms_sw_irq_put(rdev, i); + trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]); } - return 0; + + return r; } /** * radeon_fence_wait - wait for a fence to signal * * @fence: radeon fence object - * @intr: use interruptable sleep + * @intr: use interruptible sleep * * Wait for the requested fence to signal (all asics). * @intr selects whether to use interruptable (true) or non-interruptable @@ -396,22 +539,17 @@ static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq, int radeon_fence_wait(struct radeon_fence *fence, bool intr) { uint64_t seq[RADEON_NUM_RINGS] = {}; - int r; - - if (fence == NULL) { - WARN(1, "Querying an invalid fence : %p !\n", fence); - return -EINVAL; - } + long r; seq[fence->ring] = fence->seq; - if (seq[fence->ring] == RADEON_FENCE_SIGNALED_SEQ) - return 0; - - r = radeon_fence_wait_seq(fence->rdev, seq, intr); - if (r) + r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, MAX_SCHEDULE_TIMEOUT); + if (r < 0) { return r; + } - fence->seq = RADEON_FENCE_SIGNALED_SEQ; + r = fence_signal(&fence->base); + if (!r) + FENCE_TRACE(&fence->base, "signaled from fence_wait\n"); return 0; } @@ -434,7 +572,7 @@ int radeon_fence_wait_any(struct radeon_device *rdev, { uint64_t seq[RADEON_NUM_RINGS]; unsigned i, num_rings = 0; - int r; + long r; for (i = 0; i < RADEON_NUM_RINGS; ++i) { seq[i] = 0; @@ -445,18 +583,14 @@ int radeon_fence_wait_any(struct radeon_device *rdev, seq[i] = fences[i]->seq; ++num_rings; - - /* test if something was allready signaled */ - if (seq[i] == RADEON_FENCE_SIGNALED_SEQ) - return 0; } /* nothing to wait for ? */ if (num_rings == 0) return -ENOENT; - r = radeon_fence_wait_seq(rdev, seq, intr); - if (r) { + r = radeon_fence_wait_seq_timeout(rdev, seq, intr, MAX_SCHEDULE_TIMEOUT); + if (r < 0) { return r; } return 0; @@ -475,6 +609,7 @@ int radeon_fence_wait_any(struct radeon_device *rdev, int radeon_fence_wait_next(struct radeon_device *rdev, int ring) { uint64_t seq[RADEON_NUM_RINGS] = {}; + long r; seq[ring] = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL; if (seq[ring] >= rdev->fence_drv[ring].sync_seq[ring]) { @@ -482,7 +617,10 @@ int radeon_fence_wait_next(struct radeon_device *rdev, int ring) already the last emited fence */ return -ENOENT; } - return radeon_fence_wait_seq(rdev, seq, false); + r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT); + if (r < 0) + return r; + return 0; } /** @@ -498,18 +636,18 @@ int radeon_fence_wait_next(struct radeon_device *rdev, int ring) int radeon_fence_wait_empty(struct radeon_device *rdev, int ring) { uint64_t seq[RADEON_NUM_RINGS] = {}; - int r; + long r; seq[ring] = rdev->fence_drv[ring].sync_seq[ring]; if (!seq[ring]) return 0; - r = radeon_fence_wait_seq(rdev, seq, false); - if (r) { + r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT); + if (r < 0) { if (r == -EDEADLK) return -EDEADLK; - dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%d)\n", + dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%ld)\n", ring, r); } return 0; @@ -525,7 +663,7 @@ int radeon_fence_wait_empty(struct radeon_device *rdev, int ring) */ struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence) { - kref_get(&fence->kref); + fence_get(&fence->base); return fence; } @@ -542,7 +680,7 @@ void radeon_fence_unref(struct radeon_fence **fence) *fence = NULL; if (tmp) { - kref_put(&tmp->kref, radeon_fence_destroy); + fence_put(&tmp->base); } } @@ -711,6 +849,9 @@ static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring) rdev->fence_drv[ring].sync_seq[i] = 0; atomic64_set(&rdev->fence_drv[ring].last_seq, 0); rdev->fence_drv[ring].initialized = false; + INIT_DELAYED_WORK(&rdev->fence_drv[ring].lockup_work, + radeon_fence_check_lockup); + rdev->fence_drv[ring].rdev = rdev; } /** @@ -758,8 +899,9 @@ void radeon_fence_driver_fini(struct radeon_device *rdev) r = radeon_fence_wait_empty(rdev, ring); if (r) { /* no need to trigger GPU reset as we are unloading */ - radeon_fence_driver_force_completion(rdev); + radeon_fence_driver_force_completion(rdev, ring); } + cancel_delayed_work_sync(&rdev->fence_drv[ring].lockup_work); wake_up_all(&rdev->fence_queue); radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg); rdev->fence_drv[ring].initialized = false; @@ -771,18 +913,16 @@ void radeon_fence_driver_fini(struct radeon_device *rdev) * radeon_fence_driver_force_completion - force all fence waiter to complete * * @rdev: radeon device pointer + * @ring: the ring to complete * * In case of GPU reset failure make sure no process keep waiting on fence * that will never complete. */ -void radeon_fence_driver_force_completion(struct radeon_device *rdev) +void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring) { - int ring; - - for (ring = 0; ring < RADEON_NUM_RINGS; ring++) { - if (!rdev->fence_drv[ring].initialized) - continue; + if (rdev->fence_drv[ring].initialized) { radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring); + cancel_delayed_work_sync(&rdev->fence_drv[ring].lockup_work); } } @@ -833,6 +973,7 @@ static int radeon_debugfs_gpu_reset(struct seq_file *m, void *data) down_read(&rdev->exclusive_lock); seq_printf(m, "%d\n", rdev->needs_reset); rdev->needs_reset = true; + wake_up_all(&rdev->fence_queue); up_read(&rdev->exclusive_lock); return 0; @@ -852,3 +993,72 @@ int radeon_debugfs_fence_init(struct radeon_device *rdev) return 0; #endif } + +static const char *radeon_fence_get_driver_name(struct fence *fence) +{ + return "radeon"; +} + +static const char *radeon_fence_get_timeline_name(struct fence *f) +{ + struct radeon_fence *fence = to_radeon_fence(f); + switch (fence->ring) { + case RADEON_RING_TYPE_GFX_INDEX: return "radeon.gfx"; + case CAYMAN_RING_TYPE_CP1_INDEX: return "radeon.cp1"; + case CAYMAN_RING_TYPE_CP2_INDEX: return "radeon.cp2"; + case R600_RING_TYPE_DMA_INDEX: return "radeon.dma"; + case CAYMAN_RING_TYPE_DMA1_INDEX: return "radeon.dma1"; + case R600_RING_TYPE_UVD_INDEX: return "radeon.uvd"; + case TN_RING_TYPE_VCE1_INDEX: return "radeon.vce1"; + case TN_RING_TYPE_VCE2_INDEX: return "radeon.vce2"; + default: WARN_ON_ONCE(1); return "radeon.unk"; + } +} + +static inline bool radeon_test_signaled(struct radeon_fence *fence) +{ + return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags); +} + +static signed long radeon_fence_default_wait(struct fence *f, bool intr, + signed long t) +{ + struct radeon_fence *fence = to_radeon_fence(f); + struct radeon_device *rdev = fence->rdev; + bool signaled; + + fence_enable_sw_signaling(&fence->base); + + /* + * This function has to return -EDEADLK, but cannot hold + * exclusive_lock during the wait because some callers + * may already hold it. This means checking needs_reset without + * lock, and not fiddling with any gpu internals. + * + * The callback installed with fence_enable_sw_signaling will + * run before our wait_event_*timeout call, so we will see + * both the signaled fence and the changes to needs_reset. + */ + + if (intr) + t = wait_event_interruptible_timeout(rdev->fence_queue, + ((signaled = radeon_test_signaled(fence)) || + rdev->needs_reset), t); + else + t = wait_event_timeout(rdev->fence_queue, + ((signaled = radeon_test_signaled(fence)) || + rdev->needs_reset), t); + + if (t > 0 && !signaled) + return -EDEADLK; + return t; +} + +const struct fence_ops radeon_fence_ops = { + .get_driver_name = radeon_fence_get_driver_name, + .get_timeline_name = radeon_fence_get_timeline_name, + .enable_signaling = radeon_fence_enable_signaling, + .signaled = radeon_fence_is_signaled, + .wait = radeon_fence_default_wait, + .release = NULL, +}; diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index bfd7e1b0ff3f..4b7c8ec36c2f 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -94,7 +94,7 @@ static int radeon_gem_set_domain(struct drm_gem_object *gobj, { struct radeon_bo *robj; uint32_t domain; - int r; + long r; /* FIXME: reeimplement */ robj = gem_to_radeon_bo(gobj); @@ -110,9 +110,12 @@ static int radeon_gem_set_domain(struct drm_gem_object *gobj, } if (domain == RADEON_GEM_DOMAIN_CPU) { /* Asking for cpu access wait for object idle */ - r = radeon_bo_wait(robj, NULL, false); - if (r) { - printk(KERN_ERR "Failed to wait for object !\n"); + r = reservation_object_wait_timeout_rcu(robj->tbo.resv, true, true, 30 * HZ); + if (!r) + r = -EBUSY; + + if (r < 0 && r != -EINTR) { + printk(KERN_ERR "Failed to wait for object: %li\n", r); return r; } } @@ -272,6 +275,94 @@ int radeon_gem_create_ioctl(struct drm_device *dev, void *data, return 0; } +int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + struct radeon_device *rdev = dev->dev_private; + struct drm_radeon_gem_userptr *args = data; + struct drm_gem_object *gobj; + struct radeon_bo *bo; + uint32_t handle; + int r; + + if (offset_in_page(args->addr | args->size)) + return -EINVAL; + + /* reject unknown flag values */ + if (args->flags & ~(RADEON_GEM_USERPTR_READONLY | + RADEON_GEM_USERPTR_ANONONLY | RADEON_GEM_USERPTR_VALIDATE | + RADEON_GEM_USERPTR_REGISTER)) + return -EINVAL; + + if (args->flags & RADEON_GEM_USERPTR_READONLY) { + /* readonly pages not tested on older hardware */ + if (rdev->family < CHIP_R600) + return -EINVAL; + + } else if (!(args->flags & RADEON_GEM_USERPTR_ANONONLY) || + !(args->flags & RADEON_GEM_USERPTR_REGISTER)) { + + /* if we want to write to it we must require anonymous + memory and install a MMU notifier */ + return -EACCES; + } + + down_read(&rdev->exclusive_lock); + + /* create a gem object to contain this object in */ + r = radeon_gem_object_create(rdev, args->size, 0, + RADEON_GEM_DOMAIN_CPU, 0, + false, &gobj); + if (r) + goto handle_lockup; + + bo = gem_to_radeon_bo(gobj); + r = radeon_ttm_tt_set_userptr(bo->tbo.ttm, args->addr, args->flags); + if (r) + goto release_object; + + if (args->flags & RADEON_GEM_USERPTR_REGISTER) { + r = radeon_mn_register(bo, args->addr); + if (r) + goto release_object; + } + + if (args->flags & RADEON_GEM_USERPTR_VALIDATE) { + down_read(¤t->mm->mmap_sem); + r = radeon_bo_reserve(bo, true); + if (r) { + up_read(¤t->mm->mmap_sem); + goto release_object; + } + + radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_GTT); + r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); + radeon_bo_unreserve(bo); + up_read(¤t->mm->mmap_sem); + if (r) + goto release_object; + } + + r = drm_gem_handle_create(filp, gobj, &handle); + /* drop reference from allocate - handle holds it now */ + drm_gem_object_unreference_unlocked(gobj); + if (r) + goto handle_lockup; + + args->handle = handle; + up_read(&rdev->exclusive_lock); + return 0; + +release_object: + drm_gem_object_unreference_unlocked(gobj); + +handle_lockup: + up_read(&rdev->exclusive_lock); + r = radeon_gem_handle_lockup(rdev, r); + + return r; +} + int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { @@ -315,6 +406,10 @@ int radeon_mode_dumb_mmap(struct drm_file *filp, return -ENOENT; } robj = gem_to_radeon_bo(gobj); + if (radeon_ttm_tt_has_userptr(robj->tbo.ttm)) { + drm_gem_object_unreference_unlocked(gobj); + return -EPERM; + } *offset_p = radeon_bo_mmap_offset(robj); drm_gem_object_unreference_unlocked(gobj); return 0; @@ -357,15 +452,22 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data, struct drm_radeon_gem_wait_idle *args = data; struct drm_gem_object *gobj; struct radeon_bo *robj; - int r; + int r = 0; uint32_t cur_placement = 0; + long ret; gobj = drm_gem_object_lookup(dev, filp, args->handle); if (gobj == NULL) { return -ENOENT; } robj = gem_to_radeon_bo(gobj); - r = radeon_bo_wait(robj, &cur_placement, false); + + ret = reservation_object_wait_timeout_rcu(robj->tbo.resv, true, true, 30 * HZ); + if (ret == 0) + r = -EBUSY; + else if (ret < 0) + r = ret; + /* Flush HDP cache via MMIO if necessary */ if (rdev->asic->mmio_hdp_flush && radeon_mem_type_to_domain(cur_placement) == RADEON_GEM_DOMAIN_VRAM) @@ -532,6 +634,11 @@ int radeon_gem_op_ioctl(struct drm_device *dev, void *data, return -ENOENT; } robj = gem_to_radeon_bo(gobj); + + r = -EPERM; + if (radeon_ttm_tt_has_userptr(robj->tbo.ttm)) + goto out; + r = radeon_bo_reserve(robj, false); if (unlikely(r)) goto out; diff --git a/drivers/gpu/drm/radeon/radeon_ib.c b/drivers/gpu/drm/radeon/radeon_ib.c index 65b0c213488d..6fc7461d70c4 100644 --- a/drivers/gpu/drm/radeon/radeon_ib.c +++ b/drivers/gpu/drm/radeon/radeon_ib.c @@ -107,6 +107,7 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib) * @rdev: radeon_device pointer * @ib: IB object to schedule * @const_ib: Const IB to schedule (SI only) + * @hdp_flush: Whether or not to perform an HDP cache flush * * Schedule an IB on the associated ring (all asics). * Returns 0 on success, error on failure. @@ -122,7 +123,7 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib) * to SI there was just a DE IB. */ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, - struct radeon_ib *const_ib) + struct radeon_ib *const_ib, bool hdp_flush) { struct radeon_ring *ring = &rdev->ring[ib->ring]; int r = 0; @@ -176,7 +177,7 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, if (ib->vm) radeon_vm_fence(rdev, ib->vm, ib->fence); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, hdp_flush); return 0; } @@ -268,6 +269,7 @@ int radeon_ib_ring_tests(struct radeon_device *rdev) r = radeon_ib_test(rdev, i, ring); if (r) { + radeon_fence_driver_force_completion(rdev, i); ring->ready = false; rdev->needs_reset = false; diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c index 16807afab362..7784911d78ef 100644 --- a/drivers/gpu/drm/radeon/radeon_irq_kms.c +++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c @@ -88,23 +88,6 @@ static void radeon_hotplug_work_func(struct work_struct *work) } /** - * radeon_irq_reset_work_func - execute gpu reset - * - * @work: work struct - * - * Execute scheduled gpu reset (cayman+). - * This function is called when the irq handler - * thinks we need a gpu reset. - */ -static void radeon_irq_reset_work_func(struct work_struct *work) -{ - struct radeon_device *rdev = container_of(work, struct radeon_device, - reset_work); - - radeon_gpu_reset(rdev); -} - -/** * radeon_driver_irq_preinstall_kms - drm irq preinstall callback * * @dev: drm dev pointer @@ -284,7 +267,6 @@ int radeon_irq_kms_init(struct radeon_device *rdev) INIT_WORK(&rdev->hotplug_work, radeon_hotplug_work_func); INIT_WORK(&rdev->audio_work, r600_audio_update_hdmi); - INIT_WORK(&rdev->reset_work, radeon_irq_reset_work_func); rdev->irq.installed = true; r = drm_irq_install(rdev->ddev, rdev->ddev->pdev->irq); @@ -342,6 +324,21 @@ void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring) } /** + * radeon_irq_kms_sw_irq_get_delayed - enable software interrupt + * + * @rdev: radeon device pointer + * @ring: ring whose interrupt you want to enable + * + * Enables the software interrupt for a specific ring (all asics). + * The software interrupt is generally used to signal a fence on + * a particular ring. + */ +bool radeon_irq_kms_sw_irq_get_delayed(struct radeon_device *rdev, int ring) +{ + return atomic_inc_return(&rdev->irq.ring_int[ring]) == 1; +} + +/** * radeon_irq_kms_sw_irq_put - disable software interrupt * * @rdev: radeon device pointer diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index eb7164d07985..8309b11e674d 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -885,5 +885,6 @@ const struct drm_ioctl_desc radeon_ioctls_kms[] = { DRM_IOCTL_DEF_DRV(RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(RADEON_GEM_VA, radeon_gem_va_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(RADEON_GEM_OP, radeon_gem_op_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(RADEON_GEM_USERPTR, radeon_gem_userptr_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), }; int radeon_max_kms_ioctl = ARRAY_SIZE(radeon_ioctls_kms); diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c new file mode 100644 index 000000000000..a69bd441dd2d --- /dev/null +++ b/drivers/gpu/drm/radeon/radeon_mn.c @@ -0,0 +1,274 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + */ +/* + * Authors: + * Christian König <christian.koenig@amd.com> + */ + +#include <linux/firmware.h> +#include <linux/module.h> +#include <linux/mmu_notifier.h> +#include <drm/drmP.h> +#include <drm/drm.h> + +#include "radeon.h" + +struct radeon_mn { + /* constant after initialisation */ + struct radeon_device *rdev; + struct mm_struct *mm; + struct mmu_notifier mn; + + /* only used on destruction */ + struct work_struct work; + + /* protected by rdev->mn_lock */ + struct hlist_node node; + + /* objects protected by lock */ + struct mutex lock; + struct rb_root objects; +}; + +/** + * radeon_mn_destroy - destroy the rmn + * + * @work: previously sheduled work item + * + * Lazy destroys the notifier from a work item + */ +static void radeon_mn_destroy(struct work_struct *work) +{ + struct radeon_mn *rmn = container_of(work, struct radeon_mn, work); + struct radeon_device *rdev = rmn->rdev; + struct radeon_bo *bo, *next; + + mutex_lock(&rdev->mn_lock); + mutex_lock(&rmn->lock); + hash_del(&rmn->node); + rbtree_postorder_for_each_entry_safe(bo, next, &rmn->objects, mn_it.rb) { + interval_tree_remove(&bo->mn_it, &rmn->objects); + bo->mn = NULL; + } + mutex_unlock(&rmn->lock); + mutex_unlock(&rdev->mn_lock); + mmu_notifier_unregister(&rmn->mn, rmn->mm); + kfree(rmn); +} + +/** + * radeon_mn_release - callback to notify about mm destruction + * + * @mn: our notifier + * @mn: the mm this callback is about + * + * Shedule a work item to lazy destroy our notifier. + */ +static void radeon_mn_release(struct mmu_notifier *mn, + struct mm_struct *mm) +{ + struct radeon_mn *rmn = container_of(mn, struct radeon_mn, mn); + INIT_WORK(&rmn->work, radeon_mn_destroy); + schedule_work(&rmn->work); +} + +/** + * radeon_mn_invalidate_range_start - callback to notify about mm change + * + * @mn: our notifier + * @mn: the mm this callback is about + * @start: start of updated range + * @end: end of updated range + * + * We block for all BOs between start and end to be idle and + * unmap them by move them into system domain again. + */ +static void radeon_mn_invalidate_range_start(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + struct radeon_mn *rmn = container_of(mn, struct radeon_mn, mn); + struct interval_tree_node *it; + + /* notification is exclusive, but interval is inclusive */ + end -= 1; + + mutex_lock(&rmn->lock); + + it = interval_tree_iter_first(&rmn->objects, start, end); + while (it) { + struct radeon_bo *bo; + struct fence *fence; + int r; + + bo = container_of(it, struct radeon_bo, mn_it); + it = interval_tree_iter_next(it, start, end); + + r = radeon_bo_reserve(bo, true); + if (r) { + DRM_ERROR("(%d) failed to reserve user bo\n", r); + continue; + } + + fence = reservation_object_get_excl(bo->tbo.resv); + if (fence) { + r = radeon_fence_wait((struct radeon_fence *)fence, false); + if (r) + DRM_ERROR("(%d) failed to wait for user bo\n", r); + } + + radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_CPU); + r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); + if (r) + DRM_ERROR("(%d) failed to validate user bo\n", r); + + radeon_bo_unreserve(bo); + } + + mutex_unlock(&rmn->lock); +} + +static const struct mmu_notifier_ops radeon_mn_ops = { + .release = radeon_mn_release, + .invalidate_range_start = radeon_mn_invalidate_range_start, +}; + +/** + * radeon_mn_get - create notifier context + * + * @rdev: radeon device pointer + * + * Creates a notifier context for current->mm. + */ +static struct radeon_mn *radeon_mn_get(struct radeon_device *rdev) +{ + struct mm_struct *mm = current->mm; + struct radeon_mn *rmn; + int r; + + down_write(&mm->mmap_sem); + mutex_lock(&rdev->mn_lock); + + hash_for_each_possible(rdev->mn_hash, rmn, node, (unsigned long)mm) + if (rmn->mm == mm) + goto release_locks; + + rmn = kzalloc(sizeof(*rmn), GFP_KERNEL); + if (!rmn) { + rmn = ERR_PTR(-ENOMEM); + goto release_locks; + } + + rmn->rdev = rdev; + rmn->mm = mm; + rmn->mn.ops = &radeon_mn_ops; + mutex_init(&rmn->lock); + rmn->objects = RB_ROOT; + + r = __mmu_notifier_register(&rmn->mn, mm); + if (r) + goto free_rmn; + + hash_add(rdev->mn_hash, &rmn->node, (unsigned long)mm); + +release_locks: + mutex_unlock(&rdev->mn_lock); + up_write(&mm->mmap_sem); + + return rmn; + +free_rmn: + mutex_unlock(&rdev->mn_lock); + up_write(&mm->mmap_sem); + kfree(rmn); + + return ERR_PTR(r); +} + +/** + * radeon_mn_register - register a BO for notifier updates + * + * @bo: radeon buffer object + * @addr: userptr addr we should monitor + * + * Registers an MMU notifier for the given BO at the specified address. + * Returns 0 on success, -ERRNO if anything goes wrong. + */ +int radeon_mn_register(struct radeon_bo *bo, unsigned long addr) +{ + unsigned long end = addr + radeon_bo_size(bo) - 1; + struct radeon_device *rdev = bo->rdev; + struct radeon_mn *rmn; + struct interval_tree_node *it; + + rmn = radeon_mn_get(rdev); + if (IS_ERR(rmn)) + return PTR_ERR(rmn); + + mutex_lock(&rmn->lock); + + it = interval_tree_iter_first(&rmn->objects, addr, end); + if (it) { + mutex_unlock(&rmn->lock); + return -EEXIST; + } + + bo->mn = rmn; + bo->mn_it.start = addr; + bo->mn_it.last = end; + interval_tree_insert(&bo->mn_it, &rmn->objects); + + mutex_unlock(&rmn->lock); + + return 0; +} + +/** + * radeon_mn_unregister - unregister a BO for notifier updates + * + * @bo: radeon buffer object + * + * Remove any registration of MMU notifier updates from the buffer object. + */ +void radeon_mn_unregister(struct radeon_bo *bo) +{ + struct radeon_device *rdev = bo->rdev; + struct radeon_mn *rmn; + + mutex_lock(&rdev->mn_lock); + rmn = bo->mn; + if (rmn == NULL) { + mutex_unlock(&rdev->mn_lock); + return; + } + + mutex_lock(&rmn->lock); + interval_tree_remove(&bo->mn_it, &rmn->objects); + bo->mn = NULL; + mutex_unlock(&rmn->lock); + mutex_unlock(&rdev->mn_lock); +} diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 480c87d8edc5..aadbd36e64b9 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -75,6 +75,7 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo) bo = container_of(tbo, struct radeon_bo, tbo); radeon_update_memory_usage(bo, bo->tbo.mem.mem_type, -1); + radeon_mn_unregister(bo); mutex_lock(&bo->rdev->gem.mutex); list_del_init(&bo->list); @@ -96,40 +97,56 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain) { u32 c = 0, i; - rbo->placement.fpfn = 0; - rbo->placement.lpfn = 0; rbo->placement.placement = rbo->placements; rbo->placement.busy_placement = rbo->placements; if (domain & RADEON_GEM_DOMAIN_VRAM) - rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | - TTM_PL_FLAG_VRAM; + rbo->placements[c++].flags = TTM_PL_FLAG_WC | + TTM_PL_FLAG_UNCACHED | + TTM_PL_FLAG_VRAM; + if (domain & RADEON_GEM_DOMAIN_GTT) { if (rbo->flags & RADEON_GEM_GTT_UC) { - rbo->placements[c++] = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_TT; + rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED | + TTM_PL_FLAG_TT; + } else if ((rbo->flags & RADEON_GEM_GTT_WC) || (rbo->rdev->flags & RADEON_IS_AGP)) { - rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | + rbo->placements[c++].flags = TTM_PL_FLAG_WC | + TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_TT; } else { - rbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_TT; + rbo->placements[c++].flags = TTM_PL_FLAG_CACHED | + TTM_PL_FLAG_TT; } } + if (domain & RADEON_GEM_DOMAIN_CPU) { if (rbo->flags & RADEON_GEM_GTT_UC) { - rbo->placements[c++] = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_SYSTEM; + rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED | + TTM_PL_FLAG_SYSTEM; + } else if ((rbo->flags & RADEON_GEM_GTT_WC) || rbo->rdev->flags & RADEON_IS_AGP) { - rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | + rbo->placements[c++].flags = TTM_PL_FLAG_WC | + TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_SYSTEM; } else { - rbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM; + rbo->placements[c++].flags = TTM_PL_FLAG_CACHED | + TTM_PL_FLAG_SYSTEM; } } if (!c) - rbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; + rbo->placements[c++].flags = TTM_PL_MASK_CACHING | + TTM_PL_FLAG_SYSTEM; + rbo->placement.num_placement = c; rbo->placement.num_busy_placement = c; + for (i = 0; i < c; ++i) { + rbo->placements[i].fpfn = 0; + rbo->placements[i].lpfn = 0; + } + /* * Use two-ended allocation depending on the buffer size to * improve fragmentation quality. @@ -137,7 +154,7 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain) */ if (rbo->tbo.mem.size > 512 * 1024) { for (i = 0; i < c; i++) { - rbo->placements[i] |= TTM_PL_FLAG_TOPDOWN; + rbo->placements[i].flags |= TTM_PL_FLAG_TOPDOWN; } } } @@ -264,6 +281,9 @@ int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset, { int r, i; + if (radeon_ttm_tt_has_userptr(bo->tbo.ttm)) + return -EPERM; + if (bo->pin_count) { bo->pin_count++; if (gpu_addr) @@ -283,21 +303,22 @@ int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset, return 0; } radeon_ttm_placement_from_domain(bo, domain); - if (domain == RADEON_GEM_DOMAIN_VRAM) { + for (i = 0; i < bo->placement.num_placement; i++) { + unsigned lpfn = 0; + /* force to pin into visible video ram */ - bo->placement.lpfn = bo->rdev->mc.visible_vram_size >> PAGE_SHIFT; - } - if (max_offset) { - u64 lpfn = max_offset >> PAGE_SHIFT; + if (bo->placements[i].flags & TTM_PL_FLAG_VRAM) + lpfn = bo->rdev->mc.visible_vram_size >> PAGE_SHIFT; + else + lpfn = bo->rdev->mc.gtt_size >> PAGE_SHIFT; /* ??? */ - if (!bo->placement.lpfn) - bo->placement.lpfn = bo->rdev->mc.gtt_size >> PAGE_SHIFT; + if (max_offset) + lpfn = min (lpfn, (unsigned)(max_offset >> PAGE_SHIFT)); - if (lpfn < bo->placement.lpfn) - bo->placement.lpfn = lpfn; + bo->placements[i].lpfn = lpfn; + bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT; } - for (i = 0; i < bo->placement.num_placement; i++) - bo->placements[i] |= TTM_PL_FLAG_NO_EVICT; + r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); if (likely(r == 0)) { bo->pin_count = 1; @@ -329,8 +350,10 @@ int radeon_bo_unpin(struct radeon_bo *bo) bo->pin_count--; if (bo->pin_count) return 0; - for (i = 0; i < bo->placement.num_placement; i++) - bo->placements[i] &= ~TTM_PL_FLAG_NO_EVICT; + for (i = 0; i < bo->placement.num_placement; i++) { + bo->placements[i].lpfn = 0; + bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT; + } r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); if (likely(r == 0)) { if (bo->tbo.mem.mem_type == TTM_PL_VRAM) @@ -459,7 +482,7 @@ int radeon_bo_list_validate(struct radeon_device *rdev, u64 bytes_moved = 0, initial_bytes_moved; u64 bytes_moved_threshold = radeon_bo_get_threshold_for_moves(rdev); - r = ttm_eu_reserve_buffers(ticket, head); + r = ttm_eu_reserve_buffers(ticket, head, true); if (unlikely(r != 0)) { return r; } @@ -468,6 +491,7 @@ int radeon_bo_list_validate(struct radeon_device *rdev, bo = lobj->robj; if (!bo->pin_count) { u32 domain = lobj->prefered_domains; + u32 allowed = lobj->allowed_domains; u32 current_domain = radeon_mem_type_to_domain(bo->tbo.mem.mem_type); @@ -479,7 +503,7 @@ int radeon_bo_list_validate(struct radeon_device *rdev, * into account. We don't want to disallow buffer moves * completely. */ - if ((lobj->allowed_domains & current_domain) != 0 && + if ((allowed & current_domain) != 0 && (domain & current_domain) == 0 && /* will be moved */ bytes_moved > bytes_moved_threshold) { /* don't move it */ @@ -489,7 +513,7 @@ int radeon_bo_list_validate(struct radeon_device *rdev, retry: radeon_ttm_placement_from_domain(bo, domain); if (ring == R600_RING_TYPE_UVD_INDEX) - radeon_uvd_force_into_uvd_segment(bo); + radeon_uvd_force_into_uvd_segment(bo, allowed); initial_bytes_moved = atomic64_read(&rdev->num_bytes_moved); r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); @@ -731,7 +755,7 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo) /* hurrah the memory is not visible ! */ radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM); - rbo->placement.lpfn = rdev->mc.visible_vram_size >> PAGE_SHIFT; + rbo->placements[0].lpfn = rdev->mc.visible_vram_size >> PAGE_SHIFT; r = ttm_bo_validate(bo, &rbo->placement, false, false); if (unlikely(r == -ENOMEM)) { radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT); @@ -755,12 +779,10 @@ int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, bool no_wait) r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, NULL); if (unlikely(r != 0)) return r; - spin_lock(&bo->tbo.bdev->fence_lock); if (mem_type) *mem_type = bo->tbo.mem.mem_type; - if (bo->tbo.sync_obj) - r = ttm_bo_wait(&bo->tbo, true, true, no_wait); - spin_unlock(&bo->tbo.bdev->fence_lock); + + r = ttm_bo_wait(&bo->tbo, true, true, no_wait); ttm_bo_unreserve(&bo->tbo); return r; } diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index 7fd665a22908..32522cc940a1 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -460,10 +460,6 @@ static ssize_t radeon_get_dpm_state(struct device *dev, struct radeon_device *rdev = ddev->dev_private; enum radeon_pm_state_type pm = rdev->pm.dpm.user_state; - if ((rdev->flags & RADEON_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return snprintf(buf, PAGE_SIZE, "off\n"); - return snprintf(buf, PAGE_SIZE, "%s\n", (pm == POWER_STATE_TYPE_BATTERY) ? "battery" : (pm == POWER_STATE_TYPE_BALANCED) ? "balanced" : "performance"); @@ -477,11 +473,6 @@ static ssize_t radeon_set_dpm_state(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct radeon_device *rdev = ddev->dev_private; - /* Can't set dpm state when the card is off */ - if ((rdev->flags & RADEON_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) - return -EINVAL; - mutex_lock(&rdev->pm.mutex); if (strncmp("battery", buf, strlen("battery")) == 0) rdev->pm.dpm.user_state = POWER_STATE_TYPE_BATTERY; @@ -495,7 +486,12 @@ static ssize_t radeon_set_dpm_state(struct device *dev, goto fail; } mutex_unlock(&rdev->pm.mutex); - radeon_pm_compute_clocks(rdev); + + /* Can't set dpm state when the card is off */ + if (!(rdev->flags & RADEON_IS_PX) || + (ddev->switch_power_state == DRM_SWITCH_POWER_ON)) + radeon_pm_compute_clocks(rdev); + fail: return count; } diff --git a/drivers/gpu/drm/radeon/radeon_prime.c b/drivers/gpu/drm/radeon/radeon_prime.c index f7e48d329db3..d5414d42e44b 100644 --- a/drivers/gpu/drm/radeon/radeon_prime.c +++ b/drivers/gpu/drm/radeon/radeon_prime.c @@ -103,3 +103,21 @@ void radeon_gem_prime_unpin(struct drm_gem_object *obj) radeon_bo_unpin(bo); radeon_bo_unreserve(bo); } + + +struct reservation_object *radeon_gem_prime_res_obj(struct drm_gem_object *obj) +{ + struct radeon_bo *bo = gem_to_radeon_bo(obj); + + return bo->tbo.resv; +} + +struct dma_buf *radeon_gem_prime_export(struct drm_device *dev, + struct drm_gem_object *gobj, + int flags) +{ + struct radeon_bo *bo = gem_to_radeon_bo(gobj); + if (radeon_ttm_tt_has_userptr(bo->tbo.ttm)) + return ERR_PTR(-EPERM); + return drm_gem_prime_export(dev, gobj, flags); +} diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 5b4e0cf231a0..6f2a9bd6bb54 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -45,27 +45,6 @@ static int radeon_debugfs_ring_init(struct radeon_device *rdev, struct radeon_ring *ring); /** - * radeon_ring_write - write a value to the ring - * - * @ring: radeon_ring structure holding ring information - * @v: dword (dw) value to write - * - * Write a value to the requested ring buffer (all asics). - */ -void radeon_ring_write(struct radeon_ring *ring, uint32_t v) -{ -#if DRM_DEBUG_CODE - if (ring->count_dw <= 0) { - DRM_ERROR("radeon: writing more dwords to the ring than expected!\n"); - } -#endif - ring->ring[ring->wptr++] = v; - ring->wptr &= ring->ptr_mask; - ring->count_dw--; - ring->ring_free_dw--; -} - -/** * radeon_ring_supports_scratch_reg - check if the ring supports * writing to scratch registers * @@ -177,16 +156,18 @@ int radeon_ring_lock(struct radeon_device *rdev, struct radeon_ring *ring, unsig * * @rdev: radeon_device pointer * @ring: radeon_ring structure holding ring information + * @hdp_flush: Whether or not to perform an HDP cache flush * * Update the wptr (write pointer) to tell the GPU to * execute new commands on the ring buffer (all asics). */ -void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring) +void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring, + bool hdp_flush) { /* If we are emitting the HDP flush via the ring buffer, we need to * do it before padding. */ - if (rdev->asic->ring[ring->idx]->hdp_flush) + if (hdp_flush && rdev->asic->ring[ring->idx]->hdp_flush) rdev->asic->ring[ring->idx]->hdp_flush(rdev, ring); /* We pad to match fetch size */ while (ring->wptr & ring->align_mask) { @@ -196,7 +177,7 @@ void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring) /* If we are emitting the HDP flush via MMIO, we need to do it after * all CPU writes to VRAM finished. */ - if (rdev->asic->mmio_hdp_flush) + if (hdp_flush && rdev->asic->mmio_hdp_flush) rdev->asic->mmio_hdp_flush(rdev); radeon_ring_set_wptr(rdev, ring); } @@ -207,12 +188,14 @@ void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring) * * @rdev: radeon_device pointer * @ring: radeon_ring structure holding ring information + * @hdp_flush: Whether or not to perform an HDP cache flush * * Call radeon_ring_commit() then unlock the ring (all asics). */ -void radeon_ring_unlock_commit(struct radeon_device *rdev, struct radeon_ring *ring) +void radeon_ring_unlock_commit(struct radeon_device *rdev, struct radeon_ring *ring, + bool hdp_flush) { - radeon_ring_commit(rdev, ring); + radeon_ring_commit(rdev, ring, hdp_flush); mutex_unlock(&rdev->ring_lock); } @@ -372,7 +355,7 @@ int radeon_ring_restore(struct radeon_device *rdev, struct radeon_ring *ring, radeon_ring_write(ring, data[i]); } - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); kfree(data); return 0; } @@ -400,9 +383,7 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsig /* Allocate ring buffer */ if (ring->ring_obj == NULL) { r = radeon_bo_create(rdev, ring->ring_size, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_GTT, - (rdev->flags & RADEON_IS_PCIE) ? - RADEON_GEM_GTT_WC : 0, + RADEON_GEM_DOMAIN_GTT, 0, NULL, &ring->ring_obj); if (r) { dev_err(rdev->dev, "(%d) ring create failed\n", r); diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c index dbd6bcde92de..56d9fd66d8ae 100644 --- a/drivers/gpu/drm/radeon/radeon_semaphore.c +++ b/drivers/gpu/drm/radeon/radeon_semaphore.c @@ -179,7 +179,7 @@ int radeon_semaphore_sync_rings(struct radeon_device *rdev, continue; } - radeon_ring_commit(rdev, &rdev->ring[i]); + radeon_ring_commit(rdev, &rdev->ring[i], false); radeon_fence_note_sync(fence, ring); semaphore->gpu_addr += 8; diff --git a/drivers/gpu/drm/radeon/radeon_state.c b/drivers/gpu/drm/radeon/radeon_state.c index 23bb64fd775f..535403e0c8a2 100644 --- a/drivers/gpu/drm/radeon/radeon_state.c +++ b/drivers/gpu/drm/radeon/radeon_state.c @@ -30,9 +30,9 @@ */ #include <drm/drmP.h> -#include <drm/drm_buffer.h> #include <drm/radeon_drm.h> #include "radeon_drv.h" +#include "drm_buffer.h" /* ================================================================ * Helper functions for client state checking and fixup diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c index 5adf4207453d..17bc3dced9f1 100644 --- a/drivers/gpu/drm/radeon/radeon_test.c +++ b/drivers/gpu/drm/radeon/radeon_test.c @@ -288,7 +288,7 @@ static int radeon_test_create_and_emit_fence(struct radeon_device *rdev, return r; } radeon_fence_emit(rdev, fence, ring->idx); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); } return 0; } @@ -313,7 +313,7 @@ void radeon_test_ring_sync(struct radeon_device *rdev, goto out_cleanup; } radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore); - radeon_ring_unlock_commit(rdev, ringA); + radeon_ring_unlock_commit(rdev, ringA, false); r = radeon_test_create_and_emit_fence(rdev, ringA, &fence1); if (r) @@ -325,7 +325,7 @@ void radeon_test_ring_sync(struct radeon_device *rdev, goto out_cleanup; } radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore); - radeon_ring_unlock_commit(rdev, ringA); + radeon_ring_unlock_commit(rdev, ringA, false); r = radeon_test_create_and_emit_fence(rdev, ringA, &fence2); if (r) @@ -344,7 +344,7 @@ void radeon_test_ring_sync(struct radeon_device *rdev, goto out_cleanup; } radeon_semaphore_emit_signal(rdev, ringB->idx, semaphore); - radeon_ring_unlock_commit(rdev, ringB); + radeon_ring_unlock_commit(rdev, ringB, false); r = radeon_fence_wait(fence1, false); if (r) { @@ -365,7 +365,7 @@ void radeon_test_ring_sync(struct radeon_device *rdev, goto out_cleanup; } radeon_semaphore_emit_signal(rdev, ringB->idx, semaphore); - radeon_ring_unlock_commit(rdev, ringB); + radeon_ring_unlock_commit(rdev, ringB, false); r = radeon_fence_wait(fence2, false); if (r) { @@ -408,7 +408,7 @@ static void radeon_test_ring_sync2(struct radeon_device *rdev, goto out_cleanup; } radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore); - radeon_ring_unlock_commit(rdev, ringA); + radeon_ring_unlock_commit(rdev, ringA, false); r = radeon_test_create_and_emit_fence(rdev, ringA, &fenceA); if (r) @@ -420,7 +420,7 @@ static void radeon_test_ring_sync2(struct radeon_device *rdev, goto out_cleanup; } radeon_semaphore_emit_wait(rdev, ringB->idx, semaphore); - radeon_ring_unlock_commit(rdev, ringB); + radeon_ring_unlock_commit(rdev, ringB, false); r = radeon_test_create_and_emit_fence(rdev, ringB, &fenceB); if (r) goto out_cleanup; @@ -442,7 +442,7 @@ static void radeon_test_ring_sync2(struct radeon_device *rdev, goto out_cleanup; } radeon_semaphore_emit_signal(rdev, ringC->idx, semaphore); - radeon_ring_unlock_commit(rdev, ringC); + radeon_ring_unlock_commit(rdev, ringC, false); for (i = 0; i < 30; ++i) { mdelay(100); @@ -468,7 +468,7 @@ static void radeon_test_ring_sync2(struct radeon_device *rdev, goto out_cleanup; } radeon_semaphore_emit_signal(rdev, ringC->idx, semaphore); - radeon_ring_unlock_commit(rdev, ringC); + radeon_ring_unlock_commit(rdev, ringC, false); mdelay(1000); diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 72afe82a95c9..62d1f4d730a2 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -39,6 +39,8 @@ #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/swiotlb.h> +#include <linux/swap.h> +#include <linux/pagemap.h> #include <linux/debugfs.h> #include "radeon_reg.h" #include "radeon.h" @@ -176,12 +178,15 @@ static int radeon_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, static void radeon_evict_flags(struct ttm_buffer_object *bo, struct ttm_placement *placement) { + static struct ttm_place placements = { + .fpfn = 0, + .lpfn = 0, + .flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM + }; + struct radeon_bo *rbo; - static u32 placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; if (!radeon_ttm_bo_is_radeon_bo(bo)) { - placement->fpfn = 0; - placement->lpfn = 0; placement->placement = &placements; placement->busy_placement = &placements; placement->num_placement = 1; @@ -265,12 +270,12 @@ static int radeon_move_blit(struct ttm_buffer_object *bo, BUILD_BUG_ON((PAGE_SIZE % RADEON_GPU_PAGE_SIZE) != 0); /* sync other rings */ - fence = bo->sync_obj; + fence = (struct radeon_fence *)reservation_object_get_excl(bo->resv); r = radeon_copy(rdev, old_start, new_start, new_mem->num_pages * (PAGE_SIZE / RADEON_GPU_PAGE_SIZE), /* GPU pages */ &fence); /* FIXME: handle copy error */ - r = ttm_bo_move_accel_cleanup(bo, (void *)fence, + r = ttm_bo_move_accel_cleanup(bo, &fence->base, evict, no_wait_gpu, new_mem); radeon_fence_unref(&fence); return r; @@ -284,20 +289,20 @@ static int radeon_move_vram_ram(struct ttm_buffer_object *bo, struct radeon_device *rdev; struct ttm_mem_reg *old_mem = &bo->mem; struct ttm_mem_reg tmp_mem; - u32 placements; + struct ttm_place placements; struct ttm_placement placement; int r; rdev = radeon_get_rdev(bo->bdev); tmp_mem = *new_mem; tmp_mem.mm_node = NULL; - placement.fpfn = 0; - placement.lpfn = 0; placement.num_placement = 1; placement.placement = &placements; placement.num_busy_placement = 1; placement.busy_placement = &placements; - placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; + placements.fpfn = 0; + placements.lpfn = 0; + placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; r = ttm_bo_mem_space(bo, &placement, &tmp_mem, interruptible, no_wait_gpu); if (unlikely(r)) { @@ -332,19 +337,19 @@ static int radeon_move_ram_vram(struct ttm_buffer_object *bo, struct ttm_mem_reg *old_mem = &bo->mem; struct ttm_mem_reg tmp_mem; struct ttm_placement placement; - u32 placements; + struct ttm_place placements; int r; rdev = radeon_get_rdev(bo->bdev); tmp_mem = *new_mem; tmp_mem.mm_node = NULL; - placement.fpfn = 0; - placement.lpfn = 0; placement.num_placement = 1; placement.placement = &placements; placement.num_busy_placement = 1; placement.busy_placement = &placements; - placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; + placements.fpfn = 0; + placements.lpfn = 0; + placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; r = ttm_bo_mem_space(bo, &placement, &tmp_mem, interruptible, no_wait_gpu); if (unlikely(r)) { @@ -483,39 +488,108 @@ static void radeon_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re { } -static int radeon_sync_obj_wait(void *sync_obj, bool lazy, bool interruptible) -{ - return radeon_fence_wait((struct radeon_fence *)sync_obj, interruptible); -} +/* + * TTM backend functions. + */ +struct radeon_ttm_tt { + struct ttm_dma_tt ttm; + struct radeon_device *rdev; + u64 offset; -static int radeon_sync_obj_flush(void *sync_obj) + uint64_t userptr; + struct mm_struct *usermm; + uint32_t userflags; +}; + +/* prepare the sg table with the user pages */ +static int radeon_ttm_tt_pin_userptr(struct ttm_tt *ttm) { + struct radeon_device *rdev = radeon_get_rdev(ttm->bdev); + struct radeon_ttm_tt *gtt = (void *)ttm; + unsigned pinned = 0, nents; + int r; + + int write = !(gtt->userflags & RADEON_GEM_USERPTR_READONLY); + enum dma_data_direction direction = write ? + DMA_BIDIRECTIONAL : DMA_TO_DEVICE; + + if (current->mm != gtt->usermm) + return -EPERM; + + if (gtt->userflags & RADEON_GEM_USERPTR_ANONONLY) { + /* check that we only pin down anonymous memory + to prevent problems with writeback */ + unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; + struct vm_area_struct *vma; + vma = find_vma(gtt->usermm, gtt->userptr); + if (!vma || vma->vm_file || vma->vm_end < end) + return -EPERM; + } + + do { + unsigned num_pages = ttm->num_pages - pinned; + uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE; + struct page **pages = ttm->pages + pinned; + + r = get_user_pages(current, current->mm, userptr, num_pages, + write, 0, pages, NULL); + if (r < 0) + goto release_pages; + + pinned += r; + + } while (pinned < ttm->num_pages); + + r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0, + ttm->num_pages << PAGE_SHIFT, + GFP_KERNEL); + if (r) + goto release_sg; + + r = -ENOMEM; + nents = dma_map_sg(rdev->dev, ttm->sg->sgl, ttm->sg->nents, direction); + if (nents != ttm->sg->nents) + goto release_sg; + + drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, + gtt->ttm.dma_address, ttm->num_pages); + return 0; -} -static void radeon_sync_obj_unref(void **sync_obj) -{ - radeon_fence_unref((struct radeon_fence **)sync_obj); -} +release_sg: + kfree(ttm->sg); -static void *radeon_sync_obj_ref(void *sync_obj) -{ - return radeon_fence_ref((struct radeon_fence *)sync_obj); +release_pages: + release_pages(ttm->pages, pinned, 0); + return r; } -static bool radeon_sync_obj_signaled(void *sync_obj) +static void radeon_ttm_tt_unpin_userptr(struct ttm_tt *ttm) { - return radeon_fence_signaled((struct radeon_fence *)sync_obj); -} + struct radeon_device *rdev = radeon_get_rdev(ttm->bdev); + struct radeon_ttm_tt *gtt = (void *)ttm; + struct scatterlist *sg; + int i; -/* - * TTM backend functions. - */ -struct radeon_ttm_tt { - struct ttm_dma_tt ttm; - struct radeon_device *rdev; - u64 offset; -}; + int write = !(gtt->userflags & RADEON_GEM_USERPTR_READONLY); + enum dma_data_direction direction = write ? + DMA_BIDIRECTIONAL : DMA_TO_DEVICE; + + /* free the sg table and pages again */ + dma_unmap_sg(rdev->dev, ttm->sg->sgl, ttm->sg->nents, direction); + + for_each_sg(ttm->sg->sgl, sg, ttm->sg->nents, i) { + struct page *page = sg_page(sg); + + if (!(gtt->userflags & RADEON_GEM_USERPTR_READONLY)) + set_page_dirty(page); + + mark_page_accessed(page); + page_cache_release(page); + } + + sg_free_table(ttm->sg); +} static int radeon_ttm_backend_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem) @@ -525,6 +599,11 @@ static int radeon_ttm_backend_bind(struct ttm_tt *ttm, RADEON_GART_PAGE_WRITE; int r; + if (gtt->userptr) { + radeon_ttm_tt_pin_userptr(ttm); + flags &= ~RADEON_GART_PAGE_WRITE; + } + gtt->offset = (unsigned long)(bo_mem->start << PAGE_SHIFT); if (!ttm->num_pages) { WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n", @@ -547,6 +626,10 @@ static int radeon_ttm_backend_unbind(struct ttm_tt *ttm) struct radeon_ttm_tt *gtt = (void *)ttm; radeon_gart_unbind(gtt->rdev, gtt->offset, ttm->num_pages); + + if (gtt->userptr) + radeon_ttm_tt_unpin_userptr(ttm); + return 0; } @@ -603,6 +686,16 @@ static int radeon_ttm_tt_populate(struct ttm_tt *ttm) if (ttm->state != tt_unpopulated) return 0; + if (gtt->userptr) { + ttm->sg = kcalloc(1, sizeof(struct sg_table), GFP_KERNEL); + if (!ttm->sg) + return -ENOMEM; + + ttm->page_flags |= TTM_PAGE_FLAG_SG; + ttm->state = tt_unbound; + return 0; + } + if (slave && ttm->sg) { drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, gtt->ttm.dma_address, ttm->num_pages); @@ -652,6 +745,12 @@ static void radeon_ttm_tt_unpopulate(struct ttm_tt *ttm) unsigned i; bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); + if (gtt->userptr) { + kfree(ttm->sg); + ttm->page_flags &= ~TTM_PAGE_FLAG_SG; + return; + } + if (slave) return; @@ -680,6 +779,40 @@ static void radeon_ttm_tt_unpopulate(struct ttm_tt *ttm) ttm_pool_unpopulate(ttm); } +int radeon_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, + uint32_t flags) +{ + struct radeon_ttm_tt *gtt = (void *)ttm; + + if (gtt == NULL) + return -EINVAL; + + gtt->userptr = addr; + gtt->usermm = current->mm; + gtt->userflags = flags; + return 0; +} + +bool radeon_ttm_tt_has_userptr(struct ttm_tt *ttm) +{ + struct radeon_ttm_tt *gtt = (void *)ttm; + + if (gtt == NULL) + return false; + + return !!gtt->userptr; +} + +bool radeon_ttm_tt_is_readonly(struct ttm_tt *ttm) +{ + struct radeon_ttm_tt *gtt = (void *)ttm; + + if (gtt == NULL) + return false; + + return !!(gtt->userflags & RADEON_GEM_USERPTR_READONLY); +} + static struct ttm_bo_driver radeon_bo_driver = { .ttm_tt_create = &radeon_ttm_tt_create, .ttm_tt_populate = &radeon_ttm_tt_populate, @@ -689,11 +822,6 @@ static struct ttm_bo_driver radeon_bo_driver = { .evict_flags = &radeon_evict_flags, .move = &radeon_bo_move, .verify_access = &radeon_verify_access, - .sync_obj_signaled = &radeon_sync_obj_signaled, - .sync_obj_wait = &radeon_sync_obj_wait, - .sync_obj_flush = &radeon_sync_obj_flush, - .sync_obj_unref = &radeon_sync_obj_unref, - .sync_obj_ref = &radeon_sync_obj_ref, .move_notify = &radeon_bo_move_notify, .fault_reserve_notify = &radeon_bo_fault_reserve_notify, .io_mem_reserve = &radeon_ttm_io_mem_reserve, diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index 6bf55ec85b62..ba4f38916026 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -40,12 +40,18 @@ #define UVD_IDLE_TIMEOUT_MS 1000 /* Firmware Names */ +#define FIRMWARE_R600 "radeon/R600_uvd.bin" +#define FIRMWARE_RS780 "radeon/RS780_uvd.bin" +#define FIRMWARE_RV770 "radeon/RV770_uvd.bin" #define FIRMWARE_RV710 "radeon/RV710_uvd.bin" #define FIRMWARE_CYPRESS "radeon/CYPRESS_uvd.bin" #define FIRMWARE_SUMO "radeon/SUMO_uvd.bin" #define FIRMWARE_TAHITI "radeon/TAHITI_uvd.bin" #define FIRMWARE_BONAIRE "radeon/BONAIRE_uvd.bin" +MODULE_FIRMWARE(FIRMWARE_R600); +MODULE_FIRMWARE(FIRMWARE_RS780); +MODULE_FIRMWARE(FIRMWARE_RV770); MODULE_FIRMWARE(FIRMWARE_RV710); MODULE_FIRMWARE(FIRMWARE_CYPRESS); MODULE_FIRMWARE(FIRMWARE_SUMO); @@ -63,6 +69,23 @@ int radeon_uvd_init(struct radeon_device *rdev) INIT_DELAYED_WORK(&rdev->uvd.idle_work, radeon_uvd_idle_work_handler); switch (rdev->family) { + case CHIP_RV610: + case CHIP_RV630: + case CHIP_RV670: + case CHIP_RV620: + case CHIP_RV635: + fw_name = FIRMWARE_R600; + break; + + case CHIP_RS780: + case CHIP_RS880: + fw_name = FIRMWARE_RS780; + break; + + case CHIP_RV770: + fw_name = FIRMWARE_RV770; + break; + case CHIP_RV710: case CHIP_RV730: case CHIP_RV740: @@ -115,7 +138,8 @@ int radeon_uvd_init(struct radeon_device *rdev) } bo_size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 8) + - RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE; + RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE + + RADEON_GPU_PAGE_SIZE; r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM, 0, NULL, &rdev->uvd.vcpu_bo); if (r) { @@ -231,10 +255,30 @@ int radeon_uvd_resume(struct radeon_device *rdev) return 0; } -void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo) +void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo, + uint32_t allowed_domains) { - rbo->placement.fpfn = 0 >> PAGE_SHIFT; - rbo->placement.lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; + int i; + + for (i = 0; i < rbo->placement.num_placement; ++i) { + rbo->placements[i].fpfn = 0 >> PAGE_SHIFT; + rbo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; + } + + /* If it must be in VRAM it must be in the first segment as well */ + if (allowed_domains == RADEON_GEM_DOMAIN_VRAM) + return; + + /* abort if we already have more than one placement */ + if (rbo->placement.num_placement > 1) + return; + + /* add another 256MB segment */ + rbo->placements[1] = rbo->placements[0]; + rbo->placements[1].fpfn += (256 * 1024 * 1024) >> PAGE_SHIFT; + rbo->placements[1].lpfn += (256 * 1024 * 1024) >> PAGE_SHIFT; + rbo->placement.num_placement++; + rbo->placement.num_busy_placement++; } void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp) @@ -356,6 +400,7 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo, { int32_t *msg, msg_type, handle; unsigned img_size = 0; + struct fence *f; void *ptr; int i, r; @@ -365,8 +410,9 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo, return -EINVAL; } - if (bo->tbo.sync_obj) { - r = radeon_fence_wait(bo->tbo.sync_obj, false); + f = reservation_object_get_excl(bo->tbo.resv); + if (f) { + r = radeon_fence_wait((struct radeon_fence *)f, false); if (r) { DRM_ERROR("Failed waiting for UVD message (%d)!\n", r); return r; @@ -604,38 +650,16 @@ int radeon_uvd_cs_parse(struct radeon_cs_parser *p) } static int radeon_uvd_send_msg(struct radeon_device *rdev, - int ring, struct radeon_bo *bo, + int ring, uint64_t addr, struct radeon_fence **fence) { - struct ttm_validate_buffer tv; - struct ww_acquire_ctx ticket; - struct list_head head; struct radeon_ib ib; - uint64_t addr; int i, r; - memset(&tv, 0, sizeof(tv)); - tv.bo = &bo->tbo; - - INIT_LIST_HEAD(&head); - list_add(&tv.head, &head); - - r = ttm_eu_reserve_buffers(&ticket, &head); - if (r) - return r; - - radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_VRAM); - radeon_uvd_force_into_uvd_segment(bo); - - r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); - if (r) - goto err; - r = radeon_ib_get(rdev, ring, &ib, NULL, 64); if (r) - goto err; + return r; - addr = radeon_bo_gpu_offset(bo); ib.ptr[0] = PACKET0(UVD_GPCOM_VCPU_DATA0, 0); ib.ptr[1] = addr; ib.ptr[2] = PACKET0(UVD_GPCOM_VCPU_DATA1, 0); @@ -646,20 +670,12 @@ static int radeon_uvd_send_msg(struct radeon_device *rdev, ib.ptr[i] = PACKET2(0); ib.length_dw = 16; - r = radeon_ib_schedule(rdev, &ib, NULL); - if (r) - goto err; - ttm_eu_fence_buffer_objects(&ticket, &head, ib.fence); + r = radeon_ib_schedule(rdev, &ib, NULL, false); if (fence) *fence = radeon_fence_ref(ib.fence); radeon_ib_free(rdev, &ib); - radeon_bo_unref(&bo); - return 0; - -err: - ttm_eu_backoff_reservation(&ticket, &head); return r; } @@ -669,27 +685,18 @@ err: int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring, uint32_t handle, struct radeon_fence **fence) { - struct radeon_bo *bo; - uint32_t *msg; - int r, i; + /* we use the last page of the vcpu bo for the UVD message */ + uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) - + RADEON_GPU_PAGE_SIZE; - r = radeon_bo_create(rdev, 1024, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_VRAM, 0, NULL, &bo); - if (r) - return r; + uint32_t *msg = rdev->uvd.cpu_addr + offs; + uint64_t addr = rdev->uvd.gpu_addr + offs; - r = radeon_bo_reserve(bo, false); - if (r) { - radeon_bo_unref(&bo); - return r; - } + int r, i; - r = radeon_bo_kmap(bo, (void **)&msg); - if (r) { - radeon_bo_unreserve(bo); - radeon_bo_unref(&bo); + r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true); + if (r) return r; - } /* stitch together an UVD create msg */ msg[0] = cpu_to_le32(0x00000de4); @@ -706,36 +713,26 @@ int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring, for (i = 11; i < 1024; ++i) msg[i] = cpu_to_le32(0x0); - radeon_bo_kunmap(bo); - radeon_bo_unreserve(bo); - - return radeon_uvd_send_msg(rdev, ring, bo, fence); + r = radeon_uvd_send_msg(rdev, ring, addr, fence); + radeon_bo_unreserve(rdev->uvd.vcpu_bo); + return r; } int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring, uint32_t handle, struct radeon_fence **fence) { - struct radeon_bo *bo; - uint32_t *msg; - int r, i; + /* we use the last page of the vcpu bo for the UVD message */ + uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) - + RADEON_GPU_PAGE_SIZE; - r = radeon_bo_create(rdev, 1024, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_VRAM, 0, NULL, &bo); - if (r) - return r; + uint32_t *msg = rdev->uvd.cpu_addr + offs; + uint64_t addr = rdev->uvd.gpu_addr + offs; - r = radeon_bo_reserve(bo, false); - if (r) { - radeon_bo_unref(&bo); - return r; - } + int r, i; - r = radeon_bo_kmap(bo, (void **)&msg); - if (r) { - radeon_bo_unreserve(bo); - radeon_bo_unref(&bo); + r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true); + if (r) return r; - } /* stitch together an UVD destroy msg */ msg[0] = cpu_to_le32(0x00000de4); @@ -745,10 +742,9 @@ int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring, for (i = 4; i < 1024; ++i) msg[i] = cpu_to_le32(0x0); - radeon_bo_kunmap(bo); - radeon_bo_unreserve(bo); - - return radeon_uvd_send_msg(rdev, ring, bo, fence); + r = radeon_uvd_send_msg(rdev, ring, addr, fence); + radeon_bo_unreserve(rdev->uvd.vcpu_bo); + return r; } /** diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c index f9b70a43aa52..c7190aadbd89 100644 --- a/drivers/gpu/drm/radeon/radeon_vce.c +++ b/drivers/gpu/drm/radeon/radeon_vce.c @@ -368,7 +368,7 @@ int radeon_vce_get_create_msg(struct radeon_device *rdev, int ring, for (i = ib.length_dw; i < ib_size_dw; ++i) ib.ptr[i] = 0x0; - r = radeon_ib_schedule(rdev, &ib, NULL); + r = radeon_ib_schedule(rdev, &ib, NULL, false); if (r) { DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); } @@ -425,7 +425,7 @@ int radeon_vce_get_destroy_msg(struct radeon_device *rdev, int ring, for (i = ib.length_dw; i < ib_size_dw; ++i) ib.ptr[i] = 0x0; - r = radeon_ib_schedule(rdev, &ib, NULL); + r = radeon_ib_schedule(rdev, &ib, NULL, false); if (r) { DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); } @@ -715,7 +715,7 @@ int radeon_vce_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) return r; } radeon_ring_write(ring, VCE_CMD_END); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); for (i = 0; i < rdev->usec_timeout; i++) { if (vce_v1_0_get_rptr(rdev, ring) != rptr) diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index ccae4d9dc3de..671ee566aa51 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -399,7 +399,7 @@ static int radeon_vm_clear_bo(struct radeon_device *rdev, INIT_LIST_HEAD(&head); list_add(&tv.head, &head); - r = ttm_eu_reserve_buffers(&ticket, &head); + r = ttm_eu_reserve_buffers(&ticket, &head, true); if (r) return r; @@ -420,11 +420,11 @@ static int radeon_vm_clear_bo(struct radeon_device *rdev, radeon_asic_vm_pad_ib(rdev, &ib); WARN_ON(ib.length_dw > 64); - r = radeon_ib_schedule(rdev, &ib, NULL); + r = radeon_ib_schedule(rdev, &ib, NULL, false); if (r) goto error; - ttm_eu_fence_buffer_objects(&ticket, &head, ib.fence); + ttm_eu_fence_buffer_objects(&ticket, &head, &ib.fence->base); radeon_ib_free(rdev, &ib); return 0; @@ -483,6 +483,10 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, /* add a clone of the bo_va to clear the old address */ struct radeon_bo_va *tmp; tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); + if (!tmp) { + mutex_unlock(&vm->mutex); + return -ENOMEM; + } tmp->it.start = bo_va->it.start; tmp->it.last = bo_va->it.last; tmp->vm = vm; @@ -689,11 +693,17 @@ int radeon_vm_update_page_directory(struct radeon_device *rdev, incr, R600_PTE_VALID); if (ib.length_dw != 0) { + struct fence *fence; + radeon_asic_vm_pad_ib(rdev, &ib); - radeon_semaphore_sync_to(ib.semaphore, pd->tbo.sync_obj); + + fence = reservation_object_get_excl(pd->tbo.resv); + radeon_semaphore_sync_to(ib.semaphore, + (struct radeon_fence *)fence); + radeon_semaphore_sync_to(ib.semaphore, vm->last_id_use); WARN_ON(ib.length_dw > ndw); - r = radeon_ib_schedule(rdev, &ib, NULL); + r = radeon_ib_schedule(rdev, &ib, NULL, false); if (r) { radeon_ib_free(rdev, &ib); return r; @@ -816,8 +826,11 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev, struct radeon_bo *pt = vm->page_tables[pt_idx].bo; unsigned nptes; uint64_t pte; + struct fence *fence; - radeon_semaphore_sync_to(ib->semaphore, pt->tbo.sync_obj); + fence = reservation_object_get_excl(pt->tbo.resv); + radeon_semaphore_sync_to(ib->semaphore, + (struct radeon_fence *)fence); if ((addr & ~mask) == (end & ~mask)) nptes = end - addr; @@ -888,6 +901,9 @@ int radeon_vm_bo_update(struct radeon_device *rdev, bo_va->flags &= ~RADEON_VM_PAGE_VALID; bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM; bo_va->flags &= ~RADEON_VM_PAGE_SNOOPED; + if (bo_va->bo && radeon_ttm_tt_is_readonly(bo_va->bo->tbo.ttm)) + bo_va->flags &= ~RADEON_VM_PAGE_WRITEABLE; + if (mem) { addr = mem->start << PAGE_SHIFT; if (mem->mem_type != TTM_PL_SYSTEM) { @@ -957,7 +973,7 @@ int radeon_vm_bo_update(struct radeon_device *rdev, WARN_ON(ib.length_dw > ndw); radeon_semaphore_sync_to(ib.semaphore, vm->fence); - r = radeon_ib_schedule(rdev, &ib, NULL); + r = radeon_ib_schedule(rdev, &ib, NULL, false); if (r) { radeon_ib_free(rdev, &ib); return r; diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c index 3e21e869015f..8a477bf1fdb3 100644 --- a/drivers/gpu/drm/radeon/rv515.c +++ b/drivers/gpu/drm/radeon/rv515.c @@ -124,7 +124,7 @@ void rv515_ring_start(struct radeon_device *rdev, struct radeon_ring *ring) radeon_ring_write(ring, GEOMETRY_ROUND_NEAREST | COLOR_ROUND_NEAREST); radeon_ring_write(ring, PACKET0(0x20C8, 0)); radeon_ring_write(ring, 0); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); } int rv515_mc_wait_for_idle(struct radeon_device *rdev) diff --git a/drivers/gpu/drm/radeon/rv770_dma.c b/drivers/gpu/drm/radeon/rv770_dma.c index bbf2e076ee45..74426ac2bb5c 100644 --- a/drivers/gpu/drm/radeon/rv770_dma.c +++ b/drivers/gpu/drm/radeon/rv770_dma.c @@ -90,7 +90,7 @@ int rv770_copy_dma(struct radeon_device *rdev, return r; } - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); radeon_semaphore_free(rdev, &sem, *fence); return r; diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 011779bd2b3d..a1274a31405c 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -3057,7 +3057,7 @@ static void si_gpu_init(struct radeon_device *rdev) u32 sx_debug_1; u32 hdp_host_path_cntl; u32 tmp; - int i, j, k; + int i, j; switch (rdev->family) { case CHIP_TAHITI: @@ -3255,12 +3255,11 @@ static void si_gpu_init(struct radeon_device *rdev) rdev->config.si.max_sh_per_se, rdev->config.si.max_cu_per_sh); + rdev->config.si.active_cus = 0; for (i = 0; i < rdev->config.si.max_shader_engines; i++) { for (j = 0; j < rdev->config.si.max_sh_per_se; j++) { - for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) { - rdev->config.si.active_cus += - hweight32(si_get_cu_active_bitmap(rdev, i, j)); - } + rdev->config.si.active_cus += + hweight32(si_get_cu_active_bitmap(rdev, i, j)); } } @@ -3541,7 +3540,7 @@ static int si_cp_start(struct radeon_device *rdev) radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); radeon_ring_write(ring, 0xc000); radeon_ring_write(ring, 0xe000); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); si_cp_enable(rdev, true); @@ -3570,7 +3569,7 @@ static int si_cp_start(struct radeon_device *rdev) radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */ radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */ - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) { ring = &rdev->ring[i]; @@ -3580,7 +3579,7 @@ static int si_cp_start(struct radeon_device *rdev) radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0)); radeon_ring_write(ring, 0); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); } return 0; @@ -5028,7 +5027,7 @@ void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) /* flush hdp cache */ radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | + radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | WRITE_DATA_DST_SEL(0))); radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2); radeon_ring_write(ring, 0); @@ -5036,7 +5035,7 @@ void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) /* bits 0-15 are the VM contexts0-15 */ radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | + radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | WRITE_DATA_DST_SEL(0))); radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); radeon_ring_write(ring, 0); diff --git a/drivers/gpu/drm/radeon/si_dma.c b/drivers/gpu/drm/radeon/si_dma.c index 716505129450..7c22baaf94db 100644 --- a/drivers/gpu/drm/radeon/si_dma.c +++ b/drivers/gpu/drm/radeon/si_dma.c @@ -275,7 +275,7 @@ int si_copy_dma(struct radeon_device *rdev, return r; } - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); radeon_semaphore_free(rdev, &sem, *fence); return r; diff --git a/drivers/gpu/drm/radeon/trinity_dpm.c b/drivers/gpu/drm/radeon/trinity_dpm.c index 32e50be9c4ac..57f780053b3e 100644 --- a/drivers/gpu/drm/radeon/trinity_dpm.c +++ b/drivers/gpu/drm/radeon/trinity_dpm.c @@ -1874,16 +1874,22 @@ int trinity_dpm_init(struct radeon_device *rdev) for (i = 0; i < SUMO_MAX_HARDWARE_POWERLEVELS; i++) pi->at[i] = TRINITY_AT_DFLT; - /* There are stability issues reported on with - * bapm enabled when switching between AC and battery - * power. At the same time, some MSI boards hang - * if it's not enabled and dpm is enabled. Just enable - * it for MSI boards right now. - */ - if (rdev->pdev->subsystem_vendor == 0x1462) - pi->enable_bapm = true; - else + if (radeon_bapm == -1) { + /* There are stability issues reported on with + * bapm enabled when switching between AC and battery + * power. At the same time, some MSI boards hang + * if it's not enabled and dpm is enabled. Just enable + * it for MSI boards right now. + */ + if (rdev->pdev->subsystem_vendor == 0x1462) + pi->enable_bapm = true; + else + pi->enable_bapm = false; + } else if (radeon_bapm == 0) { pi->enable_bapm = false; + } else { + pi->enable_bapm = true; + } pi->enable_nbps_policy = true; pi->enable_sclk_ds = true; pi->enable_gfx_power_gating = true; diff --git a/drivers/gpu/drm/radeon/uvd_v1_0.c b/drivers/gpu/drm/radeon/uvd_v1_0.c index be42c8125203..e72b3cb59358 100644 --- a/drivers/gpu/drm/radeon/uvd_v1_0.c +++ b/drivers/gpu/drm/radeon/uvd_v1_0.c @@ -22,6 +22,7 @@ * Authors: Christian König <christian.koenig@amd.com> */ +#include <linux/firmware.h> #include <drm/drmP.h> #include "radeon.h" #include "radeon_asic.h" @@ -70,6 +71,82 @@ void uvd_v1_0_set_wptr(struct radeon_device *rdev, } /** + * uvd_v1_0_fence_emit - emit an fence & trap command + * + * @rdev: radeon_device pointer + * @fence: fence to emit + * + * Write a fence and a trap command to the ring. + */ +void uvd_v1_0_fence_emit(struct radeon_device *rdev, + struct radeon_fence *fence) +{ + struct radeon_ring *ring = &rdev->ring[fence->ring]; + uint64_t addr = rdev->fence_drv[fence->ring].gpu_addr; + + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0)); + radeon_ring_write(ring, addr & 0xffffffff); + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0)); + radeon_ring_write(ring, fence->seq); + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0)); + radeon_ring_write(ring, 0); + + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0)); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0)); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0)); + radeon_ring_write(ring, 2); + return; +} + +/** + * uvd_v1_0_resume - memory controller programming + * + * @rdev: radeon_device pointer + * + * Let the UVD memory controller know it's offsets + */ +int uvd_v1_0_resume(struct radeon_device *rdev) +{ + uint64_t addr; + uint32_t size; + int r; + + r = radeon_uvd_resume(rdev); + if (r) + return r; + + /* programm the VCPU memory controller bits 0-27 */ + addr = (rdev->uvd.gpu_addr >> 3) + 16; + size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size) >> 3; + WREG32(UVD_VCPU_CACHE_OFFSET0, addr); + WREG32(UVD_VCPU_CACHE_SIZE0, size); + + addr += size; + size = RADEON_UVD_STACK_SIZE >> 3; + WREG32(UVD_VCPU_CACHE_OFFSET1, addr); + WREG32(UVD_VCPU_CACHE_SIZE1, size); + + addr += size; + size = RADEON_UVD_HEAP_SIZE >> 3; + WREG32(UVD_VCPU_CACHE_OFFSET2, addr); + WREG32(UVD_VCPU_CACHE_SIZE2, size); + + /* bits 28-31 */ + addr = (rdev->uvd.gpu_addr >> 28) & 0xF; + WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0)); + + /* bits 32-39 */ + addr = (rdev->uvd.gpu_addr >> 32) & 0xFF; + WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31)); + + WREG32(UVD_FW_START, *((uint32_t*)rdev->uvd.cpu_addr)); + + return 0; +} + +/** * uvd_v1_0_init - start and test UVD block * * @rdev: radeon_device pointer @@ -124,14 +201,38 @@ int uvd_v1_0_init(struct radeon_device *rdev) radeon_ring_write(ring, PACKET0(UVD_SEMA_CNTL, 0)); radeon_ring_write(ring, 3); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); done: /* lower clocks again */ radeon_set_uvd_clocks(rdev, 0, 0); - if (!r) + if (!r) { + switch (rdev->family) { + case CHIP_RV610: + case CHIP_RV630: + case CHIP_RV620: + /* 64byte granularity workaround */ + WREG32(MC_CONFIG, 0); + WREG32(MC_CONFIG, 1 << 4); + WREG32(RS_DQ_RD_RET_CONF, 0x3f); + WREG32(MC_CONFIG, 0x1f); + + /* fall through */ + case CHIP_RV670: + case CHIP_RV635: + + /* write clean workaround */ + WREG32_P(UVD_VCPU_CNTL, 0x10, ~0x10); + break; + + default: + /* TODO: Do we need more? */ + break; + } + DRM_INFO("UVD initialized successfully.\n"); + } return r; } @@ -218,12 +319,12 @@ int uvd_v1_0_start(struct radeon_device *rdev) /* enable UMC */ WREG32_P(UVD_LMI_CTRL2, 0, ~(1 << 8)); + WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3)); + /* boot up the VCPU */ WREG32(UVD_SOFT_RESET, 0); mdelay(10); - WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3)); - for (i = 0; i < 10; ++i) { uint32_t status; for (j = 0; j < 100; ++j) { @@ -331,7 +432,7 @@ int uvd_v1_0_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) } radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0)); radeon_ring_write(ring, 0xDEADBEEF); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); for (i = 0; i < rdev->usec_timeout; i++) { tmp = RREG32(UVD_CONTEXT_ID); if (tmp == 0xDEADBEEF) diff --git a/drivers/gpu/drm/radeon/uvd_v2_2.c b/drivers/gpu/drm/radeon/uvd_v2_2.c index 8bfdadd56598..89193519f8a1 100644 --- a/drivers/gpu/drm/radeon/uvd_v2_2.c +++ b/drivers/gpu/drm/radeon/uvd_v2_2.c @@ -72,6 +72,10 @@ int uvd_v2_2_resume(struct radeon_device *rdev) uint32_t chip_id, size; int r; + /* RV770 uses V1.0 MC */ + if (rdev->family == CHIP_RV770) + return uvd_v1_0_resume(rdev); + r = radeon_uvd_resume(rdev); if (r) return r; |