diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-16 07:42:10 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-16 07:42:10 +0300 |
commit | e60e1ee60630cafef5e430c2ae364877e061d980 (patch) | |
tree | 816aeef8fe8d4a2c6a1ebbc7a350839bac8dd4c2 /drivers/gpu/drm/etnaviv/etnaviv_gpu.c | |
parent | 5d352e69c60e54b5f04d6e337a1d2bf0dbf3d94a (diff) | |
parent | f150891fd9878ef0d9197c4e8451ce67c3bdd014 (diff) | |
download | linux-e60e1ee60630cafef5e430c2ae364877e061d980.tar.xz |
Merge tag 'drm-for-v4.15' of git://people.freedesktop.org/~airlied/linux
Pull drm updates from Dave Airlie:
"This is the main drm pull request for v4.15.
Core:
- Atomic object lifetime fixes
- Atomic iterator improvements
- Sparse/smatch fixes
- Legacy kms ioctls to be interruptible
- EDID override improvements
- fb/gem helper cleanups
- Simple outreachy patches
- Documentation improvements
- Fix dma-buf rcu races
- DRM mode object leasing for improving VR use cases.
- vgaarb improvements for non-x86 platforms.
New driver:
- tve200: Faraday Technology TVE200 block.
This "TV Encoder" encodes a ITU-T BT.656 stream and can be found in
the StorLink SL3516 (later Cortina Systems CS3516) as well as the
Grain Media GM8180.
New bridges:
- SiI9234 support
New panels:
- S6E63J0X03, OTM8009A, Seiko 43WVF1G, 7" rpi touch panel, Toshiba
LT089AC19000, Innolux AT043TN24
i915:
- Remove Coffeelake from alpha support
- Cannonlake workarounds
- Infoframe refactoring for DisplayPort
- VBT updates
- DisplayPort vswing/emph/buffer translation refactoring
- CCS fixes
- Restore GPU clock boost on missed vblanks
- Scatter list updates for userptr allocations
- Gen9+ transition watermarks
- Display IPC (Isochronous Priority Control)
- Private PAT management
- GVT: improved error handling and pci config sanitizing
- Execlist refactoring
- Transparent Huge Page support
- User defined priorities support
- HuC/GuC firmware refactoring
- DP MST fixes
- eDP power sequencing fixes
- Use RCU instead of stop_machine
- PSR state tracking support
- Eviction fixes
- BDW DP aux channel timeout fixes
- LSPCON fixes
- Cannonlake PLL fixes
amdgpu:
- Per VM BO support
- Powerplay cleanups
- CI powerplay support
- PASID mgr for kfd
- SR-IOV fixes
- initial GPU reset for vega10
- Prime mmap support
- TTM updates
- Clock query interface for Raven
- Fence to handle ioctl
- UVD encode ring support on Polaris
- Transparent huge page DMA support
- Compute LRU pipe tweaks
- BO flag to allow buffers to opt out of implicit sync
- CTX priority setting API
- VRAM lost infrastructure plumbing
qxl:
- fix flicker since atomic rework
amdkfd:
- Further improvements from internal AMD tree
- Usermode events
- Drop radeon support
nouveau:
- Pascal temperature sensor support
- Improved BAR2 handling
- MMU rework to support Pascal MMU
exynos:
- Improved HDMI/mixer support
- HDMI audio interface support
tegra:
- Prep work for tegra186
- Cleanup/fixes
msm:
- Preemption support for a5xx
- Display fixes for 8x96 (snapdragon 820)
- Async cursor plane fixes
- FW loading rework
- GPU debugging improvements
vc4:
- Prep for DSI panels
- fix T-format tiling scanout
- New madvise ioctl
Rockchip:
- LVDS support
omapdrm:
- omap4 HDMI CEC support
etnaviv:
- GPU performance counters groundwork
sun4i:
- refactor driver load + TCON backend
- HDMI improvements
- A31 support
- Misc fixes
udl:
- Probe/EDID read fixes.
tilcdc:
- Misc fixes.
pl111:
- Support more variants
adv7511:
- Improve EDID handling.
- HDMI CEC support
sii8620:
- Add remote control support"
* tag 'drm-for-v4.15' of git://people.freedesktop.org/~airlied/linux: (1480 commits)
drm/rockchip: analogix_dp: Use mutex rather than spinlock
drm/mode_object: fix documentation for object lookups.
drm/i915: Reorder context-close to avoid calling i915_vma_close() under RCU
drm/i915: Move init_clock_gating() back to where it was
drm/i915: Prune the reservation shared fence array
drm/i915: Idle the GPU before shinking everything
drm/i915: Lock llist_del_first() vs llist_del_all()
drm/i915: Calculate ironlake intermediate watermarks correctly, v2.
drm/i915: Disable lazy PPGTT page table optimization for vGPU
drm/i915/execlists: Remove the priority "optimisation"
drm/i915: Filter out spurious execlists context-switch interrupts
drm/amdgpu: use irq-safe lock for kiq->ring_lock
drm/amdgpu: bypass lru touch for KIQ ring submission
drm/amdgpu: Potential uninitialized variable in amdgpu_vm_update_directories()
drm/amdgpu: potential uninitialized variable in amdgpu_vce_ring_parse_cs()
drm/amd/powerplay: initialize a variable before using it
drm/amd/powerplay: suppress KASAN out of bounds warning in vega10_populate_all_memory_levels
drm/amd/amdgpu: fix evicted VRAM bo adjudgement condition
drm/vblank: Tune drm_crtc_accurate_vblank_count() WARN down to a debug
drm/rockchip: add CONFIG_OF dependency for lvds
...
Diffstat (limited to 'drivers/gpu/drm/etnaviv/etnaviv_gpu.c')
-rw-r--r-- | drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 217 |
1 files changed, 167 insertions, 50 deletions
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index 4b152e0d31a6..e19cbe05da2a 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -25,6 +25,7 @@ #include "etnaviv_gpu.h" #include "etnaviv_gem.h" #include "etnaviv_mmu.h" +#include "etnaviv_perfmon.h" #include "common.xml.h" #include "state.xml.h" #include "state_hi.xml.h" @@ -420,9 +421,10 @@ static void etnaviv_gpu_update_clock(struct etnaviv_gpu *gpu) gpu->base_rate_shader >> gpu->freq_scale); } else { unsigned int fscale = 1 << (6 - gpu->freq_scale); - u32 clock = VIVS_HI_CLOCK_CONTROL_DISABLE_DEBUG_REGISTERS | - VIVS_HI_CLOCK_CONTROL_FSCALE_VAL(fscale); + u32 clock = gpu_read(gpu, VIVS_HI_CLOCK_CONTROL); + clock &= ~VIVS_HI_CLOCK_CONTROL_FSCALE_VAL__MASK; + clock |= VIVS_HI_CLOCK_CONTROL_FSCALE_VAL(fscale); etnaviv_gpu_load_clock(gpu, clock); } } @@ -433,24 +435,14 @@ static int etnaviv_hw_reset(struct etnaviv_gpu *gpu) unsigned long timeout; bool failed = true; - /* TODO - * - * - clock gating - * - puls eater - * - what about VG? - */ - /* We hope that the GPU resets in under one second */ timeout = jiffies + msecs_to_jiffies(1000); while (time_is_after_jiffies(timeout)) { /* enable clock */ - etnaviv_gpu_update_clock(gpu); - - control = gpu_read(gpu, VIVS_HI_CLOCK_CONTROL); - - /* Wait for stable clock. Vivante's code waited for 1ms */ - usleep_range(1000, 10000); + unsigned int fscale = 1 << (6 - gpu->freq_scale); + control = VIVS_HI_CLOCK_CONTROL_FSCALE_VAL(fscale); + etnaviv_gpu_load_clock(gpu, control); /* isolate the GPU. */ control |= VIVS_HI_CLOCK_CONTROL_ISOLATE_GPU; @@ -461,7 +453,7 @@ static int etnaviv_hw_reset(struct etnaviv_gpu *gpu) gpu_write(gpu, VIVS_HI_CLOCK_CONTROL, control); /* wait for reset. */ - msleep(1); + usleep_range(10, 20); /* reset soft reset bit. */ control &= ~VIVS_HI_CLOCK_CONTROL_SOFT_RESET; @@ -490,6 +482,10 @@ static int etnaviv_hw_reset(struct etnaviv_gpu *gpu) continue; } + /* disable debug registers, as they are not normally needed */ + control |= VIVS_HI_CLOCK_CONTROL_DISABLE_DEBUG_REGISTERS; + gpu_write(gpu, VIVS_HI_CLOCK_CONTROL, control); + failed = false; break; } @@ -721,7 +717,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu) } /* Create buffer: */ - gpu->buffer = etnaviv_cmdbuf_new(gpu->cmdbuf_suballoc, PAGE_SIZE, 0); + gpu->buffer = etnaviv_cmdbuf_new(gpu->cmdbuf_suballoc, PAGE_SIZE, 0, 0); if (!gpu->buffer) { ret = -ENOMEM; dev_err(gpu->dev, "could not create command buffer\n"); @@ -739,10 +735,9 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu) /* Setup event management */ spin_lock_init(&gpu->event_spinlock); init_completion(&gpu->event_free); - for (i = 0; i < ARRAY_SIZE(gpu->event); i++) { - gpu->event[i].used = false; + bitmap_zero(gpu->event_bitmap, ETNA_NR_EVENTS); + for (i = 0; i < ARRAY_SIZE(gpu->event); i++) complete(&gpu->event_free); - } /* Now program the hardware */ mutex_lock(&gpu->lock); @@ -926,7 +921,7 @@ static void recover_worker(struct work_struct *work) struct etnaviv_gpu *gpu = container_of(work, struct etnaviv_gpu, recover_work); unsigned long flags; - unsigned int i; + unsigned int i = 0; dev_err(gpu->dev, "hangcheck recover!\n"); @@ -945,14 +940,12 @@ static void recover_worker(struct work_struct *work) /* complete all events, the GPU won't do it after the reset */ spin_lock_irqsave(&gpu->event_spinlock, flags); - for (i = 0; i < ARRAY_SIZE(gpu->event); i++) { - if (!gpu->event[i].used) - continue; + for_each_set_bit_from(i, gpu->event_bitmap, ETNA_NR_EVENTS) { dma_fence_signal(gpu->event[i].fence); gpu->event[i].fence = NULL; - gpu->event[i].used = false; complete(&gpu->event_free); } + bitmap_zero(gpu->event_bitmap, ETNA_NR_EVENTS); spin_unlock_irqrestore(&gpu->event_spinlock, flags); gpu->completed_fence = gpu->active_fence; @@ -1140,30 +1133,45 @@ int etnaviv_gpu_fence_sync_obj(struct etnaviv_gem_object *etnaviv_obj, * event management: */ -static unsigned int event_alloc(struct etnaviv_gpu *gpu) +static int event_alloc(struct etnaviv_gpu *gpu, unsigned nr_events, + unsigned int *events) { - unsigned long ret, flags; - unsigned int i, event = ~0U; + unsigned long flags, timeout = msecs_to_jiffies(10 * 10000); + unsigned i, acquired = 0; - ret = wait_for_completion_timeout(&gpu->event_free, - msecs_to_jiffies(10 * 10000)); - if (!ret) - dev_err(gpu->dev, "wait_for_completion_timeout failed"); + for (i = 0; i < nr_events; i++) { + unsigned long ret; - spin_lock_irqsave(&gpu->event_spinlock, flags); + ret = wait_for_completion_timeout(&gpu->event_free, timeout); - /* find first free event */ - for (i = 0; i < ARRAY_SIZE(gpu->event); i++) { - if (gpu->event[i].used == false) { - gpu->event[i].used = true; - event = i; - break; + if (!ret) { + dev_err(gpu->dev, "wait_for_completion_timeout failed"); + goto out; } + + acquired++; + timeout = ret; + } + + spin_lock_irqsave(&gpu->event_spinlock, flags); + + for (i = 0; i < nr_events; i++) { + int event = find_first_zero_bit(gpu->event_bitmap, ETNA_NR_EVENTS); + + events[i] = event; + memset(&gpu->event[event], 0, sizeof(struct etnaviv_event)); + set_bit(event, gpu->event_bitmap); } spin_unlock_irqrestore(&gpu->event_spinlock, flags); - return event; + return 0; + +out: + for (i = 0; i < acquired; i++) + complete(&gpu->event_free); + + return -EBUSY; } static void event_free(struct etnaviv_gpu *gpu, unsigned int event) @@ -1172,12 +1180,12 @@ static void event_free(struct etnaviv_gpu *gpu, unsigned int event) spin_lock_irqsave(&gpu->event_spinlock, flags); - if (gpu->event[event].used == false) { + if (!test_bit(event, gpu->event_bitmap)) { dev_warn(gpu->dev, "event %u is already marked as free", event); spin_unlock_irqrestore(&gpu->event_spinlock, flags); } else { - gpu->event[event].used = false; + clear_bit(event, gpu->event_bitmap); spin_unlock_irqrestore(&gpu->event_spinlock, flags); complete(&gpu->event_free); @@ -1311,12 +1319,71 @@ void etnaviv_gpu_pm_put(struct etnaviv_gpu *gpu) pm_runtime_put_autosuspend(gpu->dev); } +static void sync_point_perfmon_sample(struct etnaviv_gpu *gpu, + struct etnaviv_event *event, unsigned int flags) +{ + const struct etnaviv_cmdbuf *cmdbuf = event->cmdbuf; + unsigned int i; + + for (i = 0; i < cmdbuf->nr_pmrs; i++) { + const struct etnaviv_perfmon_request *pmr = cmdbuf->pmrs + i; + + if (pmr->flags == flags) + etnaviv_perfmon_process(gpu, pmr); + } +} + +static void sync_point_perfmon_sample_pre(struct etnaviv_gpu *gpu, + struct etnaviv_event *event) +{ + u32 val; + + /* disable clock gating */ + val = gpu_read(gpu, VIVS_PM_POWER_CONTROLS); + val &= ~VIVS_PM_POWER_CONTROLS_ENABLE_MODULE_CLOCK_GATING; + gpu_write(gpu, VIVS_PM_POWER_CONTROLS, val); + + /* enable debug register */ + val = gpu_read(gpu, VIVS_HI_CLOCK_CONTROL); + val &= ~VIVS_HI_CLOCK_CONTROL_DISABLE_DEBUG_REGISTERS; + gpu_write(gpu, VIVS_HI_CLOCK_CONTROL, val); + + sync_point_perfmon_sample(gpu, event, ETNA_PM_PROCESS_PRE); +} + +static void sync_point_perfmon_sample_post(struct etnaviv_gpu *gpu, + struct etnaviv_event *event) +{ + const struct etnaviv_cmdbuf *cmdbuf = event->cmdbuf; + unsigned int i; + u32 val; + + sync_point_perfmon_sample(gpu, event, ETNA_PM_PROCESS_POST); + + for (i = 0; i < cmdbuf->nr_pmrs; i++) { + const struct etnaviv_perfmon_request *pmr = cmdbuf->pmrs + i; + + *pmr->bo_vma = pmr->sequence; + } + + /* disable debug register */ + val = gpu_read(gpu, VIVS_HI_CLOCK_CONTROL); + val |= VIVS_HI_CLOCK_CONTROL_DISABLE_DEBUG_REGISTERS; + gpu_write(gpu, VIVS_HI_CLOCK_CONTROL, val); + + /* enable clock gating */ + val = gpu_read(gpu, VIVS_PM_POWER_CONTROLS); + val |= VIVS_PM_POWER_CONTROLS_ENABLE_MODULE_CLOCK_GATING; + gpu_write(gpu, VIVS_PM_POWER_CONTROLS, val); +} + + /* add bo's to gpu's ring, and kick gpu: */ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, struct etnaviv_gem_submit *submit, struct etnaviv_cmdbuf *cmdbuf) { struct dma_fence *fence; - unsigned int event, i; + unsigned int i, nr_events = 1, event[3]; int ret; ret = etnaviv_gpu_pm_get_sync(gpu); @@ -1332,10 +1399,19 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, * */ - event = event_alloc(gpu); - if (unlikely(event == ~0U)) { - DRM_ERROR("no free event\n"); - ret = -EBUSY; + /* + * if there are performance monitor requests we need to have + * - a sync point to re-configure gpu and process ETNA_PM_PROCESS_PRE + * requests. + * - a sync point to re-configure gpu, process ETNA_PM_PROCESS_POST requests + * and update the sequence number for userspace. + */ + if (cmdbuf->nr_pmrs) + nr_events = 3; + + ret = event_alloc(gpu, nr_events, event); + if (ret) { + DRM_ERROR("no free events\n"); goto out_pm_put; } @@ -1343,12 +1419,14 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, fence = etnaviv_gpu_fence_alloc(gpu); if (!fence) { - event_free(gpu, event); + for (i = 0; i < nr_events; i++) + event_free(gpu, event[i]); + ret = -ENOMEM; goto out_unlock; } - gpu->event[event].fence = fence; + gpu->event[event[0]].fence = fence; submit->fence = dma_fence_get(fence); gpu->active_fence = submit->fence->seqno; @@ -1358,7 +1436,19 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, gpu->lastctx = cmdbuf->ctx; } - etnaviv_buffer_queue(gpu, event, cmdbuf); + if (cmdbuf->nr_pmrs) { + gpu->event[event[1]].sync_point = &sync_point_perfmon_sample_pre; + gpu->event[event[1]].cmdbuf = cmdbuf; + etnaviv_sync_point_queue(gpu, event[1]); + } + + etnaviv_buffer_queue(gpu, event[0], cmdbuf); + + if (cmdbuf->nr_pmrs) { + gpu->event[event[2]].sync_point = &sync_point_perfmon_sample_post; + gpu->event[event[2]].cmdbuf = cmdbuf; + etnaviv_sync_point_queue(gpu, event[2]); + } cmdbuf->fence = fence; list_add_tail(&cmdbuf->node, &gpu->active_cmd_list); @@ -1394,6 +1484,24 @@ out_pm_put: return ret; } +static void etnaviv_process_sync_point(struct etnaviv_gpu *gpu, + struct etnaviv_event *event) +{ + u32 addr = gpu_read(gpu, VIVS_FE_DMA_ADDRESS); + + event->sync_point(gpu, event); + etnaviv_gpu_start_fe(gpu, addr + 2, 2); +} + +static void sync_point_worker(struct work_struct *work) +{ + struct etnaviv_gpu *gpu = container_of(work, struct etnaviv_gpu, + sync_point_work); + + etnaviv_process_sync_point(gpu, &gpu->event[gpu->sync_point_event]); + event_free(gpu, gpu->sync_point_event); +} + /* * Init/Cleanup: */ @@ -1440,7 +1548,15 @@ static irqreturn_t irq_handler(int irq, void *data) dev_dbg(gpu->dev, "event %u\n", event); + if (gpu->event[event].sync_point) { + gpu->sync_point_event = event; + etnaviv_queue_work(gpu->drm, &gpu->sync_point_work); + } + fence = gpu->event[event].fence; + if (!fence) + continue; + gpu->event[event].fence = NULL; dma_fence_signal(fence); @@ -1645,6 +1761,7 @@ static int etnaviv_gpu_bind(struct device *dev, struct device *master, INIT_LIST_HEAD(&gpu->active_cmd_list); INIT_WORK(&gpu->retire_work, retire_worker); + INIT_WORK(&gpu->sync_point_work, sync_point_worker); INIT_WORK(&gpu->recover_work, recover_worker); init_waitqueue_head(&gpu->fence_event); |