diff options
Diffstat (limited to 'drivers/gpu/drm/vc4')
-rw-r--r-- | drivers/gpu/drm/vc4/Makefile | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_crtc.c | 25 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_drv.c | 26 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_drv.h | 128 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_gem.c | 48 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_hdmi.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_irq.c | 40 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_kms.c | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_perfmon.c | 188 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_plane.c | 125 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_regs.h | 36 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_v3d.c | 64 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_validate.c | 2 |
13 files changed, 544 insertions, 142 deletions
diff --git a/drivers/gpu/drm/vc4/Makefile b/drivers/gpu/drm/vc4/Makefile index f5500df51686..4a3a868235f8 100644 --- a/drivers/gpu/drm/vc4/Makefile +++ b/drivers/gpu/drm/vc4/Makefile @@ -15,6 +15,7 @@ vc4-y := \ vc4_vec.o \ vc4_hvs.o \ vc4_irq.o \ + vc4_perfmon.o \ vc4_plane.o \ vc4_render_cl.o \ vc4_trace_points.o \ diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index ce1e3b9e14c9..bf4667481935 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -643,9 +643,12 @@ static void vc4_crtc_atomic_flush(struct drm_crtc *crtc, { struct drm_device *dev = crtc->dev; struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc); struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state); struct drm_plane *plane; + struct vc4_plane_state *vc4_plane_state; bool debug_dump_regs = false; + bool enable_bg_fill = false; u32 __iomem *dlist_start = vc4->hvs->dlist + vc4_state->mm.start; u32 __iomem *dlist_next = dlist_start; @@ -656,6 +659,20 @@ static void vc4_crtc_atomic_flush(struct drm_crtc *crtc, /* Copy all the active planes' dlist contents to the hardware dlist. */ drm_atomic_crtc_for_each_plane(plane, crtc) { + /* Is this the first active plane? */ + if (dlist_next == dlist_start) { + /* We need to enable background fill when a plane + * could be alpha blending from the background, i.e. + * where no other plane is underneath. It suffices to + * consider the first active plane here since we set + * needs_bg_fill such that either the first plane + * already needs it or all planes on top blend from + * the first or a lower plane. + */ + vc4_plane_state = to_vc4_plane_state(plane->state); + enable_bg_fill = vc4_plane_state->needs_bg_fill; + } + dlist_next += vc4_plane_write_dlist(plane, dlist_next); } @@ -664,6 +681,14 @@ static void vc4_crtc_atomic_flush(struct drm_crtc *crtc, WARN_ON_ONCE(dlist_next - dlist_start != vc4_state->mm.size); + if (enable_bg_fill) + /* This sets a black background color fill, as is the case + * with other DRM drivers. + */ + HVS_WRITE(SCALER_DISPBKGNDX(vc4_crtc->channel), + HVS_READ(SCALER_DISPBKGNDX(vc4_crtc->channel)) | + SCALER_DISPBKGND_FILL); + /* Only update DISPLIST if the CRTC was already running and is not * being disabled. * vc4_crtc_enable() takes care of updating the dlist just after diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index ceb385fd69c5..94b99c90425a 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -101,6 +101,7 @@ static int vc4_get_param_ioctl(struct drm_device *dev, void *data, case DRM_VC4_PARAM_SUPPORTS_THREADED_FS: case DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER: case DRM_VC4_PARAM_SUPPORTS_MADVISE: + case DRM_VC4_PARAM_SUPPORTS_PERFMON: args->value = true; break; default: @@ -111,6 +112,26 @@ static int vc4_get_param_ioctl(struct drm_device *dev, void *data, return 0; } +static int vc4_open(struct drm_device *dev, struct drm_file *file) +{ + struct vc4_file *vc4file; + + vc4file = kzalloc(sizeof(*vc4file), GFP_KERNEL); + if (!vc4file) + return -ENOMEM; + + vc4_perfmon_open_file(vc4file); + file->driver_priv = vc4file; + return 0; +} + +static void vc4_close(struct drm_device *dev, struct drm_file *file) +{ + struct vc4_file *vc4file = file->driver_priv; + + vc4_perfmon_close_file(vc4file); +} + static const struct vm_operations_struct vc4_vm_ops = { .fault = vc4_fault, .open = drm_gem_vm_open, @@ -143,6 +164,9 @@ static const struct drm_ioctl_desc vc4_drm_ioctls[] = { DRM_IOCTL_DEF_DRV(VC4_GET_TILING, vc4_get_tiling_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(VC4_LABEL_BO, vc4_label_bo_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(VC4_GEM_MADVISE, vc4_gem_madvise_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VC4_PERFMON_CREATE, vc4_perfmon_create_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VC4_PERFMON_DESTROY, vc4_perfmon_destroy_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VC4_PERFMON_GET_VALUES, vc4_perfmon_get_values_ioctl, DRM_RENDER_ALLOW), }; static struct drm_driver vc4_drm_driver = { @@ -153,6 +177,8 @@ static struct drm_driver vc4_drm_driver = { DRIVER_RENDER | DRIVER_PRIME), .lastclose = drm_fb_helper_lastclose, + .open = vc4_open, + .postclose = vc4_close, .irq_handler = vc4_irq, .irq_preinstall = vc4_irq_preinstall, .irq_postinstall = vc4_irq_postinstall, diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index 3af22936d9b3..1b4cd1fabf56 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -11,6 +11,8 @@ #include <drm/drm_encoder.h> #include <drm/drm_gem_cma_helper.h> +#include "uapi/drm/vc4_drm.h" + /* Don't forget to update vc4_bo.c: bo_type_names[] when adding to * this. */ @@ -29,6 +31,36 @@ enum vc4_kernel_bo_type { VC4_BO_TYPE_COUNT }; +/* Performance monitor object. The perform lifetime is controlled by userspace + * using perfmon related ioctls. A perfmon can be attached to a submit_cl + * request, and when this is the case, HW perf counters will be activated just + * before the submit_cl is submitted to the GPU and disabled when the job is + * done. This way, only events related to a specific job will be counted. + */ +struct vc4_perfmon { + /* Tracks the number of users of the perfmon, when this counter reaches + * zero the perfmon is destroyed. + */ + refcount_t refcnt; + + /* Number of counters activated in this perfmon instance + * (should be less than DRM_VC4_MAX_PERF_COUNTERS). + */ + u8 ncounters; + + /* Events counted by the HW perf counters. */ + u8 events[DRM_VC4_MAX_PERF_COUNTERS]; + + /* Storage for counter values. Counters are incremented by the HW + * perf counter values every time the perfmon is attached to a GPU job. + * This way, perfmon users don't have to retrieve the results after + * each job if they want to track events covering several submissions. + * Note that counter values can't be reset, but you can fake a reset by + * destroying the perfmon and creating a new one. + */ + u64 counters[0]; +}; + struct vc4_dev { struct drm_device *dev; @@ -121,6 +153,11 @@ struct vc4_dev { wait_queue_head_t job_wait_queue; struct work_struct job_done_work; + /* Used to track the active perfmon if any. Access to this field is + * protected by job_lock. + */ + struct vc4_perfmon *active_perfmon; + /* List of struct vc4_seqno_cb for callbacks to be made from a * workqueue when the given seqno is passed. */ @@ -273,6 +310,66 @@ to_vc4_plane(struct drm_plane *plane) return (struct vc4_plane *)plane; } +enum vc4_scaling_mode { + VC4_SCALING_NONE, + VC4_SCALING_TPZ, + VC4_SCALING_PPF, +}; + +struct vc4_plane_state { + struct drm_plane_state base; + /* System memory copy of the display list for this element, computed + * at atomic_check time. + */ + u32 *dlist; + u32 dlist_size; /* Number of dwords allocated for the display list */ + u32 dlist_count; /* Number of used dwords in the display list. */ + + /* Offset in the dlist to various words, for pageflip or + * cursor updates. + */ + u32 pos0_offset; + u32 pos2_offset; + u32 ptr0_offset; + + /* Offset where the plane's dlist was last stored in the + * hardware at vc4_crtc_atomic_flush() time. + */ + u32 __iomem *hw_dlist; + + /* Clipped coordinates of the plane on the display. */ + int crtc_x, crtc_y, crtc_w, crtc_h; + /* Clipped area being scanned from in the FB. */ + u32 src_x, src_y; + + u32 src_w[2], src_h[2]; + + /* Scaling selection for the RGB/Y plane and the Cb/Cr planes. */ + enum vc4_scaling_mode x_scaling[2], y_scaling[2]; + bool is_unity; + bool is_yuv; + + /* Offset to start scanning out from the start of the plane's + * BO. + */ + u32 offsets[3]; + + /* Our allocation in LBM for temporary storage during scaling. */ + struct drm_mm_node lbm; + + /* Set when the plane has per-pixel alpha content or does not cover + * the entire screen. This is a hint to the CRTC that it might need + * to enable background color fill. + */ + bool needs_bg_fill; +}; + +static inline struct vc4_plane_state * +to_vc4_plane_state(struct drm_plane_state *state) +{ + return (struct vc4_plane_state *)state; +} + enum vc4_encoder_type { VC4_ENCODER_TYPE_NONE, VC4_ENCODER_TYPE_HDMI, @@ -406,6 +503,21 @@ struct vc4_exec_info { void *uniforms_v; uint32_t uniforms_p; uint32_t uniforms_size; + + /* Pointer to a performance monitor object if the user requested it, + * NULL otherwise. + */ + struct vc4_perfmon *perfmon; +}; + +/* Per-open file private data. Any driver-specific resource that has to be + * released when the DRM file is closed should be placed here. + */ +struct vc4_file { + struct { + struct idr idr; + struct mutex lock; + } perfmon; }; static inline struct vc4_exec_info * @@ -646,3 +758,19 @@ bool vc4_check_tex_size(struct vc4_exec_info *exec, /* vc4_validate_shader.c */ struct vc4_validated_shader_info * vc4_validate_shader(struct drm_gem_cma_object *shader_obj); + +/* vc4_perfmon.c */ +void vc4_perfmon_get(struct vc4_perfmon *perfmon); +void vc4_perfmon_put(struct vc4_perfmon *perfmon); +void vc4_perfmon_start(struct vc4_dev *vc4, struct vc4_perfmon *perfmon); +void vc4_perfmon_stop(struct vc4_dev *vc4, struct vc4_perfmon *perfmon, + bool capture); +struct vc4_perfmon *vc4_perfmon_find(struct vc4_file *vc4file, int id); +void vc4_perfmon_open_file(struct vc4_file *vc4file); +void vc4_perfmon_close_file(struct vc4_file *vc4file); +int vc4_perfmon_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int vc4_perfmon_destroy_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int vc4_perfmon_get_values_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index c94cce96544c..2107b0daf8ef 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -467,14 +467,30 @@ again: vc4_flush_caches(dev); + /* Only start the perfmon if it was not already started by a previous + * job. + */ + if (exec->perfmon && vc4->active_perfmon != exec->perfmon) + vc4_perfmon_start(vc4, exec->perfmon); + /* Either put the job in the binner if it uses the binner, or * immediately move it to the to-be-rendered queue. */ if (exec->ct0ca != exec->ct0ea) { submit_cl(dev, 0, exec->ct0ca, exec->ct0ea); } else { + struct vc4_exec_info *next; + vc4_move_job_to_render(dev, exec); - goto again; + next = vc4_first_bin_job(vc4); + + /* We can't start the next bin job if the previous job had a + * different perfmon instance attached to it. The same goes + * if one of them had a perfmon attached to it and the other + * one doesn't. + */ + if (next && next->perfmon == exec->perfmon) + goto again; } } @@ -642,6 +658,7 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, struct ww_acquire_ctx *acquire_ctx) { struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_exec_info *renderjob; uint64_t seqno; unsigned long irqflags; struct vc4_fence *fence; @@ -667,11 +684,14 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, list_add_tail(&exec->head, &vc4->bin_job_list); - /* If no job was executing, kick ours off. Otherwise, it'll - * get started when the previous job's flush done interrupt - * occurs. + /* If no bin job was executing and if the render job (if any) has the + * same perfmon as our job attached to it (or if both jobs don't have + * perfmon activated), then kick ours off. Otherwise, it'll get + * started when the previous job's flush/render done interrupt occurs. */ - if (vc4_first_bin_job(vc4) == exec) { + renderjob = vc4_first_render_job(vc4); + if (vc4_first_bin_job(vc4) == exec && + (!renderjob || renderjob->perfmon == exec->perfmon)) { vc4_submit_next_bin_job(dev); vc4_queue_hangcheck(dev); } @@ -936,6 +956,9 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) vc4->bin_alloc_used &= ~exec->bin_slots; spin_unlock_irqrestore(&vc4->job_lock, irqflags); + /* Release the reference we had on the perf monitor. */ + vc4_perfmon_put(exec->perfmon); + mutex_lock(&vc4->power_lock); if (--vc4->power_refcount == 0) { pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev); @@ -1088,6 +1111,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_file *vc4file = file_priv->driver_priv; struct drm_vc4_submit_cl *args = data; struct vc4_exec_info *exec; struct ww_acquire_ctx acquire_ctx; @@ -1101,6 +1125,11 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, return -EINVAL; } + if (args->pad2 != 0) { + DRM_DEBUG("->pad2 must be set to zero\n"); + return -EINVAL; + } + exec = kcalloc(1, sizeof(*exec), GFP_KERNEL); if (!exec) { DRM_ERROR("malloc failure on exec struct\n"); @@ -1126,6 +1155,15 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, if (ret) goto fail; + if (args->perfmonid) { + exec->perfmon = vc4_perfmon_find(vc4file, + args->perfmonid); + if (!exec->perfmon) { + ret = -ENOENT; + goto fail; + } + } + if (exec->args->bin_cl_size != 0) { ret = vc4_get_bcl(dev, exec); if (ret) diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 984501e3f0b0..1a6db291d48b 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -681,7 +681,7 @@ static void vc4_hdmi_encoder_enable(struct drm_encoder *encoder) drift & ~VC4_HDMI_FIFO_CTL_RECENTER); HDMI_WRITE(VC4_HDMI_FIFO_CTL, drift | VC4_HDMI_FIFO_CTL_RECENTER); - udelay(1000); + usleep_range(1000, 1100); HDMI_WRITE(VC4_HDMI_FIFO_CTL, drift & ~VC4_HDMI_FIFO_CTL_RECENTER); HDMI_WRITE(VC4_HDMI_FIFO_CTL, diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c index 3dd62d75f531..4cd2ccfe15f4 100644 --- a/drivers/gpu/drm/vc4/vc4_irq.c +++ b/drivers/gpu/drm/vc4/vc4_irq.c @@ -104,13 +104,20 @@ static void vc4_irq_finish_bin_job(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); - struct vc4_exec_info *exec = vc4_first_bin_job(vc4); + struct vc4_exec_info *next, *exec = vc4_first_bin_job(vc4); if (!exec) return; vc4_move_job_to_render(dev, exec); - vc4_submit_next_bin_job(dev); + next = vc4_first_bin_job(vc4); + + /* Only submit the next job in the bin list if it matches the perfmon + * attached to the one that just finished (or if both jobs don't have + * perfmon attached to them). + */ + if (next && next->perfmon == exec->perfmon) + vc4_submit_next_bin_job(dev); } static void @@ -122,6 +129,10 @@ vc4_cancel_bin_job(struct drm_device *dev) if (!exec) return; + /* Stop the perfmon so that the next bin job can be started. */ + if (exec->perfmon) + vc4_perfmon_stop(vc4, exec->perfmon, false); + list_move_tail(&exec->head, &vc4->bin_job_list); vc4_submit_next_bin_job(dev); } @@ -131,18 +142,41 @@ vc4_irq_finish_render_job(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_exec_info *exec = vc4_first_render_job(vc4); + struct vc4_exec_info *nextbin, *nextrender; if (!exec) return; vc4->finished_seqno++; list_move_tail(&exec->head, &vc4->job_done_list); + + nextbin = vc4_first_bin_job(vc4); + nextrender = vc4_first_render_job(vc4); + + /* Only stop the perfmon if following jobs in the queue don't expect it + * to be enabled. + */ + if (exec->perfmon && !nextrender && + (!nextbin || nextbin->perfmon != exec->perfmon)) + vc4_perfmon_stop(vc4, exec->perfmon, true); + + /* If there's a render job waiting, start it. If this is not the case + * we may have to unblock the binner if it's been stalled because of + * perfmon (this can be checked by comparing the perfmon attached to + * the finished renderjob to the one attached to the next bin job: if + * they don't match, this means the binner is stalled and should be + * restarted). + */ + if (nextrender) + vc4_submit_next_render_job(dev); + else if (nextbin && nextbin->perfmon != exec->perfmon) + vc4_submit_next_bin_job(dev); + if (exec->fence) { dma_fence_signal_locked(exec->fence); dma_fence_put(exec->fence); exec->fence = NULL; } - vc4_submit_next_render_job(dev); wake_up_all(&vc4->job_wait_queue); schedule_work(&vc4->job_done_work); diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index 4256f294c346..ba60153dddb5 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -215,6 +215,7 @@ int vc4_kms_load(struct drm_device *dev) dev->mode_config.funcs = &vc4_mode_funcs; dev->mode_config.preferred_depth = 24; dev->mode_config.async_page_flip = true; + dev->mode_config.allow_fb_modifiers = true; drm_mode_config_reset(dev); diff --git a/drivers/gpu/drm/vc4/vc4_perfmon.c b/drivers/gpu/drm/vc4/vc4_perfmon.c new file mode 100644 index 000000000000..437e7a27f21d --- /dev/null +++ b/drivers/gpu/drm/vc4/vc4_perfmon.c @@ -0,0 +1,188 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2018 Broadcom + */ + +/** + * DOC: VC4 V3D performance monitor module + * + * The V3D block provides 16 hardware counters which can count various events. + */ + +#include "vc4_drv.h" +#include "vc4_regs.h" + +#define VC4_PERFMONID_MIN 1 +#define VC4_PERFMONID_MAX U32_MAX + +void vc4_perfmon_get(struct vc4_perfmon *perfmon) +{ + if (perfmon) + refcount_inc(&perfmon->refcnt); +} + +void vc4_perfmon_put(struct vc4_perfmon *perfmon) +{ + if (perfmon && refcount_dec_and_test(&perfmon->refcnt)) + kfree(perfmon); +} + +void vc4_perfmon_start(struct vc4_dev *vc4, struct vc4_perfmon *perfmon) +{ + unsigned int i; + u32 mask; + + if (WARN_ON_ONCE(!perfmon || vc4->active_perfmon)) + return; + + for (i = 0; i < perfmon->ncounters; i++) + V3D_WRITE(V3D_PCTRS(i), perfmon->events[i]); + + mask = GENMASK(perfmon->ncounters - 1, 0); + V3D_WRITE(V3D_PCTRC, mask); + V3D_WRITE(V3D_PCTRE, V3D_PCTRE_EN | mask); + vc4->active_perfmon = perfmon; +} + +void vc4_perfmon_stop(struct vc4_dev *vc4, struct vc4_perfmon *perfmon, + bool capture) +{ + unsigned int i; + + if (WARN_ON_ONCE(!vc4->active_perfmon || + perfmon != vc4->active_perfmon)) + return; + + if (capture) { + for (i = 0; i < perfmon->ncounters; i++) + perfmon->counters[i] += V3D_READ(V3D_PCTR(i)); + } + + V3D_WRITE(V3D_PCTRE, 0); + vc4->active_perfmon = NULL; +} + +struct vc4_perfmon *vc4_perfmon_find(struct vc4_file *vc4file, int id) +{ + struct vc4_perfmon *perfmon; + + mutex_lock(&vc4file->perfmon.lock); + perfmon = idr_find(&vc4file->perfmon.idr, id); + vc4_perfmon_get(perfmon); + mutex_unlock(&vc4file->perfmon.lock); + + return perfmon; +} + +void vc4_perfmon_open_file(struct vc4_file *vc4file) +{ + mutex_init(&vc4file->perfmon.lock); + idr_init(&vc4file->perfmon.idr); +} + +static int vc4_perfmon_idr_del(int id, void *elem, void *data) +{ + struct vc4_perfmon *perfmon = elem; + + vc4_perfmon_put(perfmon); + + return 0; +} + +void vc4_perfmon_close_file(struct vc4_file *vc4file) +{ + mutex_lock(&vc4file->perfmon.lock); + idr_for_each(&vc4file->perfmon.idr, vc4_perfmon_idr_del, NULL); + idr_destroy(&vc4file->perfmon.idr); + mutex_unlock(&vc4file->perfmon.lock); +} + +int vc4_perfmon_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct vc4_file *vc4file = file_priv->driver_priv; + struct drm_vc4_perfmon_create *req = data; + struct vc4_perfmon *perfmon; + unsigned int i; + int ret; + + /* Number of monitored counters cannot exceed HW limits. */ + if (req->ncounters > DRM_VC4_MAX_PERF_COUNTERS || + !req->ncounters) + return -EINVAL; + + /* Make sure all events are valid. */ + for (i = 0; i < req->ncounters; i++) { + if (req->events[i] >= VC4_PERFCNT_NUM_EVENTS) + return -EINVAL; + } + + perfmon = kzalloc(sizeof(*perfmon) + (req->ncounters * sizeof(u64)), + GFP_KERNEL); + if (!perfmon) + return -ENOMEM; + + for (i = 0; i < req->ncounters; i++) + perfmon->events[i] = req->events[i]; + + perfmon->ncounters = req->ncounters; + + refcount_set(&perfmon->refcnt, 1); + + mutex_lock(&vc4file->perfmon.lock); + ret = idr_alloc(&vc4file->perfmon.idr, perfmon, VC4_PERFMONID_MIN, + VC4_PERFMONID_MAX, GFP_KERNEL); + mutex_unlock(&vc4file->perfmon.lock); + + if (ret < 0) { + kfree(perfmon); + return ret; + } + + req->id = ret; + return 0; +} + +int vc4_perfmon_destroy_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct vc4_file *vc4file = file_priv->driver_priv; + struct drm_vc4_perfmon_destroy *req = data; + struct vc4_perfmon *perfmon; + + mutex_lock(&vc4file->perfmon.lock); + perfmon = idr_remove(&vc4file->perfmon.idr, req->id); + mutex_unlock(&vc4file->perfmon.lock); + + if (!perfmon) + return -EINVAL; + + vc4_perfmon_put(perfmon); + return 0; +} + +int vc4_perfmon_get_values_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct vc4_file *vc4file = file_priv->driver_priv; + struct drm_vc4_perfmon_get_values *req = data; + struct vc4_perfmon *perfmon; + int ret; + + mutex_lock(&vc4file->perfmon.lock); + perfmon = idr_find(&vc4file->perfmon.idr, req->id); + vc4_perfmon_get(perfmon); + mutex_unlock(&vc4file->perfmon.lock); + + if (!perfmon) + return -EINVAL; + + if (copy_to_user(u64_to_user_ptr(req->values_ptr), perfmon->counters, + perfmon->ncounters * sizeof(u64))) + ret = -EFAULT; + else + ret = 0; + + vc4_perfmon_put(perfmon); + return ret; +} diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 515f97997624..ce39390be389 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -27,105 +27,50 @@ #include "vc4_drv.h" #include "vc4_regs.h" -enum vc4_scaling_mode { - VC4_SCALING_NONE, - VC4_SCALING_TPZ, - VC4_SCALING_PPF, -}; - -struct vc4_plane_state { - struct drm_plane_state base; - /* System memory copy of the display list for this element, computed - * at atomic_check time. - */ - u32 *dlist; - u32 dlist_size; /* Number of dwords allocated for the display list */ - u32 dlist_count; /* Number of used dwords in the display list. */ - - /* Offset in the dlist to various words, for pageflip or - * cursor updates. - */ - u32 pos0_offset; - u32 pos2_offset; - u32 ptr0_offset; - - /* Offset where the plane's dlist was last stored in the - * hardware at vc4_crtc_atomic_flush() time. - */ - u32 __iomem *hw_dlist; - - /* Clipped coordinates of the plane on the display. */ - int crtc_x, crtc_y, crtc_w, crtc_h; - /* Clipped area being scanned from in the FB. */ - u32 src_x, src_y; - - u32 src_w[2], src_h[2]; - - /* Scaling selection for the RGB/Y plane and the Cb/Cr planes. */ - enum vc4_scaling_mode x_scaling[2], y_scaling[2]; - bool is_unity; - bool is_yuv; - - /* Offset to start scanning out from the start of the plane's - * BO. - */ - u32 offsets[3]; - - /* Our allocation in LBM for temporary storage during scaling. */ - struct drm_mm_node lbm; -}; - -static inline struct vc4_plane_state * -to_vc4_plane_state(struct drm_plane_state *state) -{ - return (struct vc4_plane_state *)state; -} - static const struct hvs_format { u32 drm; /* DRM_FORMAT_* */ u32 hvs; /* HVS_FORMAT_* */ u32 pixel_order; - bool has_alpha; } hvs_formats[] = { { .drm = DRM_FORMAT_XRGB8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888, - .pixel_order = HVS_PIXEL_ORDER_ABGR, .has_alpha = false, + .pixel_order = HVS_PIXEL_ORDER_ABGR, }, { .drm = DRM_FORMAT_ARGB8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888, - .pixel_order = HVS_PIXEL_ORDER_ABGR, .has_alpha = true, + .pixel_order = HVS_PIXEL_ORDER_ABGR, }, { .drm = DRM_FORMAT_ABGR8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888, - .pixel_order = HVS_PIXEL_ORDER_ARGB, .has_alpha = true, + .pixel_order = HVS_PIXEL_ORDER_ARGB, }, { .drm = DRM_FORMAT_XBGR8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888, - .pixel_order = HVS_PIXEL_ORDER_ARGB, .has_alpha = false, + .pixel_order = HVS_PIXEL_ORDER_ARGB, }, { .drm = DRM_FORMAT_RGB565, .hvs = HVS_PIXEL_FORMAT_RGB565, - .pixel_order = HVS_PIXEL_ORDER_XRGB, .has_alpha = false, + .pixel_order = HVS_PIXEL_ORDER_XRGB, }, { .drm = DRM_FORMAT_BGR565, .hvs = HVS_PIXEL_FORMAT_RGB565, - .pixel_order = HVS_PIXEL_ORDER_XBGR, .has_alpha = false, + .pixel_order = HVS_PIXEL_ORDER_XBGR, }, { .drm = DRM_FORMAT_ARGB1555, .hvs = HVS_PIXEL_FORMAT_RGBA5551, - .pixel_order = HVS_PIXEL_ORDER_ABGR, .has_alpha = true, + .pixel_order = HVS_PIXEL_ORDER_ABGR, }, { .drm = DRM_FORMAT_XRGB1555, .hvs = HVS_PIXEL_FORMAT_RGBA5551, - .pixel_order = HVS_PIXEL_ORDER_ABGR, .has_alpha = false, + .pixel_order = HVS_PIXEL_ORDER_ABGR, }, { .drm = DRM_FORMAT_RGB888, .hvs = HVS_PIXEL_FORMAT_RGB888, - .pixel_order = HVS_PIXEL_ORDER_XRGB, .has_alpha = false, + .pixel_order = HVS_PIXEL_ORDER_XRGB, }, { .drm = DRM_FORMAT_BGR888, .hvs = HVS_PIXEL_FORMAT_RGB888, - .pixel_order = HVS_PIXEL_ORDER_XBGR, .has_alpha = false, + .pixel_order = HVS_PIXEL_ORDER_XBGR, }, { .drm = DRM_FORMAT_YUV422, @@ -522,6 +467,7 @@ static int vc4_plane_mode_set(struct drm_plane *plane, u32 ctl0_offset = vc4_state->dlist_count; const struct hvs_format *format = vc4_get_hvs_format(fb->format->format); int num_planes = drm_format_num_planes(format->drm); + bool covers_screen; u32 scl0, scl1, pitch0; u32 lbm_size, tiling; unsigned long irqflags; @@ -619,13 +565,14 @@ static int vc4_plane_mode_set(struct drm_plane *plane, SCALER_POS1_SCL_HEIGHT)); } - /* Position Word 2: Source Image Size, Alpha Mode */ + /* Position Word 2: Source Image Size, Alpha */ vc4_state->pos2_offset = vc4_state->dlist_count; vc4_dlist_write(vc4_state, - VC4_SET_FIELD(format->has_alpha ? + VC4_SET_FIELD(fb->format->has_alpha ? SCALER_POS2_ALPHA_MODE_PIPELINE : SCALER_POS2_ALPHA_MODE_FIXED, SCALER_POS2_ALPHA_MODE) | + (fb->format->has_alpha ? SCALER_POS2_ALPHA_PREMULT : 0) | VC4_SET_FIELD(vc4_state->src_w[0], SCALER_POS2_WIDTH) | VC4_SET_FIELD(vc4_state->src_h[0], SCALER_POS2_HEIGHT)); @@ -701,6 +648,16 @@ static int vc4_plane_mode_set(struct drm_plane *plane, vc4_state->dlist[ctl0_offset] |= VC4_SET_FIELD(vc4_state->dlist_count, SCALER_CTL0_SIZE); + /* crtc_* are already clipped coordinates. */ + covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 && + vc4_state->crtc_w == state->crtc->mode.hdisplay && + vc4_state->crtc_h == state->crtc->mode.vdisplay; + /* Background fill might be necessary when the plane has per-pixel + * alpha content and blends from the background or does not cover + * the entire screen. + */ + vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen; + return 0; } @@ -907,6 +864,32 @@ out: ctx); } +static bool vc4_format_mod_supported(struct drm_plane *plane, + uint32_t format, + uint64_t modifier) +{ + /* Support T_TILING for RGB formats only. */ + switch (format) { + case DRM_FORMAT_XRGB8888: + case DRM_FORMAT_ARGB8888: + case DRM_FORMAT_ABGR8888: + case DRM_FORMAT_XBGR8888: + case DRM_FORMAT_RGB565: + case DRM_FORMAT_BGR565: + case DRM_FORMAT_ARGB1555: + case DRM_FORMAT_XRGB1555: + return true; + case DRM_FORMAT_YUV422: + case DRM_FORMAT_YVU422: + case DRM_FORMAT_YUV420: + case DRM_FORMAT_YVU420: + case DRM_FORMAT_NV12: + case DRM_FORMAT_NV16: + default: + return (modifier == DRM_FORMAT_MOD_LINEAR); + } +} + static const struct drm_plane_funcs vc4_plane_funcs = { .update_plane = vc4_update_plane, .disable_plane = drm_atomic_helper_disable_plane, @@ -915,6 +898,7 @@ static const struct drm_plane_funcs vc4_plane_funcs = { .reset = vc4_plane_reset, .atomic_duplicate_state = vc4_plane_duplicate_state, .atomic_destroy_state = vc4_plane_destroy_state, + .format_mod_supported = vc4_format_mod_supported, }; struct drm_plane *vc4_plane_init(struct drm_device *dev, @@ -926,6 +910,11 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev, u32 num_formats = 0; int ret = 0; unsigned i; + static const uint64_t modifiers[] = { + DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED, + DRM_FORMAT_MOD_LINEAR, + DRM_FORMAT_MOD_INVALID + }; vc4_plane = devm_kzalloc(dev->dev, sizeof(*vc4_plane), GFP_KERNEL); @@ -946,7 +935,7 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev, ret = drm_universal_plane_init(dev, plane, 0, &vc4_plane_funcs, formats, num_formats, - NULL, type, NULL); + modifiers, type, NULL); drm_plane_helper_add(plane, &vc4_plane_helper_funcs); diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h index 55677bd50f66..a141496104a6 100644 --- a/drivers/gpu/drm/vc4/vc4_regs.h +++ b/drivers/gpu/drm/vc4/vc4_regs.h @@ -122,38 +122,9 @@ #define V3D_VPMBASE 0x00504 #define V3D_PCTRC 0x00670 #define V3D_PCTRE 0x00674 -#define V3D_PCTR0 0x00680 -#define V3D_PCTRS0 0x00684 -#define V3D_PCTR1 0x00688 -#define V3D_PCTRS1 0x0068c -#define V3D_PCTR2 0x00690 -#define V3D_PCTRS2 0x00694 -#define V3D_PCTR3 0x00698 -#define V3D_PCTRS3 0x0069c -#define V3D_PCTR4 0x006a0 -#define V3D_PCTRS4 0x006a4 -#define V3D_PCTR5 0x006a8 -#define V3D_PCTRS5 0x006ac -#define V3D_PCTR6 0x006b0 -#define V3D_PCTRS6 0x006b4 -#define V3D_PCTR7 0x006b8 -#define V3D_PCTRS7 0x006bc -#define V3D_PCTR8 0x006c0 -#define V3D_PCTRS8 0x006c4 -#define V3D_PCTR9 0x006c8 -#define V3D_PCTRS9 0x006cc -#define V3D_PCTR10 0x006d0 -#define V3D_PCTRS10 0x006d4 -#define V3D_PCTR11 0x006d8 -#define V3D_PCTRS11 0x006dc -#define V3D_PCTR12 0x006e0 -#define V3D_PCTRS12 0x006e4 -#define V3D_PCTR13 0x006e8 -#define V3D_PCTRS13 0x006ec -#define V3D_PCTR14 0x006f0 -#define V3D_PCTRS14 0x006f4 -#define V3D_PCTR15 0x006f8 -#define V3D_PCTRS15 0x006fc +# define V3D_PCTRE_EN BIT(31) +#define V3D_PCTR(x) (0x00680 + ((x) * 8)) +#define V3D_PCTRS(x) (0x00684 + ((x) * 8)) #define V3D_DBGE 0x00f00 #define V3D_FDBGO 0x00f04 #define V3D_FDBGB 0x00f08 @@ -877,6 +848,7 @@ enum hvs_pixel_format { #define SCALER_POS2_ALPHA_MODE_FIXED 1 #define SCALER_POS2_ALPHA_MODE_FIXED_NONZERO 2 #define SCALER_POS2_ALPHA_MODE_FIXED_OVER_0x07 3 +#define SCALER_POS2_ALPHA_PREMULT BIT(29) #define SCALER_POS2_HEIGHT_MASK VC4_MASK(27, 16) #define SCALER_POS2_HEIGHT_SHIFT 16 diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c index 493f392b3a0a..bfc2fa73d2ae 100644 --- a/drivers/gpu/drm/vc4/vc4_v3d.c +++ b/drivers/gpu/drm/vc4/vc4_v3d.c @@ -68,38 +68,38 @@ static const struct { REGDEF(V3D_VPMBASE), REGDEF(V3D_PCTRC), REGDEF(V3D_PCTRE), - REGDEF(V3D_PCTR0), - REGDEF(V3D_PCTRS0), - REGDEF(V3D_PCTR1), - REGDEF(V3D_PCTRS1), - REGDEF(V3D_PCTR2), - REGDEF(V3D_PCTRS2), - REGDEF(V3D_PCTR3), - REGDEF(V3D_PCTRS3), - REGDEF(V3D_PCTR4), - REGDEF(V3D_PCTRS4), - REGDEF(V3D_PCTR5), - REGDEF(V3D_PCTRS5), - REGDEF(V3D_PCTR6), - REGDEF(V3D_PCTRS6), - REGDEF(V3D_PCTR7), - REGDEF(V3D_PCTRS7), - REGDEF(V3D_PCTR8), - REGDEF(V3D_PCTRS8), - REGDEF(V3D_PCTR9), - REGDEF(V3D_PCTRS9), - REGDEF(V3D_PCTR10), - REGDEF(V3D_PCTRS10), - REGDEF(V3D_PCTR11), - REGDEF(V3D_PCTRS11), - REGDEF(V3D_PCTR12), - REGDEF(V3D_PCTRS12), - REGDEF(V3D_PCTR13), - REGDEF(V3D_PCTRS13), - REGDEF(V3D_PCTR14), - REGDEF(V3D_PCTRS14), - REGDEF(V3D_PCTR15), - REGDEF(V3D_PCTRS15), + REGDEF(V3D_PCTR(0)), + REGDEF(V3D_PCTRS(0)), + REGDEF(V3D_PCTR(1)), + REGDEF(V3D_PCTRS(1)), + REGDEF(V3D_PCTR(2)), + REGDEF(V3D_PCTRS(2)), + REGDEF(V3D_PCTR(3)), + REGDEF(V3D_PCTRS(3)), + REGDEF(V3D_PCTR(4)), + REGDEF(V3D_PCTRS(4)), + REGDEF(V3D_PCTR(5)), + REGDEF(V3D_PCTRS(5)), + REGDEF(V3D_PCTR(6)), + REGDEF(V3D_PCTRS(6)), + REGDEF(V3D_PCTR(7)), + REGDEF(V3D_PCTRS(7)), + REGDEF(V3D_PCTR(8)), + REGDEF(V3D_PCTRS(8)), + REGDEF(V3D_PCTR(9)), + REGDEF(V3D_PCTRS(9)), + REGDEF(V3D_PCTR(10)), + REGDEF(V3D_PCTRS(10)), + REGDEF(V3D_PCTR(11)), + REGDEF(V3D_PCTRS(11)), + REGDEF(V3D_PCTR(12)), + REGDEF(V3D_PCTRS(12)), + REGDEF(V3D_PCTR(13)), + REGDEF(V3D_PCTRS(13)), + REGDEF(V3D_PCTR(14)), + REGDEF(V3D_PCTRS(14)), + REGDEF(V3D_PCTR(15)), + REGDEF(V3D_PCTRS(15)), REGDEF(V3D_DBGE), REGDEF(V3D_FDBGO), REGDEF(V3D_FDBGB), diff --git a/drivers/gpu/drm/vc4/vc4_validate.c b/drivers/gpu/drm/vc4/vc4_validate.c index 2db485abb186..eec76af49f04 100644 --- a/drivers/gpu/drm/vc4/vc4_validate.c +++ b/drivers/gpu/drm/vc4/vc4_validate.c @@ -753,7 +753,7 @@ validate_gl_shader_rec(struct drm_device *dev, 28, /* cs */ }; uint32_t shader_reloc_count = ARRAY_SIZE(shader_reloc_offsets); - struct drm_gem_cma_object *bo[shader_reloc_count + 8]; + struct drm_gem_cma_object *bo[ARRAY_SIZE(shader_reloc_offsets) + 8]; uint32_t nr_attributes, nr_relocs, packet_size; int i; |