summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-10-15 20:46:16 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2020-10-15 20:46:16 +0300
commit93b694d096cc10994c817730d4d50288f9ae3d66 (patch)
tree5bd967686d0003f7dbbe1da49f5399cb4a92f074 /drivers/gpu/drm/i915/gt/gen6_ppgtt.c
parent726eb70e0d34dc4bc4dada71f52bba8ed638431e (diff)
parent640eee067d9aae0bb98d8706001976ff1affaf00 (diff)
downloadlinux-93b694d096cc10994c817730d4d50288f9ae3d66.tar.xz
Merge tag 'drm-next-2020-10-15' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Dave Airlie: "Not a major amount of change, the i915 trees got split into display and gt trees to better facilitate higher level review, and there's a major refactoring of i915 GEM locking to use more core kernel concepts (like ww-mutexes). msm gets per-process pagetables, older AMD SI cards get DC support, nouveau got a bump in displayport support with common code extraction from i915. Outside of drm this contains a couple of patches for hexint moduleparams which you've acked, and a virtio common code tree that you should also get via it's regular path. New driver: - Cadence MHDP8546 DisplayPort bridge driver core: - cross-driver scatterlist cleanups - devm_drm conversions - remove drm_dev_init - devm_drm_dev_alloc conversion ttm: - lots of refactoring and cleanups bridges: - chained bridge support in more drivers panel: - misc new panels scheduler: - cleanup priority levels displayport: - refactor i915 code into helpers for nouveau i915: - split into display and GT trees - WW locking refactoring in GEM - execbuf2 extension mechanism - syncobj timeline support - GEN 12 HOBL display powersaving - Rocket Lake display additions - Disable FBC on Tigerlake - Tigerlake Type-C + DP improvements - Hotplug interrupt refactoring amdgpu: - Sienna Cichlid updates - Navy Flounder updates - DCE6 (SI) support for DC - Plane rotation enabled - TMZ state info ioctl - PCIe DPC recovery support - DC interrupt handling refactor - OLED panel fixes amdkfd: - add SMI events for thermal throttling - SMI interface events ioctl update - process eviction counters radeon: - move to dma_ for allocations - expose sclk via sysfs msm: - DSI support for sm8150/sm8250 - per-process GPU pagetable support - Displayport support mediatek: - move HDMI phy driver to PHY - convert mtk-dpi to bridge API - disable mt2701 tmds tegra: - bridge support exynos: - misc cleanups vc4: - dual display cleanups ast: - cleanups gma500: - conversion to GPIOd API hisilicon: - misc reworks ingenic: - clock handling and format improvements mcde: - DSI support mgag200: - desktop g200 support mxsfb: - i.MX7 + i.MX8M - alpha plane support panfrost: - devfreq support - amlogic SoC support ps8640: - EDID from eDP retrieval tidss: - AM65xx YUV workaround virtio: - virtio-gpu exported resources rcar-du: - R8A7742, R8A774E1 and R8A77961 support - YUV planar format fixes - non-visible plane handling - VSP device reference count fix - Kconfig fix to avoid displaying disabled options in .config" * tag 'drm-next-2020-10-15' of git://anongit.freedesktop.org/drm/drm: (1494 commits) drm/ingenic: Fix bad revert drm/amdgpu: Fix invalid number of character '{' in amdgpu_acpi_init drm/amdgpu: Remove warning for virtual_display drm/amdgpu: kfd_initialized can be static drm/amd/pm: setup APU dpm clock table in SMU HW initialization drm/amdgpu: prevent spurious warning drm/amdgpu/swsmu: fix ARC build errors drm/amd/display: Fix OPTC_DATA_FORMAT programming drm/amd/display: Don't allow pstate if no support in blank drm/panfrost: increase readl_relaxed_poll_timeout values MAINTAINERS: Update entry for st7703 driver after the rename Revert "gpu/drm: ingenic: Add option to mmap GEM buffers cached" drm/amd/display: HDMI remote sink need mode validation for Linux drm/amd/display: Change to correct unit on audio rate drm/amd/display: Avoid set zero in the requested clk drm/amdgpu: align frag_end to covered address space drm/amdgpu: fix NULL pointer dereference for Renoir drm/vmwgfx: fix regression in thp code due to ttm init refactor. drm/amdgpu/swsmu: add interrupt work handler for smu11 parts drm/amdgpu/swsmu: add interrupt work function ...
Diffstat (limited to 'drivers/gpu/drm/i915/gt/gen6_ppgtt.c')
-rw-r--r--drivers/gpu/drm/i915/gt/gen6_ppgtt.c106
1 files changed, 48 insertions, 58 deletions
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
index cdc0b9c54305..fd0d24d28763 100644
--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
@@ -16,8 +16,10 @@ static inline void gen6_write_pde(const struct gen6_ppgtt *ppgtt,
const unsigned int pde,
const struct i915_page_table *pt)
{
+ dma_addr_t addr = pt ? px_dma(pt) : px_dma(ppgtt->base.vm.scratch[1]);
+
/* Caller needs to make sure the write completes if necessary */
- iowrite32(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID,
+ iowrite32(GEN6_PDE_ADDR_ENCODE(addr) | GEN6_PDE_VALID,
ppgtt->pd_addr + pde);
}
@@ -79,7 +81,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
{
struct gen6_ppgtt * const ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
const unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
- const gen6_pte_t scratch_pte = vm->scratch[0].encode;
+ const gen6_pte_t scratch_pte = vm->scratch[0]->encode;
unsigned int pde = first_entry / GEN6_PTES;
unsigned int pte = first_entry % GEN6_PTES;
unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
@@ -90,8 +92,6 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
const unsigned int count = min(num_entries, GEN6_PTES - pte);
gen6_pte_t *vaddr;
- GEM_BUG_ON(px_base(pt) == px_base(&vm->scratch[1]));
-
num_entries -= count;
GEM_BUG_ON(count > atomic_read(&pt->used));
@@ -127,7 +127,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
struct sgt_dma iter = sgt_dma(vma);
gen6_pte_t *vaddr;
- GEM_BUG_ON(pd->entry[act_pt] == &vm->scratch[1]);
+ GEM_BUG_ON(!pd->entry[act_pt]);
vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt));
do {
@@ -177,39 +177,36 @@ static void gen6_flush_pd(struct gen6_ppgtt *ppgtt, u64 start, u64 end)
mutex_unlock(&ppgtt->flush);
}
-static int gen6_alloc_va_range(struct i915_address_space *vm,
- u64 start, u64 length)
+static void gen6_alloc_va_range(struct i915_address_space *vm,
+ struct i915_vm_pt_stash *stash,
+ u64 start, u64 length)
{
struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
struct i915_page_directory * const pd = ppgtt->base.pd;
- struct i915_page_table *pt, *alloc = NULL;
+ struct i915_page_table *pt;
bool flush = false;
u64 from = start;
unsigned int pde;
- int ret = 0;
spin_lock(&pd->lock);
gen6_for_each_pde(pt, pd, start, length, pde) {
const unsigned int count = gen6_pte_count(start, length);
- if (px_base(pt) == px_base(&vm->scratch[1])) {
+ if (!pt) {
spin_unlock(&pd->lock);
- pt = fetch_and_zero(&alloc);
- if (!pt)
- pt = alloc_pt(vm);
- if (IS_ERR(pt)) {
- ret = PTR_ERR(pt);
- goto unwind_out;
- }
+ pt = stash->pt[0];
+ __i915_gem_object_pin_pages(pt->base);
+ i915_gem_object_make_unshrinkable(pt->base);
- fill32_px(pt, vm->scratch[0].encode);
+ fill32_px(pt, vm->scratch[0]->encode);
spin_lock(&pd->lock);
- if (pd->entry[pde] == &vm->scratch[1]) {
+ if (!pd->entry[pde]) {
+ stash->pt[0] = pt->stash;
+ atomic_set(&pt->used, 0);
pd->entry[pde] = pt;
} else {
- alloc = pt;
pt = pd->entry[pde];
}
@@ -226,38 +223,32 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
with_intel_runtime_pm(&vm->i915->runtime_pm, wakeref)
gen6_flush_pd(ppgtt, from, start);
}
-
- goto out;
-
-unwind_out:
- gen6_ppgtt_clear_range(vm, from, start - from);
-out:
- if (alloc)
- free_px(vm, alloc);
- return ret;
}
static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
{
struct i915_address_space * const vm = &ppgtt->base.vm;
- struct i915_page_directory * const pd = ppgtt->base.pd;
int ret;
- ret = setup_scratch_page(vm, __GFP_HIGHMEM);
+ ret = setup_scratch_page(vm);
if (ret)
return ret;
- vm->scratch[0].encode =
- vm->pte_encode(px_dma(&vm->scratch[0]),
+ vm->scratch[0]->encode =
+ vm->pte_encode(px_dma(vm->scratch[0]),
I915_CACHE_NONE, PTE_READ_ONLY);
- if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[1])))) {
- cleanup_scratch_page(vm);
- return -ENOMEM;
+ vm->scratch[1] = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
+ if (IS_ERR(vm->scratch[1]))
+ return PTR_ERR(vm->scratch[1]);
+
+ ret = pin_pt_dma(vm, vm->scratch[1]);
+ if (ret) {
+ i915_gem_object_put(vm->scratch[1]);
+ return ret;
}
- fill32_px(&vm->scratch[1], vm->scratch[0].encode);
- memset_p(pd->entry, &vm->scratch[1], I915_PDES);
+ fill32_px(vm->scratch[1], vm->scratch[0]->encode);
return 0;
}
@@ -265,14 +256,12 @@ static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt)
{
struct i915_page_directory * const pd = ppgtt->base.pd;
- struct i915_page_dma * const scratch =
- px_base(&ppgtt->base.vm.scratch[1]);
struct i915_page_table *pt;
u32 pde;
gen6_for_all_pdes(pt, pd, pde)
- if (px_base(pt) != scratch)
- free_px(&ppgtt->base.vm, pt);
+ if (pt)
+ free_pt(&ppgtt->base.vm, pt);
}
static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
@@ -286,7 +275,8 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
mutex_destroy(&ppgtt->flush);
mutex_destroy(&ppgtt->pin_mutex);
- kfree(ppgtt->base.pd);
+
+ free_pd(&ppgtt->base.vm, ppgtt->base.pd);
}
static int pd_vma_set_pages(struct i915_vma *vma)
@@ -302,28 +292,26 @@ static void pd_vma_clear_pages(struct i915_vma *vma)
vma->pages = NULL;
}
-static int pd_vma_bind(struct i915_address_space *vm,
- struct i915_vma *vma,
- enum i915_cache_level cache_level,
- u32 unused)
+static void pd_vma_bind(struct i915_address_space *vm,
+ struct i915_vm_pt_stash *stash,
+ struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 unused)
{
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
struct gen6_ppgtt *ppgtt = vma->private;
u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE;
- px_base(ppgtt->base.pd)->ggtt_offset = ggtt_offset * sizeof(gen6_pte_t);
+ ppgtt->pp_dir = ggtt_offset * sizeof(gen6_pte_t) << 10;
ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset;
gen6_flush_pd(ppgtt, 0, ppgtt->base.vm.total);
- return 0;
}
static void pd_vma_unbind(struct i915_address_space *vm, struct i915_vma *vma)
{
struct gen6_ppgtt *ppgtt = vma->private;
struct i915_page_directory * const pd = ppgtt->base.pd;
- struct i915_page_dma * const scratch =
- px_base(&ppgtt->base.vm.scratch[1]);
struct i915_page_table *pt;
unsigned int pde;
@@ -332,11 +320,11 @@ static void pd_vma_unbind(struct i915_address_space *vm, struct i915_vma *vma)
/* Free all no longer used page tables */
gen6_for_all_pdes(pt, ppgtt->base.pd, pde) {
- if (px_base(pt) == scratch || atomic_read(&pt->used))
+ if (!pt || atomic_read(&pt->used))
continue;
- free_px(&ppgtt->base.vm, pt);
- pd->entry[pde] = scratch;
+ free_pt(&ppgtt->base.vm, pt);
+ pd->entry[pde] = NULL;
}
ppgtt->scan_for_unused_pt = false;
@@ -380,7 +368,7 @@ static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
return vma;
}
-int gen6_ppgtt_pin(struct i915_ppgtt *base)
+int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww)
{
struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
int err;
@@ -406,7 +394,7 @@ int gen6_ppgtt_pin(struct i915_ppgtt *base)
*/
err = 0;
if (!atomic_read(&ppgtt->pin_count))
- err = i915_ggtt_pin(ppgtt->vma, GEN6_PD_ALIGN, PIN_HIGH);
+ err = i915_ggtt_pin(ppgtt->vma, ww, GEN6_PD_ALIGN, PIN_HIGH);
if (!err)
atomic_inc(&ppgtt->pin_count);
mutex_unlock(&ppgtt->pin_mutex);
@@ -448,6 +436,7 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
mutex_init(&ppgtt->pin_mutex);
ppgtt_init(&ppgtt->base, gt);
+ ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen6_pte_t));
ppgtt->base.vm.top = 1;
ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND;
@@ -456,9 +445,10 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries;
ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup;
+ ppgtt->base.vm.alloc_pt_dma = alloc_pt_dma;
ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode;
- ppgtt->base.pd = __alloc_pd(sizeof(*ppgtt->base.pd));
+ ppgtt->base.pd = __alloc_pd(I915_PDES);
if (!ppgtt->base.pd) {
err = -ENOMEM;
goto err_free;
@@ -479,7 +469,7 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
err_scratch:
free_scratch(&ppgtt->base.vm);
err_pd:
- kfree(ppgtt->base.pd);
+ free_pd(&ppgtt->base.vm, ppgtt->base.pd);
err_free:
mutex_destroy(&ppgtt->pin_mutex);
kfree(ppgtt);