diff options
author | Rodrigo Vivi <rodrigo.vivi@intel.com> | 2018-07-12 10:23:44 +0300 |
---|---|---|
committer | Rodrigo Vivi <rodrigo.vivi@intel.com> | 2018-07-12 10:23:44 +0300 |
commit | 91045034d53eb37b93a3ddd20f25883d9a40b100 (patch) | |
tree | b37c9d6385f3d0041c1602def22d6fb3aded0848 /drivers/gpu/drm/i915/gvt | |
parent | 655250a8d1aa3b18949a72869858d07ceac12799 (diff) | |
parent | 279ce5d117078ee8ea40c40199399889981fd808 (diff) | |
download | linux-91045034d53eb37b93a3ddd20f25883d9a40b100.tar.xz |
Merge tag 'gvt-next-2018-07-11' of https://github.com/intel/gvt-linux into drm-intel-next-queued
gvt-next-2018-07-11
- vGPU huge page support (Changbin)
- BXT display irq warning fix (Colin)
- Handle GVT dependency well (Henry)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180711023353.GU1267@zhen-hp.sh.intel.com
Diffstat (limited to 'drivers/gpu/drm/i915/gvt')
-rw-r--r-- | drivers/gpu/drm/i915/gvt/gtt.c | 423 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gvt/gtt.h | 9 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gvt/gvt.c | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gvt/handlers.c | 39 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gvt/hypercall.h | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gvt/kvmgt.c | 126 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gvt/mpt.h | 7 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gvt/vgpu.c | 1 |
8 files changed, 517 insertions, 94 deletions
diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 6a8a9868bcfb..156ceeeb7446 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -216,16 +216,22 @@ static struct gtt_type_table_entry gtt_type_table[] = { GTT_TYPE_PPGTT_PDE_PT, GTT_TYPE_PPGTT_PTE_PT, GTT_TYPE_PPGTT_PTE_2M_ENTRY), + /* We take IPS bit as 'PSE' for PTE level. */ GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_PT, GTT_TYPE_PPGTT_PTE_4K_ENTRY, GTT_TYPE_PPGTT_PTE_PT, GTT_TYPE_INVALID, - GTT_TYPE_INVALID), + GTT_TYPE_PPGTT_PTE_64K_ENTRY), GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_4K_ENTRY, GTT_TYPE_PPGTT_PTE_4K_ENTRY, GTT_TYPE_PPGTT_PTE_PT, GTT_TYPE_INVALID, - GTT_TYPE_INVALID), + GTT_TYPE_PPGTT_PTE_64K_ENTRY), + GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_64K_ENTRY, + GTT_TYPE_PPGTT_PTE_4K_ENTRY, + GTT_TYPE_PPGTT_PTE_PT, + GTT_TYPE_INVALID, + GTT_TYPE_PPGTT_PTE_64K_ENTRY), GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_2M_ENTRY, GTT_TYPE_PPGTT_PDE_ENTRY, GTT_TYPE_PPGTT_PDE_PT, @@ -339,8 +345,14 @@ static inline int gtt_set_entry64(void *pt, #define ADDR_1G_MASK GENMASK_ULL(GTT_HAW - 1, 30) #define ADDR_2M_MASK GENMASK_ULL(GTT_HAW - 1, 21) +#define ADDR_64K_MASK GENMASK_ULL(GTT_HAW - 1, 16) #define ADDR_4K_MASK GENMASK_ULL(GTT_HAW - 1, 12) +#define GTT_SPTE_FLAG_MASK GENMASK_ULL(62, 52) +#define GTT_SPTE_FLAG_64K_SPLITED BIT(52) /* splited 64K gtt entry */ + +#define GTT_64K_PTE_STRIDE 16 + static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e) { unsigned long pfn; @@ -349,6 +361,8 @@ static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e) pfn = (e->val64 & ADDR_1G_MASK) >> PAGE_SHIFT; else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) pfn = (e->val64 & ADDR_2M_MASK) >> PAGE_SHIFT; + else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY) + pfn = (e->val64 & ADDR_64K_MASK) >> PAGE_SHIFT; else pfn = (e->val64 & ADDR_4K_MASK) >> PAGE_SHIFT; return pfn; @@ -362,6 +376,9 @@ static void gen8_gtt_set_pfn(struct intel_gvt_gtt_entry *e, unsigned long pfn) } else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) { e->val64 &= ~ADDR_2M_MASK; pfn &= (ADDR_2M_MASK >> PAGE_SHIFT); + } else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY) { + e->val64 &= ~ADDR_64K_MASK; + pfn &= (ADDR_64K_MASK >> PAGE_SHIFT); } else { e->val64 &= ~ADDR_4K_MASK; pfn &= (ADDR_4K_MASK >> PAGE_SHIFT); @@ -372,16 +389,41 @@ static void gen8_gtt_set_pfn(struct intel_gvt_gtt_entry *e, unsigned long pfn) static bool gen8_gtt_test_pse(struct intel_gvt_gtt_entry *e) { - /* Entry doesn't have PSE bit. */ - if (get_pse_type(e->type) == GTT_TYPE_INVALID) - return false; + return !!(e->val64 & _PAGE_PSE); +} - e->type = get_entry_type(e->type); - if (!(e->val64 & _PAGE_PSE)) +static void gen8_gtt_clear_pse(struct intel_gvt_gtt_entry *e) +{ + if (gen8_gtt_test_pse(e)) { + switch (e->type) { + case GTT_TYPE_PPGTT_PTE_2M_ENTRY: + e->val64 &= ~_PAGE_PSE; + e->type = GTT_TYPE_PPGTT_PDE_ENTRY; + break; + case GTT_TYPE_PPGTT_PTE_1G_ENTRY: + e->type = GTT_TYPE_PPGTT_PDP_ENTRY; + e->val64 &= ~_PAGE_PSE; + break; + default: + WARN_ON(1); + } + } +} + +static bool gen8_gtt_test_ips(struct intel_gvt_gtt_entry *e) +{ + if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY)) return false; - e->type = get_pse_type(e->type); - return true; + return !!(e->val64 & GEN8_PDE_IPS_64K); +} + +static void gen8_gtt_clear_ips(struct intel_gvt_gtt_entry *e) +{ + if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY)) + return; + + e->val64 &= ~GEN8_PDE_IPS_64K; } static bool gen8_gtt_test_present(struct intel_gvt_gtt_entry *e) @@ -408,6 +450,21 @@ static void gtt_entry_set_present(struct intel_gvt_gtt_entry *e) e->val64 |= _PAGE_PRESENT; } +static bool gen8_gtt_test_64k_splited(struct intel_gvt_gtt_entry *e) +{ + return !!(e->val64 & GTT_SPTE_FLAG_64K_SPLITED); +} + +static void gen8_gtt_set_64k_splited(struct intel_gvt_gtt_entry *e) +{ + e->val64 |= GTT_SPTE_FLAG_64K_SPLITED; +} + +static void gen8_gtt_clear_64k_splited(struct intel_gvt_gtt_entry *e) +{ + e->val64 &= ~GTT_SPTE_FLAG_64K_SPLITED; +} + /* * Per-platform GMA routines. */ @@ -440,6 +497,12 @@ static struct intel_gvt_gtt_pte_ops gen8_gtt_pte_ops = { .set_present = gtt_entry_set_present, .test_present = gen8_gtt_test_present, .test_pse = gen8_gtt_test_pse, + .clear_pse = gen8_gtt_clear_pse, + .clear_ips = gen8_gtt_clear_ips, + .test_ips = gen8_gtt_test_ips, + .clear_64k_splited = gen8_gtt_clear_64k_splited, + .set_64k_splited = gen8_gtt_set_64k_splited, + .test_64k_splited = gen8_gtt_test_64k_splited, .get_pfn = gen8_gtt_get_pfn, .set_pfn = gen8_gtt_set_pfn, }; @@ -453,6 +516,27 @@ static struct intel_gvt_gtt_gma_ops gen8_gtt_gma_ops = { .gma_to_pml4_index = gen8_gma_to_pml4_index, }; +/* Update entry type per pse and ips bit. */ +static void update_entry_type_for_real(struct intel_gvt_gtt_pte_ops *pte_ops, + struct intel_gvt_gtt_entry *entry, bool ips) +{ + switch (entry->type) { + case GTT_TYPE_PPGTT_PDE_ENTRY: + case GTT_TYPE_PPGTT_PDP_ENTRY: + if (pte_ops->test_pse(entry)) + entry->type = get_pse_type(entry->type); + break; + case GTT_TYPE_PPGTT_PTE_4K_ENTRY: + if (ips) + entry->type = get_pse_type(entry->type); + break; + default: + GEM_BUG_ON(!gtt_type_is_entry(entry->type)); + } + + GEM_BUG_ON(entry->type == GTT_TYPE_INVALID); +} + /* * MM helpers. */ @@ -468,8 +552,7 @@ static void _ppgtt_get_root_entry(struct intel_vgpu_mm *mm, pte_ops->get_entry(guest ? mm->ppgtt_mm.guest_pdps : mm->ppgtt_mm.shadow_pdps, entry, index, false, 0, mm->vgpu); - - pte_ops->test_pse(entry); + update_entry_type_for_real(pte_ops, entry, false); } static inline void ppgtt_get_guest_root_entry(struct intel_vgpu_mm *mm, @@ -574,7 +657,8 @@ static inline int ppgtt_spt_get_entry( if (ret) return ret; - ops->test_pse(e); + update_entry_type_for_real(ops, e, guest ? + spt->guest_page.pde_ips : false); gvt_vdbg_mm("read ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n", type, e->type, index, e->val64); @@ -653,10 +737,12 @@ static void ppgtt_free_spt(struct intel_vgpu_ppgtt_spt *spt) radix_tree_delete(&spt->vgpu->gtt.spt_tree, spt->shadow_page.mfn); - if (spt->guest_page.oos_page) - detach_oos_page(spt->vgpu, spt->guest_page.oos_page); + if (spt->guest_page.gfn) { + if (spt->guest_page.oos_page) + detach_oos_page(spt->vgpu, spt->guest_page.oos_page); - intel_vgpu_unregister_page_track(spt->vgpu, spt->guest_page.gfn); + intel_vgpu_unregister_page_track(spt->vgpu, spt->guest_page.gfn); + } list_del_init(&spt->post_shadow_list); free_spt(spt); @@ -717,8 +803,9 @@ static inline struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_mfn( static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt); +/* Allocate shadow page table without guest page. */ static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt( - struct intel_vgpu *vgpu, int type, unsigned long gfn) + struct intel_vgpu *vgpu, intel_gvt_gtt_type_t type) { struct device *kdev = &vgpu->gvt->dev_priv->drm.pdev->dev; struct intel_vgpu_ppgtt_spt *spt = NULL; @@ -753,26 +840,12 @@ retry: spt->shadow_page.vaddr = page_address(spt->shadow_page.page); spt->shadow_page.mfn = daddr >> I915_GTT_PAGE_SHIFT; - /* - * Init guest_page. - */ - spt->guest_page.type = type; - spt->guest_page.gfn = gfn; - - ret = intel_vgpu_register_page_track(vgpu, spt->guest_page.gfn, - ppgtt_write_protection_handler, spt); - if (ret) - goto err_unmap_dma; - ret = radix_tree_insert(&vgpu->gtt.spt_tree, spt->shadow_page.mfn, spt); if (ret) - goto err_unreg_page_track; + goto err_unmap_dma; - trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn); return spt; -err_unreg_page_track: - intel_vgpu_unregister_page_track(vgpu, spt->guest_page.gfn); err_unmap_dma: dma_unmap_page(kdev, daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); err_free_spt: @@ -780,6 +853,37 @@ err_free_spt: return ERR_PTR(ret); } +/* Allocate shadow page table associated with specific gfn. */ +static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt_gfn( + struct intel_vgpu *vgpu, intel_gvt_gtt_type_t type, + unsigned long gfn, bool guest_pde_ips) +{ + struct intel_vgpu_ppgtt_spt *spt; + int ret; + + spt = ppgtt_alloc_spt(vgpu, type); + if (IS_ERR(spt)) + return spt; + + /* + * Init guest_page. + */ + ret = intel_vgpu_register_page_track(vgpu, gfn, + ppgtt_write_protection_handler, spt); + if (ret) { + ppgtt_free_spt(spt); + return ERR_PTR(ret); + } + + spt->guest_page.type = type; + spt->guest_page.gfn = gfn; + spt->guest_page.pde_ips = guest_pde_ips; + + trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn); + + return spt; +} + #define pt_entry_size_shift(spt) \ ((spt)->vgpu->gvt->device_info.gtt_entry_size_shift) @@ -787,24 +891,38 @@ err_free_spt: (I915_GTT_PAGE_SIZE >> pt_entry_size_shift(spt)) #define for_each_present_guest_entry(spt, e, i) \ - for (i = 0; i < pt_entries(spt); i++) \ + for (i = 0; i < pt_entries(spt); \ + i += spt->guest_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \ if (!ppgtt_get_guest_entry(spt, e, i) && \ spt->vgpu->gvt->gtt.pte_ops->test_present(e)) #define for_each_present_shadow_entry(spt, e, i) \ - for (i = 0; i < pt_entries(spt); i++) \ + for (i = 0; i < pt_entries(spt); \ + i += spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \ if (!ppgtt_get_shadow_entry(spt, e, i) && \ spt->vgpu->gvt->gtt.pte_ops->test_present(e)) -static void ppgtt_get_spt(struct intel_vgpu_ppgtt_spt *spt) +#define for_each_shadow_entry(spt, e, i) \ + for (i = 0; i < pt_entries(spt); \ + i += (spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1)) \ + if (!ppgtt_get_shadow_entry(spt, e, i)) + +static inline void ppgtt_get_spt(struct intel_vgpu_ppgtt_spt *spt) { int v = atomic_read(&spt->refcount); trace_spt_refcount(spt->vgpu->id, "inc", spt, v, (v + 1)); - atomic_inc(&spt->refcount); } +static inline int ppgtt_put_spt(struct intel_vgpu_ppgtt_spt *spt) +{ + int v = atomic_read(&spt->refcount); + + trace_spt_refcount(spt->vgpu->id, "dec", spt, v, (v - 1)); + return atomic_dec_return(&spt->refcount); +} + static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt); static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu, @@ -843,7 +961,8 @@ static inline void ppgtt_invalidate_pte(struct intel_vgpu_ppgtt_spt *spt, pfn = ops->get_pfn(entry); type = spt->shadow_page.type; - if (pfn == vgpu->gtt.scratch_pt[type].page_mfn) + /* Uninitialized spte or unshadowed spte. */ + if (!pfn || pfn == vgpu->gtt.scratch_pt[type].page_mfn) return; intel_gvt_hypervisor_dma_unmap_guest_page(vgpu, pfn << PAGE_SHIFT); @@ -855,14 +974,11 @@ static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt) struct intel_gvt_gtt_entry e; unsigned long index; int ret; - int v = atomic_read(&spt->refcount); trace_spt_change(spt->vgpu->id, "die", spt, spt->guest_page.gfn, spt->shadow_page.type); - trace_spt_refcount(spt->vgpu->id, "dec", spt, v, (v - 1)); - - if (atomic_dec_return(&spt->refcount) > 0) + if (ppgtt_put_spt(spt) > 0) return 0; for_each_present_shadow_entry(spt, &e, index) { @@ -871,9 +987,15 @@ static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt) gvt_vdbg_mm("invalidate 4K entry\n"); ppgtt_invalidate_pte(spt, &e); break; + case GTT_TYPE_PPGTT_PTE_64K_ENTRY: + /* We don't setup 64K shadow entry so far. */ + WARN(1, "suspicious 64K gtt entry\n"); + continue; case GTT_TYPE_PPGTT_PTE_2M_ENTRY: + gvt_vdbg_mm("invalidate 2M entry\n"); + continue; case GTT_TYPE_PPGTT_PTE_1G_ENTRY: - WARN(1, "GVT doesn't support 2M/1GB page\n"); + WARN(1, "GVT doesn't support 1GB page\n"); continue; case GTT_TYPE_PPGTT_PML4_ENTRY: case GTT_TYPE_PPGTT_PDP_ENTRY: @@ -899,6 +1021,22 @@ fail: return ret; } +static bool vgpu_ips_enabled(struct intel_vgpu *vgpu) +{ + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + + if (INTEL_GEN(dev_priv) == 9 || INTEL_GEN(dev_priv) == 10) { + u32 ips = vgpu_vreg_t(vgpu, GEN8_GAMW_ECO_DEV_RW_IA) & + GAMW_ECO_ENABLE_64K_IPS_FIELD; + + return ips == GAMW_ECO_ENABLE_64K_IPS_FIELD; + } else if (INTEL_GEN(dev_priv) >= 11) { + /* 64K paging only controlled by IPS bit in PTE now. */ + return true; + } else + return false; +} + static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt); static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry( @@ -906,35 +1044,54 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry( { struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; struct intel_vgpu_ppgtt_spt *spt = NULL; + bool ips = false; int ret; GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(we->type))); + if (we->type == GTT_TYPE_PPGTT_PDE_ENTRY) + ips = vgpu_ips_enabled(vgpu) && ops->test_ips(we); + spt = intel_vgpu_find_spt_by_gfn(vgpu, ops->get_pfn(we)); - if (spt) + if (spt) { ppgtt_get_spt(spt); - else { + + if (ips != spt->guest_page.pde_ips) { + spt->guest_page.pde_ips = ips; + + gvt_dbg_mm("reshadow PDE since ips changed\n"); + clear_page(spt->shadow_page.vaddr); + ret = ppgtt_populate_spt(spt); + if (ret) { + ppgtt_put_spt(spt); + goto err; + } + } + } else { int type = get_next_pt_type(we->type); - spt = ppgtt_alloc_spt(vgpu, type, ops->get_pfn(we)); + spt = ppgtt_alloc_spt_gfn(vgpu, type, ops->get_pfn(we), ips); if (IS_ERR(spt)) { ret = PTR_ERR(spt); - goto fail; + goto err; } ret = intel_vgpu_enable_page_track(vgpu, spt->guest_page.gfn); if (ret) - goto fail; + goto err_free_spt; ret = ppgtt_populate_spt(spt); if (ret) - goto fail; + goto err_free_spt; trace_spt_change(vgpu->id, "new", spt, spt->guest_page.gfn, spt->shadow_page.type); } return spt; -fail: + +err_free_spt: + ppgtt_free_spt(spt); +err: gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n", spt, we->val64, we->type); return ERR_PTR(ret); @@ -948,16 +1105,118 @@ static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se, se->type = ge->type; se->val64 = ge->val64; + /* Because we always split 64KB pages, so clear IPS in shadow PDE. */ + if (se->type == GTT_TYPE_PPGTT_PDE_ENTRY) + ops->clear_ips(se); + ops->set_pfn(se, s->shadow_page.mfn); } +/** + * Return 1 if 2MB huge gtt shadowing is possilbe, 0 if miscondition, + * negtive if found err. + */ +static int is_2MB_gtt_possible(struct intel_vgpu *vgpu, + struct intel_gvt_gtt_entry *entry) +{ + struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; + unsigned long pfn; + + if (!HAS_PAGE_SIZES(vgpu->gvt->dev_priv, I915_GTT_PAGE_SIZE_2M)) + return 0; + + pfn = intel_gvt_hypervisor_gfn_to_mfn(vgpu, ops->get_pfn(entry)); + if (pfn == INTEL_GVT_INVALID_ADDR) + return -EINVAL; + + return PageTransHuge(pfn_to_page(pfn)); +} + +static int split_2MB_gtt_entry(struct intel_vgpu *vgpu, + struct intel_vgpu_ppgtt_spt *spt, unsigned long index, + struct intel_gvt_gtt_entry *se) +{ + struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; + struct intel_vgpu_ppgtt_spt *sub_spt; + struct intel_gvt_gtt_entry sub_se; + unsigned long start_gfn; + dma_addr_t dma_addr; + unsigned long sub_index; + int ret; + + gvt_dbg_mm("Split 2M gtt entry, index %lu\n", index); + + start_gfn = ops->get_pfn(se); + + sub_spt = ppgtt_alloc_spt(vgpu, GTT_TYPE_PPGTT_PTE_PT); + if (IS_ERR(sub_spt)) + return PTR_ERR(sub_spt); + + for_each_shadow_entry(sub_spt, &sub_se, sub_index) { + ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, + start_gfn + sub_index, PAGE_SIZE, &dma_addr); + if (ret) { + ppgtt_invalidate_spt(spt); + return ret; + } + sub_se.val64 = se->val64; + + /* Copy the PAT field from PDE. */ + sub_se.val64 &= ~_PAGE_PAT; + sub_se.val64 |= (se->val64 & _PAGE_PAT_LARGE) >> 5; + + ops->set_pfn(&sub_se, dma_addr >> PAGE_SHIFT); + ppgtt_set_shadow_entry(sub_spt, &sub_se, sub_index); + } + + /* Clear dirty field. */ + se->val64 &= ~_PAGE_DIRTY; + + ops->clear_pse(se); + ops->clear_ips(se); + ops->set_pfn(se, sub_spt->shadow_page.mfn); + ppgtt_set_shadow_entry(spt, se, index); + return 0; +} + +static int split_64KB_gtt_entry(struct intel_vgpu *vgpu, + struct intel_vgpu_ppgtt_spt *spt, unsigned long index, + struct intel_gvt_gtt_entry *se) +{ + struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; + struct intel_gvt_gtt_entry entry = *se; + unsigned long start_gfn; + dma_addr_t dma_addr; + int i, ret; + + gvt_vdbg_mm("Split 64K gtt entry, index %lu\n", index); + + GEM_BUG_ON(index % GTT_64K_PTE_STRIDE); + + start_gfn = ops->get_pfn(se); + + entry.type = GTT_TYPE_PPGTT_PTE_4K_ENTRY; + ops->set_64k_splited(&entry); + + for (i = 0; i < GTT_64K_PTE_STRIDE; i++) { + ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, + start_gfn + i, PAGE_SIZE, &dma_addr); + if (ret) + return ret; + + ops->set_pfn(&entry, dma_addr >> PAGE_SHIFT); + ppgtt_set_shadow_entry(spt, &entry, index + i); + } + return 0; +} + static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu, struct intel_vgpu_ppgtt_spt *spt, unsigned long index, struct intel_gvt_gtt_entry *ge) { struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops; struct intel_gvt_gtt_entry se = *ge; - unsigned long gfn; + unsigned long gfn, page_size = PAGE_SIZE; dma_addr_t dma_addr; int ret; @@ -970,16 +1229,33 @@ static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu, case GTT_TYPE_PPGTT_PTE_4K_ENTRY: gvt_vdbg_mm("shadow 4K gtt entry\n"); break; + case GTT_TYPE_PPGTT_PTE_64K_ENTRY: + gvt_vdbg_mm("shadow 64K gtt entry\n"); + /* + * The layout of 64K page is special, the page size is + * controlled by uper PDE. To be simple, we always split + * 64K page to smaller 4K pages in shadow PT. + */ + return split_64KB_gtt_entry(vgpu, spt, index, &se); case GTT_TYPE_PPGTT_PTE_2M_ENTRY: + gvt_vdbg_mm("shadow 2M gtt entry\n"); + ret = is_2MB_gtt_possible(vgpu, ge); + if (ret == 0) + return split_2MB_gtt_entry(vgpu, spt, index, &se); + else if (ret < 0) + return ret; + page_size = I915_GTT_PAGE_SIZE_2M; + break; case GTT_TYPE_PPGTT_PTE_1G_ENTRY: - gvt_vgpu_err("GVT doesn't support 2M/1GB entry\n"); + gvt_vgpu_err("GVT doesn't support 1GB entry\n"); return -EINVAL; default: GEM_BUG_ON(1); }; /* direct shadow */ - ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, &dma_addr); + ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, page_size, + &dma_addr); if (ret) return -ENXIO; @@ -1062,8 +1338,12 @@ static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_ppgtt_spt *spt, ret = ppgtt_invalidate_spt(s); if (ret) goto fail; - } else + } else { + /* We don't setup 64K shadow entry so far. */ + WARN(se->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY, + "suspicious 64K entry\n"); ppgtt_invalidate_pte(spt, se); + } return 0; fail: @@ -1286,7 +1566,7 @@ static int ppgtt_handle_guest_write_page_table( struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; struct intel_gvt_gtt_entry old_se; int new_present; - int ret; + int i, ret; new_present = ops->test_present(we); @@ -1308,8 +1588,27 @@ static int ppgtt_handle_guest_write_page_table( goto fail; if (!new_present) { - ops->set_pfn(&old_se, vgpu->gtt.scratch_pt[type].page_mfn); - ppgtt_set_shadow_entry(spt, &old_se, index); + /* For 64KB splited entries, we need clear them all. */ + if (ops->test_64k_splited(&old_se) && + !(index % GTT_64K_PTE_STRIDE)) { + gvt_vdbg_mm("remove splited 64K shadow entries\n"); + for (i = 0; i < GTT_64K_PTE_STRIDE; i++) { + ops->clear_64k_splited(&old_se); + ops->set_pfn(&old_se, + vgpu->gtt.scratch_pt[type].page_mfn); + ppgtt_set_shadow_entry(spt, &old_se, index + i); + } + } else if (old_se.type == GTT_TYPE_PPGTT_PTE_2M_ENTRY || + old_se.type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) { + ops->clear_pse(&old_se); + ops->set_pfn(&old_se, + vgpu->gtt.scratch_pt[type].page_mfn); + ppgtt_set_shadow_entry(spt, &old_se, index); + } else { + ops->set_pfn(&old_se, + vgpu->gtt.scratch_pt[type].page_mfn); + ppgtt_set_shadow_entry(spt, &old_se, index); + } } return 0; @@ -1391,7 +1690,17 @@ static int ppgtt_handle_guest_write_page_table_bytes( ppgtt_get_guest_entry(spt, &we, index); - ops->test_pse(&we); + /* + * For page table which has 64K gtt entry, only PTE#0, PTE#16, + * PTE#32, ... PTE#496 are used. Unused PTEs update should be + * ignored. + */ + if (we.type == GTT_TYPE_PPGTT_PTE_64K_ENTRY && + (index % GTT_64K_PTE_STRIDE)) { + gvt_vdbg_mm("Ignore write to unused PTE entry, index %lu\n", + index); + return 0; + } if (bytes == info->gtt_entry_size) { ret = ppgtt_handle_guest_write_page_table(spt, &we, index); @@ -1880,7 +2189,7 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, } ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, - &dma_addr); + PAGE_SIZE, &dma_addr); if (ret) { gvt_vgpu_err("fail to populate guest ggtt entry\n"); /* guest driver may read/write the entry when partial diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h index 3792f2b7f4ff..b7bf68cc8418 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.h +++ b/drivers/gpu/drm/i915/gvt/gtt.h @@ -63,6 +63,12 @@ struct intel_gvt_gtt_pte_ops { void (*clear_present)(struct intel_gvt_gtt_entry *e); void (*set_present)(struct intel_gvt_gtt_entry *e); bool (*test_pse)(struct intel_gvt_gtt_entry *e); + void (*clear_pse)(struct intel_gvt_gtt_entry *e); + bool (*test_ips)(struct intel_gvt_gtt_entry *e); + void (*clear_ips)(struct intel_gvt_gtt_entry *e); + bool (*test_64k_splited)(struct intel_gvt_gtt_entry *e); + void (*clear_64k_splited)(struct intel_gvt_gtt_entry *e); + void (*set_64k_splited)(struct intel_gvt_gtt_entry *e); void (*set_pfn)(struct intel_gvt_gtt_entry *e, unsigned long pfn); unsigned long (*get_pfn)(struct intel_gvt_gtt_entry *e); }; @@ -95,6 +101,7 @@ typedef enum { GTT_TYPE_GGTT_PTE, GTT_TYPE_PPGTT_PTE_4K_ENTRY, + GTT_TYPE_PPGTT_PTE_64K_ENTRY, GTT_TYPE_PPGTT_PTE_2M_ENTRY, GTT_TYPE_PPGTT_PTE_1G_ENTRY, @@ -220,6 +227,7 @@ struct intel_vgpu_ppgtt_spt { struct { intel_gvt_gtt_type_t type; + bool pde_ips; /* for 64KB PTEs */ void *vaddr; struct page *page; unsigned long mfn; @@ -227,6 +235,7 @@ struct intel_vgpu_ppgtt_spt { struct { intel_gvt_gtt_type_t type; + bool pde_ips; /* for 64KB PTEs */ unsigned long gfn; unsigned long write_cnt; struct intel_vgpu_oos_page *oos_page; diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index 4e65266e7b95..712f9d14e720 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -468,3 +468,7 @@ out_clean_idr: kfree(gvt); return ret; } + +#if IS_ENABLED(CONFIG_DRM_I915_GVT_KVMGT) +MODULE_SOFTDEP("pre: kvmgt"); +#endif diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 0bc0c5418adb..e2e252c67de8 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -210,6 +210,31 @@ static int sanitize_fence_mmio_access(struct intel_vgpu *vgpu, return 0; } +static int gamw_echo_dev_rw_ia_write(struct intel_vgpu *vgpu, + unsigned int offset, void *p_data, unsigned int bytes) +{ + u32 ips = (*(u32 *)p_data) & GAMW_ECO_ENABLE_64K_IPS_FIELD; + + if (INTEL_GEN(vgpu->gvt->dev_priv) <= 10) { + if (ips == GAMW_ECO_ENABLE_64K_IPS_FIELD) + gvt_dbg_core("vgpu%d: ips enabled\n", vgpu->id); + else if (!ips) + gvt_dbg_core("vgpu%d: ips disabled\n", vgpu->id); + else { + /* All engines must be enabled together for vGPU, + * since we don't know which engine the ppgtt will + * bind to when shadowing. + */ + gvt_vgpu_err("Unsupported IPS setting %x, cannot enable 64K gtt.\n", + ips); + return -EINVAL; + } + } + + write_vreg(vgpu, offset, p_data, bytes); + return 0; +} + static int fence_mmio_read(struct intel_vgpu *vgpu, unsigned int off, void *p_data, unsigned int bytes) { @@ -1559,6 +1584,13 @@ static int bxt_gt_disp_pwron_write(struct intel_vgpu *vgpu, return 0; } +static int bxt_edp_psr_imr_iir_write(struct intel_vgpu *vgpu, + unsigned int offset, void *p_data, unsigned int bytes) +{ + vgpu_vreg(vgpu, offset) = 0; + return 0; +} + static int mmio_read_from_hw(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { @@ -1769,7 +1801,9 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_RING_DFH(RING_HWSTAM, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_GM_RDR(RENDER_HWS_PGA_GEN7, D_ALL, NULL, NULL); + MMIO_DH(GEN8_GAMW_ECO_DEV_RW_IA, D_BDW_PLUS, NULL, + gamw_echo_dev_rw_ia_write); + MMIO_GM_RDR(BSD_HWS_PGA_GEN7, D_ALL, NULL, NULL); MMIO_GM_RDR(BLT_HWS_PGA_GEN7, D_ALL, NULL, NULL); MMIO_GM_RDR(VEBOX_HWS_PGA_GEN7, D_ALL, NULL, NULL); @@ -3155,6 +3189,9 @@ static int init_bxt_mmio_info(struct intel_gvt *gvt) MMIO_D(HSW_TVIDEO_DIP_GCP(TRANSCODER_B), D_BXT); MMIO_D(HSW_TVIDEO_DIP_GCP(TRANSCODER_C), D_BXT); + MMIO_DH(EDP_PSR_IMR, D_BXT, NULL, bxt_edp_psr_imr_iir_write); + MMIO_DH(EDP_PSR_IIR, D_BXT, NULL, bxt_edp_psr_imr_iir_write); + MMIO_D(RC6_CTX_BASE, D_BXT); MMIO_D(GEN8_PUSHBUS_CONTROL, D_BXT); diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h index f6dd9f717888..5af11cf1b482 100644 --- a/drivers/gpu/drm/i915/gvt/hypercall.h +++ b/drivers/gpu/drm/i915/gvt/hypercall.h @@ -53,7 +53,7 @@ struct intel_gvt_mpt { unsigned long (*gfn_to_mfn)(unsigned long handle, unsigned long gfn); int (*dma_map_guest_page)(unsigned long handle, unsigned long gfn, - dma_addr_t *dma_addr); + unsigned long size, dma_addr_t *dma_addr); void (*dma_unmap_guest_page)(unsigned long handle, dma_addr_t dma_addr); int (*map_gfn_to_mfn)(unsigned long handle, unsigned long gfn, diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 1466d8769ec9..685cb3de6dab 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -94,6 +94,7 @@ struct gvt_dma { struct rb_node dma_addr_node; gfn_t gfn; dma_addr_t dma_addr; + unsigned long size; struct kref ref; }; @@ -106,45 +107,103 @@ static int kvmgt_guest_init(struct mdev_device *mdev); static void intel_vgpu_release_work(struct work_struct *work); static bool kvmgt_guest_exit(struct kvmgt_guest_info *info); +static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, + unsigned long size) +{ + int total_pages; + int npage; + int ret; + + total_pages = roundup(size, PAGE_SIZE) / PAGE_SIZE; + + for (npage = 0; npage < total_pages; npage++) { + unsigned long cur_gfn = gfn + npage; + + ret = vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &cur_gfn, 1); + WARN_ON(ret != 1); + } +} + +/* Pin a normal or compound guest page for dma. */ +static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, + unsigned long size, struct page **page) +{ + unsigned long base_pfn = 0; + int total_pages; + int npage; + int ret; + + total_pages = roundup(size, PAGE_SIZE) / PAGE_SIZE; + /* + * We pin the pages one-by-one to avoid allocating a big arrary + * on stack to hold pfns. + */ + for (npage = 0; npage < total_pages; npage++) { + unsigned long cur_gfn = gfn + npage; + unsigned long pfn; + + ret = vfio_pin_pages(mdev_dev(vgpu->vdev.mdev), &cur_gfn, 1, + IOMMU_READ | IOMMU_WRITE, &pfn); + if (ret != 1) { + gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx, ret %d\n", + cur_gfn, ret); + goto err; + } + + if (!pfn_valid(pfn)) { + gvt_vgpu_err("pfn 0x%lx is not mem backed\n", pfn); + npage++; + ret = -EFAULT; + goto err; + } + + if (npage == 0) + base_pfn = pfn; + else if (base_pfn + npage != pfn) { + gvt_vgpu_err("The pages are not continuous\n"); + ret = -EINVAL; + npage++; + goto err; + } + } + + *page = pfn_to_page(base_pfn); + return 0; +err: + gvt_unpin_guest_page(vgpu, gfn, npage * PAGE_SIZE); + return ret; +} + static int gvt_dma_map_page(struct intel_vgpu *vgpu, unsigned long gfn, - dma_addr_t *dma_addr) + dma_addr_t *dma_addr, unsigned long size) { struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev; - struct page *page; - unsigned long pfn; + struct page *page = NULL; int ret; - /* Pin the page first. */ - ret = vfio_pin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1, - IOMMU_READ | IOMMU_WRITE, &pfn); - if (ret != 1) { - gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx: %d\n", - gfn, ret); - return -EINVAL; - } + ret = gvt_pin_guest_page(vgpu, gfn, size, &page); + if (ret) + return ret; /* Setup DMA mapping. */ - page = pfn_to_page(pfn); - *dma_addr = dma_map_page(dev, page, 0, PAGE_SIZE, - PCI_DMA_BIDIRECTIONAL); - if (dma_mapping_error(dev, *dma_addr)) { - gvt_vgpu_err("DMA mapping failed for gfn 0x%lx\n", gfn); - vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1); - return -ENOMEM; + *dma_addr = dma_map_page(dev, page, 0, size, PCI_DMA_BIDIRECTIONAL); + ret = dma_mapping_error(dev, *dma_addr); + if (ret) { + gvt_vgpu_err("DMA mapping failed for pfn 0x%lx, ret %d\n", + page_to_pfn(page), ret); + gvt_unpin_guest_page(vgpu, gfn, size); } - return 0; + return ret; } static void gvt_dma_unmap_page(struct intel_vgpu *vgpu, unsigned long gfn, - dma_addr_t dma_addr) + dma_addr_t dma_addr, unsigned long size) { struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev; - int ret; - dma_unmap_page(dev, dma_addr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - ret = vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1); - WARN_ON(ret != 1); + dma_unmap_page(dev, dma_addr, size, PCI_DMA_BIDIRECTIONAL); + gvt_unpin_guest_page(vgpu, gfn, size); } static struct gvt_dma *__gvt_cache_find_dma_addr(struct intel_vgpu *vgpu, @@ -185,7 +244,7 @@ static struct gvt_dma *__gvt_cache_find_gfn(struct intel_vgpu *vgpu, gfn_t gfn) } static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, - dma_addr_t dma_addr) + dma_addr_t dma_addr, unsigned long size) { struct gvt_dma *new, *itr; struct rb_node **link, *parent = NULL; @@ -197,6 +256,7 @@ static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, new->vgpu = vgpu; new->gfn = gfn; new->dma_addr = dma_addr; + new->size = size; kref_init(&new->ref); /* gfn_cache maps gfn to struct gvt_dma. */ @@ -254,7 +314,7 @@ static void gvt_cache_destroy(struct intel_vgpu *vgpu) break; } dma = rb_entry(node, struct gvt_dma, gfn_node); - gvt_dma_unmap_page(vgpu, dma->gfn, dma->dma_addr); + gvt_dma_unmap_page(vgpu, dma->gfn, dma->dma_addr, dma->size); __gvt_cache_remove_entry(vgpu, dma); mutex_unlock(&vgpu->vdev.cache_lock); } @@ -509,7 +569,8 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb, if (!entry) continue; - gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr); + gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr, + entry->size); __gvt_cache_remove_entry(vgpu, entry); } mutex_unlock(&vgpu->vdev.cache_lock); @@ -1616,7 +1677,7 @@ static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn) } int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn, - dma_addr_t *dma_addr) + unsigned long size, dma_addr_t *dma_addr) { struct kvmgt_guest_info *info; struct intel_vgpu *vgpu; @@ -1633,11 +1694,11 @@ int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn, entry = __gvt_cache_find_gfn(info->vgpu, gfn); if (!entry) { - ret = gvt_dma_map_page(vgpu, gfn, dma_addr); + ret = gvt_dma_map_page(vgpu, gfn, dma_addr, size); if (ret) goto err_unlock; - ret = __gvt_cache_add(info->vgpu, gfn, *dma_addr); + ret = __gvt_cache_add(info->vgpu, gfn, *dma_addr, size); if (ret) goto err_unmap; } else { @@ -1649,7 +1710,7 @@ int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn, return 0; err_unmap: - gvt_dma_unmap_page(vgpu, gfn, *dma_addr); + gvt_dma_unmap_page(vgpu, gfn, *dma_addr, size); err_unlock: mutex_unlock(&info->vgpu->vdev.cache_lock); return ret; @@ -1659,7 +1720,8 @@ static void __gvt_dma_release(struct kref *ref) { struct gvt_dma *entry = container_of(ref, typeof(*entry), ref); - gvt_dma_unmap_page(entry->vgpu, entry->gfn, entry->dma_addr); + gvt_dma_unmap_page(entry->vgpu, entry->gfn, entry->dma_addr, + entry->size); __gvt_cache_remove_entry(entry->vgpu, entry); } diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h index 32ffcd566cdd..67f19992b226 100644 --- a/drivers/gpu/drm/i915/gvt/mpt.h +++ b/drivers/gpu/drm/i915/gvt/mpt.h @@ -230,17 +230,18 @@ static inline unsigned long intel_gvt_hypervisor_gfn_to_mfn( /** * intel_gvt_hypervisor_dma_map_guest_page - setup dma map for guest page * @vgpu: a vGPU - * @gpfn: guest pfn + * @gfn: guest pfn + * @size: page size * @dma_addr: retrieve allocated dma addr * * Returns: * 0 on success, negative error code if failed. */ static inline int intel_gvt_hypervisor_dma_map_guest_page( - struct intel_vgpu *vgpu, unsigned long gfn, + struct intel_vgpu *vgpu, unsigned long gfn, unsigned long size, dma_addr_t *dma_addr) { - return intel_gvt_host.mpt->dma_map_guest_page(vgpu->handle, gfn, + return intel_gvt_host.mpt->dma_map_guest_page(vgpu->handle, gfn, size, dma_addr); } diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 889d10f8ee96..aa063b275e81 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -46,6 +46,7 @@ void populate_pvinfo_page(struct intel_vgpu *vgpu) vgpu_vreg_t(vgpu, vgtif_reg(vgt_caps)) = VGT_CAPS_FULL_48BIT_PPGTT; vgpu_vreg_t(vgpu, vgtif_reg(vgt_caps)) |= VGT_CAPS_HWSP_EMULATION; + vgpu_vreg_t(vgpu, vgtif_reg(vgt_caps)) |= VGT_CAPS_HUGE_GTT; vgpu_vreg_t(vgpu, vgtif_reg(avail_rs.mappable_gmadr.base)) = vgpu_aperture_gmadr_base(vgpu); |