From 09120d4e88b13967d44d46280fb74d3ac4ac2f73 Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Wed, 29 Jul 2015 17:23:45 +0100 Subject: drm/i915: Remove unnecessary gen8_clamp_pd gen8_clamp_pd clamps to the next page directory boundary, but the macro gen8_for_each_pde already has a check to stop at the page directory boundary. Furthermore, i915_pte_count also restricts to the next page table boundary. v2: Rebase after Mika's ppgtt cleanup / scratch merge patch series. Suggested-by: Akash Goel Signed-off-by: Michel Thierry Reviewed-by: "Akash Goel" Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_gtt.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_gtt.h') diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index e1cfa292f9ad..d5bf953deff9 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -444,17 +444,6 @@ static inline uint32_t gen6_pde_index(uint32_t addr) temp = min(temp, length), \ start += temp, length -= temp) -/* Clamp length to the next page_directory boundary */ -static inline uint64_t gen8_clamp_pd(uint64_t start, uint64_t length) -{ - uint64_t next_pd = ALIGN(start + 1, 1 << GEN8_PDPE_SHIFT); - - if (next_pd > (start + length)) - return length; - - return next_pd - start; -} - static inline uint32_t gen8_pte_index(uint64_t address) { return i915_pte_index(address, GEN8_PDE_SHIFT); -- cgit v1.2.3 From 6ac1850220732f47bc6ae767fa41542009674ad7 Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Wed, 29 Jul 2015 17:23:46 +0100 Subject: drm/i915/gen8: Make pdp allocation more dynamic This transitional patch doesn't do much for the existing code. However, it should make upcoming patches to use the full 48b address space a bit easier. 32-bit ppgtt uses just 4 PDPs, while 48-bit ppgtt will have up-to 512; this patch prepares the existing functions to query the right number of pdps at run-time. This also means that used_pdpes should also be allocated during ppgtt_init, as the bitmap size will depend on the ppgtt address range selected. v2: Renamed pdp_free to be similar to pd/pt (unmap_and_free_pdp). v3: To facilitate testing, 48b mode will be available on Broadwell and GEN9+, when i915.enable_ppgtt = 3. v4: Rebase after s/page_tables/page_table/, added extra information about 4-level page table formats and use IS_ENABLED macro. v5: Check CONFIG_X86_64 instead of CONFIG_64BIT. v6: Rebase after Mika's ppgtt cleanup / scratch merge patch series, and follow his nomenclature in pdp functions (there is no alloc_pdp yet). v7: Rebase after merged version of Mika's ppgtt cleanup patch series. v8: Rebase after final merged version of Mika's ppgtt/scratch patches. v9: Introduce PML4 (and 48-bit checks) until next patch (Akash). v10: Also use test_bit to detect when pd/pt are already allocated (Akash) Cc: Akash Goel Signed-off-by: Ben Widawsky Signed-off-by: Michel Thierry (v2+) Reviewed-by: Akash Goel [danvet: Amend commit message as suggested by Michel.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_gtt.c | 86 +++++++++++++++++++++++++++++-------- drivers/gpu/drm/i915/i915_gem_gtt.h | 17 +++++--- 2 files changed, 80 insertions(+), 23 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_gtt.h') diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 566466fe1917..6724e71cf1d7 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -522,6 +522,43 @@ static void gen8_initialize_pd(struct i915_address_space *vm, fill_px(vm->dev, pd, scratch_pde); } +static int __pdp_init(struct drm_device *dev, + struct i915_page_directory_pointer *pdp) +{ + size_t pdpes = I915_PDPES_PER_PDP(dev); + + pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes), + sizeof(unsigned long), + GFP_KERNEL); + if (!pdp->used_pdpes) + return -ENOMEM; + + pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory), + GFP_KERNEL); + if (!pdp->page_directory) { + kfree(pdp->used_pdpes); + /* the PDP might be the statically allocated top level. Keep it + * as clean as possible */ + pdp->used_pdpes = NULL; + return -ENOMEM; + } + + return 0; +} + +static void __pdp_fini(struct i915_page_directory_pointer *pdp) +{ + kfree(pdp->used_pdpes); + kfree(pdp->page_directory); + pdp->page_directory = NULL; +} + +static void free_pdp(struct drm_device *dev, + struct i915_page_directory_pointer *pdp) +{ + __pdp_fini(pdp); +} + /* Broadwell Page Directory Pointer Descriptors */ static int gen8_write_pdp(struct drm_i915_gem_request *req, unsigned entry, @@ -720,7 +757,8 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) container_of(vm, struct i915_hw_ppgtt, base); int i; - for_each_set_bit(i, ppgtt->pdp.used_pdpes, GEN8_LEGACY_PDPES) { + for_each_set_bit(i, ppgtt->pdp.used_pdpes, + I915_PDPES_PER_PDP(ppgtt->base.dev)) { if (WARN_ON(!ppgtt->pdp.page_directory[i])) continue; @@ -729,6 +767,7 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) free_pd(ppgtt->base.dev, ppgtt->pdp.page_directory[i]); } + free_pdp(ppgtt->base.dev, &ppgtt->pdp); gen8_free_scratch(vm); } @@ -763,7 +802,7 @@ static int gen8_ppgtt_alloc_pagetabs(struct i915_hw_ppgtt *ppgtt, gen8_for_each_pde(pt, pd, start, length, temp, pde) { /* Don't reallocate page tables */ - if (pt) { + if (test_bit(pde, pd->used_pdes)) { /* Scratch is never allocated this way */ WARN_ON(pt == ppgtt->base.scratch_pt); continue; @@ -820,11 +859,12 @@ static int gen8_ppgtt_alloc_page_directories(struct i915_hw_ppgtt *ppgtt, struct i915_page_directory *pd; uint64_t temp; uint32_t pdpe; + uint32_t pdpes = I915_PDPES_PER_PDP(dev); - WARN_ON(!bitmap_empty(new_pds, GEN8_LEGACY_PDPES)); + WARN_ON(!bitmap_empty(new_pds, pdpes)); gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) { - if (pd) + if (test_bit(pdpe, pdp->used_pdpes)) continue; pd = alloc_pd(dev); @@ -839,18 +879,19 @@ static int gen8_ppgtt_alloc_page_directories(struct i915_hw_ppgtt *ppgtt, return 0; unwind_out: - for_each_set_bit(pdpe, new_pds, GEN8_LEGACY_PDPES) + for_each_set_bit(pdpe, new_pds, pdpes) free_pd(dev, pdp->page_directory[pdpe]); return -ENOMEM; } static void -free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long **new_pts) +free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long **new_pts, + uint32_t pdpes) { int i; - for (i = 0; i < GEN8_LEGACY_PDPES; i++) + for (i = 0; i < pdpes; i++) kfree(new_pts[i]); kfree(new_pts); kfree(new_pds); @@ -861,23 +902,24 @@ free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long **new_pts) */ static int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds, - unsigned long ***new_pts) + unsigned long ***new_pts, + uint32_t pdpes) { int i; unsigned long *pds; unsigned long **pts; - pds = kcalloc(BITS_TO_LONGS(GEN8_LEGACY_PDPES), sizeof(unsigned long), GFP_KERNEL); + pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_KERNEL); if (!pds) return -ENOMEM; - pts = kcalloc(GEN8_LEGACY_PDPES, sizeof(unsigned long *), GFP_KERNEL); + pts = kcalloc(pdpes, sizeof(unsigned long *), GFP_KERNEL); if (!pts) { kfree(pds); return -ENOMEM; } - for (i = 0; i < GEN8_LEGACY_PDPES; i++) { + for (i = 0; i < pdpes; i++) { pts[i] = kcalloc(BITS_TO_LONGS(I915_PDES), sizeof(unsigned long), GFP_KERNEL); if (!pts[i]) @@ -890,7 +932,7 @@ int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds, return 0; err_out: - free_gen8_temp_bitmaps(pds, pts); + free_gen8_temp_bitmaps(pds, pts, pdpes); return -ENOMEM; } @@ -916,6 +958,7 @@ static int gen8_alloc_va_range(struct i915_address_space *vm, const uint64_t orig_length = length; uint64_t temp; uint32_t pdpe; + uint32_t pdpes = I915_PDPES_PER_PDP(ppgtt->base.dev); int ret; /* Wrap is never okay since we can only represent 48b, and we don't @@ -927,7 +970,7 @@ static int gen8_alloc_va_range(struct i915_address_space *vm, if (WARN_ON(start + length > ppgtt->base.total)) return -ENODEV; - ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables); + ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes); if (ret) return ret; @@ -935,7 +978,7 @@ static int gen8_alloc_va_range(struct i915_address_space *vm, ret = gen8_ppgtt_alloc_page_directories(ppgtt, &ppgtt->pdp, start, length, new_page_dirs); if (ret) { - free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); + free_gen8_temp_bitmaps(new_page_dirs, new_page_tables, pdpes); return ret; } @@ -989,7 +1032,7 @@ static int gen8_alloc_va_range(struct i915_address_space *vm, __set_bit(pdpe, ppgtt->pdp.used_pdpes); } - free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); + free_gen8_temp_bitmaps(new_page_dirs, new_page_tables, pdpes); mark_tlbs_dirty(ppgtt); return 0; @@ -999,10 +1042,10 @@ err_out: free_pt(vm->dev, ppgtt->pdp.page_directory[pdpe]->page_table[temp]); } - for_each_set_bit(pdpe, new_page_dirs, GEN8_LEGACY_PDPES) + for_each_set_bit(pdpe, new_page_dirs, pdpes) free_pd(vm->dev, ppgtt->pdp.page_directory[pdpe]); - free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); + free_gen8_temp_bitmaps(new_page_dirs, new_page_tables, pdpes); mark_tlbs_dirty(ppgtt); return ret; } @@ -1040,7 +1083,16 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->switch_mm = gen8_mm_switch; + ret = __pdp_init(false, &ppgtt->pdp); + + if (ret) + goto free_scratch; + return 0; + +free_scratch: + gen8_free_scratch(&ppgtt->base); + return ret; } static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index d5bf953deff9..87e389c6c9c0 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -98,6 +98,9 @@ typedef uint64_t gen8_pde_t; #define GEN8_LEGACY_PDPES 4 #define GEN8_PTES I915_PTES(sizeof(gen8_pte_t)) +/* FIXME: Next patch will use dev */ +#define I915_PDPES_PER_PDP(dev) GEN8_LEGACY_PDPES + #define PPAT_UNCACHED_INDEX (_PAGE_PWT | _PAGE_PCD) #define PPAT_CACHED_PDE_INDEX 0 /* WB LLC */ #define PPAT_CACHED_INDEX _PAGE_PAT /* WB LLCeLLC */ @@ -241,9 +244,10 @@ struct i915_page_directory { }; struct i915_page_directory_pointer { - /* struct page *page; */ - DECLARE_BITMAP(used_pdpes, GEN8_LEGACY_PDPES); - struct i915_page_directory *page_directory[GEN8_LEGACY_PDPES]; + struct i915_page_dma base; + + unsigned long *used_pdpes; + struct i915_page_directory **page_directory; }; struct i915_address_space { @@ -436,9 +440,10 @@ static inline uint32_t gen6_pde_index(uint32_t addr) temp = min(temp, length), \ start += temp, length -= temp) -#define gen8_for_each_pdpe(pd, pdp, start, length, temp, iter) \ - for (iter = gen8_pdpe_index(start); \ - pd = (pdp)->page_directory[iter], length > 0 && iter < GEN8_LEGACY_PDPES; \ +#define gen8_for_each_pdpe(pd, pdp, start, length, temp, iter) \ + for (iter = gen8_pdpe_index(start); \ + pd = (pdp)->page_directory[iter], \ + length > 0 && (iter < I915_PDPES_PER_PDP(dev)); \ iter++, \ temp = ALIGN(start+1, 1 << GEN8_PDPE_SHIFT) - start, \ temp = min(temp, length), \ -- cgit v1.2.3 From 81ba8aefd03803a8aec3395d18f7b1dda5942105 Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Mon, 3 Aug 2015 09:52:01 +0100 Subject: drm/i915/gen8: Add PML4 structure Introduces the Page Map Level 4 (PML4), ie. the new top level structure of the page tables. To facilitate testing, 48b mode will be available on Broadwell and GEN9+, when i915.enable_ppgtt = 3. v2: Remove unnecessary CONFIG_X86_64 checks, ppgtt code is already 32/64-bit safe (Chris). v3: Add goto free_scratch in temp 48-bit mode init code (Akash). v4: kfree the pdp until the 4lvl alloc/free patch (Akash). v5: Postpone 48-bit code in sanitize_enable_ppgtt (Akash). v6: Keep _insert_pte_entries changes outside this patch (Akash). Cc: Akash Goel Signed-off-by: Michel Thierry Reviewed-by: Akash Goel Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 3 ++- drivers/gpu/drm/i915/i915_gem_gtt.c | 27 +++++++++++++++++---------- drivers/gpu/drm/i915/i915_gem_gtt.h | 26 +++++++++++++++++++++----- 3 files changed, 40 insertions(+), 16 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_gtt.h') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 4273c281ce77..79d2dcb13bb0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2510,7 +2510,8 @@ struct drm_i915_cmd_table { #define HAS_HW_CONTEXTS(dev) (INTEL_INFO(dev)->gen >= 6) #define HAS_LOGICAL_RING_CONTEXTS(dev) (INTEL_INFO(dev)->gen >= 8) #define USES_PPGTT(dev) (i915.enable_ppgtt) -#define USES_FULL_PPGTT(dev) (i915.enable_ppgtt == 2) +#define USES_FULL_PPGTT(dev) (i915.enable_ppgtt >= 2) +#define USES_FULL_48BIT_PPGTT(dev) (i915.enable_ppgtt == 3) #define HAS_OVERLAY(dev) (INTEL_INFO(dev)->has_overlay) #define OVERLAY_NEEDS_PHYSICAL(dev) (INTEL_INFO(dev)->overlay_needs_physical) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 72f4777f06fe..da7863b5c22b 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1105,14 +1105,6 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) return ret; ppgtt->base.start = 0; - ppgtt->base.total = 1ULL << 32; - if (IS_ENABLED(CONFIG_X86_32)) - /* While we have a proliferation of size_t variables - * we cannot represent the full ppgtt size on 32bit, - * so limit it to the same size as the GGTT (currently - * 2GiB). - */ - ppgtt->base.total = to_i915(ppgtt->base.dev)->gtt.base.total; ppgtt->base.cleanup = gen8_ppgtt_cleanup; ppgtt->base.allocate_va_range = gen8_alloc_va_range; ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; @@ -1122,10 +1114,25 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->switch_mm = gen8_mm_switch; - ret = __pdp_init(false, &ppgtt->pdp); + if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { + ret = __pdp_init(false, &ppgtt->pdp); - if (ret) + if (ret) + goto free_scratch; + + ppgtt->base.total = 1ULL << 32; + if (IS_ENABLED(CONFIG_X86_32)) + /* While we have a proliferation of size_t variables + * we cannot represent the full ppgtt size on 32bit, + * so limit it to the same size as the GGTT (currently + * 2GiB). + */ + ppgtt->base.total = to_i915(ppgtt->base.dev)->gtt.base.total; + } else { + ppgtt->base.total = 1ULL << 48; + ret = -EPERM; /* Not yet implemented */ goto free_scratch; + } return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 87e389c6c9c0..04bc66f113a6 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -88,9 +88,17 @@ typedef uint64_t gen8_pde_t; * PDPE | PDE | PTE | offset * The difference as compared to normal x86 3 level page table is the PDPEs are * programmed via register. + * + * GEN8 48b legacy style address is defined as a 4 level page table: + * 47:39 | 38:30 | 29:21 | 20:12 | 11:0 + * PML4E | PDPE | PDE | PTE | offset */ +#define GEN8_PML4ES_PER_PML4 512 +#define GEN8_PML4E_SHIFT 39 #define GEN8_PDPE_SHIFT 30 -#define GEN8_PDPE_MASK 0x3 +/* NB: GEN8_PDPE_MASK is untrue for 32b platforms, but it has no impact on 32b page + * tables */ +#define GEN8_PDPE_MASK 0x1ff #define GEN8_PDE_SHIFT 21 #define GEN8_PDE_MASK 0x1ff #define GEN8_PTE_SHIFT 12 @@ -98,8 +106,8 @@ typedef uint64_t gen8_pde_t; #define GEN8_LEGACY_PDPES 4 #define GEN8_PTES I915_PTES(sizeof(gen8_pte_t)) -/* FIXME: Next patch will use dev */ -#define I915_PDPES_PER_PDP(dev) GEN8_LEGACY_PDPES +#define I915_PDPES_PER_PDP(dev) (USES_FULL_48BIT_PPGTT(dev) ?\ + GEN8_PML4ES_PER_PML4 : GEN8_LEGACY_PDPES) #define PPAT_UNCACHED_INDEX (_PAGE_PWT | _PAGE_PCD) #define PPAT_CACHED_PDE_INDEX 0 /* WB LLC */ @@ -250,6 +258,13 @@ struct i915_page_directory_pointer { struct i915_page_directory **page_directory; }; +struct i915_pml4 { + struct i915_page_dma base; + + DECLARE_BITMAP(used_pml4es, GEN8_PML4ES_PER_PML4); + struct i915_page_directory_pointer *pdps[GEN8_PML4ES_PER_PML4]; +}; + struct i915_address_space { struct drm_mm mm; struct drm_device *dev; @@ -345,8 +360,9 @@ struct i915_hw_ppgtt { struct drm_mm_node node; unsigned long pd_dirty_rings; union { - struct i915_page_directory_pointer pdp; - struct i915_page_directory pd; + struct i915_pml4 pml4; /* GEN8+ & 48b PPGTT */ + struct i915_page_directory_pointer pdp; /* GEN8+ */ + struct i915_page_directory pd; /* GEN6-7 */ }; struct drm_i915_file_private *file_priv; -- cgit v1.2.3 From 762d99363dc9bc436f39f8bdc3f8670ea272a5a9 Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Thu, 30 Jul 2015 11:05:29 +0100 Subject: drm/i915/gen8: implement alloc/free for 4lvl PML4 has no special attributes, and there will always be a PML4. So simply initialize it at creation, and destroy it at the end. The code for 4lvl is able to call into the existing 3lvl page table code to handle all of the lower levels. v2: Return something at the end of gen8_alloc_va_range_4lvl to keep the compiler happy. And define ret only in one place. Updated gen8_ppgtt_unmap_pages and gen8_ppgtt_free to handle 4lvl. v3: Use i915_dma_unmap_single instead of pci API. Fix a couple of incorrect checks when unmapping pdp and pd pages (Akash). v4: Call __pdp_fini also for 32b PPGTT. Clean up alloc_pdp param list. v5: Prevent (harmless) out of range access in gen8_for_each_pml4e. v6: Simplify alloc_vma_range_4lvl and gen8_ppgtt_init_common error paths. (Akash) v7: Rebase, s/gen8_ppgtt_free_*/gen8_ppgtt_cleanup_*/. v8: Change location of pml4_init/fini. It will make next patches cleaner. v9: Rebase after Mika's ppgtt cleanup / scratch merge patch series, while trying to reuse as much as possible for pdp alloc. pml4_init/fini replaced by setup/cleanup_px macros. v10: Rebase after Mika's merged ppgtt cleanup patch series. v11: Rebase after final merged version of Mika's ppgtt/scratch patches. v12: Fix pdpe start value in trace (Akash) v13: Define all 4lvl functions in this patch directly, instead of previous patches, add i915_page_directory_pointer_entry_alloc here, use test_bit to detect when pdp is already allocated (Akash). v14: Move pdp allocation into a new gen8_ppgtt_alloc_page_dirpointers funtion, as we do for pds and pts; move pd and pdp setup functions to this patch (Akash). v15: Added kfree(pdp) from previous patch to this (Akash). Cc: Akash Goel Signed-off-by: Ben Widawsky Signed-off-by: Michel Thierry (v2+) Reviewed-by: Akash Goel Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_gtt.c | 239 +++++++++++++++++++++++++++++++++--- drivers/gpu/drm/i915/i915_gem_gtt.h | 15 ++- drivers/gpu/drm/i915/i915_trace.h | 8 ++ 3 files changed, 246 insertions(+), 16 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_gtt.h') diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index da7863b5c22b..229a31760260 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -204,6 +204,9 @@ static gen8_pde_t gen8_pde_encode(const dma_addr_t addr, return pde; } +#define gen8_pdpe_encode gen8_pde_encode +#define gen8_pml4e_encode gen8_pde_encode + static gen6_pte_t snb_pte_encode(dma_addr_t addr, enum i915_cache_level level, bool valid, u32 unused) @@ -553,10 +556,73 @@ static void __pdp_fini(struct i915_page_directory_pointer *pdp) pdp->page_directory = NULL; } +static struct +i915_page_directory_pointer *alloc_pdp(struct drm_device *dev) +{ + struct i915_page_directory_pointer *pdp; + int ret = -ENOMEM; + + WARN_ON(!USES_FULL_48BIT_PPGTT(dev)); + + pdp = kzalloc(sizeof(*pdp), GFP_KERNEL); + if (!pdp) + return ERR_PTR(-ENOMEM); + + ret = __pdp_init(dev, pdp); + if (ret) + goto fail_bitmap; + + ret = setup_px(dev, pdp); + if (ret) + goto fail_page_m; + + return pdp; + +fail_page_m: + __pdp_fini(pdp); +fail_bitmap: + kfree(pdp); + + return ERR_PTR(ret); +} + static void free_pdp(struct drm_device *dev, struct i915_page_directory_pointer *pdp) { __pdp_fini(pdp); + if (USES_FULL_48BIT_PPGTT(dev)) { + cleanup_px(dev, pdp); + kfree(pdp); + } +} + +static void +gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt, + struct i915_page_directory_pointer *pdp, + struct i915_page_directory *pd, + int index) +{ + gen8_ppgtt_pdpe_t *page_directorypo; + + if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) + return; + + page_directorypo = kmap_px(pdp); + page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC); + kunmap_px(ppgtt, page_directorypo); +} + +static void +gen8_setup_page_directory_pointer(struct i915_hw_ppgtt *ppgtt, + struct i915_pml4 *pml4, + struct i915_page_directory_pointer *pdp, + int index) +{ + gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4); + + WARN_ON(!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)); + pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC); + kunmap_px(ppgtt, pagemap); } /* Broadwell Page Directory Pointer Descriptors */ @@ -782,12 +848,9 @@ static void gen8_free_scratch(struct i915_address_space *vm) free_scratch_page(dev, vm->scratch_page); } -static void gen8_ppgtt_cleanup(struct i915_address_space *vm) +static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev, + struct i915_page_directory_pointer *pdp) { - struct i915_hw_ppgtt *ppgtt = - container_of(vm, struct i915_hw_ppgtt, base); - struct i915_page_directory_pointer *pdp = &ppgtt->pdp; /* FIXME: 48b */ - struct drm_device *dev = ppgtt->base.dev; int i; for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) { @@ -799,6 +862,31 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) } free_pdp(dev, pdp); +} + +static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt) +{ + int i; + + for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) { + if (WARN_ON(!ppgtt->pml4.pdps[i])) + continue; + + gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, ppgtt->pml4.pdps[i]); + } + + cleanup_px(ppgtt->base.dev, &ppgtt->pml4); +} + +static void gen8_ppgtt_cleanup(struct i915_address_space *vm) +{ + struct i915_hw_ppgtt *ppgtt = + container_of(vm, struct i915_hw_ppgtt, base); + + if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) + gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, &ppgtt->pdp); + else + gen8_ppgtt_cleanup_4lvl(ppgtt); gen8_free_scratch(vm); } @@ -920,6 +1008,60 @@ unwind_out: return -ENOMEM; } +/** + * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range. + * @vm: Master vm structure. + * @pml4: Page map level 4 for this address range. + * @start: Starting virtual address to begin allocations. + * @length: Size of the allocations. + * @new_pdps: Bitmap set by function with new allocations. Likely used by the + * caller to free on error. + * + * Allocate the required number of page directory pointers. Extremely similar to + * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs(). + * The main difference is here we are limited by the pml4 boundary (instead of + * the page directory pointer). + * + * Return: 0 if success; negative error code otherwise. + */ +static int +gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm, + struct i915_pml4 *pml4, + uint64_t start, + uint64_t length, + unsigned long *new_pdps) +{ + struct drm_device *dev = vm->dev; + struct i915_page_directory_pointer *pdp; + uint64_t temp; + uint32_t pml4e; + + WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4)); + + gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) { + if (!test_bit(pml4e, pml4->used_pml4es)) { + pdp = alloc_pdp(dev); + if (IS_ERR(pdp)) + goto unwind_out; + + pml4->pdps[pml4e] = pdp; + __set_bit(pml4e, new_pdps); + trace_i915_page_directory_pointer_entry_alloc(vm, + pml4e, + start, + GEN8_PML4E_SHIFT); + } + } + + return 0; + +unwind_out: + for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) + free_pdp(dev, pml4->pdps[pml4e]); + + return -ENOMEM; +} + static void free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long **new_pts, uint32_t pdpes) @@ -981,14 +1123,15 @@ static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt) ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask; } -static int gen8_alloc_va_range(struct i915_address_space *vm, - uint64_t start, uint64_t length) +static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, + struct i915_page_directory_pointer *pdp, + uint64_t start, + uint64_t length) { struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); unsigned long *new_page_dirs, **new_page_tables; struct drm_device *dev = vm->dev; - struct i915_page_directory_pointer *pdp = &ppgtt->pdp; /* FIXME: 48b */ struct i915_page_directory *pd; const uint64_t orig_start = start; const uint64_t orig_length = length; @@ -1069,6 +1212,7 @@ static int gen8_alloc_va_range(struct i915_address_space *vm, kunmap_px(ppgtt, page_directory); __set_bit(pdpe, pdp->used_pdpes); + gen8_setup_page_directory(ppgtt, pdp, pd, pdpe); } free_gen8_temp_bitmaps(new_page_dirs, new_page_tables, pdpes); @@ -1089,6 +1233,68 @@ err_out: return ret; } +static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, + struct i915_pml4 *pml4, + uint64_t start, + uint64_t length) +{ + DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4); + struct i915_hw_ppgtt *ppgtt = + container_of(vm, struct i915_hw_ppgtt, base); + struct i915_page_directory_pointer *pdp; + uint64_t temp, pml4e; + int ret = 0; + + /* Do the pml4 allocations first, so we don't need to track the newly + * allocated tables below the pdp */ + bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4); + + /* The pagedirectory and pagetable allocations are done in the shared 3 + * and 4 level code. Just allocate the pdps. + */ + ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length, + new_pdps); + if (ret) + return ret; + + WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2, + "The allocation has spanned more than 512GB. " + "It is highly likely this is incorrect."); + + gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) { + WARN_ON(!pdp); + + ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length); + if (ret) + goto err_out; + + gen8_setup_page_directory_pointer(ppgtt, pml4, pdp, pml4e); + } + + bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es, + GEN8_PML4ES_PER_PML4); + + return 0; + +err_out: + for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) + gen8_ppgtt_cleanup_3lvl(vm->dev, pml4->pdps[pml4e]); + + return ret; +} + +static int gen8_alloc_va_range(struct i915_address_space *vm, + uint64_t start, uint64_t length) +{ + struct i915_hw_ppgtt *ppgtt = + container_of(vm, struct i915_hw_ppgtt, base); + + if (USES_FULL_48BIT_PPGTT(vm->dev)) + return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length); + else + return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length); +} + /* * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers * with a net effect resembling a 2-level page table in normal x86 terms. Each @@ -1114,9 +1320,14 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->switch_mm = gen8_mm_switch; - if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { - ret = __pdp_init(false, &ppgtt->pdp); + if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { + ret = setup_px(ppgtt->base.dev, &ppgtt->pml4); + if (ret) + goto free_scratch; + ppgtt->base.total = 1ULL << 48; + } else { + ret = __pdp_init(false, &ppgtt->pdp); if (ret) goto free_scratch; @@ -1128,10 +1339,10 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) * 2GiB). */ ppgtt->base.total = to_i915(ppgtt->base.dev)->gtt.base.total; - } else { - ppgtt->base.total = 1ULL << 48; - ret = -EPERM; /* Not yet implemented */ - goto free_scratch; + + trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, + 0, 0, + GEN8_PML4E_SHIFT); } return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 04bc66f113a6..11d44b3d84a3 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -39,6 +39,8 @@ struct drm_i915_file_private; typedef uint32_t gen6_pte_t; typedef uint64_t gen8_pte_t; typedef uint64_t gen8_pde_t; +typedef uint64_t gen8_ppgtt_pdpe_t; +typedef uint64_t gen8_ppgtt_pml4e_t; #define gtt_total_entries(gtt) ((gtt).base.total >> PAGE_SHIFT) @@ -95,6 +97,7 @@ typedef uint64_t gen8_pde_t; */ #define GEN8_PML4ES_PER_PML4 512 #define GEN8_PML4E_SHIFT 39 +#define GEN8_PML4E_MASK (GEN8_PML4ES_PER_PML4 - 1) #define GEN8_PDPE_SHIFT 30 /* NB: GEN8_PDPE_MASK is untrue for 32b platforms, but it has no impact on 32b page * tables */ @@ -465,6 +468,15 @@ static inline uint32_t gen6_pde_index(uint32_t addr) temp = min(temp, length), \ start += temp, length -= temp) +#define gen8_for_each_pml4e(pdp, pml4, start, length, temp, iter) \ + for (iter = gen8_pml4e_index(start); \ + pdp = (pml4)->pdps[iter], \ + length > 0 && iter < GEN8_PML4ES_PER_PML4; \ + iter++, \ + temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT) - start, \ + temp = min(temp, length), \ + start += temp, length -= temp) + static inline uint32_t gen8_pte_index(uint64_t address) { return i915_pte_index(address, GEN8_PDE_SHIFT); @@ -482,8 +494,7 @@ static inline uint32_t gen8_pdpe_index(uint64_t address) static inline uint32_t gen8_pml4e_index(uint64_t address) { - WARN_ON(1); /* For 64B */ - return 0; + return (address >> GEN8_PML4E_SHIFT) & GEN8_PML4E_MASK; } static inline size_t gen8_pte_count(uint64_t address, uint64_t length) diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index f230d7639000..e6b5c7470ba0 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -221,6 +221,14 @@ DEFINE_EVENT_PRINT(i915_px_entry, i915_page_directory_entry_alloc, __entry->vm, __entry->px, __entry->start, __entry->end) ); +DEFINE_EVENT_PRINT(i915_px_entry, i915_page_directory_pointer_entry_alloc, + TP_PROTO(struct i915_address_space *vm, u32 pml4e, u64 start, u64 pml4e_shift), + TP_ARGS(vm, pml4e, start, pml4e_shift), + + TP_printk("vm=%p, pml4e=%d (0x%llx-0x%llx)", + __entry->vm, __entry->px, __entry->start, __entry->end) +); + /* Avoid extra math because we only support two sizes. The format is defined by * bitmap_scnprintf. Each 32 bits is 8 HEX digits followed by comma */ #define TRACE_PT_SIZE(bits) \ -- cgit v1.2.3 From 69ab76fd3d497816992b22dd201d2327cb921c94 Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Wed, 29 Jul 2015 17:23:55 +0100 Subject: drm/i915/gen8: Initialize PDPs and PML4 Similar to PDs, while setting up a page directory pointer, make all entries of the pdp point to the scratch pd before mapping (and make all its entries point to the scratch page); this is to be safe in case of out of bound access or proactive prefetch. Also add a scratch pdp, which the PML4 entries point to. v2: Handle scratch_pdp allocation failure correctly, and keep initialize_px functions together (Akash) v3: Rebase after Mika's ppgtt cleanup / scratch merge patch series. Rely on the added macros to initialize the pdps. v4: Rebase after final merged version of Mika's ppgtt/scratch patches (and removed commit message part related to v3). v5: Update commit message to also mention PML4 table initialization and the new scratch pdp (Akash). Suggested-by: Akash Goel Signed-off-by: Michel Thierry Reviewed-by: Akash Goel Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_gtt.c | 38 +++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_gem_gtt.h | 1 + 2 files changed, 39 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_gem_gtt.h') diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 032801ef5025..a4bdef30713c 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -596,6 +596,27 @@ static void free_pdp(struct drm_device *dev, } } +static void gen8_initialize_pdp(struct i915_address_space *vm, + struct i915_page_directory_pointer *pdp) +{ + gen8_ppgtt_pdpe_t scratch_pdpe; + + scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC); + + fill_px(vm->dev, pdp, scratch_pdpe); +} + +static void gen8_initialize_pml4(struct i915_address_space *vm, + struct i915_pml4 *pml4) +{ + gen8_ppgtt_pml4e_t scratch_pml4e; + + scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp), + I915_CACHE_LLC); + + fill_px(vm->dev, pml4, scratch_pml4e); +} + static void gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt, struct i915_page_directory_pointer *pdp, @@ -860,8 +881,20 @@ static int gen8_init_scratch(struct i915_address_space *vm) return PTR_ERR(vm->scratch_pd); } + if (USES_FULL_48BIT_PPGTT(dev)) { + vm->scratch_pdp = alloc_pdp(dev); + if (IS_ERR(vm->scratch_pdp)) { + free_pd(dev, vm->scratch_pd); + free_pt(dev, vm->scratch_pt); + free_scratch_page(dev, vm->scratch_page); + return PTR_ERR(vm->scratch_pdp); + } + } + gen8_initialize_pt(vm, vm->scratch_pt); gen8_initialize_pd(vm, vm->scratch_pd); + if (USES_FULL_48BIT_PPGTT(dev)) + gen8_initialize_pdp(vm, vm->scratch_pdp); return 0; } @@ -870,6 +903,8 @@ static void gen8_free_scratch(struct i915_address_space *vm) { struct drm_device *dev = vm->dev; + if (USES_FULL_48BIT_PPGTT(dev)) + free_pdp(dev, vm->scratch_pdp); free_pd(dev, vm->scratch_pd); free_pt(dev, vm->scratch_pt); free_scratch_page(dev, vm->scratch_page); @@ -1071,6 +1106,7 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm, if (IS_ERR(pdp)) goto unwind_out; + gen8_initialize_pdp(vm, pdp); pml4->pdps[pml4e] = pdp; __set_bit(pml4e, new_pdps); trace_i915_page_directory_pointer_entry_alloc(vm, @@ -1350,6 +1386,8 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) if (ret) goto free_scratch; + gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4); + ppgtt->base.total = 1ULL << 48; ppgtt->switch_mm = gen8_48b_mm_switch; } else { diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 11d44b3d84a3..70c50e7c13f9 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -278,6 +278,7 @@ struct i915_address_space { struct i915_page_scratch *scratch_page; struct i915_page_table *scratch_pt; struct i915_page_directory *scratch_pd; + struct i915_page_directory_pointer *scratch_pdp; /* GEN8+ & 48b PPGTT */ /** * List of objects currently involved in rendering. -- cgit v1.2.3 From 088e0df4020e30c4952f29dc672ceb4742b98e73 Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Fri, 7 Aug 2015 17:40:17 +0100 Subject: drm/i915/gtt: Allow >= 4GB offsets in X86_32 Similar to commit c44ef60e437019b8ca1dab8b4d2e8761fd4ce1e9 ("drm/i915/gtt: Allow >= 4GB sizes for vm"), i915_gem_obj_offset and i915_gem_obj_ggtt_offset return an unsigned long, which in only 4-bytes long in 32-bit kernels. Change return type (and other related offset variables) to u64. Since Global GTT is always limited to 4GB, this change would not be required in i915_gem_obj_ggtt_offset, but this is done for consistency. v2: Remove unnecessary offset variable in do_pin, as we already have vma->node.start (Chris). Update GGTT offset too (Tvrtko). Cc: Chris Wilson Cc: Daniel Vetter Cc: Tvrtko Ursulin Signed-off-by: Michel Thierry Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 12 +++++------- drivers/gpu/drm/i915/i915_gem.c | 18 +++++++----------- drivers/gpu/drm/i915/i915_gem_fence.c | 4 ++-- drivers/gpu/drm/i915/i915_gem_gtt.c | 8 ++++---- drivers/gpu/drm/i915/i915_gem_gtt.h | 2 +- drivers/gpu/drm/i915/intel_fbdev.c | 2 +- 6 files changed, 20 insertions(+), 26 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_gtt.h') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index cefb6e96a941..6b85338c4b89 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2977,13 +2977,11 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, struct dma_buf *i915_gem_prime_export(struct drm_device *dev, struct drm_gem_object *gem_obj, int flags); -unsigned long -i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o, - const struct i915_ggtt_view *view); -unsigned long -i915_gem_obj_offset(struct drm_i915_gem_object *o, - struct i915_address_space *vm); -static inline unsigned long +u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o, + const struct i915_ggtt_view *view); +u64 i915_gem_obj_offset(struct drm_i915_gem_object *o, + struct i915_address_space *vm); +static inline u64 i915_gem_obj_ggtt_offset(struct drm_i915_gem_object *o) { return i915_gem_obj_ggtt_offset_view(o, &i915_ggtt_view_normal); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 73293b487217..6d0f8349a91c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4012,15 +4012,13 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj, return -EBUSY; if (i915_vma_misplaced(vma, alignment, flags)) { - unsigned long offset; - offset = ggtt_view ? i915_gem_obj_ggtt_offset_view(obj, ggtt_view) : - i915_gem_obj_offset(obj, vm); WARN(vma->pin_count, "bo is already pinned in %s with incorrect alignment:" - " offset=%lx, req.alignment=%x, req.map_and_fenceable=%d," + " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d," " obj->map_and_fenceable=%d\n", ggtt_view ? "ggtt" : "ppgtt", - offset, + upper_32_bits(vma->node.start), + lower_32_bits(vma->node.start), alignment, !!(flags & PIN_MAPPABLE), obj->map_and_fenceable); @@ -4975,9 +4973,8 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old, } /* All the new VM stuff */ -unsigned long -i915_gem_obj_offset(struct drm_i915_gem_object *o, - struct i915_address_space *vm) +u64 i915_gem_obj_offset(struct drm_i915_gem_object *o, + struct i915_address_space *vm) { struct drm_i915_private *dev_priv = o->base.dev->dev_private; struct i915_vma *vma; @@ -4997,9 +4994,8 @@ i915_gem_obj_offset(struct drm_i915_gem_object *o, return -1; } -unsigned long -i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o, - const struct i915_ggtt_view *view) +u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o, + const struct i915_ggtt_view *view) { struct i915_address_space *ggtt = i915_obj_to_ggtt(o); struct i915_vma *vma; diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c index af1f8c461060..6077dffb318a 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence.c +++ b/drivers/gpu/drm/i915/i915_gem_fence.c @@ -128,7 +128,7 @@ static void i915_write_fence_reg(struct drm_device *dev, int reg, WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) || (size & -size) != size || (i915_gem_obj_ggtt_offset(obj) & (size - 1)), - "object 0x%08lx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", + "object 0x%08llx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size); if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) @@ -171,7 +171,7 @@ static void i830_write_fence_reg(struct drm_device *dev, int reg, WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) || (size & -size) != size || (i915_gem_obj_ggtt_offset(obj) & (size - 1)), - "object 0x%08lx not 512K or pot-size 0x%08x aligned\n", + "object 0x%08llx not 512K or pot-size 0x%08x aligned\n", i915_gem_obj_ggtt_offset(obj), size); pitch_val = obj->stride / 128; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 38439fa339a9..7a89e97cbd3f 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2543,9 +2543,9 @@ static void i915_gtt_color_adjust(struct drm_mm_node *node, } static int i915_gem_setup_global_gtt(struct drm_device *dev, - unsigned long start, - unsigned long mappable_end, - unsigned long end) + u64 start, + u64 mappable_end, + u64 end) { /* Let GEM Manage all of the aperture. * @@ -2584,7 +2584,7 @@ static int i915_gem_setup_global_gtt(struct drm_device *dev, list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm); - DRM_DEBUG_KMS("reserving preallocated space: %lx + %zx\n", + DRM_DEBUG_KMS("reserving preallocated space: %llx + %zx\n", i915_gem_obj_ggtt_offset(obj), obj->base.size); WARN_ON(i915_gem_obj_ggtt_bound(obj)); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 70c50e7c13f9..82750073d5b3 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -149,7 +149,7 @@ struct i915_ggtt_view { union { struct { - unsigned long offset; + u64 offset; unsigned int size; } partial; } params; diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 7eff33ff84f6..6c9351b2e3af 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -295,7 +295,7 @@ static int intelfb_create(struct drm_fb_helper *helper, /* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */ - DRM_DEBUG_KMS("allocated %dx%d fb: 0x%08lx, bo %p\n", + DRM_DEBUG_KMS("allocated %dx%d fb: 0x%08llx, bo %p\n", fb->width, fb->height, i915_gem_obj_ggtt_offset(obj), obj); -- cgit v1.2.3 From a9da512b3ed73045253afd778e40d4298f42905b Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Mon, 14 Sep 2015 15:19:57 -0300 Subject: drm/i915: avoid the last 8mb of stolen on BDW/SKL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The FBC hardware for these platforms doesn't have access to the bios_reserved range, so it always assumes the maximum (8mb) is used. So avoid this range while allocating. This solves a bunch of FIFO underruns that happen if you end up putting the CFB in that memory range. On my machine, with 32mb of stolen, I need a 2560x1440 mode for that. Testcase: igt/kms_frontbuffer_tracking/fbc-* (given the right setup) Signed-off-by: Paulo Zanoni Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 4 ++++ drivers/gpu/drm/i915/i915_gem_gtt.h | 1 + drivers/gpu/drm/i915/i915_gem_stolen.c | 26 +++++++++++++++++++------- drivers/gpu/drm/i915/intel_fbc.c | 16 ++++++++++++++-- 4 files changed, 38 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_gtt.h') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d4240dba16ff..ffd4ab111957 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3175,6 +3175,10 @@ static inline void i915_gem_chipset_flush(struct drm_device *dev) int i915_gem_stolen_insert_node(struct drm_i915_private *dev_priv, struct drm_mm_node *node, u64 size, unsigned alignment); +int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv, + struct drm_mm_node *node, u64 size, + unsigned alignment, u64 start, + u64 end); void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, struct drm_mm_node *node); int i915_gem_init_stolen(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 82750073d5b3..96ebb98a476d 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -341,6 +341,7 @@ struct i915_gtt { struct i915_address_space base; size_t stolen_size; /* Total size of stolen memory */ + size_t stolen_usable_size; /* Total size minus BIOS reserved */ u64 mappable_end; /* End offset that we can CPU map */ struct io_mapping *mappable; /* Mapping to our CPU mappable region */ phys_addr_t mappable_base; /* PA of our GMADR */ diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index bf26ecc3781b..081ef6d6c94f 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -42,9 +42,9 @@ * for is a boon. */ -int i915_gem_stolen_insert_node(struct drm_i915_private *dev_priv, - struct drm_mm_node *node, u64 size, - unsigned alignment) +int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv, + struct drm_mm_node *node, u64 size, + unsigned alignment, u64 start, u64 end) { int ret; @@ -52,13 +52,23 @@ int i915_gem_stolen_insert_node(struct drm_i915_private *dev_priv, return -ENODEV; mutex_lock(&dev_priv->mm.stolen_lock); - ret = drm_mm_insert_node(&dev_priv->mm.stolen, node, size, alignment, - DRM_MM_SEARCH_DEFAULT); + ret = drm_mm_insert_node_in_range(&dev_priv->mm.stolen, node, size, + alignment, start, end, + DRM_MM_SEARCH_DEFAULT); mutex_unlock(&dev_priv->mm.stolen_lock); return ret; } +int i915_gem_stolen_insert_node(struct drm_i915_private *dev_priv, + struct drm_mm_node *node, u64 size, + unsigned alignment) +{ + return i915_gem_stolen_insert_node_in_range(dev_priv, node, size, + alignment, 0, + dev_priv->gtt.stolen_usable_size); +} + void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, struct drm_mm_node *node) { @@ -355,9 +365,11 @@ int i915_gem_init_stolen(struct drm_device *dev) dev_priv->gtt.stolen_size >> 10, (dev_priv->gtt.stolen_size - reserved_total) >> 10); + dev_priv->gtt.stolen_usable_size = dev_priv->gtt.stolen_size - + reserved_total; + /* Basic memrange allocator for stolen space */ - drm_mm_init(&dev_priv->mm.stolen, 0, dev_priv->gtt.stolen_size - - reserved_total); + drm_mm_init(&dev_priv->mm.stolen, 0, dev_priv->gtt.stolen_usable_size); return 0; } diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index db3809134a29..6f3c2ea75dd8 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -551,6 +551,16 @@ static int find_compression_threshold(struct drm_i915_private *dev_priv, { int compression_threshold = 1; int ret; + u64 end; + + /* The FBC hardware for BDW/SKL doesn't have access to the stolen + * reserved range size, so it always assumes the maximum (8mb) is used. + * If we enable FBC using a CFB on that memory range we'll get FIFO + * underruns, even if that range is not reserved by the BIOS. */ + if (IS_BROADWELL(dev_priv) || IS_SKYLAKE(dev_priv)) + end = dev_priv->gtt.stolen_size - 8 * 1024 * 1024; + else + end = dev_priv->gtt.stolen_usable_size; /* HACK: This code depends on what we will do in *_enable_fbc. If that * code changes, this code needs to change as well. @@ -560,7 +570,8 @@ static int find_compression_threshold(struct drm_i915_private *dev_priv, */ /* Try to over-allocate to reduce reallocations and fragmentation. */ - ret = i915_gem_stolen_insert_node(dev_priv, node, size <<= 1, 4096); + ret = i915_gem_stolen_insert_node_in_range(dev_priv, node, size <<= 1, + 4096, 0, end); if (ret == 0) return compression_threshold; @@ -570,7 +581,8 @@ again: (fb_cpp == 2 && compression_threshold == 2)) return 0; - ret = i915_gem_stolen_insert_node(dev_priv, node, size >>= 1, 4096); + ret = i915_gem_stolen_insert_node_in_range(dev_priv, node, size >>= 1, + 4096, 0, end); if (ret && INTEL_INFO(dev_priv)->gen <= 4) { return 0; } else if (ret) { -- cgit v1.2.3 From 89e3e1427629027dc33e576fc002880a02a7e50c Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 21 Sep 2015 10:45:34 +0100 Subject: drm/i915: Support NV12 in rotated GGTT mapping Just adding the rotated UV plane at the end of the rotated Y plane. v2: Rebase. Signed-off-by: Tvrtko Ursulin Reviewed-by: Joonas Lahtinen Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_gtt.c | 37 ++++++++++++++++++++++++++++++------ drivers/gpu/drm/i915/i915_gem_gtt.h | 3 +++ drivers/gpu/drm/i915/intel_display.c | 12 ++++++++++++ 3 files changed, 46 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_gtt.h') diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 2a8f64dc5096..ad0e7e03a91c 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3282,10 +3282,13 @@ intel_rotate_fb_obj_pages(struct i915_ggtt_view *ggtt_view, { struct intel_rotation_info *rot_info = &ggtt_view->rotation_info; unsigned int size_pages = rot_info->size >> PAGE_SHIFT; + unsigned int size_pages_uv; struct sg_page_iter sg_iter; unsigned long i; dma_addr_t *page_addr_list; struct sg_table *st; + unsigned int uv_start_page; + struct scatterlist *sg; int ret = -ENOMEM; /* Allocate a temporary list of source pages for random access. */ @@ -3294,12 +3297,18 @@ intel_rotate_fb_obj_pages(struct i915_ggtt_view *ggtt_view, if (!page_addr_list) return ERR_PTR(ret); + /* Account for UV plane with NV12. */ + if (rot_info->pixel_format == DRM_FORMAT_NV12) + size_pages_uv = rot_info->size_uv >> PAGE_SHIFT; + else + size_pages_uv = 0; + /* Allocate target SG list. */ st = kmalloc(sizeof(*st), GFP_KERNEL); if (!st) goto err_st_alloc; - ret = sg_alloc_table(st, size_pages, GFP_KERNEL); + ret = sg_alloc_table(st, size_pages + size_pages_uv, GFP_KERNEL); if (ret) goto err_sg_alloc; @@ -3311,15 +3320,30 @@ intel_rotate_fb_obj_pages(struct i915_ggtt_view *ggtt_view, } /* Rotate the pages. */ - rotate_pages(page_addr_list, 0, + sg = rotate_pages(page_addr_list, 0, rot_info->width_pages, rot_info->height_pages, st, NULL); + /* Append the UV plane if NV12. */ + if (rot_info->pixel_format == DRM_FORMAT_NV12) { + uv_start_page = size_pages; + + /* Check for tile-row un-alignment. */ + if (offset_in_page(rot_info->uv_offset)) + uv_start_page--; + + rotate_pages(page_addr_list, uv_start_page, + rot_info->width_pages_uv, + rot_info->height_pages_uv, + st, sg); + } + DRM_DEBUG_KMS( - "Created rotated page mapping for object size %zu (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %u pages).\n", + "Created rotated page mapping for object size %zu (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %u pages (%u plane 0)).\n", obj->base.size, rot_info->pitch, rot_info->height, rot_info->pixel_format, rot_info->width_pages, - rot_info->height_pages, size_pages); + rot_info->height_pages, size_pages + size_pages_uv, + size_pages); drm_free_large(page_addr_list); @@ -3331,10 +3355,11 @@ err_st_alloc: drm_free_large(page_addr_list); DRM_DEBUG_KMS( - "Failed to create rotated mapping for object size %zu! (%d) (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %u pages)\n", + "Failed to create rotated mapping for object size %zu! (%d) (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %u pages (%u plane 0))\n", obj->base.size, ret, rot_info->pitch, rot_info->height, rot_info->pixel_format, rot_info->width_pages, - rot_info->height_pages, size_pages); + rot_info->height_pages, size_pages + size_pages_uv, + size_pages); return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 96ebb98a476d..c80758628899 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -138,10 +138,13 @@ enum i915_ggtt_view_type { struct intel_rotation_info { unsigned int height; unsigned int pitch; + unsigned int uv_offset; uint32_t pixel_format; uint64_t fb_modifier; unsigned int width_pages, height_pages; uint64_t size; + unsigned int width_pages_uv, height_pages_uv; + uint64_t size_uv; }; struct i915_ggtt_view { diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 1f3af2404dca..bdce4588c7b5 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2263,6 +2263,7 @@ intel_fill_fb_ggtt_view(struct i915_ggtt_view *view, struct drm_framebuffer *fb, info->height = fb->height; info->pixel_format = fb->pixel_format; info->pitch = fb->pitches[0]; + info->uv_offset = fb->offsets[1]; info->fb_modifier = fb->modifier[0]; tile_height = intel_tile_height(fb->dev, fb->pixel_format, @@ -2272,6 +2273,17 @@ intel_fill_fb_ggtt_view(struct i915_ggtt_view *view, struct drm_framebuffer *fb, info->height_pages = DIV_ROUND_UP(fb->height, tile_height); info->size = info->width_pages * info->height_pages * PAGE_SIZE; + if (info->pixel_format == DRM_FORMAT_NV12) { + tile_height = intel_tile_height(fb->dev, fb->pixel_format, + fb->modifier[0], 1); + tile_pitch = PAGE_SIZE / tile_height; + info->width_pages_uv = DIV_ROUND_UP(fb->pitches[0], tile_pitch); + info->height_pages_uv = DIV_ROUND_UP(fb->height / 2, + tile_height); + info->size_uv = info->width_pages_uv * info->height_pages_uv * + PAGE_SIZE; + } + return 0; } -- cgit v1.2.3 From dedf278ce69cac6b29c324d280183da31093e0b0 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 21 Sep 2015 10:45:35 +0100 Subject: drm/i915: Enable querying offset of UV plane with intel_plane_obj_offset v2: Rebase. Signed-off-by: Tvrtko Ursulin Reviewed-by: Joonas Lahtinen Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 ++ drivers/gpu/drm/i915/i915_gem_gtt.h | 1 + drivers/gpu/drm/i915/intel_display.c | 26 +++++++++++++++++++++----- drivers/gpu/drm/i915/intel_drv.h | 4 +++- drivers/gpu/drm/i915/intel_sprite.c | 2 +- 5 files changed, 28 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_gtt.h') diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index ad0e7e03a91c..bbb96a41c57b 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3332,6 +3332,8 @@ intel_rotate_fb_obj_pages(struct i915_ggtt_view *ggtt_view, if (offset_in_page(rot_info->uv_offset)) uv_start_page--; + rot_info->uv_start_page = uv_start_page; + rotate_pages(page_addr_list, uv_start_page, rot_info->width_pages_uv, rot_info->height_pages_uv, diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index c80758628899..9fbb07d6eaad 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -145,6 +145,7 @@ struct intel_rotation_info { uint64_t size; unsigned int width_pages_uv, height_pages_uv; uint64_t size_uv; + unsigned int uv_start_page; }; struct i915_ggtt_view { diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index bdce4588c7b5..9b1989166dbc 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2897,14 +2897,29 @@ u32 intel_fb_stride_alignment(struct drm_device *dev, uint64_t fb_modifier, } unsigned long intel_plane_obj_offset(struct intel_plane *intel_plane, - struct drm_i915_gem_object *obj) + struct drm_i915_gem_object *obj, + unsigned int plane) { const struct i915_ggtt_view *view = &i915_ggtt_view_normal; + struct i915_vma *vma; + unsigned char *offset; if (intel_rotation_90_or_270(intel_plane->base.state->rotation)) view = &i915_ggtt_view_rotated; - return i915_gem_obj_ggtt_offset_view(obj, view); + vma = i915_gem_obj_to_ggtt_view(obj, view); + if (WARN(!vma, "ggtt vma for display object not found! (view=%u)\n", + view->type)) + return -1; + + offset = (unsigned char *)vma->node.start; + + if (plane == 1) { + offset += vma->ggtt_view.rotation_info.uv_start_page * + PAGE_SIZE; + } + + return (unsigned long)offset; } static void skl_detach_scaler(struct intel_crtc *intel_crtc, int id) @@ -3060,7 +3075,7 @@ static void skylake_update_primary_plane(struct drm_crtc *crtc, obj = intel_fb_obj(fb); stride_div = intel_fb_stride_alignment(dev, fb->modifier[0], fb->pixel_format); - surf_addr = intel_plane_obj_offset(to_intel_plane(plane), obj); + surf_addr = intel_plane_obj_offset(to_intel_plane(plane), obj, 0); /* * FIXME: intel_plane_state->src, dst aren't set when transitional @@ -11423,8 +11438,9 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, if (ret) goto cleanup_pending; - work->gtt_offset = intel_plane_obj_offset(to_intel_plane(primary), obj) - + intel_crtc->dspaddr_offset; + work->gtt_offset = intel_plane_obj_offset(to_intel_plane(primary), + obj, 0); + work->gtt_offset += intel_crtc->dspaddr_offset; if (mmio_flip) { ret = intel_queue_mmio_flip(dev, crtc, fb, obj, ring, diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 858c3821cdf7..bc674aac59cb 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1170,7 +1170,9 @@ int skl_update_scaler_crtc(struct intel_crtc_state *crtc_state); int skl_max_scale(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state); unsigned long intel_plane_obj_offset(struct intel_plane *intel_plane, - struct drm_i915_gem_object *obj); + struct drm_i915_gem_object *obj, + unsigned int plane); + u32 skl_plane_ctl_format(uint32_t pixel_format); u32 skl_plane_ctl_tiling(uint64_t fb_modifier); u32 skl_plane_ctl_rotation(unsigned int rotation); diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 4372fa0b1ec5..4349fde4b72c 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -235,7 +235,7 @@ skl_update_plane(struct drm_plane *drm_plane, struct drm_crtc *crtc, else if (key->flags & I915_SET_COLORKEY_SOURCE) plane_ctl |= PLANE_CTL_KEY_ENABLE_SOURCE; - surf_addr = intel_plane_obj_offset(intel_plane, obj); + surf_addr = intel_plane_obj_offset(intel_plane, obj, 0); if (intel_rotation_90_or_270(rotation)) { /* stride: Surface height in tiles */ -- cgit v1.2.3 From 24dfd0736c9fc01d096e5760c656032b5a07e962 Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Fri, 2 Oct 2015 14:16:53 +0100 Subject: drm/i915: prevent out of range pt in the PDE macros (take 3) We tried to fix this in commit fdc454c1484a ("drm/i915: Prevent out of range pt in gen6_for_each_pde"). But the static analyzer still complains that, just before we break due to "iter < I915_PDES", we do "pt = (pd)->page_table[iter]" with an iter value that is bigger than I915_PDES. Of course, this isn't really a problem since no one uses pt outside the macro. Still, every single new usage of the macro will create a new issue for us to mark as a false positive. Also, Paulo re-started the discussion a while ago [1], but didn't end up implemented. In order to "solve" this "problem", this patch takes the ideas from Chris and Dave, but that check would change the desired behavior of the code, because the object (for example pdp->page_directory[iter]) can be null during init/alloc, and C would take this as false, breaking the for loop immediately. This has been already verified with "static analysis tools". [1]http://lists.freedesktop.org/archives/intel-gfx/2015-June/068548.html v2: Make it a single statement, while preventing the common subexpression elimination (Chris) Cc: Paulo Zanoni Cc: Chris Wilson Cc: Dave Gordon Signed-off-by: Michel Thierry Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_gtt.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_gtt.h') diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 9fbb07d6eaad..a216397ead52 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -394,7 +394,8 @@ struct i915_hw_ppgtt { */ #define gen6_for_each_pde(pt, pd, start, length, temp, iter) \ for (iter = gen6_pde_index(start); \ - pt = (pd)->page_table[iter], length > 0 && iter < I915_PDES; \ + length > 0 && iter < I915_PDES ? \ + (pt = (pd)->page_table[iter]), 1 : 0; \ iter++, \ temp = ALIGN(start+1, 1 << GEN6_PDE_SHIFT) - start, \ temp = min_t(unsigned, temp, length), \ @@ -459,7 +460,8 @@ static inline uint32_t gen6_pde_index(uint32_t addr) */ #define gen8_for_each_pde(pt, pd, start, length, temp, iter) \ for (iter = gen8_pde_index(start); \ - pt = (pd)->page_table[iter], length > 0 && iter < I915_PDES; \ + length > 0 && iter < I915_PDES ? \ + (pt = (pd)->page_table[iter]), 1 : 0; \ iter++, \ temp = ALIGN(start+1, 1 << GEN8_PDE_SHIFT) - start, \ temp = min(temp, length), \ @@ -467,8 +469,8 @@ static inline uint32_t gen6_pde_index(uint32_t addr) #define gen8_for_each_pdpe(pd, pdp, start, length, temp, iter) \ for (iter = gen8_pdpe_index(start); \ - pd = (pdp)->page_directory[iter], \ - length > 0 && (iter < I915_PDPES_PER_PDP(dev)); \ + length > 0 && (iter < I915_PDPES_PER_PDP(dev)) ? \ + (pd = (pdp)->page_directory[iter]), 1 : 0; \ iter++, \ temp = ALIGN(start+1, 1 << GEN8_PDPE_SHIFT) - start, \ temp = min(temp, length), \ @@ -476,8 +478,8 @@ static inline uint32_t gen6_pde_index(uint32_t addr) #define gen8_for_each_pml4e(pdp, pml4, start, length, temp, iter) \ for (iter = gen8_pml4e_index(start); \ - pdp = (pml4)->pdps[iter], \ - length > 0 && iter < GEN8_PML4ES_PER_PML4; \ + length > 0 && iter < GEN8_PML4ES_PER_PML4 ? \ + (pdp = (pml4)->pdps[iter]), 1 : 0; \ iter++, \ temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT) - start, \ temp = min(temp, length), \ -- cgit v1.2.3