summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_gem_gtt.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_gtt.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem_gtt.c420
1 files changed, 367 insertions, 53 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index df470b5e8d36..2c150dee78a7 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -28,19 +28,67 @@
#include "i915_trace.h"
#include "intel_drv.h"
+typedef uint32_t gtt_pte_t;
+
+/* PPGTT stuff */
+#define GEN6_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0xff0))
+
+#define GEN6_PDE_VALID (1 << 0)
+/* gen6+ has bit 11-4 for physical addr bit 39-32 */
+#define GEN6_PDE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr)
+
+#define GEN6_PTE_VALID (1 << 0)
+#define GEN6_PTE_UNCACHED (1 << 1)
+#define HSW_PTE_UNCACHED (0)
+#define GEN6_PTE_CACHE_LLC (2 << 1)
+#define GEN6_PTE_CACHE_LLC_MLC (3 << 1)
+#define GEN6_PTE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr)
+
+static inline gtt_pte_t pte_encode(struct drm_device *dev,
+ dma_addr_t addr,
+ enum i915_cache_level level)
+{
+ gtt_pte_t pte = GEN6_PTE_VALID;
+ pte |= GEN6_PTE_ADDR_ENCODE(addr);
+
+ switch (level) {
+ case I915_CACHE_LLC_MLC:
+ /* Haswell doesn't set L3 this way */
+ if (IS_HASWELL(dev))
+ pte |= GEN6_PTE_CACHE_LLC;
+ else
+ pte |= GEN6_PTE_CACHE_LLC_MLC;
+ break;
+ case I915_CACHE_LLC:
+ pte |= GEN6_PTE_CACHE_LLC;
+ break;
+ case I915_CACHE_NONE:
+ if (IS_HASWELL(dev))
+ pte |= HSW_PTE_UNCACHED;
+ else
+ pte |= GEN6_PTE_UNCACHED;
+ break;
+ default:
+ BUG();
+ }
+
+
+ return pte;
+}
+
/* PPGTT support for Sandybdrige/Gen6 and later */
static void i915_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt,
unsigned first_entry,
unsigned num_entries)
{
- uint32_t *pt_vaddr;
- uint32_t scratch_pte;
+ gtt_pte_t *pt_vaddr;
+ gtt_pte_t scratch_pte;
unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES;
unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
unsigned last_pte, i;
- scratch_pte = GEN6_PTE_ADDR_ENCODE(ppgtt->scratch_page_dma_addr);
- scratch_pte |= GEN6_PTE_VALID | GEN6_PTE_CACHE_LLC;
+ scratch_pte = pte_encode(ppgtt->dev, ppgtt->scratch_page_dma_addr,
+ I915_CACHE_LLC);
while (num_entries) {
last_pte = first_pte + num_entries;
@@ -77,6 +125,7 @@ int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
if (!ppgtt)
return ret;
+ ppgtt->dev = dev;
ppgtt->num_pd_entries = I915_PPGTT_PD_ENTRIES;
ppgtt->pt_pages = kzalloc(sizeof(struct page *)*ppgtt->num_pd_entries,
GFP_KERNEL);
@@ -118,7 +167,7 @@ int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
i915_ppgtt_clear_range(ppgtt, 0,
ppgtt->num_pd_entries*I915_PPGTT_PT_ENTRIES);
- ppgtt->pd_offset = (first_pd_entry_in_global_pt)*sizeof(uint32_t);
+ ppgtt->pd_offset = (first_pd_entry_in_global_pt)*sizeof(gtt_pte_t);
dev_priv->mm.aliasing_ppgtt = ppgtt;
@@ -168,9 +217,9 @@ void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev)
static void i915_ppgtt_insert_sg_entries(struct i915_hw_ppgtt *ppgtt,
const struct sg_table *pages,
unsigned first_entry,
- uint32_t pte_flags)
+ enum i915_cache_level cache_level)
{
- uint32_t *pt_vaddr, pte;
+ gtt_pte_t *pt_vaddr;
unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES;
unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
unsigned i, j, m, segment_len;
@@ -188,8 +237,8 @@ static void i915_ppgtt_insert_sg_entries(struct i915_hw_ppgtt *ppgtt,
for (j = first_pte; j < I915_PPGTT_PT_ENTRIES; j++) {
page_addr = sg_dma_address(sg) + (m << PAGE_SHIFT);
- pte = GEN6_PTE_ADDR_ENCODE(page_addr);
- pt_vaddr[j] = pte | pte_flags;
+ pt_vaddr[j] = pte_encode(ppgtt->dev, page_addr,
+ cache_level);
/* grab the next page */
if (++m == segment_len) {
@@ -213,29 +262,10 @@ void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
struct drm_i915_gem_object *obj,
enum i915_cache_level cache_level)
{
- uint32_t pte_flags = GEN6_PTE_VALID;
-
- switch (cache_level) {
- case I915_CACHE_LLC_MLC:
- pte_flags |= GEN6_PTE_CACHE_LLC_MLC;
- break;
- case I915_CACHE_LLC:
- pte_flags |= GEN6_PTE_CACHE_LLC;
- break;
- case I915_CACHE_NONE:
- if (IS_HASWELL(obj->base.dev))
- pte_flags |= HSW_PTE_UNCACHED;
- else
- pte_flags |= GEN6_PTE_UNCACHED;
- break;
- default:
- BUG();
- }
-
i915_ppgtt_insert_sg_entries(ppgtt,
obj->pages,
obj->gtt_space->start >> PAGE_SHIFT,
- pte_flags);
+ cache_level);
}
void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
@@ -246,23 +276,65 @@ void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
obj->base.size >> PAGE_SHIFT);
}
-/* XXX kill agp_type! */
-static unsigned int cache_level_to_agp_type(struct drm_device *dev,
- enum i915_cache_level cache_level)
+void i915_gem_init_ppgtt(struct drm_device *dev)
{
- switch (cache_level) {
- case I915_CACHE_LLC_MLC:
- if (INTEL_INFO(dev)->gen >= 6)
- return AGP_USER_CACHED_MEMORY_LLC_MLC;
- /* Older chipsets do not have this extra level of CPU
- * cacheing, so fallthrough and request the PTE simply
- * as cached.
- */
- case I915_CACHE_LLC:
- return AGP_USER_CACHED_MEMORY;
- default:
- case I915_CACHE_NONE:
- return AGP_USER_MEMORY;
+ drm_i915_private_t *dev_priv = dev->dev_private;
+ uint32_t pd_offset;
+ struct intel_ring_buffer *ring;
+ struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
+ uint32_t __iomem *pd_addr;
+ uint32_t pd_entry;
+ int i;
+
+ if (!dev_priv->mm.aliasing_ppgtt)
+ return;
+
+
+ pd_addr = dev_priv->mm.gtt->gtt + ppgtt->pd_offset/sizeof(uint32_t);
+ for (i = 0; i < ppgtt->num_pd_entries; i++) {
+ dma_addr_t pt_addr;
+
+ if (dev_priv->mm.gtt->needs_dmar)
+ pt_addr = ppgtt->pt_dma_addr[i];
+ else
+ pt_addr = page_to_phys(ppgtt->pt_pages[i]);
+
+ pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr);
+ pd_entry |= GEN6_PDE_VALID;
+
+ writel(pd_entry, pd_addr + i);
+ }
+ readl(pd_addr);
+
+ pd_offset = ppgtt->pd_offset;
+ pd_offset /= 64; /* in cachelines, */
+ pd_offset <<= 16;
+
+ if (INTEL_INFO(dev)->gen == 6) {
+ uint32_t ecochk, gab_ctl, ecobits;
+
+ ecobits = I915_READ(GAC_ECO_BITS);
+ I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
+
+ gab_ctl = I915_READ(GAB_CTL);
+ I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
+
+ ecochk = I915_READ(GAM_ECOCHK);
+ I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT |
+ ECOCHK_PPGTT_CACHE64B);
+ I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
+ } else if (INTEL_INFO(dev)->gen >= 7) {
+ I915_WRITE(GAM_ECOCHK, ECOCHK_PPGTT_CACHE64B);
+ /* GFX_MODE is per-ring on gen7+ */
+ }
+
+ for_each_ring(ring, dev_priv, i) {
+ if (INTEL_INFO(dev)->gen >= 7)
+ I915_WRITE(RING_MODE_GEN7(ring),
+ _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
+
+ I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
+ I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset);
}
}
@@ -288,13 +360,40 @@ static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
dev_priv->mm.interruptible = interruptible;
}
+
+static void i915_ggtt_clear_range(struct drm_device *dev,
+ unsigned first_entry,
+ unsigned num_entries)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ gtt_pte_t scratch_pte;
+ gtt_pte_t __iomem *gtt_base = dev_priv->mm.gtt->gtt + first_entry;
+ const int max_entries = dev_priv->mm.gtt->gtt_total_entries - first_entry;
+ int i;
+
+ if (INTEL_INFO(dev)->gen < 6) {
+ intel_gtt_clear_range(first_entry, num_entries);
+ return;
+ }
+
+ if (WARN(num_entries > max_entries,
+ "First entry = %d; Num entries = %d (max=%d)\n",
+ first_entry, num_entries, max_entries))
+ num_entries = max_entries;
+
+ scratch_pte = pte_encode(dev, dev_priv->mm.gtt->scratch_page_dma, I915_CACHE_LLC);
+ for (i = 0; i < num_entries; i++)
+ iowrite32(scratch_pte, &gtt_base[i]);
+ readl(gtt_base);
+}
+
void i915_gem_restore_gtt_mappings(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_i915_gem_object *obj;
/* First fill our portion of the GTT with scratch pages */
- intel_gtt_clear_range(dev_priv->mm.gtt_start / PAGE_SIZE,
+ i915_ggtt_clear_range(dev, dev_priv->mm.gtt_start / PAGE_SIZE,
(dev_priv->mm.gtt_end - dev_priv->mm.gtt_start) / PAGE_SIZE);
list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) {
@@ -302,7 +401,7 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
i915_gem_gtt_bind_object(obj, obj->cache_level);
}
- intel_gtt_chipset_flush();
+ i915_gem_chipset_flush(dev);
}
int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
@@ -318,21 +417,76 @@ int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
return 0;
}
+/*
+ * Binds an object into the global gtt with the specified cache level. The object
+ * will be accessible to the GPU via commands whose operands reference offsets
+ * within the global GTT as well as accessible by the GPU through the GMADR
+ * mapped BAR (dev_priv->mm.gtt->gtt).
+ */
+static void gen6_ggtt_bind_object(struct drm_i915_gem_object *obj,
+ enum i915_cache_level level)
+{
+ struct drm_device *dev = obj->base.dev;
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ struct sg_table *st = obj->pages;
+ struct scatterlist *sg = st->sgl;
+ const int first_entry = obj->gtt_space->start >> PAGE_SHIFT;
+ const int max_entries = dev_priv->mm.gtt->gtt_total_entries - first_entry;
+ gtt_pte_t __iomem *gtt_entries = dev_priv->mm.gtt->gtt + first_entry;
+ int unused, i = 0;
+ unsigned int len, m = 0;
+ dma_addr_t addr;
+
+ for_each_sg(st->sgl, sg, st->nents, unused) {
+ len = sg_dma_len(sg) >> PAGE_SHIFT;
+ for (m = 0; m < len; m++) {
+ addr = sg_dma_address(sg) + (m << PAGE_SHIFT);
+ iowrite32(pte_encode(dev, addr, level), &gtt_entries[i]);
+ i++;
+ }
+ }
+
+ BUG_ON(i > max_entries);
+ BUG_ON(i != obj->base.size / PAGE_SIZE);
+
+ /* XXX: This serves as a posting read to make sure that the PTE has
+ * actually been updated. There is some concern that even though
+ * registers and PTEs are within the same BAR that they are potentially
+ * of NUMA access patterns. Therefore, even with the way we assume
+ * hardware should work, we must keep this posting read for paranoia.
+ */
+ if (i != 0)
+ WARN_ON(readl(&gtt_entries[i-1]) != pte_encode(dev, addr, level));
+
+ /* This next bit makes the above posting read even more important. We
+ * want to flush the TLBs only after we're certain all the PTE updates
+ * have finished.
+ */
+ I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
+ POSTING_READ(GFX_FLSH_CNTL_GEN6);
+}
+
void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
enum i915_cache_level cache_level)
{
struct drm_device *dev = obj->base.dev;
- unsigned int agp_type = cache_level_to_agp_type(dev, cache_level);
+ if (INTEL_INFO(dev)->gen < 6) {
+ unsigned int flags = (cache_level == I915_CACHE_NONE) ?
+ AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
+ intel_gtt_insert_sg_entries(obj->pages,
+ obj->gtt_space->start >> PAGE_SHIFT,
+ flags);
+ } else {
+ gen6_ggtt_bind_object(obj, cache_level);
+ }
- intel_gtt_insert_sg_entries(obj->pages,
- obj->gtt_space->start >> PAGE_SHIFT,
- agp_type);
obj->has_global_gtt_mapping = 1;
}
void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj)
{
- intel_gtt_clear_range(obj->gtt_space->start >> PAGE_SHIFT,
+ i915_ggtt_clear_range(obj->base.dev,
+ obj->gtt_space->start >> PAGE_SHIFT,
obj->base.size >> PAGE_SHIFT);
obj->has_global_gtt_mapping = 0;
@@ -390,5 +544,165 @@ void i915_gem_init_global_gtt(struct drm_device *dev,
dev_priv->mm.mappable_gtt_total = min(end, mappable_end) - start;
/* ... but ensure that we clear the entire range. */
- intel_gtt_clear_range(start / PAGE_SIZE, (end-start) / PAGE_SIZE);
+ i915_ggtt_clear_range(dev, start / PAGE_SIZE, (end-start) / PAGE_SIZE);
+}
+
+static int setup_scratch_page(struct drm_device *dev)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ struct page *page;
+ dma_addr_t dma_addr;
+
+ page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO);
+ if (page == NULL)
+ return -ENOMEM;
+ get_page(page);
+ set_pages_uc(page, 1);
+
+#ifdef CONFIG_INTEL_IOMMU
+ dma_addr = pci_map_page(dev->pdev, page, 0, PAGE_SIZE,
+ PCI_DMA_BIDIRECTIONAL);
+ if (pci_dma_mapping_error(dev->pdev, dma_addr))
+ return -EINVAL;
+#else
+ dma_addr = page_to_phys(page);
+#endif
+ dev_priv->mm.gtt->scratch_page = page;
+ dev_priv->mm.gtt->scratch_page_dma = dma_addr;
+
+ return 0;
+}
+
+static void teardown_scratch_page(struct drm_device *dev)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ set_pages_wb(dev_priv->mm.gtt->scratch_page, 1);
+ pci_unmap_page(dev->pdev, dev_priv->mm.gtt->scratch_page_dma,
+ PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+ put_page(dev_priv->mm.gtt->scratch_page);
+ __free_page(dev_priv->mm.gtt->scratch_page);
+}
+
+static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
+{
+ snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
+ snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
+ return snb_gmch_ctl << 20;
+}
+
+static inline unsigned int gen6_get_stolen_size(u16 snb_gmch_ctl)
+{
+ snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
+ snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
+ return snb_gmch_ctl << 25; /* 32 MB units */
+}
+
+static inline unsigned int gen7_get_stolen_size(u16 snb_gmch_ctl)
+{
+ static const int stolen_decoder[] = {
+ 0, 0, 0, 0, 0, 32, 48, 64, 128, 256, 96, 160, 224, 352};
+ snb_gmch_ctl >>= IVB_GMCH_GMS_SHIFT;
+ snb_gmch_ctl &= IVB_GMCH_GMS_MASK;
+ return stolen_decoder[snb_gmch_ctl] << 20;
+}
+
+int i915_gem_gtt_init(struct drm_device *dev)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ phys_addr_t gtt_bus_addr;
+ u16 snb_gmch_ctl;
+ int ret;
+
+ /* On modern platforms we need not worry ourself with the legacy
+ * hostbridge query stuff. Skip it entirely
+ */
+ if (INTEL_INFO(dev)->gen < 6) {
+ ret = intel_gmch_probe(dev_priv->bridge_dev, dev->pdev, NULL);
+ if (!ret) {
+ DRM_ERROR("failed to set up gmch\n");
+ return -EIO;
+ }
+
+ dev_priv->mm.gtt = intel_gtt_get();
+ if (!dev_priv->mm.gtt) {
+ DRM_ERROR("Failed to initialize GTT\n");
+ intel_gmch_remove();
+ return -ENODEV;
+ }
+ return 0;
+ }
+
+ dev_priv->mm.gtt = kzalloc(sizeof(*dev_priv->mm.gtt), GFP_KERNEL);
+ if (!dev_priv->mm.gtt)
+ return -ENOMEM;
+
+ if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40)))
+ pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40));
+
+#ifdef CONFIG_INTEL_IOMMU
+ dev_priv->mm.gtt->needs_dmar = 1;
+#endif
+
+ /* For GEN6+ the PTEs for the ggtt live at 2MB + BAR0 */
+ gtt_bus_addr = pci_resource_start(dev->pdev, 0) + (2<<20);
+ dev_priv->mm.gtt->gma_bus_addr = pci_resource_start(dev->pdev, 2);
+
+ /* i9xx_setup */
+ pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
+ dev_priv->mm.gtt->gtt_total_entries =
+ gen6_get_total_gtt_size(snb_gmch_ctl) / sizeof(gtt_pte_t);
+ if (INTEL_INFO(dev)->gen < 7)
+ dev_priv->mm.gtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl);
+ else
+ dev_priv->mm.gtt->stolen_size = gen7_get_stolen_size(snb_gmch_ctl);
+
+ dev_priv->mm.gtt->gtt_mappable_entries = pci_resource_len(dev->pdev, 2) >> PAGE_SHIFT;
+ /* 64/512MB is the current min/max we actually know of, but this is just a
+ * coarse sanity check.
+ */
+ if ((dev_priv->mm.gtt->gtt_mappable_entries >> 8) < 64 ||
+ dev_priv->mm.gtt->gtt_mappable_entries > dev_priv->mm.gtt->gtt_total_entries) {
+ DRM_ERROR("Unknown GMADR entries (%d)\n",
+ dev_priv->mm.gtt->gtt_mappable_entries);
+ ret = -ENXIO;
+ goto err_out;
+ }
+
+ ret = setup_scratch_page(dev);
+ if (ret) {
+ DRM_ERROR("Scratch setup failed\n");
+ goto err_out;
+ }
+
+ dev_priv->mm.gtt->gtt = ioremap_wc(gtt_bus_addr,
+ dev_priv->mm.gtt->gtt_total_entries * sizeof(gtt_pte_t));
+ if (!dev_priv->mm.gtt->gtt) {
+ DRM_ERROR("Failed to map the gtt page table\n");
+ teardown_scratch_page(dev);
+ ret = -ENOMEM;
+ goto err_out;
+ }
+
+ /* GMADR is the PCI aperture used by SW to access tiled GFX surfaces in a linear fashion. */
+ DRM_INFO("Memory usable by graphics device = %dM\n", dev_priv->mm.gtt->gtt_total_entries >> 8);
+ DRM_DEBUG_DRIVER("GMADR size = %dM\n", dev_priv->mm.gtt->gtt_mappable_entries >> 8);
+ DRM_DEBUG_DRIVER("GTT stolen size = %dM\n", dev_priv->mm.gtt->stolen_size >> 20);
+
+ return 0;
+
+err_out:
+ kfree(dev_priv->mm.gtt);
+ if (INTEL_INFO(dev)->gen < 6)
+ intel_gmch_remove();
+ return ret;
+}
+
+void i915_gem_gtt_fini(struct drm_device *dev)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ iounmap(dev_priv->mm.gtt->gtt);
+ teardown_scratch_page(dev);
+ if (INTEL_INFO(dev)->gen < 6)
+ intel_gmch_remove();
+ kfree(dev_priv->mm.gtt);
}