summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Widawsky <benjamin.widawsky@intel.com>2014-02-05 12:39:48 +0000
committerBen Widawsky <benjamin.widawsky@intel.com>2014-02-08 13:14:52 -0800
commit05be33119a0366676826d2765d6ff88a882e007a (patch)
tree43b5552c4daf408a42385b89bc3e90e5547a5d1a
parent47a8cbcb63518cb007d1b291a0d9168fe4aed7a2 (diff)
drm/i915: Naive transparent big page support
EXPLANATION At this point it's technically possible to promote to big pages, the constraints (2MB aligned, 2MB contiguous) are extremely unlikely to be satisfied. If we have an object allocated out of stolen memory (which as of writing this is not possible for a PPGTT bound object) that can potentially satisfy this condition. As a result, one could consider this an infrastructure patch even though it does potentially add functionality. NOTE: The bitmap is entirely useless for anything but debug at this point. Since it does offer a nice way to WARN (without unnecessarily reading MMIO), and we will need the bitmap eventually, I like it introduced early. Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h8
-rw-r--r--drivers/gpu/drm/i915/i915_gem_gtt.c107
2 files changed, 115 insertions, 0 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1edaa41f9eae..9b24530a9770 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -584,6 +584,8 @@ struct i915_vma {
unsigned long exec_handle;
struct drm_i915_gem_exec_object2 *exec_entry;
+ unsigned int big_page_promoted:1;
+
/**
* How many users have pinned this object in GTT space. The following
* users can each hold at most one reference: pwrite/pread, pin_ioctl
@@ -707,10 +709,16 @@ struct i915_hw_ppgtt {
dma_addr_t *gen8_pt_dma_addr[4];
};
+ unsigned long *big_pde_map;
+
int (*enable)(struct i915_hw_ppgtt *ppgtt);
int (*switch_mm)(struct i915_hw_ppgtt *ppgtt,
struct intel_ring_buffer *ring,
bool synchronous);
+ /* Promote a VMA to use big pages if possible. Returns true if done. */
+ bool (*promote)(struct i915_hw_ppgtt *ppgtt, struct i915_vma *vma);
+ void (*demote)(struct i915_hw_ppgtt *ppgtt, struct i915_vma *vma);
+
void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m);
};
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 4b369a19ba7d..65262d40a4e2 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -31,6 +31,11 @@
#define GEN6_PPGTT_PD_ENTRIES 512
#define GEN6_PTES_PER_PAGE (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
+#define GEN6_PDE_MAP_SIZE (PAGE_SIZE * GEN6_PTES_PER_PAGE)
+#define GEN7_BIG_PAGES_PER_PT (PAGE_SIZE / GEN7_BIG_PAGE_SIZE)
+#define GEN7_BIG_PAGES \
+ (GEN6_PPGTT_PD_ENTRIES * GEN6_PDE_MAP_SIZE / GEN7_BIG_PAGE_SIZE)
+
typedef uint64_t gen8_gtt_pte_t;
typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
@@ -38,6 +43,7 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
#define GEN6_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0xff0))
#define HSW_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0x7f0))
+#define GEN7_PDE_BIG (1 << 1)
#define GEN6_PDE_VALID (1 << 0)
/* gen6+ has bit 11-4 for physical addr bit 39-32 */
#define GEN6_PDE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr)
@@ -76,6 +82,7 @@ static void ppgtt_bind_vma(struct i915_vma *vma,
u32 flags);
static void ppgtt_unbind_vma(struct i915_vma *vma);
static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt);
+static void gen7_demote(struct i915_hw_ppgtt *ppgtt, struct i915_vma *vma);
static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr,
enum i915_cache_level level,
@@ -659,6 +666,56 @@ static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
return 0;
}
+static bool is_potential_big_page(struct i915_vma *vma)
+{
+ /* Big pages are currently unsupported on gen8 */
+ if (INTEL_INFO(vma->vm->dev)->gen > 7)
+ return false;
+
+ /* GEN7 big pages must be naturally aligned */
+ if (vma->node.start % GEN7_BIG_PAGE_SIZE)
+ return false;
+
+ /* Early code won't do any rounding of the allocation */
+ if (vma->node.size % GEN7_BIG_PAGE_SIZE ||
+ vma->node.size % GEN6_PDE_MAP_SIZE)
+ return false;
+
+ return true;
+}
+
+static bool gen7_promote(struct i915_hw_ppgtt *ppgtt, struct i915_vma *vma)
+{
+ struct drm_device *dev = ppgtt->base.dev;
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ const int first_pde = vma->node.start / GEN6_PPGTT_PD_ENTRIES;
+ const int num_pde = vma->node.size / GEN6_PDE_MAP_SIZE;
+ gen6_gtt_pte_t __iomem *pd_addr =
+ (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + ppgtt->pd_offset / sizeof(gen6_gtt_pte_t);
+ DECLARE_BITMAP(tmp, GEN6_PPGTT_PD_ENTRIES);
+ int i;
+
+ BUG_ON(i915_is_ggtt(vma->vm));
+
+ if (!is_potential_big_page(vma))
+ return false;
+
+ bitmap_set(tmp, first_pde, num_pde);
+
+ /* The PDE(s) is already using big pages - this can't happen yet */
+ if (WARN_ON(bitmap_intersects(tmp, ppgtt->big_pde_map, GEN6_PPGTT_PD_ENTRIES)))
+ return true;
+
+ for (i = 0; i < num_pde; i++) {
+ u32 pd_entry = readl(pd_addr + first_pde + i);
+ writel(pd_entry | GEN7_PDE_BIG, pd_addr + first_pde + i);
+ DRM_DEBUG_DRIVER("Promoting PDE %d to big pages\n", first_pde);
+ }
+
+ bitmap_or(ppgtt->big_pde_map, ppgtt->big_pde_map, tmp, GEN6_PPGTT_PD_ENTRIES);
+ return true;
+}
+
static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
{
struct drm_device *dev = ppgtt->base.dev;
@@ -838,6 +895,7 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
4096, PCI_DMA_BIDIRECTIONAL);
}
+ kfree(ppgtt->big_pde_map);
kfree(ppgtt->pt_dma_addr);
for (i = 0; i < ppgtt->num_pd_entries; i++)
__free_page(ppgtt->pt_pages[i]);
@@ -892,6 +950,18 @@ alloc:
ppgtt->switch_mm = gen7_mm_switch;
} else
BUG();
+
+ if (INTEL_INFO(dev)->gen > 6) {
+ ppgtt->big_pde_map = kcalloc(BITS_TO_LONGS(GEN6_PPGTT_PD_ENTRIES),
+ sizeof(unsigned long), GFP_KERNEL);
+ if (!ppgtt->big_pde_map) {
+ DRM_ERROR("Big page support disabled\n");
+ } else {
+ ppgtt->promote = gen7_promote;
+ ppgtt->demote = gen7_demote;
+ }
+ }
+
ppgtt->base.clear_range = gen6_ppgtt_clear_range;
ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
ppgtt->base.cleanup = gen6_ppgtt_cleanup;
@@ -984,6 +1054,8 @@ int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
gen6_write_pdes(ppgtt);
DRM_DEBUG("Adding PPGTT at offset %x\n",
ppgtt->pd_offset << 10);
+ if (ppgtt->big_pde_map)
+ DRM_DEBUG_DRIVER("Initialized big page support\n");
}
}
@@ -1147,6 +1219,13 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
int i915_gem_vm_prepare_vma(struct i915_vma *vma)
{
struct drm_i915_gem_object *obj = vma->obj;
+ struct i915_hw_ppgtt *ppgtt = NULL;
+
+ if (!i915_is_ggtt(vma->vm))
+ ppgtt = container_of(vma->vm, struct i915_hw_ppgtt, base);
+
+ if (ppgtt && ppgtt->promote)
+ ppgtt->promote(ppgtt, vma);
if (obj->has_dma_mapping)
return 0;
@@ -1392,12 +1471,40 @@ static void ggtt_unbind_vma(struct i915_vma *vma)
}
}
+static void gen7_demote(struct i915_hw_ppgtt *ppgtt, struct i915_vma *vma)
+{
+ struct drm_device *dev = ppgtt->base.dev;
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ const int first_pde = vma->node.start / GEN6_PPGTT_PD_ENTRIES;
+ const int num_pde = vma->node.size / GEN6_PDE_MAP_SIZE;
+ gen6_gtt_pte_t __iomem *pd_addr =
+ (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + ppgtt->pd_offset / sizeof(gen6_gtt_pte_t);
+ int i;
+
+ if (!vma->big_page_promoted)
+ return;
+
+ for (i = 0; i < num_pde; i++) {
+ u32 pd_entry = readl(pd_addr + first_pde + i);
+ writel(pd_entry & ~GEN7_PDE_BIG, pd_addr + first_pde + i);
+ DRM_DEBUG_DRIVER("Demote PDE %d to small pages\n", first_pde);
+ }
+
+}
+
void i915_gem_vm_finish_vma(struct i915_vma *vma)
{
struct drm_i915_gem_object *obj = vma->obj;
struct drm_device *dev = obj->base.dev;
struct drm_i915_private *dev_priv = dev->dev_private;
bool interruptible;
+ struct i915_hw_ppgtt *ppgtt = NULL;
+
+ if (!i915_is_ggtt(vma->vm))
+ ppgtt = container_of(vma->vm, struct i915_hw_ppgtt, base);
+
+ if (ppgtt && ppgtt->demote)
+ ppgtt->demote(ppgtt, vma);
interruptible = do_idling(dev_priv);