drm/i915: GEM support of merging

TODO: fix commit message to say this is just infrastructure This should now round up allocations for doing big pages. With this patch we should actually start using big pages if we end up with a contiguous allocation for an object of greater than 32K (on IVB, HSW, and VLV) This commit could potentially regress a lot of things. I need to somehow mitigate this while getting support it. Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
author: Ben Widawsky <benjamin.widawsky@intel.com> 2014-02-05 17:50:16 +0000
committer: Ben Widawsky <benjamin.widawsky@intel.com> 2014-02-08 13:14:53 -0800
commit: f127fa05f1274a439defe3571602e17573256f43 (patch)
tree: f43d12cfca3935b6a39d3926db9946c156716258
parent: c4ef4a6b76cd6fde56d8002c0938cd817d326fa5 (diff)
3 files changed, 114 insertions, 10 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 640117476011..3790fefa27a0 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -556,6 +556,13 @@ enum i915_cache_level {
 
 typedef uint32_t gen6_gtt_pte_t;
 
+struct i915_vma;
+struct i915_big_page {
+	struct kref ref;
+	struct drm_mm_node node;
+	struct i915_vma *vma;
+};
+
 /**
  * A VMA represents a GEM BO that is bound into an address space. Therefore, a
  * VMA's presence cannot be guaranteed before binding, or after unbinding the
@@ -567,6 +574,7 @@ typedef uint32_t gen6_gtt_pte_t;
 struct i915_vma {
 	struct drm_mm_node node;
 	struct drm_i915_gem_object *obj;
+	struct i915_big_page *big_page;
 	struct i915_address_space *vm;
 
 	/** This object's place on the active/inactive lists */
@@ -657,6 +665,12 @@ struct i915_address_space {
 			       unsigned int first_entry,
 			       enum i915_cache_level cache_level);
 	void (*cleanup)(struct i915_address_space *vm);
+	/* Determines if the current VMA with the given alignment can be a big
+	 * page. It will return a potentially new VMA for cases where we cannot
+	 * merge into an existing big page */
+	struct i915_vma * (*merge_or_promote_big_page)(struct i915_address_space *vm,
+						       struct i915_vma *vma,
+						       unsigned alignment);
 };
 
 /* The Graphics Translation Table is the way in which GEN hardware translates a
@@ -2289,6 +2303,19 @@ vm_to_ppgtt_safe(struct i915_address_space *vm)
 	return ppgtt;
 }
 
+static inline struct i915_hw_ppgtt *
+vm_to_full_ppgtt_safe(struct i915_address_space *vm)
+{
+	struct drm_i915_private *dev_priv = vm->dev->dev_private;
+	struct i915_hw_ppgtt *ppgtt =
+		container_of(vm, struct i915_hw_ppgtt, base);
+
+	BUG_ON(i915_is_ggtt(vm));
+	BUG_ON(vm == &dev_priv->mm.aliasing_ppgtt->base);
+
+	return ppgtt;
+}
+
 static inline bool i915_gem_obj_ggtt_bound(struct drm_i915_gem_object *obj)
 {
 	return i915_gem_obj_bound(obj, obj_to_ggtt(obj));
@@ -2366,7 +2393,8 @@ int i915_gem_evict_everything(struct drm_device *dev);
 void i915_check_and_clear_faults(struct drm_device *dev);
 void i915_gem_suspend_gtt_mappings(struct drm_device *dev);
 void i915_gem_restore_gtt_mappings(struct drm_device *dev);
-int __must_check i915_gem_vm_prepare_vma(struct i915_vma *vma);
+int __must_check i915_gem_vm_prepare_vma(struct i915_vma *vma,
+					 struct drm_mm_node *node);
 void i915_gem_vm_finish_vma(struct i915_vma *vma);
 void i915_gem_init_global_gtt(struct drm_device *dev);
 void i915_gem_setup_global_gtt(struct drm_device *dev, unsigned long start,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index af861ed4841d..ed982b6220a2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3222,7 +3222,9 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
 	size_t gtt_max =
 		map_and_fenceable ? dev_priv->gtt.mappable_end : vm->total;
 	struct drm_mm_node *node;
-	struct i915_vma *vma;
+	unsigned big_page_align = alignment;
+	bool big_page_attempt = false;
+	struct i915_vma *vma, *big_vma;
 	int ret;
 
 	fence_size = i915_gem_get_gtt_size(dev,
@@ -3269,12 +3271,25 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
 		goto err_unpin;
 	}
 
-	node = &vma->node;
-	if (WARN_ON(node->allocated))
-		goto skip_alloc;
+	/* Do the software promotion to a big page. The actual PTEs are not
+	 * updated until the drm_mm can satisfy the request. */
+	if (vm->merge_or_promote_big_page) {
+		big_vma = vm->merge_or_promote_big_page(vm, vma, alignment);
+		if (big_vma)
+			node = &big_vma->node;
+
+		if (big_vma && big_vma->node.allocated)
+			goto skip_alloc;
+		else
+			node = &vma->node;
+	} else {
+		node = &vma->node;
+	}
+
 search_free:
 	ret = drm_mm_insert_node_in_range_generic(&vm->mm, node,
-						  size, alignment,
+						  size,
+						  big_page_attempt ? big_page_align : alignment,
 						  obj->cache_level, 0, gtt_max,
 						  DRM_MM_SEARCH_DEFAULT);
 	if (ret) {
@@ -3282,8 +3297,11 @@ search_free:
 					       obj->cache_level,
 					       map_and_fenceable,
 					       nonblocking);
-		if (ret == 0)
+		if (ret == 0 || big_page_attempt) {
+			big_page_attempt = false;
+			DRM_DEBUG("Failed to promote a potential big page\n");
 			goto search_free;
+		}
 
 		goto err_free_vma;
 	}
@@ -3294,7 +3312,7 @@ search_free:
 	}
 
 skip_alloc:
-	ret = i915_gem_vm_prepare_vma(vma);
+	ret = i915_gem_vm_prepare_vma(vma, node);
 	if (ret)
 		goto err_remove_node;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 2cfd691b32e5..ea809b803885 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -716,6 +716,58 @@ static bool gen7_promote(struct i915_hw_ppgtt *ppgtt, struct i915_vma *vma)
 	return true;
 }
 
+/*
+ * Try to merge a VMA into an existing big page allocation. There are
+ * essentially 3 return values:
+ * 1. Page can be merged and there is already a big page owner.
+ * 2. Page can be promoted to big pages, but needs a new allocation
+ * 3. VMA is unsuitable for big page promotion
+ */
+static struct i915_big_page *gen7_big_page_merge(struct i915_hw_ppgtt *ppgtt,
+						 struct i915_vma *vma,
+						 unsigned alignment)
+{
+	BUG_ON(vma->big_page);
+	return ERR_PTR(-ENOSPC);
+}
+
+struct i915_vma *gen7_merge_or_promote(struct i915_address_space *vm,
+				       struct i915_vma *vma,
+				       unsigned alignment)
+{
+	struct drm_i915_gem_object *obj = vma->obj;
+	/* number of big pages per page table: */
+	const int big_pages = PAGE_SIZE / GEN7_BIG_PAGE_SIZE;
+	/* virtual memory mapped by a PDE */
+	const int big_pde_area = big_pages * GEN7_BIG_PAGE_SIZE;
+	struct i915_hw_ppgtt *ppgtt = vm_to_full_ppgtt_safe(vm);
+	struct i915_big_page *big_page;
+
+	/* Shortcut if we have a large object */
+	if (obj->base.size > big_pde_area) {
+		/* Natural alignment is required */
+		if (alignment & (GEN7_BIG_PAGE_SIZE - 1))
+			return NULL;
+
+		/* TODO: support objects which would span multiple PDEs.
+		 * It's technically possible to do this, just more difficult to
+		 * track in software. */
+		return NULL;
+	}
+
+	/* Though technically possible to mix big and small pages (if they
+	 * straddle a PDE) we have no good reason to try to support it
+	 */
+	if (obj->base.size % GEN7_BIG_PAGE_SIZE)
+		return NULL;
+
+	big_page = gen7_big_page_merge(ppgtt, vma, alignment);
+	if (!IS_ERR(big_page))
+		BUG(); /*Not support yet */
+
+	return NULL;
+}
+
 static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
 {
 	struct drm_device *dev = ppgtt->base.dev;
@@ -956,6 +1008,8 @@ alloc:
 		} else {
 			ppgtt->promote = gen7_promote;
 			ppgtt->demote = gen7_demote;
+			ppgtt->base.merge_or_promote_big_page =
+				gen7_merge_or_promote;
 		}
 	}
 
@@ -1213,7 +1267,7 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
 	i915_gem_chipset_flush(dev);
 }
 
-int i915_gem_vm_prepare_vma(struct i915_vma *vma)
+int i915_gem_vm_prepare_vma(struct i915_vma *vma, struct drm_mm_node *node)
 {
 	struct drm_i915_gem_object *obj = vma->obj;
 	struct i915_hw_ppgtt *ppgtt = NULL;
@@ -1221,8 +1275,12 @@ int i915_gem_vm_prepare_vma(struct i915_vma *vma)
 	if (!i915_is_ggtt(vma->vm))
 		ppgtt = vm_to_ppgtt_safe(vma->vm);
 
-	if (ppgtt && ppgtt->promote)
+	if (ppgtt && ppgtt->promote && &vma->node != node) {
+		vma->node.start = node->start;
+		vma->node.size = node->size;
+		vma->node.allocated = 1;
 		ppgtt->promote(ppgtt, vma);
+	}
 
 	if (obj->has_dma_mapping)
 		return 0;
author	Ben Widawsky <benjamin.widawsky@intel.com>	2014-02-05 17:50:16 +0000
committer	Ben Widawsky <benjamin.widawsky@intel.com>	2014-02-08 13:14:53 -0800
commit	f127fa05f1274a439defe3571602e17573256f43 (patch)
tree	f43d12cfca3935b6a39d3926db9946c156716258
parent	c4ef4a6b76cd6fde56d8002c0938cd817d326fa5 (diff)