sna: Only use the blitter to emit wide spans if we cannot stream the updates

If either the region is busy on the gpu or if we need to read the destination then we would incur penalties for trying to perform the operation through the GTT. However, if we are simply streaming pixels to an unbusy bo then we can do so inplace faster than computing the corresponding GPU commands and uploading them. Note: currently it is universally slower to use the GPU here (the computation of the spans is too slow). However that is only according to micro-benchmarks, avoiding the readback is likely to be more efficient in practice. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
author: Chris Wilson <chris@chris-wilson.co.uk> 2012-01-19 11:15:38 +0000
committer: Chris Wilson <chris@chris-wilson.co.uk> 2012-01-19 12:32:59 +0000
commit: 7ad4a0c9423ee7f4041173c428b07ac4af312fe1 (patch)
tree: a773ba5f8e516e8e384cd428569975d5e0fa905c
parent: 9db6b9fad808175184849f3030862ea115ef0708 (diff)
1 files changed, 69 insertions, 4 deletions
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 4645c685..964ff65a 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -58,8 +58,8 @@
 #define FORCE_FALLBACK 0
 #define FORCE_FLUSH 0
 
-#define USE_SPANS 0
 #define USE_INPLACE 1
+#define USE_WIDE_SPANS 0 /* -1 force CPU, 1 force GPU */
 #define USE_ZERO_SPANS 1 /* -1 force CPU, 1 force GPU */
 #define USE_BO_FOR_SCRATCH_PIXMAP 1
 
@@ -5334,6 +5334,70 @@ use_zero_spans(DrawablePtr drawable, GCPtr gc, const BoxRec *extents)
 	return ret;
 }
 
+/* Only use our spans code if the destination is busy and we can't perform
+ * the operation in place.
+ *
+ * Currently it looks to be faster to use the CPU for wide spans on all
+ * platforms, slow MI code. But that does not take into account the true
+ * cost of readback?
+ */
+inline static bool
+_use_wide_spans(DrawablePtr drawable, GCPtr gc, const BoxRec *extents)
+{
+	PixmapPtr pixmap;
+	struct sna_pixmap *priv;
+	BoxRec area;
+	int16_t dx, dy;
+
+	if (USE_WIDE_SPANS)
+		return USE_WIDE_SPANS > 0;
+
+	if ((drawable_gc_flags(drawable, gc, false) & MOVE_INPLACE_HINT) == 0)
+		return TRUE;
+
+	/* XXX check for GPU stalls on the gc (stipple, tile, etc) */
+
+	pixmap = get_drawable_pixmap(drawable);
+	priv = sna_pixmap(pixmap);
+	if (priv == NULL)
+		return FALSE;
+
+	if (DAMAGE_IS_ALL(priv->cpu_damage))
+		return FALSE;
+
+	if (priv->stride == 0 || priv->gpu_bo == NULL)
+		return FALSE;
+
+	if (!kgem_bo_is_busy(priv->gpu_bo))
+		return FALSE;
+
+	if (DAMAGE_IS_ALL(priv->gpu_damage))
+		return TRUE;
+
+	if (priv->gpu_damage == NULL)
+		return FALSE;
+
+	get_drawable_deltas(drawable, pixmap, &dx, &dy);
+	area = *extents;
+	area.x1 += dx;
+	area.x2 += dx;
+	area.y1 += dy;
+	area.y2 += dy;
+	DBG(("%s extents (%d, %d), (%d, %d)\n", __FUNCTION__,
+	     area.x1, area.y1, area.x2, area.y2));
+
+	return sna_damage_contains_box(priv->gpu_damage,
+				       &area) != PIXMAN_REGION_OUT;
+}
+
+static bool
+use_wide_spans(DrawablePtr drawable, GCPtr gc, const BoxRec *extents)
+{
+	bool ret = _use_wide_spans(drawable, gc, extents);
+	DBG(("%s? %d\n", __FUNCTION__, ret));
+	return ret;
+}
+
 static void
 sna_poly_line(DrawablePtr drawable, GCPtr gc,
 	      int mode, int n, DDXPointPtr pt)
@@ -5505,7 +5569,7 @@ sna_poly_line(DrawablePtr drawable, GCPtr gc,
 	}
 
 spans_fallback:
-	if (USE_SPANS &&
+	if (use_wide_spans(drawable, gc, &region.extents) &&
 	    sna_drawable_use_gpu_bo(drawable, &region.extents, &damage)) {
 		DBG(("%s: converting line into spans\n", __FUNCTION__));
 		switch (gc->lineStyle) {
@@ -6377,7 +6441,7 @@ sna_poly_segment(DrawablePtr drawable, GCPtr gc, int n, xSegment *seg)
 
 	/* XXX Do we really want to base this decision on the amalgam ? */
 spans_fallback:
-	if (USE_SPANS &&
+	if (use_wide_spans(drawable, gc, &region.extents) &&
 	    sna_drawable_use_gpu_bo(drawable, &region.extents, &damage)) {
 		void (*line)(DrawablePtr, GCPtr, int, int, DDXPointPtr);
 		int i;
@@ -7076,7 +7140,8 @@ sna_poly_arc(DrawablePtr drawable, GCPtr gc, int n, xArc *arc)
 		goto fallback;
 
 	/* For "simple" cases use the miPolyArc to spans path */
-	if (USE_SPANS && arc_to_spans(gc, n) &&
+	if (use_wide_spans(drawable, gc, &region.extents) &&
+	    arc_to_spans(gc, n) &&
 	    sna_drawable_use_gpu_bo(drawable, &region.extents, &damage)) {
 		DBG(("%s: converting arcs into spans\n", __FUNCTION__));
 		/* XXX still around 10x slower for x11perf -ellipse */
author	Chris Wilson <chris@chris-wilson.co.uk>	2012-01-19 11:15:38 +0000
committer	Chris Wilson <chris@chris-wilson.co.uk>	2012-01-19 12:32:59 +0000
commit	7ad4a0c9423ee7f4041173c428b07ac4af312fe1 (patch)
tree	a773ba5f8e516e8e384cd428569975d5e0fa905c
parent	9db6b9fad808175184849f3030862ea115ef0708 (diff)