summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2011-08-31 23:21:54 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2011-08-31 23:58:39 +0100
commit32fc0c896e0dfd06617c12beda1ccacedf69fb4a (patch)
tree8045fd9c3edb9c4282e3f3717af53b3de4ce6a73
parent5586dd729b153e37f942a285d328a07f3fe7ae16 (diff)
sna/gen6: Prefer the BLT ring, except for copies on behalf of DRI
As demonstrated by the all-important trap300, using the BLT is 2x faster than the RENDER ring for the simple case of solid fills. (Though note that performing the relocations costs 3x as much CPU for 2x GPU performance.) One case that may regress from this change is copywinpix which should benefit from the batching in the RENDER commands, and might warrant revisiting in the future (with realistic and synthetic benchmarks in hand!) However, due to the forced stall when switching rings, we still want to perform RENDER copies on behalf of DRI clients and before page-flips. Checking against cairo-perf-trace indicated no major impact -- I had worried that setting the BLT flag for some clears might have had a knock-on effect causing too many operations that could be pipelined on the RENDER ring to be sent to the BLT ring instead. Reported-by: Michael Larabel <Michael@phoronix.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r--src/sna/gen6_render.c10
-rw-r--r--src/sna/sna_dri.c16
2 files changed, 22 insertions, 4 deletions
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index 5641b3ca..eb67fc67 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -2307,7 +2307,8 @@ gen6_render_copy_boxes(struct sna *sna, uint8_t alu,
2307 __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu, 2307 __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu,
2308 src_bo == dst_bo)); 2308 src_bo == dst_bo));
2309 2309
2310 if (sna->kgem.mode == KGEM_BLT && 2310 /* XXX benchmark me! */
2311 if (sna->kgem.mode != KGEM_RENDER &&
2311 sna_blt_compare_depth(&src->drawable, &dst->drawable) && 2312 sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
2312 sna_blt_copy_boxes(sna, alu, 2313 sna_blt_copy_boxes(sna, alu,
2313 src_bo, src_dx, src_dy, 2314 src_bo, src_dx, src_dy,
@@ -2464,7 +2465,8 @@ gen6_render_copy(struct sna *sna, uint8_t alu,
2464 src->drawable.width, src->drawable.height, 2465 src->drawable.width, src->drawable.height,
2465 dst->drawable.width, dst->drawable.height)); 2466 dst->drawable.width, dst->drawable.height));
2466 2467
2467 if (sna->kgem.mode == KGEM_BLT && 2468 /* XXX benchmark me! */
2469 if (sna->kgem.mode != KGEM_RENDER &&
2468 sna_blt_compare_depth(&src->drawable, &dst->drawable) && 2470 sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
2469 sna_blt_copy(sna, alu, 2471 sna_blt_copy(sna, alu,
2470 src_bo, dst_bo, 2472 src_bo, dst_bo,
@@ -2577,7 +2579,7 @@ gen6_render_fill_boxes(struct sna *sna,
2577 return FALSE; 2579 return FALSE;
2578 } 2580 }
2579 2581
2580 if (sna->kgem.mode == KGEM_BLT || 2582 if (sna->kgem.mode != KGEM_RENDER ||
2581 dst->drawable.width > 8192 || 2583 dst->drawable.width > 8192 ||
2582 dst->drawable.height > 8192 || 2584 dst->drawable.height > 8192 ||
2583 !gen6_check_dst_format(format)) { 2585 !gen6_check_dst_format(format)) {
@@ -2734,7 +2736,7 @@ gen6_render_fill(struct sna *sna, uint8_t alu,
2734 op); 2736 op);
2735#endif 2737#endif
2736 2738
2737 if (sna->kgem.mode == KGEM_BLT && 2739 if (sna->kgem.mode != KGEM_RENDER &&
2738 sna_blt_fill(sna, alu, 2740 sna_blt_fill(sna, alu,
2739 dst_bo, dst->drawable.bitsPerPixel, 2741 dst_bo, dst->drawable.bitsPerPixel,
2740 color, 2742 color,
diff --git a/src/sna/sna_dri.c b/src/sna/sna_dri.c
index 0a01f8af..f4049f19 100644
--- a/src/sna/sna_dri.c
+++ b/src/sna/sna_dri.c
@@ -461,6 +461,22 @@ sna_dri_copy(struct sna *sna, DrawablePtr draw, RegionPtr region,
461 get_drawable_deltas(draw, dst, &dx, &dy); 461 get_drawable_deltas(draw, dst, &dx, &dy);
462 } 462 }
463 463
464 if (sna->kgem.gen >= 60) {
465 /* Sandybridge introduced a separate ring which it uses to
466 * perform blits. Switching rendering between rings incurs
467 * a stall as we wait upon the old ring to finish and
468 * flush its render cache before we can proceed on with
469 * the operation on the new ring.
470 *
471 * As this buffer, we presume, has just been written to by
472 * the DRI client using the RENDER ring, we want to perform
473 * our operation on the same ring, and ideally on the same
474 * ring as we will flip from (which should be the RENDER ring
475 * as well).
476 */
477 kgem_set_mode(&sna->kgem, KGEM_RENDER);
478 }
479
464 if (region) { 480 if (region) {
465 boxes = REGION_RECTS(region); 481 boxes = REGION_RECTS(region);
466 n = REGION_NUM_RECTS(region); 482 n = REGION_NUM_RECTS(region);