summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2013-04-25 12:34:07 -0700
committerEric Anholt <eric@anholt.net>2013-04-29 11:41:34 -0700
commit73bc6061f5c3b6a3bb7a8114bb2e1ab77d23cfdb (patch)
tree6cbfb1078fb529a8f0c8b03a536b2d0bcdf0df7a
parente409889213f3d3bf80c46898b3a48178d9c785f8 (diff)
i965: Disable Z16 on contexts that don't require it.
It appears that Z16 on Intel hardware is in fact slower than Z24, so people are getting surprisingly hurt when trying to use Z16 as a performance-versus-precision tradeoff, or when they're targeting GLES2 and that's all you get. GL 3.0+ have Z16 on the list of required exact format sizes, but GLES doesn't, so choose the better-performing layout in that case. Improves GLB 2.7 trex performance at 1920x1080 by 10.7% +/- 1.1% (n=3) on my IVB system. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c15
1 files changed, 14 insertions, 1 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index a74b2c7cc1e..f1976391b1a 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -566,7 +566,20 @@ brw_init_surface_formats(struct brw_context *brw)
566 ctx->TextureFormatSupported[MESA_FORMAT_X8_Z24] = true; 566 ctx->TextureFormatSupported[MESA_FORMAT_X8_Z24] = true;
567 ctx->TextureFormatSupported[MESA_FORMAT_Z32_FLOAT] = true; 567 ctx->TextureFormatSupported[MESA_FORMAT_Z32_FLOAT] = true;
568 ctx->TextureFormatSupported[MESA_FORMAT_Z32_FLOAT_X24S8] = true; 568 ctx->TextureFormatSupported[MESA_FORMAT_Z32_FLOAT_X24S8] = true;
569 ctx->TextureFormatSupported[MESA_FORMAT_Z16] = true; 569
570 /* It appears that Z16 is slower than Z24 (on Intel Ivybridge and newer
571 * hardware at least), so there's no real reason to prefer it unless you're
572 * under memory (not memory bandwidth) pressure. Our speculation is that
573 * this is due to either increased fragment shader execution from
574 * GL_LEQUAL/GL_EQUAL depth tests at the reduced precision, or due to
575 * increased depth stalls from a cacheline-based heuristic for detecting
576 * depth stalls.
577 *
578 * However, desktop GL 3.0+ require that you get exactly 16 bits when
579 * asking for DEPTH_COMPONENT16, so we have to respect that.
580 */
581 if (_mesa_is_desktop_gl(ctx))
582 ctx->TextureFormatSupported[MESA_FORMAT_Z16] = true;
570 583
571 /* On hardware that lacks support for ETC1, we map ETC1 to RGBX 584 /* On hardware that lacks support for ETC1, we map ETC1 to RGBX
572 * during glCompressedTexImage2D(). See intel_mipmap_tree::wraps_etc1. 585 * during glCompressedTexImage2D(). See intel_mipmap_tree::wraps_etc1.