diff options
| author | Matt Turner <mattst88@gmail.com> | 2015-08-04 22:54:14 -0700 |
|---|---|---|
| committer | Matt Turner <mattst88@gmail.com> | 2015-08-06 15:32:59 -0700 |
| commit | ff52fa12f197166c0410a1f5ef94851427d1c208 (patch) | |
| tree | 11aab9808532361471d460693d98070937cf703d | |
| parent | 10ea9f8171bbc66cb78855fae82ce7f2c25b6968 (diff) | |
glsl: Optimize packUnorm4x8.wip/packing
Interestingly, this confirmed that i965's MAD instruction is a fused
multiply-add (i.e., that there is not intermediate rounding performed
between the multiply and add). If nir_opt_peephole_ffma is allowed to
run, the MAD instructions evidently produce different values from
separate MUL/ADD instructions, which fail the piglit tests.
Disabling nir_opt_peephole_ffma and using separate MUL/ADD instructions
lets the test pass.
| -rw-r--r-- | src/glsl/lower_packing_builtins.cpp | 6 | ||||
| -rw-r--r-- | src/glsl/nir/nir_opt_algebraic.py | 2 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/i965/brw_nir.c | 2 |
3 files changed, 9 insertions, 1 deletions
diff --git a/src/glsl/lower_packing_builtins.cpp b/src/glsl/lower_packing_builtins.cpp index a6fb8a8837e..aef8f5b83cf 100644 --- a/src/glsl/lower_packing_builtins.cpp +++ b/src/glsl/lower_packing_builtins.cpp @@ -619,9 +619,13 @@ private: assert(vec4_rval->type == glsl_type::vec4_type); +#if 0 ir_rvalue *result = pack_uvec4_to_uint( f2u(round_even(mul(saturate(vec4_rval), constant(255.0f))))); - +#else + ir_rvalue *result = pack_uvec4_to_uint( + bit_and(bitcast_f2u(add(mul(saturate(vec4_rval), constant(255.0f)), constant(8388608.0f))), constant(0xffu))); +#endif assert(result->type == glsl_type::uint_type); return result; } diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py index d7c17403f9f..60a1a9ace10 100644 --- a/src/glsl/nir/nir_opt_algebraic.py +++ b/src/glsl/nir/nir_opt_algebraic.py @@ -134,6 +134,8 @@ optimizations = [ (('iand', a, a), a), (('iand', a, ~0), a), (('iand', a, 0), 0), + (('iand', a, ('iand', a, b)), ('iand', a, b)), + (('ishl', ('iand', a, 255), 24), ('ishl', a, 24)), (('ior', a, a), a), (('ior', a, 0), a), (('fxor', a, a), 0.0), diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index b5788fa2e33..ab37fe0b5e7 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -145,11 +145,13 @@ brw_create_nir(struct brw_context *brw, nir_optimize(nir, is_scalar); +#if 0 if (brw->gen >= 6) { /* Try and fuse multiply-adds */ nir_opt_peephole_ffma(nir); nir_validate_shader(nir); } +#endif nir_opt_algebraic_late(nir); nir_validate_shader(nir); |
