summaryrefslogtreecommitdiff
path: root/src/intel/compiler/brw_fs_nir.cpp
diff options
context:
space:
mode:
authorIan Romanick <ian.d.romanick@intel.com>2021-01-23 14:28:07 -0800
committerMarge Bot <eric+marge@anholt.net>2021-08-18 22:03:37 +0000
commit5ce3bfcdf3154a65c37386f908b3f053b7fd6a61 (patch)
tree0061831d9879dc06c4157dfae5139ded6c4ef9a2 /src/intel/compiler/brw_fs_nir.cpp
parentf0a8a9816afce4f30be64a8cdf7560a4282eb048 (diff)
intel/compiler: Lower 8-bit ops to 16-bit in NIR on all platforms
This fixes the Crucible func.shader.shift.int8_t test on Gen8 and Gen9. See https://gitlab.freedesktop.org/mesa/crucible/-/merge_requests/76. With the previous optimizations in place, this change seems to improve the quality of the generated code. Comparing a couple Vulkan CTS tests on Skylake had the following results. dEQP-VK.spirv_assembly.type.vec3.i8.bitwise_xor_frag: SIMD8 shader: 36 instructions. 1 loops. 3822 cycles. 0:0 spills:fills, 5 sends SIMD8 shader: 27 instructions. 1 loops. 2742 cycles. 0:0 spills:fills, 5 sends dEQP-VK.spirv_assembly.type.vec3.i8.max_frag: SIMD8 shader: 39 instructions. 1 loops. 3922 cycles. 0:0 spills:fills, 5 sends SIMD8 shader: 37 instructions. 1 loops. 3682 cycles. 0:0 spills:fills, 5 sends Reviewed-by: Francisco Jerez <currojerez@riseup.net> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9025>
Diffstat (limited to 'src/intel/compiler/brw_fs_nir.cpp')
-rw-r--r--src/intel/compiler/brw_fs_nir.cpp32
1 files changed, 30 insertions, 2 deletions
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index 53f347bf636..10d3f0eece2 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -993,8 +993,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr,
default:
for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
- assert((devinfo->ver == 8 || devinfo->ver == 9) ||
- type_sz(op[i].type) > 1);
+ assert(type_sz(op[i].type) > 1);
}
}
#endif
@@ -1836,6 +1835,35 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr,
case nir_op_bitfield_insert:
unreachable("not reached: should have been lowered");
+ /* For all shift operations:
+ *
+ * Gen4 - Gen7: After application of source modifiers, the low 5-bits of
+ * src1 are used an unsigned value for the shift count.
+ *
+ * Gen8: As with earlier platforms, but for Q and UQ types on src0, the low
+ * 6-bit of src1 are used.
+ *
+ * Gen9+: The low bits of src1 matching the size of src0 (e.g., 4-bits for
+ * W or UW src0).
+ *
+ * The implication is that the following instruction will produce a
+ * different result on Gen9+ than on previous platforms:
+ *
+ * shr(8) g4<1>UW g12<8,8,1>UW 0x0010UW
+ *
+ * where Gen9+ will shift by zero, and earlier platforms will shift by 16.
+ *
+ * This does not seem to be the case. Experimentally, it has been
+ * determined that shifts of 16-bit values on Gen8 behave properly. Shifts
+ * of 8-bit values on both Gen8 and Gen9 do not. Gen11+ lowers 8-bit
+ * values, so those platforms were not tested. No features expose access
+ * to 8- or 16-bit types on Gen7 or earlier, so those platforms were not
+ * tested either. See
+ * https://gitlab.freedesktop.org/mesa/crucible/-/merge_requests/76.
+ *
+ * This is part of the reason 8-bit values are lowered to 16-bit on all
+ * platforms.
+ */
case nir_op_ishl:
bld.SHL(result, op[0], op[1]);
break;