summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIago Toral Quiroga <itoral@igalia.com>2017-11-29 10:50:42 +0100
committerAndres Gomez <agomez@igalia.com>2017-12-20 19:40:38 +0200
commit5226e37717cf6342a0263444820fee1f8bf1d132 (patch)
tree7c42ea60060d1b7b119d562383051f329923c2a7
parenta8ee7222629736ed553030a5bd1c33b37ba52157 (diff)
i965/vec4: use a temp register to compute offsets for pull loads
64-bit pull loads are implemented by emitting 2 separate 32-bit pull load messages, where the second message loads from an offset at +16B. That addition of 16B to the original offset should not alter the original offset register used as source for the pull load instruction though, since the compiler might use that same offset register in other instructions (for example, for other pull loads in the shader code that take that same offset as reference). If the pull load is 32-bit then we only need to emit one message and we don't need to do offset calculations, but in that case the optimizer should be able to drop the redundant MOV. Fixes the following test on Haswell: KHR-GL45.gpu_shader_fp64.fp64.max_uniform_components Reviewed-by: Matt Turner <mattst88@gmail.com> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103007 (cherry picked from commit 8620f7ebbc763dc1bbbc825d31cacfdd84433e05)
-rw-r--r--src/intel/compiler/brw_vec4_nir.cpp4
1 files changed, 3 insertions, 1 deletions
diff --git a/src/intel/compiler/brw_vec4_nir.cpp b/src/intel/compiler/brw_vec4_nir.cpp
index 9bd1bbae3ed..65608d6cbc6 100644
--- a/src/intel/compiler/brw_vec4_nir.cpp
+++ b/src/intel/compiler/brw_vec4_nir.cpp
@@ -888,7 +888,9 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
if (const_offset) {
offset_reg = brw_imm_ud(const_offset->u32[0] & ~15);
} else {
- offset_reg = get_nir_src(instr->src[1], nir_type_uint32, 1);
+ offset_reg = src_reg(this, glsl_type::uint_type);
+ emit(MOV(dst_reg(offset_reg),
+ get_nir_src(instr->src[1], nir_type_uint32, 1)));
}
src_reg packed_consts;