summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp29
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_emit.cpp27
2 files changed, 38 insertions, 18 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index c60d0418678..703c3c5d8b4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -235,14 +235,33 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(fs_reg dst, fs_reg surf_index,
exec_list instructions;
fs_inst *inst;
- fs_reg offset = fs_reg(this, glsl_type::uint_type);
- instructions.push_tail(ADD(offset, varying_offset, fs_reg(const_offset)));
-
if (intel->gen >= 7) {
+ /* We have our constant surface use a pitch of 4 bytes, so our index can
+ * be any component of a vector, and then we load 4 contiguous
+ * components starting from that.
+ *
+ * We break down the const_offset to a portion added to the variable
+ * offset and a portion done using reg_offset, which means that if you
+ * have GLSL using something like "uniform vec4 a[20]; gl_FragColor =
+ * a[i]", we'll temporarily generate 4 vec4 loads from offset i * 4, and
+ * CSE can later notice that those loads are all the same and eliminate
+ * the redundant ones.
+ */
+ fs_reg vec4_offset = fs_reg(this, glsl_type::int_type);
+ instructions.push_tail(ADD(vec4_offset,
+ varying_offset, const_offset & ~3));
+
+ fs_reg vec4_result = fs_reg(GRF, virtual_grf_alloc(4), dst.type);
inst = new(mem_ctx) fs_inst(FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
- dst, surf_index, offset);
+ vec4_result, surf_index, vec4_offset);
instructions.push_tail(inst);
+
+ vec4_result.reg_offset += const_offset & 3;
+ instructions.push_tail(MOV(dst, vec4_result));
} else {
+ fs_reg offset = fs_reg(this, glsl_type::uint_type);
+ instructions.push_tail(ADD(offset, varying_offset, fs_reg(const_offset)));
+
int base_mrf = 13;
bool header_present = true;
@@ -313,7 +332,7 @@ fs_inst::equals(fs_inst *inst)
int
fs_inst::regs_written()
{
- if (is_tex())
+ if (is_tex() || opcode == FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7)
return 4;
/* The SINCOS and INT_DIV_QUOTIENT_AND_REMAINDER math functions return 2,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index a729569c840..bc1fef16b01 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -734,28 +734,29 @@ fs_generator::generate_varying_pull_constant_load_gen7(fs_inst *inst,
index.type == BRW_REGISTER_TYPE_UD);
uint32_t surf_index = index.dw1.ud;
- uint32_t msg_control, rlen, mlen;
+ uint32_t simd_mode, rlen, mlen;
if (dispatch_width == 16) {
- msg_control = BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS;
- mlen = rlen = 2;
+ mlen = 2;
+ rlen = 8;
+ simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
} else {
- msg_control = BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS;
- mlen = rlen = 1;
+ mlen = 1;
+ rlen = 4;
+ simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
}
struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
brw_set_dest(p, send, dst);
brw_set_src0(p, send, offset);
- if (intel->gen < 6)
- send->header.destreg__conditionalmod = inst->base_mrf;
- brw_set_dp_read_message(p, send,
+ brw_set_sampler_message(p, send,
surf_index,
- msg_control,
- GEN7_DATAPORT_DC_DWORD_SCATTERED_READ,
- BRW_DATAPORT_READ_TARGET_DATA_CACHE,
+ 0, /* LD message ignores sampler unit */
+ GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
+ rlen,
mlen,
- inst->header_present,
- rlen);
+ false, /* no header */
+ simd_mode,
+ 0);
}
/**