summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2013-04-04 14:10:18 -0700
committerEric Anholt <eric@anholt.net>2013-04-10 09:45:21 -0700
commitd5f7aebac2b1afbc5023cd114174860d8d763d06 (patch)
tree7c4757cbd462c2d7629bd9fed1000c42e3a1ce41
parent3badbf7f7fa4898c69545fea3c60ea29cf61ae3b (diff)
i965/vs: Use GRFs for pull constant offsets on gen7.
This allows the computation of the offset to get written directly into the message source. shader-db results: total instructions in shared programs: 3308390 -> 3283025 (-0.77%) instructions in affected programs: 442998 -> 417633 (-5.73%) No difference in GLB2.7 low res (n=9). Reviewed-by: Matt Turner <mattst88@gmail.com>
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.cpp8
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.h4
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_emit.cpp45
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp18
6 files changed, 56 insertions, 22 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 3d07c36b747..a13f9dc57ab 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -739,6 +739,7 @@ enum opcode {
VS_OPCODE_SCRATCH_READ,
VS_OPCODE_SCRATCH_WRITE,
VS_OPCODE_PULL_CONSTANT_LOAD,
+ VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
};
#define BRW_PREDICATE_NONE 0
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 1a520391ec1..b3bd1b97667 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -498,6 +498,8 @@ brw_instruction_name(enum opcode op)
return "scratch_write";
case VS_OPCODE_PULL_CONSTANT_LOAD:
return "pull_constant_load";
+ case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
+ return "pull_constant_load_gen7";
default:
/* Yes, this leaks. It's in debug code, it should never occur, and if
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index e470ac80ab5..67dd17a45e3 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -230,7 +230,13 @@ vec4_instruction::is_math()
bool
vec4_instruction::is_send_from_grf()
{
- return opcode == SHADER_OPCODE_SHADER_TIME_ADD;
+ switch (opcode) {
+ case SHADER_OPCODE_SHADER_TIME_ADD:
+ case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
+ return true;
+ default:
+ return false;
+ }
}
bool
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 8f130e15428..e286925a0ea 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -544,6 +544,10 @@ private:
struct brw_reg dst,
struct brw_reg index,
struct brw_reg offset);
+ void generate_pull_constant_load_gen7(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg surf_index,
+ struct brw_reg offset);
struct brw_context *brw;
struct intel_context *intel;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index e378f7fd5f0..963901c029a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -558,27 +558,11 @@ vec4_generator::generate_pull_constant_load(vec4_instruction *inst,
struct brw_reg index,
struct brw_reg offset)
{
+ assert(intel->gen <= 7);
assert(index.file == BRW_IMMEDIATE_VALUE &&
index.type == BRW_REGISTER_TYPE_UD);
uint32_t surf_index = index.dw1.ud;
- if (intel->gen == 7) {
- gen6_resolve_implied_move(p, &offset, inst->base_mrf);
- brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
- brw_set_dest(p, insn, dst);
- brw_set_src0(p, insn, offset);
- brw_set_sampler_message(p, insn,
- surf_index,
- 0, /* LD message ignores sampler unit */
- GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
- 1, /* rlen */
- 1, /* mlen */
- false, /* no header */
- BRW_SAMPLER_SIMD_MODE_SIMD4X2,
- 0);
- return;
- }
-
struct brw_reg header = brw_vec8_grf(0, 0);
gen6_resolve_implied_move(p, &header, inst->base_mrf);
@@ -614,6 +598,29 @@ vec4_generator::generate_pull_constant_load(vec4_instruction *inst,
}
void
+vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg surf_index,
+ struct brw_reg offset)
+{
+ assert(surf_index.file == BRW_IMMEDIATE_VALUE &&
+ surf_index.type == BRW_REGISTER_TYPE_UD);
+
+ brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
+ brw_set_dest(p, insn, dst);
+ brw_set_src0(p, insn, offset);
+ brw_set_sampler_message(p, insn,
+ surf_index.dw1.ud,
+ 0, /* LD message ignores sampler unit */
+ GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
+ 1, /* rlen */
+ 1, /* mlen */
+ false, /* no header */
+ BRW_SAMPLER_SIMD_MODE_SIMD4X2,
+ 0);
+}
+
+void
vec4_generator::generate_vs_instruction(vec4_instruction *instruction,
struct brw_reg dst,
struct brw_reg *src)
@@ -673,6 +680,10 @@ vec4_generator::generate_vs_instruction(vec4_instruction *instruction,
generate_pull_constant_load(inst, dst, src[0], src[1]);
break;
+ case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
+ generate_pull_constant_load_gen7(inst, dst, src[0], src[1]);
+ break;
+
case SHADER_OPCODE_SHADER_TIME_ADD:
brw_shader_time_add(p, src[0], SURF_INDEX_VS_SHADER_TIME);
break;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index ca1cfe8b879..ed8e65deb33 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -2880,10 +2880,20 @@ vec4_visitor::emit_pull_constant_load(vec4_instruction *inst,
src_reg offset = get_pull_constant_offset(inst, orig_src.reladdr, reg_offset);
vec4_instruction *load;
- load = new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
- temp, index, offset);
- load->base_mrf = 14;
- load->mlen = 1;
+ if (intel->gen >= 7) {
+ dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
+ grf_offset.type = offset.type;
+ emit_before(inst, MOV(grf_offset, offset));
+
+ load = new(mem_ctx) vec4_instruction(this,
+ VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
+ temp, index, src_reg(grf_offset));
+ } else {
+ load = new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
+ temp, index, offset);
+ load->base_mrf = 14;
+ load->mlen = 1;
+ }
emit_before(inst, load);
}