summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJuha-Pekka Heikkila <juhapekka.heikkila@gmail.com>2014-04-04 16:51:59 +0300
committerMatt Turner <mattst88@gmail.com>2014-04-16 22:46:45 -0700
commit306ed81b9363721058c568244f9860c5c8c819f4 (patch)
tree18d335012e4762aeec37df3c2ed45e3169befd35
parent30c35d1dcb2fde19b1c968751fda5151b795d257 (diff)
i965: Add writes_accumulator flag
Our hardware has an "accumulator" register, which can be used to store intermediate results across multiple instructions. Many instructions can implicitly write a value to the accumulator in addition to their normal destination register. This is enabled by the "AccWrEn" flag. This patch introduces a new flag, inst->writes_accumulator, which allows us to express the AccWrEn notion in the IR. It also creates a n ALU2_ACC macro to easily define emitters for instructions that implicitly write the accumulator. Previously, we only supported implicit accumulator writes from the ADDC, SUBB, and MACH instructions. We always enabled them on those instructions, and left them disabled for other instructions. To take advantage of the MAC (multiply-accumulate) instruction, we need to be able to set AccWrEn on other types of instructions. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Matt Turner <mattst88@gmail.com> Signed-off-by: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp17
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp8
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_generator.cpp7
-rw-r--r--src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp94
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.cpp15
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_generator.cpp7
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp17
8 files changed, 131 insertions, 35 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index bff38f0d6e7..075857f7425 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -64,6 +64,8 @@ fs_inst::init()
/* This will be the case for almost all instructions. */
this->regs_written = 1;
+
+ this->writes_accumulator = false;
}
fs_inst::fs_inst()
@@ -151,6 +153,15 @@ fs_inst::fs_inst(enum opcode opcode, fs_reg dst,
return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1); \
}
+#define ALU2_ACC(op) \
+ fs_inst * \
+ fs_visitor::op(fs_reg dst, fs_reg src0, fs_reg src1) \
+ { \
+ fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1);\
+ inst->writes_accumulator = true; \
+ return inst; \
+ }
+
#define ALU3(op) \
fs_inst * \
fs_visitor::op(fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) \
@@ -166,7 +177,7 @@ ALU1(RNDE)
ALU1(RNDZ)
ALU2(ADD)
ALU2(MUL)
-ALU2(MACH)
+ALU2_ACC(MACH)
ALU2(AND)
ALU2(OR)
ALU2(XOR)
@@ -182,8 +193,8 @@ ALU1(FBH)
ALU1(FBL)
ALU1(CBIT)
ALU3(MAD)
-ALU2(ADDC)
-ALU2(SUBB)
+ALU2_ACC(ADDC)
+ALU2_ACC(SUBB)
ALU2(SEL)
/** Gen4 predicated IF. */
diff --git a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp
index 6672f840fc5..dfeceb00619 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp
@@ -72,13 +72,9 @@ fs_visitor::dead_code_eliminate()
if (!result_live) {
progress = true;
- switch (inst->opcode) {
- case BRW_OPCODE_ADDC:
- case BRW_OPCODE_SUBB:
- case BRW_OPCODE_MACH:
+ if (inst->writes_accumulator) {
inst->dst = fs_reg(retype(brw_null_reg(), inst->dst.type));
- break;
- default:
+ } else {
inst->opcode = BRW_OPCODE_NOP;
continue;
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index e590bdf4c58..1cf35b40ad2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1411,6 +1411,7 @@ fs_generator::generate_code(exec_list *instructions, FILE *dump_file)
brw_set_flag_reg(p, 0, inst->flag_subreg);
brw_set_saturate(p, inst->saturate);
brw_set_mask_control(p, inst->force_writemask_all);
+ brw_set_acc_write_control(p, inst->writes_accumulator);
if (inst->force_uncompressed || dispatch_width == 8) {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
@@ -1434,9 +1435,7 @@ fs_generator::generate_code(exec_list *instructions, FILE *dump_file)
brw_AVG(p, dst, src[0], src[1]);
break;
case BRW_OPCODE_MACH:
- brw_set_acc_write_control(p, 1);
brw_MACH(p, dst, src[0], src[1]);
- brw_set_acc_write_control(p, 0);
break;
case BRW_OPCODE_MAD:
@@ -1540,15 +1539,11 @@ fs_generator::generate_code(exec_list *instructions, FILE *dump_file)
break;
case BRW_OPCODE_ADDC:
assert(brw->gen >= 7);
- brw_set_acc_write_control(p, 1);
brw_ADDC(p, dst, src[0], src[1]);
- brw_set_acc_write_control(p, 0);
break;
case BRW_OPCODE_SUBB:
assert(brw->gen >= 7);
- brw_set_acc_write_control(p, 1);
brw_SUBB(p, dst, src[0], src[1]);
- brw_set_acc_write_control(p, 0);
break;
case BRW_OPCODE_BFE:
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index a9514594e86..5e4f2fe7478 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -742,6 +742,8 @@ fs_instruction_scheduler::is_compressed(fs_inst *inst)
void
fs_instruction_scheduler::calculate_deps()
{
+ const bool gen6plus = v->brw->gen >= 6;
+
/* Pre-register-allocation, this tracks the last write per VGRF (so
* different reg_offsets within it can interfere when they shouldn't).
* After register allocation, reg_offsets are gone and we track individual
@@ -750,6 +752,7 @@ fs_instruction_scheduler::calculate_deps()
schedule_node *last_grf_write[grf_count];
schedule_node *last_mrf_write[BRW_MAX_MRF];
schedule_node *last_conditional_mod[2] = { NULL, NULL };
+ schedule_node *last_accumulator_write = NULL;
/* Fixed HW registers are assumed to be separate from the virtual
* GRFs, so they can be tracked separately. We don't really write
* to fixed GRFs much, so don't bother tracking them on a more
@@ -800,6 +803,8 @@ fs_instruction_scheduler::calculate_deps()
} else {
add_dep(last_fixed_grf_write, n);
}
+ } else if (inst->src[i].is_accumulator() && gen6plus) {
+ add_dep(last_accumulator_write, n);
} else if (inst->src[i].file != BAD_FILE &&
inst->src[i].file != IMM &&
inst->src[i].file != UNIFORM) {
@@ -822,6 +827,14 @@ fs_instruction_scheduler::calculate_deps()
add_dep(last_conditional_mod[inst->flag_subreg], n);
}
+ if (inst->reads_accumulator_implicitly()) {
+ if (gen6plus) {
+ add_dep(last_accumulator_write, n);
+ } else {
+ add_barrier_deps(n);
+ }
+ }
+
/* write-after-write deps. */
if (inst->dst.file == GRF) {
if (post_reg_alloc) {
@@ -854,6 +867,9 @@ fs_instruction_scheduler::calculate_deps()
} else {
last_fixed_grf_write = n;
}
+ } else if (inst->dst.is_accumulator() && gen6plus) {
+ add_dep(last_accumulator_write, n);
+ last_accumulator_write = n;
} else if (inst->dst.file != BAD_FILE) {
add_barrier_deps(n);
}
@@ -869,12 +885,22 @@ fs_instruction_scheduler::calculate_deps()
add_dep(last_conditional_mod[inst->flag_subreg], n, 0);
last_conditional_mod[inst->flag_subreg] = n;
}
+
+ if (inst->writes_accumulator) {
+ if (gen6plus) {
+ add_dep(last_accumulator_write, n);
+ last_accumulator_write = n;
+ } else {
+ add_barrier_deps(n);
+ }
+ }
}
/* bottom-to-top dependencies: WAR */
memset(last_grf_write, 0, sizeof(last_grf_write));
memset(last_mrf_write, 0, sizeof(last_mrf_write));
memset(last_conditional_mod, 0, sizeof(last_conditional_mod));
+ last_accumulator_write = NULL;
last_fixed_grf_write = NULL;
exec_node *node;
@@ -906,6 +932,8 @@ fs_instruction_scheduler::calculate_deps()
} else {
add_dep(n, last_fixed_grf_write);
}
+ } else if (inst->src[i].is_accumulator() && gen6plus) {
+ add_dep(n, last_accumulator_write);
} else if (inst->src[i].file != BAD_FILE &&
inst->src[i].file != IMM &&
inst->src[i].file != UNIFORM) {
@@ -928,6 +956,14 @@ fs_instruction_scheduler::calculate_deps()
add_dep(n, last_conditional_mod[inst->flag_subreg]);
}
+ if (inst->reads_accumulator_implicitly()) {
+ if (gen6plus) {
+ add_dep(n, last_accumulator_write);
+ } else {
+ add_barrier_deps(n);
+ }
+ }
+
/* Update the things this instruction wrote, so earlier reads
* can mark this as WAR dependency.
*/
@@ -959,6 +995,8 @@ fs_instruction_scheduler::calculate_deps()
} else {
last_fixed_grf_write = n;
}
+ } else if (inst->dst.is_accumulator() && gen6plus) {
+ last_accumulator_write = n;
} else if (inst->dst.file != BAD_FILE) {
add_barrier_deps(n);
}
@@ -972,15 +1010,26 @@ fs_instruction_scheduler::calculate_deps()
if (inst->writes_flag()) {
last_conditional_mod[inst->flag_subreg] = n;
}
+
+ if (inst->writes_accumulator) {
+ if (gen6plus) {
+ last_accumulator_write = n;
+ } else {
+ add_barrier_deps(n);
+ }
+ }
}
}
void
vec4_instruction_scheduler::calculate_deps()
{
+ const bool gen6plus = v->brw->gen >= 6;
+
schedule_node *last_grf_write[grf_count];
schedule_node *last_mrf_write[BRW_MAX_MRF];
schedule_node *last_conditional_mod = NULL;
+ schedule_node *last_accumulator_write = NULL;
/* Fixed HW registers are assumed to be separate from the virtual
* GRFs, so they can be tracked separately. We don't really write
* to fixed GRFs much, so don't bother tracking them on a more
@@ -1016,6 +1065,9 @@ vec4_instruction_scheduler::calculate_deps()
(inst->src[i].fixed_hw_reg.file ==
BRW_GENERAL_REGISTER_FILE)) {
add_dep(last_fixed_grf_write, n);
+ } else if (inst->src[i].is_accumulator() && gen6plus) {
+ assert(last_accumulator_write);
+ add_dep(last_accumulator_write, n);
} else if (inst->src[i].file != BAD_FILE &&
inst->src[i].file != IMM &&
inst->src[i].file != UNIFORM) {
@@ -1039,6 +1091,15 @@ vec4_instruction_scheduler::calculate_deps()
add_dep(last_conditional_mod, n);
}
+ if (inst->reads_accumulator_implicitly()) {
+ if (gen6plus) {
+ assert(last_accumulator_write);
+ add_dep(last_accumulator_write, n);
+ } else {
+ add_barrier_deps(n);
+ }
+ }
+
/* write-after-write deps. */
if (inst->dst.file == GRF) {
add_dep(last_grf_write[inst->dst.reg], n);
@@ -1049,6 +1110,9 @@ vec4_instruction_scheduler::calculate_deps()
} else if (inst->dst.file == HW_REG &&
inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
last_fixed_grf_write = n;
+ } else if (inst->dst.is_accumulator() && gen6plus) {
+ add_dep(last_accumulator_write, n);
+ last_accumulator_write = n;
} else if (inst->dst.file != BAD_FILE) {
add_barrier_deps(n);
}
@@ -1064,12 +1128,22 @@ vec4_instruction_scheduler::calculate_deps()
add_dep(last_conditional_mod, n, 0);
last_conditional_mod = n;
}
+
+ if (inst->writes_accumulator) {
+ if (gen6plus) {
+ add_dep(last_accumulator_write, n);
+ last_accumulator_write = n;
+ } else {
+ add_barrier_deps(n);
+ }
+ }
}
/* bottom-to-top dependencies: WAR */
memset(last_grf_write, 0, sizeof(last_grf_write));
memset(last_mrf_write, 0, sizeof(last_mrf_write));
last_conditional_mod = NULL;
+ last_accumulator_write = NULL;
last_fixed_grf_write = NULL;
exec_node *node;
@@ -1088,6 +1162,8 @@ vec4_instruction_scheduler::calculate_deps()
(inst->src[i].fixed_hw_reg.file ==
BRW_GENERAL_REGISTER_FILE)) {
add_dep(n, last_fixed_grf_write);
+ } else if (inst->src[i].is_accumulator() && gen6plus) {
+ add_dep(n, last_accumulator_write);
} else if (inst->src[i].file != BAD_FILE &&
inst->src[i].file != IMM &&
inst->src[i].file != UNIFORM) {
@@ -1109,6 +1185,14 @@ vec4_instruction_scheduler::calculate_deps()
add_dep(n, last_conditional_mod);
}
+ if (inst->reads_accumulator_implicitly()) {
+ if (gen6plus) {
+ add_dep(n, last_accumulator_write);
+ } else {
+ add_barrier_deps(n);
+ }
+ }
+
/* Update the things this instruction wrote, so earlier reads
* can mark this as WAR dependency.
*/
@@ -1119,6 +1203,8 @@ vec4_instruction_scheduler::calculate_deps()
} else if (inst->dst.file == HW_REG &&
inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
last_fixed_grf_write = n;
+ } else if (inst->dst.is_accumulator() && gen6plus) {
+ last_accumulator_write = n;
} else if (inst->dst.file != BAD_FILE) {
add_barrier_deps(n);
}
@@ -1132,6 +1218,14 @@ vec4_instruction_scheduler::calculate_deps()
if (inst->writes_flag()) {
last_conditional_mod = n;
}
+
+ if (inst->writes_accumulator) {
+ if (gen6plus) {
+ last_accumulator_write = n;
+ } else {
+ add_barrier_deps(n);
+ }
+ }
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index 9ef08e58456..e730ed02b18 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -60,6 +60,7 @@ public:
uint8_t predicate;
bool predicate_inverse;
+ bool writes_accumulator; /**< instruction implicitly writes accumulator */
};
enum instruction_scheduler_mode {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 8aa746d3630..daff3641119 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -350,19 +350,12 @@ try_eliminate_instruction(vec4_instruction *inst, int new_writemask,
* accumulator as a side-effect. Instead just set the destination
* to the null register to free it.
*/
- switch (inst->opcode) {
- case BRW_OPCODE_ADDC:
- case BRW_OPCODE_SUBB:
- case BRW_OPCODE_MACH:
+ if (inst->writes_accumulator || inst->writes_flag()) {
inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type));
- break;
- default:
- if (inst->writes_flag()) {
- inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type));
- } else {
- inst->remove();
- }
+ } else {
+ inst->remove();
}
+
return true;
} else if (inst->dst.writemask != new_writemask) {
switch (inst->opcode) {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index a74514f512c..5f85d315c71 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -971,9 +971,7 @@ vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
brw_MUL(p, dst, src[0], src[1]);
break;
case BRW_OPCODE_MACH:
- brw_set_acc_write_control(p, 1);
brw_MACH(p, dst, src[0], src[1]);
- brw_set_acc_write_control(p, 0);
break;
case BRW_OPCODE_MAD:
@@ -1077,15 +1075,11 @@ vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
break;
case BRW_OPCODE_ADDC:
assert(brw->gen >= 7);
- brw_set_acc_write_control(p, 1);
brw_ADDC(p, dst, src[0], src[1]);
- brw_set_acc_write_control(p, 0);
break;
case BRW_OPCODE_SUBB:
assert(brw->gen >= 7);
- brw_set_acc_write_control(p, 1);
brw_SUBB(p, dst, src[0], src[1]);
- brw_set_acc_write_control(p, 0);
break;
case BRW_OPCODE_BFE:
@@ -1317,6 +1311,7 @@ vec4_generator::generate_code(exec_list *instructions)
brw_set_predicate_inverse(p, inst->predicate_inverse);
brw_set_saturate(p, inst->saturate);
brw_set_mask_control(p, inst->force_writemask_all);
+ brw_set_acc_write_control(p, inst->writes_accumulator);
unsigned pre_emit_nr_insn = p->nr_insn;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index edace108f19..3a764424df8 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -42,6 +42,7 @@ vec4_instruction::vec4_instruction(vec4_visitor *v,
this->force_writemask_all = false;
this->no_dd_clear = false;
this->no_dd_check = false;
+ this->writes_accumulator = false;
this->conditional_mod = BRW_CONDITIONAL_NONE;
this->sampler = 0;
this->texture_offset = 0;
@@ -124,6 +125,16 @@ vec4_visitor::emit(enum opcode opcode)
src0, src1); \
}
+#define ALU2_ACC(op) \
+ vec4_instruction * \
+ vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1) \
+ { \
+ vec4_instruction *inst = new(mem_ctx) vec4_instruction(this, \
+ BRW_OPCODE_##op, dst, src0, src1); \
+ inst->writes_accumulator = true; \
+ return inst; \
+ }
+
#define ALU3(op) \
vec4_instruction * \
vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1, src_reg src2)\
@@ -143,7 +154,7 @@ ALU1(F32TO16)
ALU1(F16TO32)
ALU2(ADD)
ALU2(MUL)
-ALU2(MACH)
+ALU2_ACC(MACH)
ALU2(AND)
ALU2(OR)
ALU2(XOR)
@@ -162,8 +173,8 @@ ALU1(FBH)
ALU1(FBL)
ALU1(CBIT)
ALU3(MAD)
-ALU2(ADDC)
-ALU2(SUBB)
+ALU2_ACC(ADDC)
+ALU2_ACC(SUBB)
/** Gen4 predicated IF. */
vec4_instruction *